1 // Ogg Vorbis audio decoder - v1.14 - public domain
2 // http://nothings.org/stb_vorbis/
3 //
4 // Original version written by Sean Barrett in 2007.
5 //
6 // Originally sponsored by RAD Game Tools. Seeking implementation
7 // sponsored by Phillip Bennefall, Marc Andersen, Aaron Baker,
8 // Elias Software, Aras Pranckevicius, and Sean Barrett.
9 //
10 // LICENSE
11 //
12 //   See end of file for license information.
13 //
14 // Limitations:
15 //
16 //   - floor 0 not supported (used in old ogg vorbis files pre-2004)
17 //   - lossless sample-truncation at beginning ignored
18 //   - cannot concatenate multiple vorbis streams
19 //   - sample positions are 32-bit, limiting seekable 192Khz
20 //       files to around 6 hours (Ogg supports 64-bit)
21 //
22 // Feature contributors:
23 //    Dougall Johnson (sample-exact seeking)
24 //
25 // Bugfix/warning contributors:
26 //    Terje Mathisen     Niklas Frykholm     Andy Hill
27 //    Casey Muratori     John Bolton         Gargaj
28 //    Laurent Gomila     Marc LeBlanc        Ronny Chevalier
29 //    Bernhard Wodo      Evan Balster        alxprd@github
30 //    Tom Beaumont       Ingo Leitgeb        Nicolas Guillemot
31 //    Phillip Bennefall  Rohit               Thiago Goulart
32 //    manxorist@github   saga musix          github:infatum
33 //    Timur Gagiev
34 //
35 // Partial history:
36 //    1.14    - 2018-02-11 - delete bogus dealloca usage
37 //    1.13    - 2018-01-29 - fix truncation of last frame (hopefully)
38 //    1.12    - 2017-11-21 - limit residue begin/end to blocksize/2 to avoid large temp allocs in bad/corrupt files
39 //    1.11    - 2017-07-23 - fix MinGW compilation
40 //    1.10    - 2017-03-03 - more robust seeking; fix negative ilog(); clear error in open_memory
41 //    1.09    - 2016-04-04 - back out 'truncation of last frame' fix from previous version
42 //    1.08    - 2016-04-02 - warnings; setup memory leaks; truncation of last frame
43 //    1.07    - 2015-01-16 - fixes for crashes on invalid files; warning fixes; const
44 //    1.06    - 2015-08-31 - full, correct support for seeking API (Dougall Johnson)
45 //                           some crash fixes when out of memory or with corrupt files
46 //                           fix some inappropriately signed shifts
47 //    1.05    - 2015-04-19 - don't define __forceinline if it's redundant
48 //    1.04    - 2014-08-27 - fix missing const-correct case in API
49 //    1.03    - 2014-08-07 - warning fixes
50 //    1.02    - 2014-07-09 - declare qsort comparison as explicitly _cdecl in Windows
51 //    1.01    - 2014-06-18 - fix stb_vorbis_get_samples_float (interleaved was correct)
52 //    1.0     - 2014-05-26 - fix memory leaks; fix warnings; fix bugs in >2-channel;
53 //                           (API change) report sample rate for decode-full-file funcs
54 //
55 // See end of file for full version history.
56 
57 
58 //////////////////////////////////////////////////////////////////////////////
59 //
60 //  HEADER BEGINS HERE
61 //
62 
63 #ifndef STB_VORBIS_INCLUDE_STB_VORBIS_H
64 #define STB_VORBIS_INCLUDE_STB_VORBIS_H
65 
66 #if defined(STB_VORBIS_NO_CRT) && !defined(STB_VORBIS_NO_STDIO)
67 #define STB_VORBIS_NO_STDIO 1
68 #endif
69 
70 #ifndef STB_VORBIS_NO_STDIO
71 #include <stdio.h>
72 #endif
73 
74 #ifdef __cplusplus
75 extern "C" {
76 #endif
77 
78 ///////////   THREAD SAFETY
79 
80 // Individual stb_vorbis* handles are not thread-safe; you cannot decode from
81 // them from multiple threads at the same time. However, you can have multiple
82 // stb_vorbis* handles and decode from them independently in multiple thrads.
83 
84 
85 ///////////   MEMORY ALLOCATION
86 
87 // normally stb_vorbis uses malloc() to allocate memory at startup,
88 // and alloca() to allocate temporary memory during a frame on the
89 // stack. (Memory consumption will depend on the amount of setup
90 // data in the file and how you set the compile flags for speed
91 // vs. size. In my test files the maximal-size usage is ~150KB.)
92 //
93 // You can modify the wrapper functions in the source (setup_malloc,
94 // setup_temp_malloc, temp_malloc) to change this behavior, or you
95 // can use a simpler allocation model: you pass in a buffer from
96 // which stb_vorbis will allocate _all_ its memory (including the
97 // temp memory). "open" may fail with a VORBIS_outofmem if you
98 // do not pass in enough data; there is no way to determine how
99 // much you do need except to succeed (at which point you can
100 // query get_info to find the exact amount required. yes I know
101 // this is lame).
102 //
103 // If you pass in a non-NULL buffer of the type below, allocation
104 // will occur from it as described above. Otherwise just pass NULL
105 // to use malloc()/alloca()
106 
107 typedef struct
108 {
109    char *alloc_buffer;
110    int   alloc_buffer_length_in_bytes;
111 } stb_vorbis_alloc;
112 
113 
114 ///////////   FUNCTIONS USEABLE WITH ALL INPUT MODES
115 
116 typedef struct stb_vorbis stb_vorbis;
117 
118 typedef struct
119 {
120    unsigned int sample_rate;
121    int channels;
122 
123    unsigned int setup_memory_required;
124    unsigned int setup_temp_memory_required;
125    unsigned int temp_memory_required;
126 
127    int max_frame_size;
128 } stb_vorbis_info;
129 
130 // get general information about the file
131 extern stb_vorbis_info stb_vorbis_get_info(stb_vorbis *f);
132 
133 // get the last error detected (clears it, too)
134 extern int stb_vorbis_get_error(stb_vorbis *f);
135 
136 // close an ogg vorbis file and free all memory in use
137 extern void stb_vorbis_close(stb_vorbis *f);
138 
139 // this function returns the offset (in samples) from the beginning of the
140 // file that will be returned by the next decode, if it is known, or -1
141 // otherwise. after a flush_pushdata() call, this may take a while before
142 // it becomes valid again.
143 // NOT WORKING YET after a seek with PULLDATA API
144 extern int stb_vorbis_get_sample_offset(stb_vorbis *f);
145 
146 // returns the current seek point within the file, or offset from the beginning
147 // of the memory buffer. In pushdata mode it returns 0.
148 extern unsigned int stb_vorbis_get_file_offset(stb_vorbis *f);
149 
150 ///////////   PUSHDATA API
151 
152 #ifndef STB_VORBIS_NO_PUSHDATA_API
153 
154 // this API allows you to get blocks of data from any source and hand
155 // them to stb_vorbis. you have to buffer them; stb_vorbis will tell
156 // you how much it used, and you have to give it the rest next time;
157 // and stb_vorbis may not have enough data to work with and you will
158 // need to give it the same data again PLUS more. Note that the Vorbis
159 // specification does not bound the size of an individual frame.
160 
161 extern stb_vorbis *stb_vorbis_open_pushdata(
162          const unsigned char * datablock, int datablock_length_in_bytes,
163          int *datablock_memory_consumed_in_bytes,
164          int *error,
165          const stb_vorbis_alloc *alloc_buffer);
166 // create a vorbis decoder by passing in the initial data block containing
167 //    the ogg&vorbis headers (you don't need to do parse them, just provide
168 //    the first N bytes of the file--you're told if it's not enough, see below)
169 // on success, returns an stb_vorbis *, does not set error, returns the amount of
170 //    data parsed/consumed on this call in *datablock_memory_consumed_in_bytes;
171 // on failure, returns NULL on error and sets *error, does not change *datablock_memory_consumed
172 // if returns NULL and *error is VORBIS_need_more_data, then the input block was
173 //       incomplete and you need to pass in a larger block from the start of the file
174 
175 extern int stb_vorbis_decode_frame_pushdata(
176          stb_vorbis *f,
177          const unsigned char *datablock, int datablock_length_in_bytes,
178          int *channels,             // place to write number of float * buffers
179          float ***output,           // place to write float ** array of float * buffers
180          int *samples               // place to write number of output samples
181      );
182 // decode a frame of audio sample data if possible from the passed-in data block
183 //
184 // return value: number of bytes we used from datablock
185 //
186 // possible cases:
187 //     0 bytes used, 0 samples output (need more data)
188 //     N bytes used, 0 samples output (resynching the stream, keep going)
189 //     N bytes used, M samples output (one frame of data)
190 // note that after opening a file, you will ALWAYS get one N-bytes,0-sample
191 // frame, because Vorbis always "discards" the first frame.
192 //
193 // Note that on resynch, stb_vorbis will rarely consume all of the buffer,
194 // instead only datablock_length_in_bytes-3 or less. This is because it wants
195 // to avoid missing parts of a page header if they cross a datablock boundary,
196 // without writing state-machiney code to record a partial detection.
197 //
198 // The number of channels returned are stored in *channels (which can be
199 // NULL--it is always the same as the number of channels reported by
200 // get_info). *output will contain an array of float* buffers, one per
201 // channel. In other words, (*output)[0][0] contains the first sample from
202 // the first channel, and (*output)[1][0] contains the first sample from
203 // the second channel.
204 
205 extern void stb_vorbis_flush_pushdata(stb_vorbis *f);
206 // inform stb_vorbis that your next datablock will not be contiguous with
207 // previous ones (e.g. you've seeked in the data); future attempts to decode
208 // frames will cause stb_vorbis to resynchronize (as noted above), and
209 // once it sees a valid Ogg page (typically 4-8KB, as large as 64KB), it
210 // will begin decoding the _next_ frame.
211 //
212 // if you want to seek using pushdata, you need to seek in your file, then
213 // call stb_vorbis_flush_pushdata(), then start calling decoding, then once
214 // decoding is returning you data, call stb_vorbis_get_sample_offset, and
215 // if you don't like the result, seek your file again and repeat.
216 #endif
217 
218 
219 //////////   PULLING INPUT API
220 
221 #ifndef STB_VORBIS_NO_PULLDATA_API
222 // This API assumes stb_vorbis is allowed to pull data from a source--
223 // either a block of memory containing the _entire_ vorbis stream, or a
224 // FILE * that you or it create, or possibly some other reading mechanism
225 // if you go modify the source to replace the FILE * case with some kind
226 // of callback to your code. (But if you don't support seeking, you may
227 // just want to go ahead and use pushdata.)
228 
229 #if !defined(STB_VORBIS_NO_STDIO) && !defined(STB_VORBIS_NO_INTEGER_CONVERSION)
230 extern int stb_vorbis_decode_filename(const char *filename, int *channels, int *sample_rate, short **output);
231 #endif
232 #if !defined(STB_VORBIS_NO_INTEGER_CONVERSION)
233 extern int stb_vorbis_decode_memory(const unsigned char *mem, int len, int *channels, int *sample_rate, short **output);
234 #endif
235 // decode an entire file and output the data interleaved into a malloc()ed
236 // buffer stored in *output. The return value is the number of samples
237 // decoded, or -1 if the file could not be opened or was not an ogg vorbis file.
238 // When you're done with it, just free() the pointer returned in *output.
239 
240 extern stb_vorbis * stb_vorbis_open_memory(const unsigned char *data, int len,
241                                   int *error, const stb_vorbis_alloc *alloc_buffer);
242 // create an ogg vorbis decoder from an ogg vorbis stream in memory (note
243 // this must be the entire stream!). on failure, returns NULL and sets *error
244 
245 #ifndef STB_VORBIS_NO_STDIO
246 extern stb_vorbis * stb_vorbis_open_filename(const char *filename,
247                                   int *error, const stb_vorbis_alloc *alloc_buffer);
248 // create an ogg vorbis decoder from a filename via fopen(). on failure,
249 // returns NULL and sets *error (possibly to VORBIS_file_open_failure).
250 
251 extern stb_vorbis * stb_vorbis_open_file(FILE *f, int close_handle_on_close,
252                                   int *error, const stb_vorbis_alloc *alloc_buffer);
253 // create an ogg vorbis decoder from an open FILE *, looking for a stream at
254 // the _current_ seek point (ftell). on failure, returns NULL and sets *error.
255 // note that stb_vorbis must "own" this stream; if you seek it in between
256 // calls to stb_vorbis, it will become confused. Morever, if you attempt to
257 // perform stb_vorbis_seek_*() operations on this file, it will assume it
258 // owns the _entire_ rest of the file after the start point. Use the next
259 // function, stb_vorbis_open_file_section(), to limit it.
260 
261 extern stb_vorbis * stb_vorbis_open_file_section(FILE *f, int close_handle_on_close,
262                 int *error, const stb_vorbis_alloc *alloc_buffer, unsigned int len);
263 // create an ogg vorbis decoder from an open FILE *, looking for a stream at
264 // the _current_ seek point (ftell); the stream will be of length 'len' bytes.
265 // on failure, returns NULL and sets *error. note that stb_vorbis must "own"
266 // this stream; if you seek it in between calls to stb_vorbis, it will become
267 // confused.
268 #endif
269 
270 extern int stb_vorbis_seek_frame(stb_vorbis *f, unsigned int sample_number);
271 extern int stb_vorbis_seek(stb_vorbis *f, unsigned int sample_number);
272 // these functions seek in the Vorbis file to (approximately) 'sample_number'.
273 // after calling seek_frame(), the next call to get_frame_*() will include
274 // the specified sample. after calling stb_vorbis_seek(), the next call to
275 // stb_vorbis_get_samples_* will start with the specified sample. If you
276 // do not need to seek to EXACTLY the target sample when using get_samples_*,
277 // you can also use seek_frame().
278 
279 extern int stb_vorbis_seek_start(stb_vorbis *f);
280 // this function is equivalent to stb_vorbis_seek(f,0)
281 
282 extern unsigned int stb_vorbis_stream_length_in_samples(stb_vorbis *f);
283 extern float        stb_vorbis_stream_length_in_seconds(stb_vorbis *f);
284 // these functions return the total length of the vorbis stream
285 
286 extern int stb_vorbis_get_frame_float(stb_vorbis *f, int *channels, float ***output);
287 // decode the next frame and return the number of samples. the number of
288 // channels returned are stored in *channels (which can be NULL--it is always
289 // the same as the number of channels reported by get_info). *output will
290 // contain an array of float* buffers, one per channel. These outputs will
291 // be overwritten on the next call to stb_vorbis_get_frame_*.
292 //
293 // You generally should not intermix calls to stb_vorbis_get_frame_*()
294 // and stb_vorbis_get_samples_*(), since the latter calls the former.
295 
296 #ifndef STB_VORBIS_NO_INTEGER_CONVERSION
297 extern int stb_vorbis_get_frame_short_interleaved(stb_vorbis *f, int num_c, short *buffer, int num_shorts);
298 extern int stb_vorbis_get_frame_short            (stb_vorbis *f, int num_c, short **buffer, int num_samples);
299 #endif
300 // decode the next frame and return the number of *samples* per channel.
301 // Note that for interleaved data, you pass in the number of shorts (the
302 // size of your array), but the return value is the number of samples per
303 // channel, not the total number of samples.
304 //
305 // The data is coerced to the number of channels you request according to the
306 // channel coercion rules (see below). You must pass in the size of your
307 // buffer(s) so that stb_vorbis will not overwrite the end of the buffer.
308 // The maximum buffer size needed can be gotten from get_info(); however,
309 // the Vorbis I specification implies an absolute maximum of 4096 samples
310 // per channel.
311 
312 // Channel coercion rules:
313 //    Let M be the number of channels requested, and N the number of channels present,
314 //    and Cn be the nth channel; let stereo L be the sum of all L and center channels,
315 //    and stereo R be the sum of all R and center channels (channel assignment from the
316 //    vorbis spec).
317 //        M    N       output
318 //        1    k      sum(Ck) for all k
319 //        2    *      stereo L, stereo R
320 //        k    l      k > l, the first l channels, then 0s
321 //        k    l      k <= l, the first k channels
322 //    Note that this is not _good_ surround etc. mixing at all! It's just so
323 //    you get something useful.
324 
325 extern int stb_vorbis_get_samples_float_interleaved(stb_vorbis *f, int channels, float *buffer, int num_floats);
326 extern int stb_vorbis_get_samples_float(stb_vorbis *f, int channels, float **buffer, int num_samples);
327 // gets num_samples samples, not necessarily on a frame boundary--this requires
328 // buffering so you have to supply the buffers. DOES NOT APPLY THE COERCION RULES.
329 // Returns the number of samples stored per channel; it may be less than requested
330 // at the end of the file. If there are no more samples in the file, returns 0.
331 
332 #ifndef STB_VORBIS_NO_INTEGER_CONVERSION
333 extern int stb_vorbis_get_samples_short_interleaved(stb_vorbis *f, int channels, short *buffer, int num_shorts);
334 extern int stb_vorbis_get_samples_short(stb_vorbis *f, int channels, short **buffer, int num_samples);
335 #endif
336 // gets num_samples samples, not necessarily on a frame boundary--this requires
337 // buffering so you have to supply the buffers. Applies the coercion rules above
338 // to produce 'channels' channels. Returns the number of samples stored per channel;
339 // it may be less than requested at the end of the file. If there are no more
340 // samples in the file, returns 0.
341 
342 #endif
343 
344 ////////   ERROR CODES
345 
346 enum STBVorbisError
347 {
348    VORBIS__no_error,
349 
350    VORBIS_need_more_data=1,             // not a real error
351 
352    VORBIS_invalid_api_mixing,           // can't mix API modes
353    VORBIS_outofmem,                     // not enough memory
354    VORBIS_feature_not_supported,        // uses floor 0
355    VORBIS_too_many_channels,            // STB_VORBIS_MAX_CHANNELS is too small
356    VORBIS_file_open_failure,            // fopen() failed
357    VORBIS_seek_without_length,          // can't seek in unknown-length file
358 
359    VORBIS_unexpected_eof=10,            // file is truncated?
360    VORBIS_seek_invalid,                 // seek past EOF
361 
362    // decoding errors (corrupt/invalid stream) -- you probably
363    // don't care about the exact details of these
364 
365    // vorbis errors:
366    VORBIS_invalid_setup=20,
367    VORBIS_invalid_stream,
368 
369    // ogg errors:
370    VORBIS_missing_capture_pattern=30,
371    VORBIS_invalid_stream_structure_version,
372    VORBIS_continued_packet_flag_invalid,
373    VORBIS_incorrect_stream_serial_number,
374    VORBIS_invalid_first_page,
375    VORBIS_bad_packet_type,
376    VORBIS_cant_find_last_page,
377    VORBIS_seek_failed
378 };
379 
380 
381 #ifdef __cplusplus
382 }
383 #endif
384 
385 #endif // STB_VORBIS_INCLUDE_STB_VORBIS_H
386 //
387 //  HEADER ENDS HERE
388 //
389 //////////////////////////////////////////////////////////////////////////////
390 
391 #ifndef STB_VORBIS_HEADER_ONLY
392 
393 // global configuration settings (e.g. set these in the project/makefile),
394 // or just set them in this file at the top (although ideally the first few
395 // should be visible when the header file is compiled too, although it's not
396 // crucial)
397 
398 // STB_VORBIS_NO_PUSHDATA_API
399 //     does not compile the code for the various stb_vorbis_*_pushdata()
400 //     functions
401 // #define STB_VORBIS_NO_PUSHDATA_API
402 
403 // STB_VORBIS_NO_PULLDATA_API
404 //     does not compile the code for the non-pushdata APIs
405 // #define STB_VORBIS_NO_PULLDATA_API
406 
407 // STB_VORBIS_NO_STDIO
408 //     does not compile the code for the APIs that use FILE *s internally
409 //     or externally (implied by STB_VORBIS_NO_PULLDATA_API)
410 // #define STB_VORBIS_NO_STDIO
411 
412 // STB_VORBIS_NO_INTEGER_CONVERSION
413 //     does not compile the code for converting audio sample data from
414 //     float to integer (implied by STB_VORBIS_NO_PULLDATA_API)
415 // #define STB_VORBIS_NO_INTEGER_CONVERSION
416 
417 // STB_VORBIS_NO_FAST_SCALED_FLOAT
418 //      does not use a fast float-to-int trick to accelerate float-to-int on
419 //      most platforms which requires endianness be defined correctly.
420 //#define STB_VORBIS_NO_FAST_SCALED_FLOAT
421 
422 
423 // STB_VORBIS_MAX_CHANNELS [number]
424 //     globally define this to the maximum number of channels you need.
425 //     The spec does not put a restriction on channels except that
426 //     the count is stored in a byte, so 255 is the hard limit.
427 //     Reducing this saves about 16 bytes per value, so using 16 saves
428 //     (255-16)*16 or around 4KB. Plus anything other memory usage
429 //     I forgot to account for. Can probably go as low as 8 (7.1 audio),
430 //     6 (5.1 audio), or 2 (stereo only).
431 #ifndef STB_VORBIS_MAX_CHANNELS
432 #define STB_VORBIS_MAX_CHANNELS    16  // enough for anyone?
433 #endif
434 
435 // STB_VORBIS_PUSHDATA_CRC_COUNT [number]
436 //     after a flush_pushdata(), stb_vorbis begins scanning for the
437 //     next valid page, without backtracking. when it finds something
438 //     that looks like a page, it streams through it and verifies its
439 //     CRC32. Should that validation fail, it keeps scanning. But it's
440 //     possible that _while_ streaming through to check the CRC32 of
441 //     one candidate page, it sees another candidate page. This #define
442 //     determines how many "overlapping" candidate pages it can search
443 //     at once. Note that "real" pages are typically ~4KB to ~8KB, whereas
444 //     garbage pages could be as big as 64KB, but probably average ~16KB.
445 //     So don't hose ourselves by scanning an apparent 64KB page and
446 //     missing a ton of real ones in the interim; so minimum of 2
447 #ifndef STB_VORBIS_PUSHDATA_CRC_COUNT
448 #define STB_VORBIS_PUSHDATA_CRC_COUNT  4
449 #endif
450 
451 // STB_VORBIS_FAST_HUFFMAN_LENGTH [number]
452 //     sets the log size of the huffman-acceleration table.  Maximum
453 //     supported value is 24. with larger numbers, more decodings are O(1),
454 //     but the table size is larger so worse cache missing, so you'll have
455 //     to probe (and try multiple ogg vorbis files) to find the sweet spot.
456 #ifndef STB_VORBIS_FAST_HUFFMAN_LENGTH
457 #define STB_VORBIS_FAST_HUFFMAN_LENGTH   10
458 #endif
459 
460 // STB_VORBIS_FAST_BINARY_LENGTH [number]
461 //     sets the log size of the binary-search acceleration table. this
462 //     is used in similar fashion to the fast-huffman size to set initial
463 //     parameters for the binary search
464 
465 // STB_VORBIS_FAST_HUFFMAN_INT
466 //     The fast huffman tables are much more efficient if they can be
467 //     stored as 16-bit results instead of 32-bit results. This restricts
468 //     the codebooks to having only 65535 possible outcomes, though.
469 //     (At least, accelerated by the huffman table.)
470 #ifndef STB_VORBIS_FAST_HUFFMAN_INT
471 #define STB_VORBIS_FAST_HUFFMAN_SHORT
472 #endif
473 
474 // STB_VORBIS_NO_HUFFMAN_BINARY_SEARCH
475 //     If the 'fast huffman' search doesn't succeed, then stb_vorbis falls
476 //     back on binary searching for the correct one. This requires storing
477 //     extra tables with the huffman codes in sorted order. Defining this
478 //     symbol trades off space for speed by forcing a linear search in the
479 //     non-fast case, except for "sparse" codebooks.
480 // #define STB_VORBIS_NO_HUFFMAN_BINARY_SEARCH
481 
482 // STB_VORBIS_DIVIDES_IN_RESIDUE
483 //     stb_vorbis precomputes the result of the scalar residue decoding
484 //     that would otherwise require a divide per chunk. you can trade off
485 //     space for time by defining this symbol.
486 // #define STB_VORBIS_DIVIDES_IN_RESIDUE
487 
488 // STB_VORBIS_DIVIDES_IN_CODEBOOK
489 //     vorbis VQ codebooks can be encoded two ways: with every case explicitly
490 //     stored, or with all elements being chosen from a small range of values,
491 //     and all values possible in all elements. By default, stb_vorbis expands
492 //     this latter kind out to look like the former kind for ease of decoding,
493 //     because otherwise an integer divide-per-vector-element is required to
494 //     unpack the index. If you define STB_VORBIS_DIVIDES_IN_CODEBOOK, you can
495 //     trade off storage for speed.
496 //#define STB_VORBIS_DIVIDES_IN_CODEBOOK
497 
498 #ifdef STB_VORBIS_CODEBOOK_SHORTS
499 #error "STB_VORBIS_CODEBOOK_SHORTS is no longer supported as it produced incorrect results for some input formats"
500 #endif
501 
502 // STB_VORBIS_DIVIDE_TABLE
503 //     this replaces small integer divides in the floor decode loop with
504 //     table lookups. made less than 1% difference, so disabled by default.
505 
506 // STB_VORBIS_NO_INLINE_DECODE
507 //     disables the inlining of the scalar codebook fast-huffman decode.
508 //     might save a little codespace; useful for debugging
509 // #define STB_VORBIS_NO_INLINE_DECODE
510 
511 // STB_VORBIS_NO_DEFER_FLOOR
512 //     Normally we only decode the floor without synthesizing the actual
513 //     full curve. We can instead synthesize the curve immediately. This
514 //     requires more memory and is very likely slower, so I don't think
515 //     you'd ever want to do it except for debugging.
516 // #define STB_VORBIS_NO_DEFER_FLOOR
517 
518 
519 
520 
521 //////////////////////////////////////////////////////////////////////////////
522 
523 #ifdef STB_VORBIS_NO_PULLDATA_API
524    #define STB_VORBIS_NO_INTEGER_CONVERSION
525    #define STB_VORBIS_NO_STDIO
526 #endif
527 
528 #if defined(STB_VORBIS_NO_CRT) && !defined(STB_VORBIS_NO_STDIO)
529    #define STB_VORBIS_NO_STDIO 1
530 #endif
531 
532 #ifndef STB_VORBIS_NO_INTEGER_CONVERSION
533 #ifndef STB_VORBIS_NO_FAST_SCALED_FLOAT
534 
535    // only need endianness for fast-float-to-int, which we don't
536    // use for pushdata
537 
538    #ifndef STB_VORBIS_BIG_ENDIAN
539      #define STB_VORBIS_ENDIAN  0
540    #else
541      #define STB_VORBIS_ENDIAN  1
542    #endif
543 
544 #endif
545 #endif
546 
547 
548 #ifndef STB_VORBIS_NO_STDIO
549 #include <stdio.h>
550 #endif
551 
552 #ifndef STB_VORBIS_NO_CRT
553    #include <stdlib.h>
554    #include <string.h>
555    #include <assert.h>
556    #include <math.h>
557 
558    // find definition of alloca if it's not in stdlib.h:
559    #if defined(_MSC_VER) || defined(__MINGW32__)
560       #include <malloc.h>
561    #endif
562    #if defined(__linux__) || defined(__linux) || defined(__EMSCRIPTEN__)
563       #include <alloca.h>
564    #endif
565 #else // STB_VORBIS_NO_CRT
566    #define NULL 0
567    #define malloc(s)   0
568    #define free(s)     ((void) 0)
569    #define realloc(s)  0
570 #endif // STB_VORBIS_NO_CRT
571 
572 #include <limits.h>
573 
574 #ifdef __MINGW32__
575    // eff you mingw:
576    //     "fixed":
577    //         http://sourceforge.net/p/mingw-w64/mailman/message/32882927/
578    //     "no that broke the build, reverted, who cares about C":
579    //         http://sourceforge.net/p/mingw-w64/mailman/message/32890381/
580    #ifdef __forceinline
581    #undef __forceinline
582    #endif
583    #define __forceinline
584    #ifndef alloca
585       #define alloca __builtin_alloca
586    #endif
587 #elif !defined(_MSC_VER)
588    #if __GNUC__
589       #define __forceinline inline
590    #else
591       #define __forceinline
592    #endif
593 #endif
594 
595 #if STB_VORBIS_MAX_CHANNELS > 256
596 #error "Value of STB_VORBIS_MAX_CHANNELS outside of allowed range"
597 #endif
598 
599 #if STB_VORBIS_FAST_HUFFMAN_LENGTH > 24
600 #error "Value of STB_VORBIS_FAST_HUFFMAN_LENGTH outside of allowed range"
601 #endif
602 
603 
604 #if 0
605 #include <crtdbg.h>
606 #define CHECK(f)   _CrtIsValidHeapPointer(f->channel_buffers[1])
607 #else
608 #define CHECK(f)   ((void) 0)
609 #endif
610 
611 #define MAX_BLOCKSIZE_LOG  13   // from specification
612 #define MAX_BLOCKSIZE      (1 << MAX_BLOCKSIZE_LOG)
613 
614 
615 typedef unsigned char  uint8;
616 typedef   signed char   int8;
617 typedef unsigned short uint16;
618 typedef   signed short  int16;
619 typedef unsigned int   uint32;
620 typedef   signed int    int32;
621 
622 #ifndef TRUE
623 #define TRUE 1
624 #define FALSE 0
625 #endif
626 
627 typedef float codetype;
628 
629 // @NOTE
630 //
631 // Some arrays below are tagged "//varies", which means it's actually
632 // a variable-sized piece of data, but rather than malloc I assume it's
633 // small enough it's better to just allocate it all together with the
634 // main thing
635 //
636 // Most of the variables are specified with the smallest size I could pack
637 // them into. It might give better performance to make them all full-sized
638 // integers. It should be safe to freely rearrange the structures or change
639 // the sizes larger--nothing relies on silently truncating etc., nor the
640 // order of variables.
641 
642 #define FAST_HUFFMAN_TABLE_SIZE   (1 << STB_VORBIS_FAST_HUFFMAN_LENGTH)
643 #define FAST_HUFFMAN_TABLE_MASK   (FAST_HUFFMAN_TABLE_SIZE - 1)
644 
645 typedef struct
646 {
647    int dimensions, entries;
648    uint8 *codeword_lengths;
649    float  minimum_value;
650    float  delta_value;
651    uint8  value_bits;
652    uint8  lookup_type;
653    uint8  sequence_p;
654    uint8  sparse;
655    uint32 lookup_values;
656    codetype *multiplicands;
657    uint32 *codewords;
658    #ifdef STB_VORBIS_FAST_HUFFMAN_SHORT
659     int16  fast_huffman[FAST_HUFFMAN_TABLE_SIZE];
660    #else
661     int32  fast_huffman[FAST_HUFFMAN_TABLE_SIZE];
662    #endif
663    uint32 *sorted_codewords;
664    int    *sorted_values;
665    int     sorted_entries;
666 } Codebook;
667 
668 typedef struct
669 {
670    uint8 order;
671    uint16 rate;
672    uint16 bark_map_size;
673    uint8 amplitude_bits;
674    uint8 amplitude_offset;
675    uint8 number_of_books;
676    uint8 book_list[16]; // varies
677 } Floor0;
678 
679 typedef struct
680 {
681    uint8 partitions;
682    uint8 partition_class_list[32]; // varies
683    uint8 class_dimensions[16]; // varies
684    uint8 class_subclasses[16]; // varies
685    uint8 class_masterbooks[16]; // varies
686    int16 subclass_books[16][8]; // varies
687    uint16 Xlist[31*8+2]; // varies
688    uint8 sorted_order[31*8+2];
689    uint8 neighbors[31*8+2][2];
690    uint8 floor1_multiplier;
691    uint8 rangebits;
692    int values;
693 } Floor1;
694 
695 typedef union
696 {
697    Floor0 floor0;
698    Floor1 floor1;
699 } Floor;
700 
701 typedef struct
702 {
703    uint32 begin, end;
704    uint32 part_size;
705    uint8 classifications;
706    uint8 classbook;
707    uint8 **classdata;
708    int16 (*residue_books)[8];
709 } Residue;
710 
711 typedef struct
712 {
713    uint8 magnitude;
714    uint8 angle;
715    uint8 mux;
716 } MappingChannel;
717 
718 typedef struct
719 {
720    uint16 coupling_steps;
721    MappingChannel *chan;
722    uint8  submaps;
723    uint8  submap_floor[15]; // varies
724    uint8  submap_residue[15]; // varies
725 } Mapping;
726 
727 typedef struct
728 {
729    uint8 blockflag;
730    uint8 mapping;
731    uint16 windowtype;
732    uint16 transformtype;
733 } Mode;
734 
735 typedef struct
736 {
737    uint32  goal_crc;    // expected crc if match
738    int     bytes_left;  // bytes left in packet
739    uint32  crc_so_far;  // running crc
740    int     bytes_done;  // bytes processed in _current_ chunk
741    uint32  sample_loc;  // granule pos encoded in page
742 } CRCscan;
743 
744 typedef struct
745 {
746    uint32 page_start, page_end;
747    uint32 last_decoded_sample;
748 } ProbedPage;
749 
750 struct stb_vorbis
751 {
752   // user-accessible info
753    unsigned int sample_rate;
754    int channels;
755 
756    unsigned int setup_memory_required;
757    unsigned int temp_memory_required;
758    unsigned int setup_temp_memory_required;
759 
760   // input config
761 #ifndef STB_VORBIS_NO_STDIO
762    FILE *f;
763    uint32 f_start;
764    int close_on_free;
765 #endif
766 
767    uint8 *stream;
768    uint8 *stream_start;
769    uint8 *stream_end;
770 
771    uint32 stream_len;
772 
773    uint8  push_mode;
774 
775    uint32 first_audio_page_offset;
776 
777    ProbedPage p_first, p_last;
778 
779   // memory management
780    stb_vorbis_alloc alloc;
781    int setup_offset;
782    int temp_offset;
783 
784   // run-time results
785    int eof;
786    enum STBVorbisError error;
787 
788   // user-useful data
789 
790   // header info
791    int blocksize[2];
792    int blocksize_0, blocksize_1;
793    int codebook_count;
794    Codebook *codebooks;
795    int floor_count;
796    uint16 floor_types[64]; // varies
797    Floor *floor_config;
798    int residue_count;
799    uint16 residue_types[64]; // varies
800    Residue *residue_config;
801    int mapping_count;
802    Mapping *mapping;
803    int mode_count;
804    Mode mode_config[64];  // varies
805 
806    uint32 total_samples;
807 
808   // decode buffer
809    float *channel_buffers[STB_VORBIS_MAX_CHANNELS];
810    float *outputs        [STB_VORBIS_MAX_CHANNELS];
811 
812    float *previous_window[STB_VORBIS_MAX_CHANNELS];
813    int previous_length;
814 
815    #ifndef STB_VORBIS_NO_DEFER_FLOOR
816    int16 *finalY[STB_VORBIS_MAX_CHANNELS];
817    #else
818    float *floor_buffers[STB_VORBIS_MAX_CHANNELS];
819    #endif
820 
821    uint32 current_loc; // sample location of next frame to decode
822    int    current_loc_valid;
823 
824   // per-blocksize precomputed data
825 
826    // twiddle factors
827    float *A[2],*B[2],*C[2];
828    float *window[2];
829    uint16 *bit_reverse[2];
830 
831   // current page/packet/segment streaming info
832    uint32 serial; // stream serial number for verification
833    int last_page;
834    int segment_count;
835    uint8 segments[255];
836    uint8 page_flag;
837    uint8 bytes_in_seg;
838    uint8 first_decode;
839    int next_seg;
840    int last_seg;  // flag that we're on the last segment
841    int last_seg_which; // what was the segment number of the last seg?
842    uint32 acc;
843    int valid_bits;
844    int packet_bytes;
845    int end_seg_with_known_loc;
846    uint32 known_loc_for_packet;
847    int discard_samples_deferred;
848    uint32 samples_output;
849 
850   // push mode scanning
851    int page_crc_tests; // only in push_mode: number of tests active; -1 if not searching
852 #ifndef STB_VORBIS_NO_PUSHDATA_API
853    CRCscan scan[STB_VORBIS_PUSHDATA_CRC_COUNT];
854 #endif
855 
856   // sample-access
857    int channel_buffer_start;
858    int channel_buffer_end;
859 };
860 
861 #if defined(STB_VORBIS_NO_PUSHDATA_API)
862    #define IS_PUSH_MODE(f)   FALSE
863 #elif defined(STB_VORBIS_NO_PULLDATA_API)
864    #define IS_PUSH_MODE(f)   TRUE
865 #else
866    #define IS_PUSH_MODE(f)   ((f)->push_mode)
867 #endif
868 
869 typedef struct stb_vorbis vorb;
870 
error(vorb * f,enum STBVorbisError e)871 static int error(vorb *f, enum STBVorbisError e)
872 {
873    f->error = e;
874    if (!f->eof && e != VORBIS_need_more_data) {
875       f->error=e; // breakpoint for debugging
876    }
877    return 0;
878 }
879 
880 
881 // these functions are used for allocating temporary memory
882 // while decoding. if you can afford the stack space, use
883 // alloca(); otherwise, provide a temp buffer and it will
884 // allocate out of those.
885 
886 #define array_size_required(count,size)  (count*(sizeof(void *)+(size)))
887 
888 #define temp_alloc(f,size)              setup_temp_malloc(f,size)//(f->alloc.alloc_buffer ? setup_temp_malloc(f,size) : alloca(size))
889 #define temp_free(f,p)                  0
890 #define temp_alloc_save(f)              ((f)->temp_offset)
891 #define temp_alloc_restore(f,p)         ((f)->temp_offset = (p))
892 
893 #define temp_block_array(f,count,size)  make_block_array(temp_alloc(f,array_size_required(count,size)), count, size)
894 
895 // given a sufficiently large block of memory, make an array of pointers to subblocks of it
make_block_array(void * mem,int count,int size)896 static void *make_block_array(void *mem, int count, int size)
897 {
898    int i;
899    void ** p = (void **) mem;
900    char *q = (char *) (p + count);
901    for (i=0; i < count; ++i) {
902       p[i] = q;
903       q += size;
904    }
905    return p;
906 }
907 
setup_malloc(vorb * f,int sz)908 static void *setup_malloc(vorb *f, int sz)
909 {
910    sz = (sz+3) & ~3;
911    f->setup_memory_required += sz;
912    if (f->alloc.alloc_buffer) {
913       void *p = (char *) f->alloc.alloc_buffer + f->setup_offset;
914       if (f->setup_offset + sz > f->temp_offset) return NULL;
915       f->setup_offset += sz;
916       return p;
917    }
918    return sz ? malloc(sz) : NULL;
919 }
920 
setup_free(vorb * f,void * p)921 static void setup_free(vorb *f, void *p)
922 {
923    if (f->alloc.alloc_buffer) return; // do nothing; setup mem is a stack
924    free(p);
925 }
926 
setup_temp_malloc(vorb * f,int sz)927 static void *setup_temp_malloc(vorb *f, int sz)
928 {
929    sz = (sz+3) & ~3;
930    if (f->alloc.alloc_buffer) {
931       if (f->temp_offset - sz < f->setup_offset) return NULL;
932       f->temp_offset -= sz;
933       return (char *) f->alloc.alloc_buffer + f->temp_offset;
934    }
935    return malloc(sz);
936 }
937 
setup_temp_free(vorb * f,void * p,int sz)938 static void setup_temp_free(vorb *f, void *p, int sz)
939 {
940    if (f->alloc.alloc_buffer) {
941       f->temp_offset += (sz+3)&~3;
942       return;
943    }
944    free(p);
945 }
946 
947 #define CRC32_POLY    0x04c11db7   // from spec
948 
949 static uint32 crc_table[256];
crc32_init(void)950 static void crc32_init(void)
951 {
952    int i,j;
953    uint32 s;
954    for(i=0; i < 256; i++) {
955       for (s=(uint32) i << 24, j=0; j < 8; ++j)
956          s = (s << 1) ^ (s >= (1U<<31) ? CRC32_POLY : 0);
957       crc_table[i] = s;
958    }
959 }
960 
crc32_update(uint32 crc,uint8 byte)961 static __forceinline uint32 crc32_update(uint32 crc, uint8 byte)
962 {
963    return (crc << 8) ^ crc_table[byte ^ (crc >> 24)];
964 }
965 
966 
967 // used in setup, and for huffman that doesn't go fast path
bit_reverse(unsigned int n)968 static unsigned int bit_reverse(unsigned int n)
969 {
970   n = ((n & 0xAAAAAAAA) >>  1) | ((n & 0x55555555) << 1);
971   n = ((n & 0xCCCCCCCC) >>  2) | ((n & 0x33333333) << 2);
972   n = ((n & 0xF0F0F0F0) >>  4) | ((n & 0x0F0F0F0F) << 4);
973   n = ((n & 0xFF00FF00) >>  8) | ((n & 0x00FF00FF) << 8);
974   return (n >> 16) | (n << 16);
975 }
976 
977 // this is a weird definition of log2() for which log2(1) = 1, log2(2) = 2, log2(4) = 3
978 // as required by the specification. fast(?) implementation from stb.h
979 // @OPTIMIZE: called multiple times per-packet with "constants"; move to setup
ilog(int32 n)980 static int ilog(int32 n)
981 {
982    static signed char log2_4[16] = { 0,1,2,2,3,3,3,3,4,4,4,4,4,4,4,4 };
983 
984    if (n < 0) return 0; // signed n returns 0
985 
986    // 2 compares if n < 16, 3 compares otherwise (4 if signed or n > 1<<29)
987    if (n < (1 << 14))
988         if (n < (1 <<  4))            return  0 + log2_4[n      ];
989         else if (n < (1 <<  9))       return  5 + log2_4[n >>  5];
990              else                     return 10 + log2_4[n >> 10];
991    else if (n < (1 << 24))
992              if (n < (1 << 19))       return 15 + log2_4[n >> 15];
993              else                     return 20 + log2_4[n >> 20];
994         else if (n < (1 << 29))       return 25 + log2_4[n >> 25];
995              else                     return 30 + log2_4[n >> 30];
996 }
997 
998 #ifndef M_PI
999   #define M_PI  3.14159265358979323846264f  // from CRC
1000 #endif
1001 
1002 // code length assigned to a value with no huffman encoding
1003 #define NO_CODE   255
1004 
1005 /////////////////////// LEAF SETUP FUNCTIONS //////////////////////////
1006 //
1007 // these functions are only called at setup, and only a few times
1008 // per file
1009 /*
1010 static float float32_unpack(uint32 x)
1011 {
1012    // from the specification
1013    uint32 mantissa = x & 0x1fffff;
1014    uint32 sign = x & 0x80000000;
1015    uint32 exp = (x & 0x7fe00000) >> 21;
1016    double res = sign ? -(double)mantissa : (double)mantissa;
1017    return (float) ldexp((float)res, exp-788);
1018 }
1019 */
ldexpi(int m,int e)1020 float ldexpi(int m, int e) {
1021     return (float)(m * pow(2, e));
1022 }
1023 
float32_unpack(uint32 x)1024 static float float32_unpack(uint32 x) {
1025    // from the specification
1026    uint32 s = x & 0x80000000;
1027    int32  m = x & 0x1fffff;
1028    int32  e = (x & 0x7fe00000) >> 21;
1029    return ldexpi(s ? -m : m, e - 788);
1030 }
1031 
1032 // zlib & jpeg huffman tables assume that the output symbols
1033 // can either be arbitrarily arranged, or have monotonically
1034 // increasing frequencies--they rely on the lengths being sorted;
1035 // this makes for a very simple generation algorithm.
1036 // vorbis allows a huffman table with non-sorted lengths. This
1037 // requires a more sophisticated construction, since symbols in
1038 // order do not map to huffman codes "in order".
add_entry(Codebook * c,uint32 huff_code,int symbol,int count,int len,uint32 * values)1039 static void add_entry(Codebook *c, uint32 huff_code, int symbol, int count, int len, uint32 *values)
1040 {
1041    if (!c->sparse) {
1042       c->codewords      [symbol] = huff_code;
1043    } else {
1044       c->codewords       [count] = huff_code;
1045       c->codeword_lengths[count] = len;
1046       values             [count] = symbol;
1047    }
1048 }
1049 
compute_codewords(Codebook * c,uint8 * len,int n,uint32 * values)1050 static int compute_codewords(Codebook *c, uint8 *len, int n, uint32 *values)
1051 {
1052    int i,k,m=0;
1053    uint32 available[32];
1054 
1055    memset(available, 0, sizeof(available));
1056    // find the first entry
1057    for (k=0; k < n; ++k) if (len[k] < NO_CODE) break;
1058    if (k == n) { assert(c->sorted_entries == 0); return TRUE; }
1059    // add to the list
1060    add_entry(c, 0, k, m++, len[k], values);
1061    // add all available leaves
1062    for (i=1; i <= len[k]; ++i)
1063       available[i] = 1U << (32-i);
1064    // note that the above code treats the first case specially,
1065    // but it's really the same as the following code, so they
1066    // could probably be combined (except the initial code is 0,
1067    // and I use 0 in available[] to mean 'empty')
1068    for (i=k+1; i < n; ++i) {
1069       uint32 res;
1070       int z = len[i], y;
1071       if (z == NO_CODE) continue;
1072       // find lowest available leaf (should always be earliest,
1073       // which is what the specification calls for)
1074       // note that this property, and the fact we can never have
1075       // more than one free leaf at a given level, isn't totally
1076       // trivial to prove, but it seems true and the assert never
1077       // fires, so!
1078       while (z > 0 && !available[z]) --z;
1079       if (z == 0) { return FALSE; }
1080       res = available[z];
1081       assert(z >= 0 && z < 32);
1082       available[z] = 0;
1083       add_entry(c, bit_reverse(res), i, m++, len[i], values);
1084       // propogate availability up the tree
1085       if (z != len[i]) {
1086          assert(len[i] >= 0 && len[i] < 32);
1087          for (y=len[i]; y > z; --y) {
1088             assert(available[y] == 0);
1089             available[y] = res + (1 << (32-y));
1090          }
1091       }
1092    }
1093    return TRUE;
1094 }
1095 
1096 // accelerated huffman table allows fast O(1) match of all symbols
1097 // of length <= STB_VORBIS_FAST_HUFFMAN_LENGTH
compute_accelerated_huffman(Codebook * c)1098 static void compute_accelerated_huffman(Codebook *c)
1099 {
1100    int i, len;
1101    memset(c->fast_huffman, 0xFF, sizeof(c->fast_huffman)); // fill by -1
1102 
1103    len = c->sparse ? c->sorted_entries : c->entries;
1104    #ifdef STB_VORBIS_FAST_HUFFMAN_SHORT
1105    if (len > 32767) len = 32767; // largest possible value we can encode!
1106    #endif
1107    for (i=0; i < len; ++i) {
1108       if (c->codeword_lengths[i] <= STB_VORBIS_FAST_HUFFMAN_LENGTH) {
1109          uint32 z = c->sparse ? bit_reverse(c->sorted_codewords[i]) : c->codewords[i];
1110          // set table entries for all bit combinations in the higher bits
1111          while (z < FAST_HUFFMAN_TABLE_SIZE) {
1112              c->fast_huffman[z] = i;
1113              z += 1 << c->codeword_lengths[i];
1114          }
1115       }
1116    }
1117 }
1118 
1119 #ifdef _MSC_VER
1120 #define STBV_CDECL __cdecl
1121 #else
1122 #define STBV_CDECL
1123 #endif
1124 
uint32_compare(const void * p,const void * q)1125 static int STBV_CDECL uint32_compare(const void *p, const void *q)
1126 {
1127    uint32 x = * (uint32 *) p;
1128    uint32 y = * (uint32 *) q;
1129    return x < y ? -1 : x > y;
1130 }
1131 
include_in_sort(Codebook * c,uint8 len)1132 static int include_in_sort(Codebook *c, uint8 len)
1133 {
1134    if (c->sparse) { assert(len != NO_CODE); return TRUE; }
1135    if (len == NO_CODE) return FALSE;
1136    if (len > STB_VORBIS_FAST_HUFFMAN_LENGTH) return TRUE;
1137    return FALSE;
1138 }
1139 
1140 // if the fast table above doesn't work, we want to binary
1141 // search them... need to reverse the bits
compute_sorted_huffman(Codebook * c,uint8 * lengths,uint32 * values)1142 static void compute_sorted_huffman(Codebook *c, uint8 *lengths, uint32 *values)
1143 {
1144    int i, len;
1145    // build a list of all the entries
1146    // OPTIMIZATION: don't include the short ones, since they'll be caught by FAST_HUFFMAN.
1147    // this is kind of a frivolous optimization--I don't see any performance improvement,
1148    // but it's like 4 extra lines of code, so.
1149    if (!c->sparse) {
1150       int k = 0;
1151       for (i=0; i < c->entries; ++i)
1152          if (include_in_sort(c, lengths[i]))
1153             c->sorted_codewords[k++] = bit_reverse(c->codewords[i]);
1154       assert(k == c->sorted_entries);
1155    } else {
1156       for (i=0; i < c->sorted_entries; ++i)
1157          c->sorted_codewords[i] = bit_reverse(c->codewords[i]);
1158    }
1159 
1160    qsort(c->sorted_codewords, c->sorted_entries, sizeof(c->sorted_codewords[0]), uint32_compare);
1161    c->sorted_codewords[c->sorted_entries] = 0xffffffff;
1162 
1163    len = c->sparse ? c->sorted_entries : c->entries;
1164    // now we need to indicate how they correspond; we could either
1165    //   #1: sort a different data structure that says who they correspond to
1166    //   #2: for each sorted entry, search the original list to find who corresponds
1167    //   #3: for each original entry, find the sorted entry
1168    // #1 requires extra storage, #2 is slow, #3 can use binary search!
1169    for (i=0; i < len; ++i) {
1170       int huff_len = c->sparse ? lengths[values[i]] : lengths[i];
1171       if (include_in_sort(c,huff_len)) {
1172          uint32 code = bit_reverse(c->codewords[i]);
1173          int x=0, n=c->sorted_entries;
1174          while (n > 1) {
1175             // invariant: sc[x] <= code < sc[x+n]
1176             int m = x + (n >> 1);
1177             if (c->sorted_codewords[m] <= code) {
1178                x = m;
1179                n -= (n>>1);
1180             } else {
1181                n >>= 1;
1182             }
1183          }
1184          assert(c->sorted_codewords[x] == code);
1185          if (c->sparse) {
1186             c->sorted_values[x] = values[i];
1187             c->codeword_lengths[x] = huff_len;
1188          } else {
1189             c->sorted_values[x] = i;
1190          }
1191       }
1192    }
1193 }
1194 
1195 // only run while parsing the header (3 times)
vorbis_validate(uint8 * data)1196 static int vorbis_validate(uint8 *data)
1197 {
1198    static uint8 vorbis[6] = { 'v', 'o', 'r', 'b', 'i', 's' };
1199    return memcmp(data, vorbis, 6) == 0;
1200 }
1201 
1202 // called from setup only, once per code book
1203 // (formula implied by specification)
lookup1_values(int entries,int dim)1204 static int lookup1_values(int entries, int dim)
1205 {
1206    int r = (int) floor(exp((float) log((float) entries) / dim));
1207    if ((int) floor(pow((float) r+1, dim)) <= entries)   // (int) cast for MinGW warning;
1208       ++r;                                              // floor() to avoid _ftol() when non-CRT
1209    assert(pow((float) r+1, dim) > entries);
1210    assert((int) floor(pow((float) r, dim)) <= entries); // (int),floor() as above
1211    return r;
1212 }
1213 
1214 #ifdef _PSP
1215 // pspmath.h
1216     extern float vfpu_sinf(float x);
1217     extern float vfpu_cosf(float x);
1218     extern void  vfpu_sincos(float r, float *s, float *c);
1219 
1220     #define sinf(x)         vfpu_sinf(x)
1221     #define cosf(x)         vfpu_cosf(x)
1222 
sincosd(double x,double * s,double * c)1223     void sincosd(double x, double *s, double *c) {
1224         float fs, fc;
1225         vfpu_sincos((float)x, &fs, &fc);
1226         *s = (double)fs;
1227         *c = (double)fc;
1228     }
1229 #else
sincosd(double x,double * s,double * c)1230     void sincosd(double x, double *s, double *c) {
1231         *s = sin(x);
1232         *c = cos(x);
1233     }
1234 #endif
1235 
1236 #define rotate(x, y, s, c) {\
1237         double t = x*c - y*s;\
1238         y = x*s + y*c;\
1239         x = t;\
1240     }
1241 
1242 // called twice per file
compute_twiddle_factors(int n,float * A,float * B,float * C)1243 static void compute_twiddle_factors(int n, float *A, float *B, float *C)
1244 {
1245    int n4 = n >> 2, n8 = n >> 3;
1246    int k, k2;
1247 
1248    double sA, cA, sB, cB, sC, cC;
1249    double xA, yA, xB, yB, xC, yC;
1250 
1251    sincosd(4*M_PI/n, &sA, &cA);
1252    sincosd(M_PI/n/2, &sB, &cB);
1253    sincosd(2*M_PI/n, &sC, &cC);
1254 
1255    xA = 1.0;
1256    yA = 0.0;
1257    A[0] = 1.0f;
1258    A[1] = 0.0f;
1259    xB = cB;
1260    yB = sB;
1261    B[0] = (float)(xB * 0.5);
1262    B[1] = (float)(yB * 0.5);
1263    rotate(cB, sB, sB, cB);
1264 
1265    for (k=1, k2=2; k < n4; ++k,k2+=2) {
1266       rotate(xA, yA, sA, cA);
1267       A[k2  ] = (float) xA;
1268       A[k2+1] = (float)-yA;
1269       rotate(xB, yB, sB, cB);
1270       B[k2  ] = (float)(xB * 0.5);
1271       B[k2+1] = (float)(yB * 0.5);
1272    }
1273 
1274    xC = cC;
1275    yC = sC;
1276    C[0] = (float) xC;
1277    C[1] = (float)-yC;
1278 
1279    rotate(cC, sC, sC, cC);
1280 
1281    for (k=1, k2=2; k < n8; ++k,k2+=2) {
1282       rotate(xC, yC, sC, cC);
1283       C[k2  ] = (float) xC;
1284       C[k2+1] = (float)-yC;
1285    }
1286 }
1287 
compute_window(int n,float * window)1288 static void compute_window(int n, float *window)
1289 {
1290    int n2 = n >> 1, i;
1291    double s, c, x, y;
1292 
1293    sincosd(0.5f / n2 * 0.5f * M_PI, &s, &c);
1294    x = c;
1295    y = s;
1296    rotate(c, s, s, c);
1297 
1298    for (i=0; i < n2; ++i) {
1299       window[i] = (float)sin(0.5 * M_PI * (y * y));
1300       rotate(x, y, s, c);
1301    }
1302 }
1303 
compute_bitreverse(int n,uint16 * rev)1304 static void compute_bitreverse(int n, uint16 *rev)
1305 {
1306    int ld = ilog(n) - 1; // ilog is off-by-one from normal definitions
1307    int i, n8 = n >> 3;
1308    for (i=0; i < n8; ++i)
1309       rev[i] = (bit_reverse(i) >> (32-ld+3)) << 2;
1310 }
1311 
init_blocksize(vorb * f,int b,int n)1312 static int init_blocksize(vorb *f, int b, int n)
1313 {
1314    int n2 = n >> 1, n4 = n >> 2, n8 = n >> 3;
1315    f->A[b] = (float *) setup_malloc(f, sizeof(float) * n2);
1316    f->B[b] = (float *) setup_malloc(f, sizeof(float) * n2);
1317    f->C[b] = (float *) setup_malloc(f, sizeof(float) * n4);
1318    if (!f->A[b] || !f->B[b] || !f->C[b]) return error(f, VORBIS_outofmem);
1319    compute_twiddle_factors(n, f->A[b], f->B[b], f->C[b]);
1320    f->window[b] = (float *) setup_malloc(f, sizeof(float) * n2);
1321    if (!f->window[b]) return error(f, VORBIS_outofmem);
1322    compute_window(n, f->window[b]);
1323    f->bit_reverse[b] = (uint16 *) setup_malloc(f, sizeof(uint16) * n8);
1324    if (!f->bit_reverse[b]) return error(f, VORBIS_outofmem);
1325    compute_bitreverse(n, f->bit_reverse[b]);
1326    return TRUE;
1327 }
1328 
neighbors(uint16 * x,int n,int * plow,int * phigh)1329 static void neighbors(uint16 *x, int n, int *plow, int *phigh)
1330 {
1331    int low = -1;
1332    int high = 65536;
1333    int i;
1334    for (i=0; i < n; ++i) {
1335       if (x[i] > low  && x[i] < x[n]) { *plow  = i; low = x[i]; }
1336       if (x[i] < high && x[i] > x[n]) { *phigh = i; high = x[i]; }
1337    }
1338 }
1339 
1340 // this has been repurposed so y is now the original index instead of y
1341 typedef struct
1342 {
1343    uint16 x,id;
1344 } stbv__floor_ordering;
1345 
point_compare(const void * p,const void * q)1346 static int STBV_CDECL point_compare(const void *p, const void *q)
1347 {
1348    stbv__floor_ordering *a = (stbv__floor_ordering *) p;
1349    stbv__floor_ordering *b = (stbv__floor_ordering *) q;
1350    return a->x < b->x ? -1 : a->x > b->x;
1351 }
1352 
1353 //
1354 /////////////////////// END LEAF SETUP FUNCTIONS //////////////////////////
1355 
1356 
1357 #if defined(STB_VORBIS_NO_STDIO)
1358    #define USE_MEMORY(z)    TRUE
1359 #else
1360    #define USE_MEMORY(z)    ((z)->stream)
1361 #endif
1362 
get8(vorb * z)1363 static uint8 get8(vorb *z)
1364 {
1365    if (USE_MEMORY(z)) {
1366       if (z->stream >= z->stream_end) { z->eof = TRUE; return 0; }
1367       return *z->stream++;
1368    }
1369 
1370    #ifndef STB_VORBIS_NO_STDIO
1371    {
1372    int c = fgetc(z->f);
1373    if (c == EOF) { z->eof = TRUE; return 0; }
1374    return c;
1375    }
1376    #endif
1377 }
1378 
get32(vorb * f)1379 static uint32 get32(vorb *f)
1380 {
1381    uint32 x;
1382    x = get8(f);
1383    x += get8(f) << 8;
1384    x += get8(f) << 16;
1385    x += (uint32) get8(f) << 24;
1386    return x;
1387 }
1388 
getn(vorb * z,uint8 * data,int n)1389 static int getn(vorb *z, uint8 *data, int n)
1390 {
1391    if (USE_MEMORY(z)) {
1392       if (z->stream+n > z->stream_end) { z->eof = 1; return 0; }
1393       memcpy(data, z->stream, n);
1394       z->stream += n;
1395       return 1;
1396    }
1397 
1398    #ifndef STB_VORBIS_NO_STDIO
1399    if (fread(data, n, 1, z->f) == 1)
1400       return 1;
1401    else {
1402       z->eof = 1;
1403       return 0;
1404    }
1405    #endif
1406 }
1407 
skip(vorb * z,int n)1408 static void skip(vorb *z, int n)
1409 {
1410    if (USE_MEMORY(z)) {
1411       z->stream += n;
1412       if (z->stream >= z->stream_end) z->eof = 1;
1413       return;
1414    }
1415    #ifndef STB_VORBIS_NO_STDIO
1416    {
1417       long x = ftell(z->f);
1418       fseek(z->f, x+n, SEEK_SET);
1419    }
1420    #endif
1421 }
1422 
set_file_offset(stb_vorbis * f,unsigned int loc)1423 static int set_file_offset(stb_vorbis *f, unsigned int loc)
1424 {
1425    #ifndef STB_VORBIS_NO_PUSHDATA_API
1426    if (f->push_mode) return 0;
1427    #endif
1428    f->eof = 0;
1429    if (USE_MEMORY(f)) {
1430       if (f->stream_start + loc >= f->stream_end || f->stream_start + loc < f->stream_start) {
1431          f->stream = f->stream_end;
1432          f->eof = 1;
1433          return 0;
1434       } else {
1435          f->stream = f->stream_start + loc;
1436          return 1;
1437       }
1438    }
1439    #ifndef STB_VORBIS_NO_STDIO
1440    if (loc + f->f_start < loc || loc >= 0x80000000) {
1441       loc = 0x7fffffff;
1442       f->eof = 1;
1443    } else {
1444       loc += f->f_start;
1445    }
1446    if (!fseek(f->f, loc, SEEK_SET))
1447       return 1;
1448    f->eof = 1;
1449    fseek(f->f, f->f_start, SEEK_END);
1450    return 0;
1451    #endif
1452 }
1453 
1454 
1455 static uint8 ogg_page_header[4] = { 0x4f, 0x67, 0x67, 0x53 };
1456 
capture_pattern(vorb * f)1457 static int capture_pattern(vorb *f)
1458 {
1459    if (0x4f != get8(f)) return FALSE;
1460    if (0x67 != get8(f)) return FALSE;
1461    if (0x67 != get8(f)) return FALSE;
1462    if (0x53 != get8(f)) return FALSE;
1463    return TRUE;
1464 }
1465 
1466 #define PAGEFLAG_continued_packet   1
1467 #define PAGEFLAG_first_page         2
1468 #define PAGEFLAG_last_page          4
1469 
start_page_no_capturepattern(vorb * f)1470 static int start_page_no_capturepattern(vorb *f)
1471 {
1472    uint32 loc0,loc1,n;
1473    // stream structure version
1474    if (0 != get8(f)) return error(f, VORBIS_invalid_stream_structure_version);
1475    // header flag
1476    f->page_flag = get8(f);
1477    // absolute granule position
1478    loc0 = get32(f);
1479    loc1 = get32(f);
1480    // @TODO: validate loc0,loc1 as valid positions?
1481    // stream serial number -- vorbis doesn't interleave, so discard
1482    get32(f);
1483    //if (f->serial != get32(f)) return error(f, VORBIS_incorrect_stream_serial_number);
1484    // page sequence number
1485    n = get32(f);
1486    f->last_page = n;
1487    // CRC32
1488    get32(f);
1489    // page_segments
1490    f->segment_count = get8(f);
1491    if (!getn(f, f->segments, f->segment_count))
1492       return error(f, VORBIS_unexpected_eof);
1493    // assume we _don't_ know any the sample position of any segments
1494    f->end_seg_with_known_loc = -2;
1495    if (loc0 != ~0U || loc1 != ~0U) {
1496       int i;
1497       // determine which packet is the last one that will complete
1498       for (i=f->segment_count-1; i >= 0; --i)
1499          if (f->segments[i] < 255)
1500             break;
1501       // 'i' is now the index of the _last_ segment of a packet that ends
1502       if (i >= 0) {
1503          f->end_seg_with_known_loc = i;
1504          f->known_loc_for_packet   = loc0;
1505       }
1506    }
1507    if (f->first_decode) {
1508       int i,len;
1509       ProbedPage p;
1510       len = 0;
1511       for (i=0; i < f->segment_count; ++i)
1512          len += f->segments[i];
1513       len += 27 + f->segment_count;
1514       p.page_start = f->first_audio_page_offset;
1515       p.page_end = p.page_start + len;
1516       p.last_decoded_sample = loc0;
1517       f->p_first = p;
1518    }
1519    f->next_seg = 0;
1520    return TRUE;
1521 }
1522 
start_page(vorb * f)1523 static int start_page(vorb *f)
1524 {
1525    if (!capture_pattern(f)) return error(f, VORBIS_missing_capture_pattern);
1526    return start_page_no_capturepattern(f);
1527 }
1528 
start_packet(vorb * f)1529 static int start_packet(vorb *f)
1530 {
1531    while (f->next_seg == -1) {
1532       if (!start_page(f)) return FALSE;
1533       if (f->page_flag & PAGEFLAG_continued_packet)
1534          return error(f, VORBIS_continued_packet_flag_invalid);
1535    }
1536    f->last_seg = FALSE;
1537    f->valid_bits = 0;
1538    f->packet_bytes = 0;
1539    f->bytes_in_seg = 0;
1540    // f->next_seg is now valid
1541    return TRUE;
1542 }
1543 
maybe_start_packet(vorb * f)1544 static int maybe_start_packet(vorb *f)
1545 {
1546    if (f->next_seg == -1) {
1547       int x = get8(f);
1548       if (f->eof) return FALSE; // EOF at page boundary is not an error!
1549       if (0x4f != x      ) return error(f, VORBIS_missing_capture_pattern);
1550       if (0x67 != get8(f)) return error(f, VORBIS_missing_capture_pattern);
1551       if (0x67 != get8(f)) return error(f, VORBIS_missing_capture_pattern);
1552       if (0x53 != get8(f)) return error(f, VORBIS_missing_capture_pattern);
1553       if (!start_page_no_capturepattern(f)) return FALSE;
1554       if (f->page_flag & PAGEFLAG_continued_packet) {
1555          // set up enough state that we can read this packet if we want,
1556          // e.g. during recovery
1557          f->last_seg = FALSE;
1558          f->bytes_in_seg = 0;
1559          return error(f, VORBIS_continued_packet_flag_invalid);
1560       }
1561    }
1562    return start_packet(f);
1563 }
1564 
next_segment(vorb * f)1565 static int next_segment(vorb *f)
1566 {
1567    int len;
1568    if (f->last_seg) return 0;
1569    if (f->next_seg == -1) {
1570       f->last_seg_which = f->segment_count-1; // in case start_page fails
1571       if (!start_page(f)) { f->last_seg = 1; return 0; }
1572       if (!(f->page_flag & PAGEFLAG_continued_packet)) return error(f, VORBIS_continued_packet_flag_invalid);
1573    }
1574    len = f->segments[f->next_seg++];
1575    if (len < 255) {
1576       f->last_seg = TRUE;
1577       f->last_seg_which = f->next_seg-1;
1578    }
1579    if (f->next_seg >= f->segment_count)
1580       f->next_seg = -1;
1581    assert(f->bytes_in_seg == 0);
1582    f->bytes_in_seg = len;
1583    return len;
1584 }
1585 
1586 #define EOP    (-1)
1587 #define INVALID_BITS  (-1)
1588 
get8_packet_raw(vorb * f)1589 static int get8_packet_raw(vorb *f)
1590 {
1591    if (!f->bytes_in_seg) {  // CLANG!
1592       if (f->last_seg) return EOP;
1593       else if (!next_segment(f)) return EOP;
1594    }
1595    assert(f->bytes_in_seg > 0);
1596    --f->bytes_in_seg;
1597    ++f->packet_bytes;
1598    return get8(f);
1599 }
1600 
get8_packet(vorb * f)1601 static int get8_packet(vorb *f)
1602 {
1603    int x = get8_packet_raw(f);
1604    f->valid_bits = 0;
1605    return x;
1606 }
1607 
flush_packet(vorb * f)1608 static void flush_packet(vorb *f)
1609 {
1610    while (get8_packet_raw(f) != EOP);
1611 }
1612 
1613 // @OPTIMIZE: this is the secondary bit decoder, so it's probably not as important
1614 // as the huffman decoder?
get_bits(vorb * f,int n)1615 static uint32 get_bits(vorb *f, int n)
1616 {
1617    uint32 z;
1618 
1619    if (f->valid_bits < 0) return 0;
1620    if (f->valid_bits < n) {
1621       if (n > 24) {
1622          // the accumulator technique below would not work correctly in this case
1623          z = get_bits(f, 24);
1624          z += get_bits(f, n-24) << 24;
1625          return z;
1626       }
1627       if (f->valid_bits == 0) f->acc = 0;
1628       while (f->valid_bits < n) {
1629          int z = get8_packet_raw(f);
1630          if (z == EOP) {
1631             f->valid_bits = INVALID_BITS;
1632             return 0;
1633          }
1634          f->acc += z << f->valid_bits;
1635          f->valid_bits += 8;
1636       }
1637    }
1638    if (f->valid_bits < 0) return 0;
1639    z = f->acc & ((1 << n)-1);
1640    f->acc >>= n;
1641    f->valid_bits -= n;
1642    return z;
1643 }
1644 
1645 // @OPTIMIZE: primary accumulator for huffman
1646 // expand the buffer to as many bits as possible without reading off end of packet
1647 // it might be nice to allow f->valid_bits and f->acc to be stored in registers,
1648 // e.g. cache them locally and decode locally
prep_huffman(vorb * f)1649 static __forceinline void prep_huffman(vorb *f)
1650 {
1651    if (f->valid_bits <= 24) {
1652       if (f->valid_bits == 0) f->acc = 0;
1653       do {
1654          int z;
1655          if (f->last_seg && !f->bytes_in_seg) return;
1656          z = get8_packet_raw(f);
1657          if (z == EOP) return;
1658          f->acc += (unsigned) z << f->valid_bits;
1659          f->valid_bits += 8;
1660       } while (f->valid_bits <= 24);
1661    }
1662 }
1663 
1664 enum
1665 {
1666    VORBIS_packet_id = 1,
1667    VORBIS_packet_comment = 3,
1668    VORBIS_packet_setup = 5
1669 };
1670 
codebook_decode_scalar_raw(vorb * f,Codebook * c)1671 static int codebook_decode_scalar_raw(vorb *f, Codebook *c)
1672 {
1673    int i;
1674    prep_huffman(f);
1675 
1676    if (c->codewords == NULL && c->sorted_codewords == NULL)
1677       return -1;
1678 
1679    // cases to use binary search: sorted_codewords && !c->codewords
1680    //                             sorted_codewords && c->entries > 8
1681    if (c->entries > 8 ? c->sorted_codewords!=NULL : !c->codewords) {
1682       // binary search
1683       uint32 code = bit_reverse(f->acc);
1684       int x=0, n=c->sorted_entries, len;
1685 
1686       while (n > 1) {
1687          // invariant: sc[x] <= code < sc[x+n]
1688          int m = x + (n >> 1);
1689          if (c->sorted_codewords[m] <= code) {
1690             x = m;
1691             n -= (n>>1);
1692          } else {
1693             n >>= 1;
1694          }
1695       }
1696       // x is now the sorted index
1697       if (!c->sparse) x = c->sorted_values[x];
1698       // x is now sorted index if sparse, or symbol otherwise
1699       len = c->codeword_lengths[x];
1700       if (f->valid_bits >= len) {
1701          f->acc >>= len;
1702          f->valid_bits -= len;
1703          return x;
1704       }
1705 
1706       f->valid_bits = 0;
1707       return -1;
1708    }
1709 
1710    // if small, linear search
1711    assert(!c->sparse);
1712    for (i=0; i < c->entries; ++i) {
1713       if (c->codeword_lengths[i] == NO_CODE) continue;
1714       if (c->codewords[i] == (f->acc & ((1 << c->codeword_lengths[i])-1))) {
1715          if (f->valid_bits >= c->codeword_lengths[i]) {
1716             f->acc >>= c->codeword_lengths[i];
1717             f->valid_bits -= c->codeword_lengths[i];
1718             return i;
1719          }
1720          f->valid_bits = 0;
1721          return -1;
1722       }
1723    }
1724 
1725    error(f, VORBIS_invalid_stream);
1726    f->valid_bits = 0;
1727    return -1;
1728 }
1729 
1730 #ifndef STB_VORBIS_NO_INLINE_DECODE
1731 
1732 #define DECODE_RAW(var, f,c)                                  \
1733    if (f->valid_bits < STB_VORBIS_FAST_HUFFMAN_LENGTH)        \
1734       prep_huffman(f);                                        \
1735    var = f->acc & FAST_HUFFMAN_TABLE_MASK;                    \
1736    var = c->fast_huffman[var];                                \
1737    if (var >= 0) {                                            \
1738       int n = c->codeword_lengths[var];                       \
1739       f->acc >>= n;                                           \
1740       f->valid_bits -= n;                                     \
1741       if (f->valid_bits < 0) { f->valid_bits = 0; var = -1; } \
1742    } else {                                                   \
1743       var = codebook_decode_scalar_raw(f,c);                  \
1744    }
1745 
1746 #else
1747 
codebook_decode_scalar(vorb * f,Codebook * c)1748 static int codebook_decode_scalar(vorb *f, Codebook *c)
1749 {
1750    int i;
1751    if (f->valid_bits < STB_VORBIS_FAST_HUFFMAN_LENGTH)
1752       prep_huffman(f);
1753    // fast huffman table lookup
1754    i = f->acc & FAST_HUFFMAN_TABLE_MASK;
1755    i = c->fast_huffman[i];
1756    if (i >= 0) {
1757       f->acc >>= c->codeword_lengths[i];
1758       f->valid_bits -= c->codeword_lengths[i];
1759       if (f->valid_bits < 0) { f->valid_bits = 0; return -1; }
1760       return i;
1761    }
1762    return codebook_decode_scalar_raw(f,c);
1763 }
1764 
1765 #define DECODE_RAW(var,f,c)    var = codebook_decode_scalar(f,c);
1766 
1767 #endif
1768 
1769 #define DECODE(var,f,c)                                       \
1770    DECODE_RAW(var,f,c)                                        \
1771    if (c->sparse) var = c->sorted_values[var];
1772 
1773 #ifndef STB_VORBIS_DIVIDES_IN_CODEBOOK
1774   #define DECODE_VQ(var,f,c)   DECODE_RAW(var,f,c)
1775 #else
1776   #define DECODE_VQ(var,f,c)   DECODE(var,f,c)
1777 #endif
1778 
1779 
1780 
1781 
1782 
1783 
1784 // CODEBOOK_ELEMENT_FAST is an optimization for the CODEBOOK_FLOATS case
1785 // where we avoid one addition
1786 #define CODEBOOK_ELEMENT(c,off)          (c->multiplicands[off])
1787 #define CODEBOOK_ELEMENT_FAST(c,off)     (c->multiplicands[off])
1788 #define CODEBOOK_ELEMENT_BASE(c)         (0)
1789 
codebook_decode_start(vorb * f,Codebook * c)1790 static int codebook_decode_start(vorb *f, Codebook *c)
1791 {
1792    int z = -1;
1793 
1794    // type 0 is only legal in a scalar context
1795    if (c->lookup_type == 0)
1796       error(f, VORBIS_invalid_stream);
1797    else {
1798       DECODE_VQ(z,f,c);
1799       if (c->sparse) assert(z < c->sorted_entries);
1800       if (z < 0) {  // check for EOP
1801          if (!f->bytes_in_seg)
1802             if (f->last_seg)
1803                return z;
1804          error(f, VORBIS_invalid_stream);
1805       }
1806    }
1807    return z;
1808 }
1809 
codebook_decode(vorb * f,Codebook * c,float * output,int len)1810 static int codebook_decode(vorb *f, Codebook *c, float *output, int len)
1811 {
1812    int i,z = codebook_decode_start(f,c);
1813    if (z < 0) return FALSE;
1814    if (len > c->dimensions) len = c->dimensions;
1815 
1816 #ifdef STB_VORBIS_DIVIDES_IN_CODEBOOK
1817    if (c->lookup_type == 1) {
1818       float last = CODEBOOK_ELEMENT_BASE(c);
1819       int div = 1;
1820       for (i=0; i < len; ++i) {
1821          int off = (z / div) % c->lookup_values;
1822          float val = CODEBOOK_ELEMENT_FAST(c,off) + last;
1823          output[i] += val;
1824          if (c->sequence_p) last = val + c->minimum_value;
1825          div *= c->lookup_values;
1826       }
1827       return TRUE;
1828    }
1829 #endif
1830 
1831    z *= c->dimensions;
1832    if (c->sequence_p) {
1833       float last = CODEBOOK_ELEMENT_BASE(c);
1834       for (i=0; i < len; ++i) {
1835          float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1836          output[i] += val;
1837          last = val + c->minimum_value;
1838       }
1839    } else {
1840       float last = CODEBOOK_ELEMENT_BASE(c);
1841       for (i=0; i < len; ++i) {
1842          output[i] += CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1843       }
1844    }
1845 
1846    return TRUE;
1847 }
1848 
codebook_decode_step(vorb * f,Codebook * c,float * output,int len,int step)1849 static int codebook_decode_step(vorb *f, Codebook *c, float *output, int len, int step)
1850 {
1851    int i,z = codebook_decode_start(f,c);
1852    float last = CODEBOOK_ELEMENT_BASE(c);
1853    if (z < 0) return FALSE;
1854    if (len > c->dimensions) len = c->dimensions;
1855 
1856 #ifdef STB_VORBIS_DIVIDES_IN_CODEBOOK
1857    if (c->lookup_type == 1) {
1858       int div = 1;
1859       for (i=0; i < len; ++i) {
1860          int off = (z / div) % c->lookup_values;
1861          float val = CODEBOOK_ELEMENT_FAST(c,off) + last;
1862          output[i*step] += val;
1863          if (c->sequence_p) last = val;
1864          div *= c->lookup_values;
1865       }
1866       return TRUE;
1867    }
1868 #endif
1869 
1870    z *= c->dimensions;
1871    for (i=0; i < len; ++i) {
1872       float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1873       output[i*step] += val;
1874       if (c->sequence_p) last = val;
1875    }
1876 
1877    return TRUE;
1878 }
1879 
codebook_decode_deinterleave_repeat(vorb * f,Codebook * c,float ** outputs,int ch,int * c_inter_p,int * p_inter_p,int len,int total_decode)1880 static int codebook_decode_deinterleave_repeat(vorb *f, Codebook *c, float **outputs, int ch, int *c_inter_p, int *p_inter_p, int len, int total_decode)
1881 {
1882    int c_inter = *c_inter_p;
1883    int p_inter = *p_inter_p;
1884    int i,z, effective = c->dimensions;
1885 
1886    // type 0 is only legal in a scalar context
1887    if (c->lookup_type == 0)   return error(f, VORBIS_invalid_stream);
1888 
1889    while (total_decode > 0) {
1890       float last = CODEBOOK_ELEMENT_BASE(c);
1891       DECODE_VQ(z,f,c);
1892       #ifndef STB_VORBIS_DIVIDES_IN_CODEBOOK
1893       assert(!c->sparse || z < c->sorted_entries);
1894       #endif
1895       if (z < 0) {
1896          if (!f->bytes_in_seg)
1897             if (f->last_seg) return FALSE;
1898          return error(f, VORBIS_invalid_stream);
1899       }
1900 
1901       // if this will take us off the end of the buffers, stop short!
1902       // we check by computing the length of the virtual interleaved
1903       // buffer (len*ch), our current offset within it (p_inter*ch)+(c_inter),
1904       // and the length we'll be using (effective)
1905       if (c_inter + p_inter*ch + effective > len * ch) {
1906          effective = len*ch - (p_inter*ch - c_inter);
1907       }
1908 
1909    #ifdef STB_VORBIS_DIVIDES_IN_CODEBOOK
1910       if (c->lookup_type == 1) {
1911          int div = 1;
1912          for (i=0; i < effective; ++i) {
1913             int off = (z / div) % c->lookup_values;
1914             float val = CODEBOOK_ELEMENT_FAST(c,off) + last;
1915             if (outputs[c_inter])
1916                outputs[c_inter][p_inter] += val;
1917             if (++c_inter == ch) { c_inter = 0; ++p_inter; }
1918             if (c->sequence_p) last = val;
1919             div *= c->lookup_values;
1920          }
1921       } else
1922    #endif
1923       {
1924          z *= c->dimensions;
1925          if (c->sequence_p) {
1926             for (i=0; i < effective; ++i) {
1927                float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1928                if (outputs[c_inter])
1929                   outputs[c_inter][p_inter] += val;
1930                if (++c_inter == ch) { c_inter = 0; ++p_inter; }
1931                last = val;
1932             }
1933          } else {
1934             for (i=0; i < effective; ++i) {
1935                float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1936                if (outputs[c_inter])
1937                   outputs[c_inter][p_inter] += val;
1938                if (++c_inter == ch) { c_inter = 0; ++p_inter; }
1939             }
1940          }
1941       }
1942 
1943       total_decode -= effective;
1944    }
1945    *c_inter_p = c_inter;
1946    *p_inter_p = p_inter;
1947    return TRUE;
1948 }
1949 
predict_point(int x,int x0,int x1,int y0,int y1)1950 static int predict_point(int x, int x0, int x1, int y0, int y1)
1951 {
1952    int dy = y1 - y0;
1953    int adx = x1 - x0;
1954    // @OPTIMIZE: force int division to round in the right direction... is this necessary on x86?
1955    int err = abs(dy) * (x - x0);
1956    int off = err / adx;
1957    return dy < 0 ? y0 - off : y0 + off;
1958 }
1959 
1960 // the following table is block-copied from the specification
1961 static float inverse_db_table[256] =
1962 {
1963   1.0649863e-07f, 1.1341951e-07f, 1.2079015e-07f, 1.2863978e-07f,
1964   1.3699951e-07f, 1.4590251e-07f, 1.5538408e-07f, 1.6548181e-07f,
1965   1.7623575e-07f, 1.8768855e-07f, 1.9988561e-07f, 2.1287530e-07f,
1966   2.2670913e-07f, 2.4144197e-07f, 2.5713223e-07f, 2.7384213e-07f,
1967   2.9163793e-07f, 3.1059021e-07f, 3.3077411e-07f, 3.5226968e-07f,
1968   3.7516214e-07f, 3.9954229e-07f, 4.2550680e-07f, 4.5315863e-07f,
1969   4.8260743e-07f, 5.1396998e-07f, 5.4737065e-07f, 5.8294187e-07f,
1970   6.2082472e-07f, 6.6116941e-07f, 7.0413592e-07f, 7.4989464e-07f,
1971   7.9862701e-07f, 8.5052630e-07f, 9.0579828e-07f, 9.6466216e-07f,
1972   1.0273513e-06f, 1.0941144e-06f, 1.1652161e-06f, 1.2409384e-06f,
1973   1.3215816e-06f, 1.4074654e-06f, 1.4989305e-06f, 1.5963394e-06f,
1974   1.7000785e-06f, 1.8105592e-06f, 1.9282195e-06f, 2.0535261e-06f,
1975   2.1869758e-06f, 2.3290978e-06f, 2.4804557e-06f, 2.6416497e-06f,
1976   2.8133190e-06f, 2.9961443e-06f, 3.1908506e-06f, 3.3982101e-06f,
1977   3.6190449e-06f, 3.8542308e-06f, 4.1047004e-06f, 4.3714470e-06f,
1978   4.6555282e-06f, 4.9580707e-06f, 5.2802740e-06f, 5.6234160e-06f,
1979   5.9888572e-06f, 6.3780469e-06f, 6.7925283e-06f, 7.2339451e-06f,
1980   7.7040476e-06f, 8.2047000e-06f, 8.7378876e-06f, 9.3057248e-06f,
1981   9.9104632e-06f, 1.0554501e-05f, 1.1240392e-05f, 1.1970856e-05f,
1982   1.2748789e-05f, 1.3577278e-05f, 1.4459606e-05f, 1.5399272e-05f,
1983   1.6400004e-05f, 1.7465768e-05f, 1.8600792e-05f, 1.9809576e-05f,
1984   2.1096914e-05f, 2.2467911e-05f, 2.3928002e-05f, 2.5482978e-05f,
1985   2.7139006e-05f, 2.8902651e-05f, 3.0780908e-05f, 3.2781225e-05f,
1986   3.4911534e-05f, 3.7180282e-05f, 3.9596466e-05f, 4.2169667e-05f,
1987   4.4910090e-05f, 4.7828601e-05f, 5.0936773e-05f, 5.4246931e-05f,
1988   5.7772202e-05f, 6.1526565e-05f, 6.5524908e-05f, 6.9783085e-05f,
1989   7.4317983e-05f, 7.9147585e-05f, 8.4291040e-05f, 8.9768747e-05f,
1990   9.5602426e-05f, 0.00010181521f, 0.00010843174f, 0.00011547824f,
1991   0.00012298267f, 0.00013097477f, 0.00013948625f, 0.00014855085f,
1992   0.00015820453f, 0.00016848555f, 0.00017943469f, 0.00019109536f,
1993   0.00020351382f, 0.00021673929f, 0.00023082423f, 0.00024582449f,
1994   0.00026179955f, 0.00027881276f, 0.00029693158f, 0.00031622787f,
1995   0.00033677814f, 0.00035866388f, 0.00038197188f, 0.00040679456f,
1996   0.00043323036f, 0.00046138411f, 0.00049136745f, 0.00052329927f,
1997   0.00055730621f, 0.00059352311f, 0.00063209358f, 0.00067317058f,
1998   0.00071691700f, 0.00076350630f, 0.00081312324f, 0.00086596457f,
1999   0.00092223983f, 0.00098217216f, 0.0010459992f,  0.0011139742f,
2000   0.0011863665f,  0.0012634633f,  0.0013455702f,  0.0014330129f,
2001   0.0015261382f,  0.0016253153f,  0.0017309374f,  0.0018434235f,
2002   0.0019632195f,  0.0020908006f,  0.0022266726f,  0.0023713743f,
2003   0.0025254795f,  0.0026895994f,  0.0028643847f,  0.0030505286f,
2004   0.0032487691f,  0.0034598925f,  0.0036847358f,  0.0039241906f,
2005   0.0041792066f,  0.0044507950f,  0.0047400328f,  0.0050480668f,
2006   0.0053761186f,  0.0057254891f,  0.0060975636f,  0.0064938176f,
2007   0.0069158225f,  0.0073652516f,  0.0078438871f,  0.0083536271f,
2008   0.0088964928f,  0.009474637f,   0.010090352f,   0.010746080f,
2009   0.011444421f,   0.012188144f,   0.012980198f,   0.013823725f,
2010   0.014722068f,   0.015678791f,   0.016697687f,   0.017782797f,
2011   0.018938423f,   0.020169149f,   0.021479854f,   0.022875735f,
2012   0.024362330f,   0.025945531f,   0.027631618f,   0.029427276f,
2013   0.031339626f,   0.033376252f,   0.035545228f,   0.037855157f,
2014   0.040315199f,   0.042935108f,   0.045725273f,   0.048696758f,
2015   0.051861348f,   0.055231591f,   0.058820850f,   0.062643361f,
2016   0.066714279f,   0.071049749f,   0.075666962f,   0.080584227f,
2017   0.085821044f,   0.091398179f,   0.097337747f,   0.10366330f,
2018   0.11039993f,    0.11757434f,    0.12521498f,    0.13335215f,
2019   0.14201813f,    0.15124727f,    0.16107617f,    0.17154380f,
2020   0.18269168f,    0.19456402f,    0.20720788f,    0.22067342f,
2021   0.23501402f,    0.25028656f,    0.26655159f,    0.28387361f,
2022   0.30232132f,    0.32196786f,    0.34289114f,    0.36517414f,
2023   0.38890521f,    0.41417847f,    0.44109412f,    0.46975890f,
2024   0.50028648f,    0.53279791f,    0.56742212f,    0.60429640f,
2025   0.64356699f,    0.68538959f,    0.72993007f,    0.77736504f,
2026   0.82788260f,    0.88168307f,    0.9389798f,     1.0f
2027 };
2028 
2029 
2030 // @OPTIMIZE: if you want to replace this bresenham line-drawing routine,
2031 // note that you must produce bit-identical output to decode correctly;
2032 // this specific sequence of operations is specified in the spec (it's
2033 // drawing integer-quantized frequency-space lines that the encoder
2034 // expects to be exactly the same)
2035 //     ... also, isn't the whole point of Bresenham's algorithm to NOT
2036 // have to divide in the setup? sigh.
2037 #ifndef STB_VORBIS_NO_DEFER_FLOOR
2038 #define LINE_OP(a,b)   a *= b
2039 #else
2040 #define LINE_OP(a,b)   a = b
2041 #endif
2042 
2043 #ifdef STB_VORBIS_DIVIDE_TABLE
2044 #define DIVTAB_NUMER   32
2045 #define DIVTAB_DENOM   64
2046 int8 integer_divide_table[DIVTAB_NUMER][DIVTAB_DENOM]; // 2KB
2047 #endif
2048 
draw_line(float * output,int x0,int y0,int x1,int y1,int n)2049 static __forceinline void draw_line(float *output, int x0, int y0, int x1, int y1, int n)
2050 {
2051    int dy = y1 - y0;
2052    int adx = x1 - x0;
2053    int ady = abs(dy);
2054    int base;
2055    int x=x0,y=y0;
2056    int err = 0;
2057    int sy;
2058 
2059 #ifdef STB_VORBIS_DIVIDE_TABLE
2060    if (adx < DIVTAB_DENOM && ady < DIVTAB_NUMER) {
2061       if (dy < 0) {
2062          base = -integer_divide_table[ady][adx];
2063          sy = base-1;
2064       } else {
2065          base =  integer_divide_table[ady][adx];
2066          sy = base+1;
2067       }
2068    } else {
2069       base = dy / adx;
2070       if (dy < 0)
2071          sy = base - 1;
2072       else
2073          sy = base+1;
2074    }
2075 #else
2076    base = dy / adx;
2077    if (dy < 0)
2078       sy = base - 1;
2079    else
2080       sy = base+1;
2081 #endif
2082    ady -= abs(base) * adx;
2083    if (x1 > n) x1 = n;
2084    if (x < x1) {
2085       LINE_OP(output[x], inverse_db_table[y]);
2086       for (++x; x < x1; ++x) {
2087          err += ady;
2088          if (err >= adx) {
2089             err -= adx;
2090             y += sy;
2091          } else
2092             y += base;
2093          LINE_OP(output[x], inverse_db_table[y]);
2094       }
2095    }
2096 }
2097 
residue_decode(vorb * f,Codebook * book,float * target,int offset,int n,int rtype)2098 static int residue_decode(vorb *f, Codebook *book, float *target, int offset, int n, int rtype)
2099 {
2100    int k;
2101    if (rtype == 0) {
2102       int step = n / book->dimensions;
2103       for (k=0; k < step; ++k)
2104          if (!codebook_decode_step(f, book, target+offset+k, n-offset-k, step))
2105             return FALSE;
2106    } else {
2107       for (k=0; k < n; ) {
2108          if (!codebook_decode(f, book, target+offset, n-k))
2109             return FALSE;
2110          k += book->dimensions;
2111          offset += book->dimensions;
2112       }
2113    }
2114    return TRUE;
2115 }
2116 
2117 // n is 1/2 of the blocksize --
2118 // specification: "Correct per-vector decode length is [n]/2"
decode_residue(vorb * f,float * residue_buffers[],int ch,int n,int rn,uint8 * do_not_decode)2119 static void decode_residue(vorb *f, float *residue_buffers[], int ch, int n, int rn, uint8 *do_not_decode)
2120 {
2121    int i,j,pass;
2122    Residue *r = f->residue_config + rn;
2123    int rtype = f->residue_types[rn];
2124    int c = r->classbook;
2125    int classwords = f->codebooks[c].dimensions;
2126    unsigned int actual_size = rtype == 2 ? n*2 : n;
2127    unsigned int limit_r_begin = (r->begin < actual_size ? r->begin : actual_size);
2128    unsigned int limit_r_end   = (r->end   < actual_size ? r->end   : actual_size);
2129    int n_read = limit_r_end - limit_r_begin;
2130    int part_read = n_read / r->part_size;
2131    int temp_alloc_point = temp_alloc_save(f);
2132    #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2133    uint8 ***part_classdata = (uint8 ***) temp_block_array(f,f->channels, part_read * sizeof(**part_classdata));
2134    #else
2135    int **classifications = (int **) temp_block_array(f,f->channels, part_read * sizeof(**classifications));
2136    #endif
2137 
2138    CHECK(f);
2139 
2140    for (i=0; i < ch; ++i)
2141       if (!do_not_decode[i])
2142          memset(residue_buffers[i], 0, sizeof(float) * n);
2143 
2144    if (rtype == 2 && ch != 1) {
2145       for (j=0; j < ch; ++j)
2146          if (!do_not_decode[j])
2147             break;
2148       if (j == ch)
2149          goto done;
2150 
2151       for (pass=0; pass < 8; ++pass) {
2152          int pcount = 0, class_set = 0;
2153          if (ch == 2) {
2154             while (pcount < part_read) {
2155                int z = r->begin + pcount*r->part_size;
2156                int c_inter = (z & 1), p_inter = z>>1;
2157                if (pass == 0) {
2158                   Codebook *c = f->codebooks+r->classbook;
2159                   int q;
2160                   DECODE(q,f,c);
2161                   if (q == EOP) goto done;
2162                   #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2163                   part_classdata[0][class_set] = r->classdata[q];
2164                   #else
2165                   for (i=classwords-1; i >= 0; --i) {
2166                      classifications[0][i+pcount] = q % r->classifications;
2167                      q /= r->classifications;
2168                   }
2169                   #endif
2170                }
2171                for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) {
2172                   int z = r->begin + pcount*r->part_size;
2173                   #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2174                   int c = part_classdata[0][class_set][i];
2175                   #else
2176                   int c = classifications[0][pcount];
2177                   #endif
2178                   int b = r->residue_books[c][pass];
2179                   if (b >= 0) {
2180                      Codebook *book = f->codebooks + b;
2181                      #ifdef STB_VORBIS_DIVIDES_IN_CODEBOOK
2182                      if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r->part_size))
2183                         goto done;
2184                      #else
2185                      // saves 1%
2186                      if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r->part_size))
2187                         goto done;
2188                      #endif
2189                   } else {
2190                      z += r->part_size;
2191                      c_inter = z & 1;
2192                      p_inter = z >> 1;
2193                   }
2194                }
2195                #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2196                ++class_set;
2197                #endif
2198             }
2199          } else if (ch == 1) {
2200             while (pcount < part_read) {
2201                int z = r->begin + pcount*r->part_size;
2202                int c_inter = 0, p_inter = z;
2203                if (pass == 0) {
2204                   Codebook *c = f->codebooks+r->classbook;
2205                   int q;
2206                   DECODE(q,f,c);
2207                   if (q == EOP) goto done;
2208                   #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2209                   part_classdata[0][class_set] = r->classdata[q];
2210                   #else
2211                   for (i=classwords-1; i >= 0; --i) {
2212                      classifications[0][i+pcount] = q % r->classifications;
2213                      q /= r->classifications;
2214                   }
2215                   #endif
2216                }
2217                for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) {
2218                   int z = r->begin + pcount*r->part_size;
2219                   #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2220                   int c = part_classdata[0][class_set][i];
2221                   #else
2222                   int c = classifications[0][pcount];
2223                   #endif
2224                   int b = r->residue_books[c][pass];
2225                   if (b >= 0) {
2226                      Codebook *book = f->codebooks + b;
2227                      if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r->part_size))
2228                         goto done;
2229                   } else {
2230                      z += r->part_size;
2231                      c_inter = 0;
2232                      p_inter = z;
2233                   }
2234                }
2235                #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2236                ++class_set;
2237                #endif
2238             }
2239          } else {
2240             while (pcount < part_read) {
2241                int z = r->begin + pcount*r->part_size;
2242                int c_inter = z % ch, p_inter = z/ch;
2243                if (pass == 0) {
2244                   Codebook *c = f->codebooks+r->classbook;
2245                   int q;
2246                   DECODE(q,f,c);
2247                   if (q == EOP) goto done;
2248                   #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2249                   part_classdata[0][class_set] = r->classdata[q];
2250                   #else
2251                   for (i=classwords-1; i >= 0; --i) {
2252                      classifications[0][i+pcount] = q % r->classifications;
2253                      q /= r->classifications;
2254                   }
2255                   #endif
2256                }
2257                for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) {
2258                   int z = r->begin + pcount*r->part_size;
2259                   #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2260                   int c = part_classdata[0][class_set][i];
2261                   #else
2262                   int c = classifications[0][pcount];
2263                   #endif
2264                   int b = r->residue_books[c][pass];
2265                   if (b >= 0) {
2266                      Codebook *book = f->codebooks + b;
2267                      if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r->part_size))
2268                         goto done;
2269                   } else {
2270                      z += r->part_size;
2271                      c_inter = z % ch;
2272                      p_inter = z / ch;
2273                   }
2274                }
2275                #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2276                ++class_set;
2277                #endif
2278             }
2279          }
2280       }
2281       goto done;
2282    }
2283    CHECK(f);
2284 
2285    for (pass=0; pass < 8; ++pass) {
2286       int pcount = 0, class_set=0;
2287       while (pcount < part_read) {
2288          if (pass == 0) {
2289             for (j=0; j < ch; ++j) {
2290                if (!do_not_decode[j]) {
2291                   Codebook *c = f->codebooks+r->classbook;
2292                   int temp;
2293                   DECODE(temp,f,c);
2294                   if (temp == EOP) goto done;
2295                   #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2296                   part_classdata[j][class_set] = r->classdata[temp];
2297                   #else
2298                   for (i=classwords-1; i >= 0; --i) {
2299                      classifications[j][i+pcount] = temp % r->classifications;
2300                      temp /= r->classifications;
2301                   }
2302                   #endif
2303                }
2304             }
2305          }
2306          for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) {
2307             for (j=0; j < ch; ++j) {
2308                if (!do_not_decode[j]) {
2309                   #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2310                   int c = part_classdata[j][class_set][i];
2311                   #else
2312                   int c = classifications[j][pcount];
2313                   #endif
2314                   int b = r->residue_books[c][pass];
2315                   if (b >= 0) {
2316                      float *target = residue_buffers[j];
2317                      int offset = r->begin + pcount * r->part_size;
2318                      int n = r->part_size;
2319                      Codebook *book = f->codebooks + b;
2320                      if (!residue_decode(f, book, target, offset, n, rtype))
2321                         goto done;
2322                   }
2323                }
2324             }
2325          }
2326          #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2327          ++class_set;
2328          #endif
2329       }
2330    }
2331   done:
2332    CHECK(f);
2333    #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2334    temp_free(f,part_classdata);
2335    #else
2336    temp_free(f,classifications);
2337    #endif
2338    temp_alloc_restore(f,temp_alloc_point);
2339 }
2340 
2341 
2342 #if 0
2343 // slow way for debugging
2344 void inverse_mdct_slow(float *buffer, int n)
2345 {
2346    int i,j;
2347    int n2 = n >> 1;
2348    float *x = (float *) malloc(sizeof(*x) * n2);
2349    memcpy(x, buffer, sizeof(*x) * n2);
2350    for (i=0; i < n; ++i) {
2351       float acc = 0;
2352       for (j=0; j < n2; ++j)
2353          // formula from paper:
2354          //acc += n/4.0f * x[j] * (float) cos(M_PI / 2 / n * (2 * i + 1 + n/2.0)*(2*j+1));
2355          // formula from wikipedia
2356          //acc += 2.0f / n2 * x[j] * (float) cos(M_PI/n2 * (i + 0.5 + n2/2)*(j + 0.5));
2357          // these are equivalent, except the formula from the paper inverts the multiplier!
2358          // however, what actually works is NO MULTIPLIER!?!
2359          //acc += 64 * 2.0f / n2 * x[j] * (float) cos(M_PI/n2 * (i + 0.5 + n2/2)*(j + 0.5));
2360          acc += x[j] * (float) cos(M_PI / 2 / n * (2 * i + 1 + n/2.0)*(2*j+1));
2361       buffer[i] = acc;
2362    }
2363    free(x);
2364 }
2365 #elif 0
2366 // same as above, but just barely able to run in real time on modern machines
inverse_mdct_slow(float * buffer,int n,vorb * f,int blocktype)2367 void inverse_mdct_slow(float *buffer, int n, vorb *f, int blocktype)
2368 {
2369    float mcos[16384];
2370    int i,j;
2371    int n2 = n >> 1, nmask = (n << 2) -1;
2372    float *x = (float *) malloc(sizeof(*x) * n2);
2373    memcpy(x, buffer, sizeof(*x) * n2);
2374    for (i=0; i < 4*n; ++i)
2375       mcos[i] = (float) cos(M_PI / 2 * i / n);
2376 
2377    for (i=0; i < n; ++i) {
2378       float acc = 0;
2379       for (j=0; j < n2; ++j)
2380          acc += x[j] * mcos[(2 * i + 1 + n2)*(2*j+1) & nmask];
2381       buffer[i] = acc;
2382    }
2383    free(x);
2384 }
2385 #elif 0
2386 // transform to use a slow dct-iv; this is STILL basically trivial,
2387 // but only requires half as many ops
dct_iv_slow(float * buffer,int n)2388 void dct_iv_slow(float *buffer, int n)
2389 {
2390    float mcos[16384];
2391    float x[2048];
2392    int i,j;
2393    int n2 = n >> 1, nmask = (n << 3) - 1;
2394    memcpy(x, buffer, sizeof(*x) * n);
2395    for (i=0; i < 8*n; ++i)
2396       mcos[i] = (float) cos(M_PI / 4 * i / n);
2397    for (i=0; i < n; ++i) {
2398       float acc = 0;
2399       for (j=0; j < n; ++j)
2400          acc += x[j] * mcos[((2 * i + 1)*(2*j+1)) & nmask];
2401       buffer[i] = acc;
2402    }
2403 }
2404 
inverse_mdct_slow(float * buffer,int n,vorb * f,int blocktype)2405 void inverse_mdct_slow(float *buffer, int n, vorb *f, int blocktype)
2406 {
2407    int i, n4 = n >> 2, n2 = n >> 1, n3_4 = n - n4;
2408    float temp[4096];
2409 
2410    memcpy(temp, buffer, n2 * sizeof(float));
2411    dct_iv_slow(temp, n2);  // returns -c'-d, a-b'
2412 
2413    for (i=0; i < n4  ; ++i) buffer[i] = temp[i+n4];            // a-b'
2414    for (   ; i < n3_4; ++i) buffer[i] = -temp[n3_4 - i - 1];   // b-a', c+d'
2415    for (   ; i < n   ; ++i) buffer[i] = -temp[i - n3_4];       // c'+d
2416 }
2417 #endif
2418 
2419 #ifndef LIBVORBIS_MDCT
2420 #define LIBVORBIS_MDCT 0
2421 #endif
2422 
2423 #if LIBVORBIS_MDCT
2424 // directly call the vorbis MDCT using an interface documented
2425 // by Jeff Roberts... useful for performance comparison
2426 typedef struct
2427 {
2428   int n;
2429   int log2n;
2430 
2431   float *trig;
2432   int   *bitrev;
2433 
2434   float scale;
2435 } mdct_lookup;
2436 
2437 extern void mdct_init(mdct_lookup *lookup, int n);
2438 extern void mdct_clear(mdct_lookup *l);
2439 extern void mdct_backward(mdct_lookup *init, float *in, float *out);
2440 
2441 mdct_lookup M1,M2;
2442 
inverse_mdct(float * buffer,int n,vorb * f,int blocktype)2443 void inverse_mdct(float *buffer, int n, vorb *f, int blocktype)
2444 {
2445    mdct_lookup *M;
2446    if (M1.n == n) M = &M1;
2447    else if (M2.n == n) M = &M2;
2448    else if (M1.n == 0) { mdct_init(&M1, n); M = &M1; }
2449    else {
2450       if (M2.n) __asm int 3;
2451       mdct_init(&M2, n);
2452       M = &M2;
2453    }
2454 
2455    mdct_backward(M, buffer, buffer);
2456 }
2457 #endif
2458 
2459 
2460 // the following were split out into separate functions while optimizing;
2461 // they could be pushed back up but eh. __forceinline showed no change;
2462 // they're probably already being inlined.
imdct_step3_iter0_loop(int n,float * e,int i_off,int k_off,float * A)2463 static void imdct_step3_iter0_loop(int n, float *e, int i_off, int k_off, float *A)
2464 {
2465    float *ee0 = e + i_off;
2466    float *ee2 = ee0 + k_off;
2467    int i;
2468 
2469    assert((n & 3) == 0);
2470    for (i=(n>>2); i > 0; --i) {
2471       float k00_20, k01_21;
2472       k00_20  = ee0[ 0] - ee2[ 0];
2473       k01_21  = ee0[-1] - ee2[-1];
2474       ee0[ 0] += ee2[ 0];//ee0[ 0] = ee0[ 0] + ee2[ 0];
2475       ee0[-1] += ee2[-1];//ee0[-1] = ee0[-1] + ee2[-1];
2476       ee2[ 0] = k00_20 * A[0] - k01_21 * A[1];
2477       ee2[-1] = k01_21 * A[0] + k00_20 * A[1];
2478       A += 8;
2479 
2480       k00_20  = ee0[-2] - ee2[-2];
2481       k01_21  = ee0[-3] - ee2[-3];
2482       ee0[-2] += ee2[-2];//ee0[-2] = ee0[-2] + ee2[-2];
2483       ee0[-3] += ee2[-3];//ee0[-3] = ee0[-3] + ee2[-3];
2484       ee2[-2] = k00_20 * A[0] - k01_21 * A[1];
2485       ee2[-3] = k01_21 * A[0] + k00_20 * A[1];
2486       A += 8;
2487 
2488       k00_20  = ee0[-4] - ee2[-4];
2489       k01_21  = ee0[-5] - ee2[-5];
2490       ee0[-4] += ee2[-4];//ee0[-4] = ee0[-4] + ee2[-4];
2491       ee0[-5] += ee2[-5];//ee0[-5] = ee0[-5] + ee2[-5];
2492       ee2[-4] = k00_20 * A[0] - k01_21 * A[1];
2493       ee2[-5] = k01_21 * A[0] + k00_20 * A[1];
2494       A += 8;
2495 
2496       k00_20  = ee0[-6] - ee2[-6];
2497       k01_21  = ee0[-7] - ee2[-7];
2498       ee0[-6] += ee2[-6];//ee0[-6] = ee0[-6] + ee2[-6];
2499       ee0[-7] += ee2[-7];//ee0[-7] = ee0[-7] + ee2[-7];
2500       ee2[-6] = k00_20 * A[0] - k01_21 * A[1];
2501       ee2[-7] = k01_21 * A[0] + k00_20 * A[1];
2502       A += 8;
2503       ee0 -= 8;
2504       ee2 -= 8;
2505    }
2506 }
2507 
imdct_step3_inner_r_loop(int lim,float * e,int d0,int k_off,float * A,int k1)2508 static void imdct_step3_inner_r_loop(int lim, float *e, int d0, int k_off, float *A, int k1)
2509 {
2510    int i;
2511    float k00_20, k01_21;
2512 
2513    float *e0 = e + d0;
2514    float *e2 = e0 + k_off;
2515 
2516    for (i=lim >> 2; i > 0; --i) {
2517       k00_20 = e0[-0] - e2[-0];
2518       k01_21 = e0[-1] - e2[-1];
2519       e0[-0] += e2[-0];//e0[-0] = e0[-0] + e2[-0];
2520       e0[-1] += e2[-1];//e0[-1] = e0[-1] + e2[-1];
2521       e2[-0] = (k00_20)*A[0] - (k01_21) * A[1];
2522       e2[-1] = (k01_21)*A[0] + (k00_20) * A[1];
2523 
2524       A += k1;
2525 
2526       k00_20 = e0[-2] - e2[-2];
2527       k01_21 = e0[-3] - e2[-3];
2528       e0[-2] += e2[-2];//e0[-2] = e0[-2] + e2[-2];
2529       e0[-3] += e2[-3];//e0[-3] = e0[-3] + e2[-3];
2530       e2[-2] = (k00_20)*A[0] - (k01_21) * A[1];
2531       e2[-3] = (k01_21)*A[0] + (k00_20) * A[1];
2532 
2533       A += k1;
2534 
2535       k00_20 = e0[-4] - e2[-4];
2536       k01_21 = e0[-5] - e2[-5];
2537       e0[-4] += e2[-4];//e0[-4] = e0[-4] + e2[-4];
2538       e0[-5] += e2[-5];//e0[-5] = e0[-5] + e2[-5];
2539       e2[-4] = (k00_20)*A[0] - (k01_21) * A[1];
2540       e2[-5] = (k01_21)*A[0] + (k00_20) * A[1];
2541 
2542       A += k1;
2543 
2544       k00_20 = e0[-6] - e2[-6];
2545       k01_21 = e0[-7] - e2[-7];
2546       e0[-6] += e2[-6];//e0[-6] = e0[-6] + e2[-6];
2547       e0[-7] += e2[-7];//e0[-7] = e0[-7] + e2[-7];
2548       e2[-6] = (k00_20)*A[0] - (k01_21) * A[1];
2549       e2[-7] = (k01_21)*A[0] + (k00_20) * A[1];
2550 
2551       e0 -= 8;
2552       e2 -= 8;
2553 
2554       A += k1;
2555    }
2556 }
2557 
imdct_step3_inner_s_loop(int n,float * e,int i_off,int k_off,float * A,int a_off,int k0)2558 static void imdct_step3_inner_s_loop(int n, float *e, int i_off, int k_off, float *A, int a_off, int k0)
2559 {
2560    int i;
2561    float A0 = A[0];
2562    float A1 = A[0+1];
2563    float A2 = A[0+a_off];
2564    float A3 = A[0+a_off+1];
2565    float A4 = A[0+a_off*2+0];
2566    float A5 = A[0+a_off*2+1];
2567    float A6 = A[0+a_off*3+0];
2568    float A7 = A[0+a_off*3+1];
2569 
2570    float k00,k11;
2571 
2572    float *ee0 = e  +i_off;
2573    float *ee2 = ee0+k_off;
2574 
2575    for (i=n; i > 0; --i) {
2576       k00     = ee0[ 0] - ee2[ 0];
2577       k11     = ee0[-1] - ee2[-1];
2578       ee0[ 0] =  ee0[ 0] + ee2[ 0];
2579       ee0[-1] =  ee0[-1] + ee2[-1];
2580       ee2[ 0] = (k00) * A0 - (k11) * A1;
2581       ee2[-1] = (k11) * A0 + (k00) * A1;
2582 
2583       k00     = ee0[-2] - ee2[-2];
2584       k11     = ee0[-3] - ee2[-3];
2585       ee0[-2] =  ee0[-2] + ee2[-2];
2586       ee0[-3] =  ee0[-3] + ee2[-3];
2587       ee2[-2] = (k00) * A2 - (k11) * A3;
2588       ee2[-3] = (k11) * A2 + (k00) * A3;
2589 
2590       k00     = ee0[-4] - ee2[-4];
2591       k11     = ee0[-5] - ee2[-5];
2592       ee0[-4] =  ee0[-4] + ee2[-4];
2593       ee0[-5] =  ee0[-5] + ee2[-5];
2594       ee2[-4] = (k00) * A4 - (k11) * A5;
2595       ee2[-5] = (k11) * A4 + (k00) * A5;
2596 
2597       k00     = ee0[-6] - ee2[-6];
2598       k11     = ee0[-7] - ee2[-7];
2599       ee0[-6] =  ee0[-6] + ee2[-6];
2600       ee0[-7] =  ee0[-7] + ee2[-7];
2601       ee2[-6] = (k00) * A6 - (k11) * A7;
2602       ee2[-7] = (k11) * A6 + (k00) * A7;
2603 
2604       ee0 -= k0;
2605       ee2 -= k0;
2606    }
2607 }
2608 
iter_54(float * z)2609 static __forceinline void iter_54(float *z)
2610 {
2611    float k00,k11,k22,k33;
2612    float y0,y1,y2,y3;
2613 
2614    k00  = z[ 0] - z[-4];
2615    y0   = z[ 0] + z[-4];
2616    y2   = z[-2] + z[-6];
2617    k22  = z[-2] - z[-6];
2618 
2619    z[-0] = y0 + y2;      // z0 + z4 + z2 + z6
2620    z[-2] = y0 - y2;      // z0 + z4 - z2 - z6
2621 
2622    // done with y0,y2
2623 
2624    k33  = z[-3] - z[-7];
2625 
2626    z[-4] = k00 + k33;    // z0 - z4 + z3 - z7
2627    z[-6] = k00 - k33;    // z0 - z4 - z3 + z7
2628 
2629    // done with k33
2630 
2631    k11  = z[-1] - z[-5];
2632    y1   = z[-1] + z[-5];
2633    y3   = z[-3] + z[-7];
2634 
2635    z[-1] = y1 + y3;      // z1 + z5 + z3 + z7
2636    z[-3] = y1 - y3;      // z1 + z5 - z3 - z7
2637    z[-5] = k11 - k22;    // z1 - z5 + z2 - z6
2638    z[-7] = k11 + k22;    // z1 - z5 - z2 + z6
2639 }
2640 
imdct_step3_inner_s_loop_ld654(int n,float * e,int i_off,float * A,int base_n)2641 static void imdct_step3_inner_s_loop_ld654(int n, float *e, int i_off, float *A, int base_n)
2642 {
2643    int a_off = base_n >> 3;
2644    float A2 = A[0+a_off];
2645    float *z = e + i_off;
2646    float *base = z - 16 * n;
2647 
2648    while (z > base) {
2649       float k00,k11;
2650 
2651       k00   = z[-0] - z[-8];
2652       k11   = z[-1] - z[-9];
2653       z[-0] = z[-0] + z[-8];
2654       z[-1] = z[-1] + z[-9];
2655       z[-8] =  k00;
2656       z[-9] =  k11 ;
2657 
2658       k00    = z[ -2] - z[-10];
2659       k11    = z[ -3] - z[-11];
2660       z[ -2] = z[ -2] + z[-10];
2661       z[ -3] = z[ -3] + z[-11];
2662       z[-10] = (k00+k11) * A2;
2663       z[-11] = (k11-k00) * A2;
2664 
2665       k00    = z[-12] - z[ -4];  // reverse to avoid a unary negation
2666       k11    = z[ -5] - z[-13];
2667       z[ -4] = z[ -4] + z[-12];
2668       z[ -5] = z[ -5] + z[-13];
2669       z[-12] = k11;
2670       z[-13] = k00;
2671 
2672       k00    = z[-14] - z[ -6];  // reverse to avoid a unary negation
2673       k11    = z[ -7] - z[-15];
2674       z[ -6] = z[ -6] + z[-14];
2675       z[ -7] = z[ -7] + z[-15];
2676       z[-14] = (k00+k11) * A2;
2677       z[-15] = (k00-k11) * A2;
2678 
2679       iter_54(z);
2680       iter_54(z-8);
2681       z -= 16;
2682    }
2683 }
2684 
inverse_mdct(float * buffer,int n,vorb * f,int blocktype)2685 static void inverse_mdct(float *buffer, int n, vorb *f, int blocktype)
2686 {
2687    int n2 = n >> 1, n4 = n >> 2, n8 = n >> 3, l;
2688    int ld;
2689    // @OPTIMIZE: reduce register pressure by using fewer variables?
2690    int save_point = temp_alloc_save(f);
2691    float *buf2 = (float *) temp_alloc(f, n2 * sizeof(*buf2));
2692    float *u=NULL,*v=NULL;
2693    // twiddle factors
2694    float *A = f->A[blocktype];
2695 
2696    // IMDCT algorithm from "The use of multirate filter banks for coding of high quality digital audio"
2697    // See notes about bugs in that paper in less-optimal implementation 'inverse_mdct_old' after this function.
2698 
2699    // kernel from paper
2700 
2701 
2702    // merged:
2703    //   copy and reflect spectral data
2704    //   step 0
2705 
2706    // note that it turns out that the items added together during
2707    // this step are, in fact, being added to themselves (as reflected
2708    // by step 0). inexplicable inefficiency! this became obvious
2709    // once I combined the passes.
2710 
2711    // so there's a missing 'times 2' here (for adding X to itself).
2712    // this propogates through linearly to the end, where the numbers
2713    // are 1/2 too small, and need to be compensated for.
2714 
2715    {
2716       float *d,*e, *AA, *e_stop;
2717       d = &buf2[n2-2];
2718       AA = A;
2719       e = &buffer[0];
2720       e_stop = &buffer[n2];
2721       while (e != e_stop) {
2722          d[1] = (e[0] * AA[0] - e[2]*AA[1]);
2723          d[0] = (e[0] * AA[1] + e[2]*AA[0]);
2724          d -= 2;
2725          AA += 2;
2726          e += 4;
2727       }
2728 
2729       e = &buffer[n2-3];
2730       while (d >= buf2) {
2731          d[1] = (-e[2] * AA[0] - -e[0]*AA[1]);
2732          d[0] = (-e[2] * AA[1] + -e[0]*AA[0]);
2733          d -= 2;
2734          AA += 2;
2735          e -= 4;
2736       }
2737    }
2738 
2739    // now we use symbolic names for these, so that we can
2740    // possibly swap their meaning as we change which operations
2741    // are in place
2742 
2743    u = buffer;
2744    v = buf2;
2745 
2746    // step 2    (paper output is w, now u)
2747    // this could be in place, but the data ends up in the wrong
2748    // place... _somebody_'s got to swap it, so this is nominated
2749    {
2750       float *AA = &A[n2-8];
2751       float *d0,*d1, *e0, *e1;
2752 
2753       e0 = &v[n4];
2754       e1 = &v[0];
2755 
2756       d0 = &u[n4];
2757       d1 = &u[0];
2758 
2759       while (AA >= A) {
2760          float v40_20, v41_21;
2761 
2762          v41_21 = e0[1] - e1[1];
2763          v40_20 = e0[0] - e1[0];
2764          d0[1]  = e0[1] + e1[1];
2765          d0[0]  = e0[0] + e1[0];
2766          d1[1]  = v41_21*AA[4] - v40_20*AA[5];
2767          d1[0]  = v40_20*AA[4] + v41_21*AA[5];
2768 
2769          v41_21 = e0[3] - e1[3];
2770          v40_20 = e0[2] - e1[2];
2771          d0[3]  = e0[3] + e1[3];
2772          d0[2]  = e0[2] + e1[2];
2773          d1[3]  = v41_21*AA[0] - v40_20*AA[1];
2774          d1[2]  = v40_20*AA[0] + v41_21*AA[1];
2775 
2776          AA -= 8;
2777 
2778          d0 += 4;
2779          d1 += 4;
2780          e0 += 4;
2781          e1 += 4;
2782       }
2783    }
2784 
2785    // step 3
2786    ld = ilog(n) - 1; // ilog is off-by-one from normal definitions
2787 
2788    // optimized step 3:
2789 
2790    // the original step3 loop can be nested r inside s or s inside r;
2791    // it's written originally as s inside r, but this is dumb when r
2792    // iterates many times, and s few. So I have two copies of it and
2793    // switch between them halfway.
2794 
2795    // this is iteration 0 of step 3
2796    imdct_step3_iter0_loop(n >> 4, u, n2-1-n4*0, -(n >> 3), A);
2797    imdct_step3_iter0_loop(n >> 4, u, n2-1-n4*1, -(n >> 3), A);
2798 
2799    // this is iteration 1 of step 3
2800    imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*0, -(n >> 4), A, 16);
2801    imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*1, -(n >> 4), A, 16);
2802    imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*2, -(n >> 4), A, 16);
2803    imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*3, -(n >> 4), A, 16);
2804 
2805    l=2;
2806    for (; l < (ld-3)>>1; ++l) {
2807       int k0 = n >> (l+2), k0_2 = k0>>1;
2808       int lim = 1 << (l+1);
2809       int i;
2810       for (i=0; i < lim; ++i)
2811          imdct_step3_inner_r_loop(n >> (l+4), u, n2-1 - k0*i, -k0_2, A, 1 << (l+3));
2812    }
2813 
2814    for (; l < ld-6; ++l) {
2815       int k0 = n >> (l+2), k1 = 1 << (l+3), k0_2 = k0>>1;
2816       int rlim = n >> (l+6), r;
2817       int lim = 1 << (l+1);
2818       int i_off;
2819       float *A0 = A;
2820       i_off = n2-1;
2821       for (r=rlim; r > 0; --r) {
2822          imdct_step3_inner_s_loop(lim, u, i_off, -k0_2, A0, k1, k0);
2823          A0 += k1*4;
2824          i_off -= 8;
2825       }
2826    }
2827 
2828    // iterations with count:
2829    //   ld-6,-5,-4 all interleaved together
2830    //       the big win comes from getting rid of needless flops
2831    //         due to the constants on pass 5 & 4 being all 1 and 0;
2832    //       combining them to be simultaneous to improve cache made little difference
2833    imdct_step3_inner_s_loop_ld654(n >> 5, u, n2-1, A, n);
2834 
2835    // output is u
2836 
2837    // step 4, 5, and 6
2838    // cannot be in-place because of step 5
2839    {
2840       uint16 *bitrev = f->bit_reverse[blocktype];
2841       // weirdly, I'd have thought reading sequentially and writing
2842       // erratically would have been better than vice-versa, but in
2843       // fact that's not what my testing showed. (That is, with
2844       // j = bitreverse(i), do you read i and write j, or read j and write i.)
2845 
2846       float *d0 = &v[n4-4];
2847       float *d1 = &v[n2-4];
2848       while (d0 >= v) {
2849          int k4;
2850 
2851          k4 = bitrev[0];
2852          d1[3] = u[k4+0];
2853          d1[2] = u[k4+1];
2854          d0[3] = u[k4+2];
2855          d0[2] = u[k4+3];
2856 
2857          k4 = bitrev[1];
2858          d1[1] = u[k4+0];
2859          d1[0] = u[k4+1];
2860          d0[1] = u[k4+2];
2861          d0[0] = u[k4+3];
2862 
2863          d0 -= 4;
2864          d1 -= 4;
2865          bitrev += 2;
2866       }
2867    }
2868    // (paper output is u, now v)
2869 
2870 
2871    // data must be in buf2
2872    assert(v == buf2);
2873 
2874    // step 7   (paper output is v, now v)
2875    // this is now in place
2876    {
2877       float *C = f->C[blocktype];
2878       float *d, *e;
2879 
2880       d = v;
2881       e = v + n2 - 4;
2882 
2883       while (d < e) {
2884          float a02,a11,b0,b1,b2,b3;
2885 
2886          a02 = d[0] - e[2];
2887          a11 = d[1] + e[3];
2888 
2889          b0 = C[1]*a02 + C[0]*a11;
2890          b1 = C[1]*a11 - C[0]*a02;
2891 
2892          b2 = d[0] + e[ 2];
2893          b3 = d[1] - e[ 3];
2894 
2895          d[0] = b2 + b0;
2896          d[1] = b3 + b1;
2897          e[2] = b2 - b0;
2898          e[3] = b1 - b3;
2899 
2900          a02 = d[2] - e[0];
2901          a11 = d[3] + e[1];
2902 
2903          b0 = C[3]*a02 + C[2]*a11;
2904          b1 = C[3]*a11 - C[2]*a02;
2905 
2906          b2 = d[2] + e[ 0];
2907          b3 = d[3] - e[ 1];
2908 
2909          d[2] = b2 + b0;
2910          d[3] = b3 + b1;
2911          e[0] = b2 - b0;
2912          e[1] = b1 - b3;
2913 
2914          C += 4;
2915          d += 4;
2916          e -= 4;
2917       }
2918    }
2919 
2920    // data must be in buf2
2921 
2922 
2923    // step 8+decode   (paper output is X, now buffer)
2924    // this generates pairs of data a la 8 and pushes them directly through
2925    // the decode kernel (pushing rather than pulling) to avoid having
2926    // to make another pass later
2927 
2928    // this cannot POSSIBLY be in place, so we refer to the buffers directly
2929 
2930    {
2931       float *d0,*d1,*d2,*d3;
2932 
2933       float *B = f->B[blocktype] + n2 - 8;
2934       float *e = buf2 + n2 - 8;
2935       d0 = &buffer[0];
2936       d1 = &buffer[n2-4];
2937       d2 = &buffer[n2];
2938       d3 = &buffer[n-4];
2939       while (e >= v) {
2940          float p0,p1,p2,p3;
2941 
2942          p3 =  e[6]*B[7] - e[7]*B[6];
2943          p2 = -e[6]*B[6] - e[7]*B[7];
2944 
2945          d0[0] =   p3;
2946          d1[3] = - p3;
2947          d2[0] =   p2;
2948          d3[3] =   p2;
2949 
2950          p1 =  e[4]*B[5] - e[5]*B[4];
2951          p0 = -e[4]*B[4] - e[5]*B[5];
2952 
2953          d0[1] =   p1;
2954          d1[2] = - p1;
2955          d2[1] =   p0;
2956          d3[2] =   p0;
2957 
2958          p3 =  e[2]*B[3] - e[3]*B[2];
2959          p2 = -e[2]*B[2] - e[3]*B[3];
2960 
2961          d0[2] =   p3;
2962          d1[1] = - p3;
2963          d2[2] =   p2;
2964          d3[1] =   p2;
2965 
2966          p1 =  e[0]*B[1] - e[1]*B[0];
2967          p0 = -e[0]*B[0] - e[1]*B[1];
2968 
2969          d0[3] =   p1;
2970          d1[0] = - p1;
2971          d2[3] =   p0;
2972          d3[0] =   p0;
2973 
2974          B -= 8;
2975          e -= 8;
2976          d0 += 4;
2977          d2 += 4;
2978          d1 -= 4;
2979          d3 -= 4;
2980       }
2981    }
2982 
2983    temp_free(f,buf2);
2984    temp_alloc_restore(f,save_point);
2985 }
2986 
2987 #if 0
2988 // this is the original version of the above code, if you want to optimize it from scratch
2989 void inverse_mdct_naive(float *buffer, int n)
2990 {
2991    float s;
2992    float A[1 << 12], B[1 << 12], C[1 << 11];
2993    int i,k,k2,k4, n2 = n >> 1, n4 = n >> 2, n8 = n >> 3, l;
2994    int n3_4 = n - n4, ld;
2995    // how can they claim this only uses N words?!
2996    // oh, because they're only used sparsely, whoops
2997    float u[1 << 13], X[1 << 13], v[1 << 13], w[1 << 13];
2998    // set up twiddle factors
2999 
3000    for (k=k2=0; k < n4; ++k,k2+=2) {
3001       A[k2  ] = (float)  cos(4*k*M_PI/n);
3002       A[k2+1] = (float) -sin(4*k*M_PI/n);
3003       B[k2  ] = (float)  cos((k2+1)*M_PI/n/2);
3004       B[k2+1] = (float)  sin((k2+1)*M_PI/n/2);
3005    }
3006    for (k=k2=0; k < n8; ++k,k2+=2) {
3007       C[k2  ] = (float)  cos(2*(k2+1)*M_PI/n);
3008       C[k2+1] = (float) -sin(2*(k2+1)*M_PI/n);
3009    }
3010 
3011    // IMDCT algorithm from "The use of multirate filter banks for coding of high quality digital audio"
3012    // Note there are bugs in that pseudocode, presumably due to them attempting
3013    // to rename the arrays nicely rather than representing the way their actual
3014    // implementation bounces buffers back and forth. As a result, even in the
3015    // "some formulars corrected" version, a direct implementation fails. These
3016    // are noted below as "paper bug".
3017 
3018    // copy and reflect spectral data
3019    for (k=0; k < n2; ++k) u[k] = buffer[k];
3020    for (   ; k < n ; ++k) u[k] = -buffer[n - k - 1];
3021    // kernel from paper
3022    // step 1
3023    for (k=k2=k4=0; k < n4; k+=1, k2+=2, k4+=4) {
3024       v[n-k4-1] = (u[k4] - u[n-k4-1]) * A[k2]   - (u[k4+2] - u[n-k4-3])*A[k2+1];
3025       v[n-k4-3] = (u[k4] - u[n-k4-1]) * A[k2+1] + (u[k4+2] - u[n-k4-3])*A[k2];
3026    }
3027    // step 2
3028    for (k=k4=0; k < n8; k+=1, k4+=4) {
3029       w[n2+3+k4] = v[n2+3+k4] + v[k4+3];
3030       w[n2+1+k4] = v[n2+1+k4] + v[k4+1];
3031       w[k4+3]    = (v[n2+3+k4] - v[k4+3])*A[n2-4-k4] - (v[n2+1+k4]-v[k4+1])*A[n2-3-k4];
3032       w[k4+1]    = (v[n2+1+k4] - v[k4+1])*A[n2-4-k4] + (v[n2+3+k4]-v[k4+3])*A[n2-3-k4];
3033    }
3034    // step 3
3035    ld = ilog(n) - 1; // ilog is off-by-one from normal definitions
3036    for (l=0; l < ld-3; ++l) {
3037       int k0 = n >> (l+2), k1 = 1 << (l+3);
3038       int rlim = n >> (l+4), r4, r;
3039       int s2lim = 1 << (l+2), s2;
3040       for (r=r4=0; r < rlim; r4+=4,++r) {
3041          for (s2=0; s2 < s2lim; s2+=2) {
3042             u[n-1-k0*s2-r4] = w[n-1-k0*s2-r4] + w[n-1-k0*(s2+1)-r4];
3043             u[n-3-k0*s2-r4] = w[n-3-k0*s2-r4] + w[n-3-k0*(s2+1)-r4];
3044             u[n-1-k0*(s2+1)-r4] = (w[n-1-k0*s2-r4] - w[n-1-k0*(s2+1)-r4]) * A[r*k1]
3045                                 - (w[n-3-k0*s2-r4] - w[n-3-k0*(s2+1)-r4]) * A[r*k1+1];
3046             u[n-3-k0*(s2+1)-r4] = (w[n-3-k0*s2-r4] - w[n-3-k0*(s2+1)-r4]) * A[r*k1]
3047                                 + (w[n-1-k0*s2-r4] - w[n-1-k0*(s2+1)-r4]) * A[r*k1+1];
3048          }
3049       }
3050       if (l+1 < ld-3) {
3051          // paper bug: ping-ponging of u&w here is omitted
3052          memcpy(w, u, sizeof(u));
3053       }
3054    }
3055 
3056    // step 4
3057    for (i=0; i < n8; ++i) {
3058       int j = bit_reverse(i) >> (32-ld+3);
3059       assert(j < n8);
3060       if (i == j) {
3061          // paper bug: original code probably swapped in place; if copying,
3062          //            need to directly copy in this case
3063          int i8 = i << 3;
3064          v[i8+1] = u[i8+1];
3065          v[i8+3] = u[i8+3];
3066          v[i8+5] = u[i8+5];
3067          v[i8+7] = u[i8+7];
3068       } else if (i < j) {
3069          int i8 = i << 3, j8 = j << 3;
3070          v[j8+1] = u[i8+1], v[i8+1] = u[j8 + 1];
3071          v[j8+3] = u[i8+3], v[i8+3] = u[j8 + 3];
3072          v[j8+5] = u[i8+5], v[i8+5] = u[j8 + 5];
3073          v[j8+7] = u[i8+7], v[i8+7] = u[j8 + 7];
3074       }
3075    }
3076    // step 5
3077    for (k=0; k < n2; ++k) {
3078       w[k] = v[k*2+1];
3079    }
3080    // step 6
3081    for (k=k2=k4=0; k < n8; ++k, k2 += 2, k4 += 4) {
3082       u[n-1-k2] = w[k4];
3083       u[n-2-k2] = w[k4+1];
3084       u[n3_4 - 1 - k2] = w[k4+2];
3085       u[n3_4 - 2 - k2] = w[k4+3];
3086    }
3087    // step 7
3088    for (k=k2=0; k < n8; ++k, k2 += 2) {
3089       v[n2 + k2 ] = ( u[n2 + k2] + u[n-2-k2] + C[k2+1]*(u[n2+k2]-u[n-2-k2]) + C[k2]*(u[n2+k2+1]+u[n-2-k2+1]))/2;
3090       v[n-2 - k2] = ( u[n2 + k2] + u[n-2-k2] - C[k2+1]*(u[n2+k2]-u[n-2-k2]) - C[k2]*(u[n2+k2+1]+u[n-2-k2+1]))/2;
3091       v[n2+1+ k2] = ( u[n2+1+k2] - u[n-1-k2] + C[k2+1]*(u[n2+1+k2]+u[n-1-k2]) - C[k2]*(u[n2+k2]-u[n-2-k2]))/2;
3092       v[n-1 - k2] = (-u[n2+1+k2] + u[n-1-k2] + C[k2+1]*(u[n2+1+k2]+u[n-1-k2]) - C[k2]*(u[n2+k2]-u[n-2-k2]))/2;
3093    }
3094    // step 8
3095    for (k=k2=0; k < n4; ++k,k2 += 2) {
3096       X[k]      = v[k2+n2]*B[k2  ] + v[k2+1+n2]*B[k2+1];
3097       X[n2-1-k] = v[k2+n2]*B[k2+1] - v[k2+1+n2]*B[k2  ];
3098    }
3099 
3100    // decode kernel to output
3101    // determined the following value experimentally
3102    // (by first figuring out what made inverse_mdct_slow work); then matching that here
3103    // (probably vorbis encoder premultiplies by n or n/2, to save it on the decoder?)
3104    s = 0.5; // theoretically would be n4
3105 
3106    // [[[ note! the s value of 0.5 is compensated for by the B[] in the current code,
3107    //     so it needs to use the "old" B values to behave correctly, or else
3108    //     set s to 1.0 ]]]
3109    for (i=0; i < n4  ; ++i) buffer[i] = s * X[i+n4];
3110    for (   ; i < n3_4; ++i) buffer[i] = -s * X[n3_4 - i - 1];
3111    for (   ; i < n   ; ++i) buffer[i] = -s * X[i - n3_4];
3112 }
3113 #endif
3114 
get_window(vorb * f,int len)3115 static float *get_window(vorb *f, int len)
3116 {
3117    len <<= 1;
3118    if (len == f->blocksize_0) return f->window[0];
3119    if (len == f->blocksize_1) return f->window[1];
3120    assert(0);
3121    return NULL;
3122 }
3123 
3124 #ifndef STB_VORBIS_NO_DEFER_FLOOR
3125 typedef int16 YTYPE;
3126 #else
3127 typedef int YTYPE;
3128 #endif
do_floor(vorb * f,Mapping * map,int i,int n,float * target,YTYPE * finalY,uint8 * step2_flag)3129 static int do_floor(vorb *f, Mapping *map, int i, int n, float *target, YTYPE *finalY, uint8 *step2_flag)
3130 {
3131    int n2 = n >> 1;
3132    int s = map->chan[i].mux, floor;
3133    floor = map->submap_floor[s];
3134    if (f->floor_types[floor] == 0) {
3135       return error(f, VORBIS_invalid_stream);
3136    } else {
3137       Floor1 *g = &f->floor_config[floor].floor1;
3138       int j,q;
3139       int lx = 0, ly = finalY[0] * g->floor1_multiplier;
3140       for (q=1; q < g->values; ++q) {
3141          j = g->sorted_order[q];
3142          #ifndef STB_VORBIS_NO_DEFER_FLOOR
3143          if (finalY[j] >= 0)
3144          #else
3145          if (step2_flag[j])
3146          #endif
3147          {
3148             int hy = finalY[j] * g->floor1_multiplier;
3149             int hx = g->Xlist[j];
3150             if (lx != hx)
3151                draw_line(target, lx,ly, hx,hy, n2);
3152             CHECK(f);
3153             lx = hx, ly = hy;
3154          }
3155       }
3156       if (lx < n2) {
3157          // optimization of: draw_line(target, lx,ly, n,ly, n2);
3158          for (j=lx; j < n2; ++j)
3159             LINE_OP(target[j], inverse_db_table[ly]);
3160          CHECK(f);
3161       }
3162    }
3163    return TRUE;
3164 }
3165 
3166 // The meaning of "left" and "right"
3167 //
3168 // For a given frame:
3169 //     we compute samples from 0..n
3170 //     window_center is n/2
3171 //     we'll window and mix the samples from left_start to left_end with data from the previous frame
3172 //     all of the samples from left_end to right_start can be output without mixing; however,
3173 //        this interval is 0-length except when transitioning between short and long frames
3174 //     all of the samples from right_start to right_end need to be mixed with the next frame,
3175 //        which we don't have, so those get saved in a buffer
3176 //     frame N's right_end-right_start, the number of samples to mix with the next frame,
3177 //        has to be the same as frame N+1's left_end-left_start (which they are by
3178 //        construction)
3179 
vorbis_decode_initial(vorb * f,int * p_left_start,int * p_left_end,int * p_right_start,int * p_right_end,int * mode)3180 static int vorbis_decode_initial(vorb *f, int *p_left_start, int *p_left_end, int *p_right_start, int *p_right_end, int *mode)
3181 {
3182    Mode *m;
3183    int i, n, prev, next, window_center;
3184    f->channel_buffer_start = f->channel_buffer_end = 0;
3185 
3186   retry:
3187    if (f->eof) return FALSE;
3188    if (!maybe_start_packet(f))
3189       return FALSE;
3190    // check packet type
3191    if (get_bits(f,1) != 0) {
3192       if (IS_PUSH_MODE(f))
3193          return error(f,VORBIS_bad_packet_type);
3194       while (EOP != get8_packet(f));
3195       goto retry;
3196    }
3197 
3198    if (f->alloc.alloc_buffer)
3199       assert(f->alloc.alloc_buffer_length_in_bytes == f->temp_offset);
3200 
3201    i = get_bits(f, ilog(f->mode_count-1));
3202    if (i == EOP) return FALSE;
3203    if (i >= f->mode_count) return FALSE;
3204    *mode = i;
3205    m = f->mode_config + i;
3206    if (m->blockflag) {
3207       n = f->blocksize_1;
3208       prev = get_bits(f,1);
3209       next = get_bits(f,1);
3210    } else {
3211       prev = next = 0;
3212       n = f->blocksize_0;
3213    }
3214 
3215 // WINDOWING
3216 
3217    window_center = n >> 1;
3218    if (m->blockflag && !prev) {
3219       *p_left_start = (n - f->blocksize_0) >> 2;
3220       *p_left_end   = (n + f->blocksize_0) >> 2;
3221    } else {
3222       *p_left_start = 0;
3223       *p_left_end   = window_center;
3224    }
3225    if (m->blockflag && !next) {
3226       *p_right_start = (n*3 - f->blocksize_0) >> 2;
3227       *p_right_end   = (n*3 + f->blocksize_0) >> 2;
3228    } else {
3229       *p_right_start = window_center;
3230       *p_right_end   = n;
3231    }
3232 
3233    return TRUE;
3234 }
3235 
vorbis_decode_packet_rest(vorb * f,int * len,Mode * m,int left_start,int left_end,int right_start,int right_end,int * p_left)3236 static int vorbis_decode_packet_rest(vorb *f, int *len, Mode *m, int left_start, int left_end, int right_start, int right_end, int *p_left)
3237 {
3238    Mapping *map;
3239    int i,j,k,n,n2;
3240    int zero_channel[256];
3241    int really_zero_channel[256];
3242 
3243 // WINDOWING
3244 
3245    n = f->blocksize[m->blockflag];
3246    map = &f->mapping[m->mapping];
3247 
3248 // FLOORS
3249    n2 = n >> 1;
3250 
3251    CHECK(f);
3252 
3253    for (i=0; i < f->channels; ++i) {
3254       int s = map->chan[i].mux, floor;
3255       zero_channel[i] = FALSE;
3256       floor = map->submap_floor[s];
3257       if (f->floor_types[floor] == 0) {
3258          return error(f, VORBIS_invalid_stream);
3259       } else {
3260          Floor1 *g = &f->floor_config[floor].floor1;
3261          if (get_bits(f, 1)) {
3262             short *finalY;
3263             uint8 step2_flag[256];
3264             static int range_list[4] = { 256, 128, 86, 64 };
3265             int range = range_list[g->floor1_multiplier-1];
3266             int offset = 2;
3267             finalY = f->finalY[i];
3268             finalY[0] = get_bits(f, ilog(range)-1);
3269             finalY[1] = get_bits(f, ilog(range)-1);
3270             for (j=0; j < g->partitions; ++j) {
3271                int pclass = g->partition_class_list[j];
3272                int cdim = g->class_dimensions[pclass];
3273                int cbits = g->class_subclasses[pclass];
3274                int csub = (1 << cbits)-1;
3275                int cval = 0;
3276                if (cbits) {
3277                   Codebook *c = f->codebooks + g->class_masterbooks[pclass];
3278                   DECODE(cval,f,c);
3279                }
3280                for (k=0; k < cdim; ++k) {
3281                   int book = g->subclass_books[pclass][cval & csub];
3282                   cval = cval >> cbits;
3283                   if (book >= 0) {
3284                      int temp;
3285                      Codebook *c = f->codebooks + book;
3286                      DECODE(temp,f,c);
3287                      finalY[offset++] = temp;
3288                   } else
3289                      finalY[offset++] = 0;
3290                }
3291             }
3292             if (f->valid_bits == INVALID_BITS) goto error; // behavior according to spec
3293             step2_flag[0] = step2_flag[1] = 1;
3294             for (j=2; j < g->values; ++j) {
3295                int low, high, pred, highroom, lowroom, room, val;
3296                low = g->neighbors[j][0];
3297                high = g->neighbors[j][1];
3298                //neighbors(g->Xlist, j, &low, &high);
3299                pred = predict_point(g->Xlist[j], g->Xlist[low], g->Xlist[high], finalY[low], finalY[high]);
3300                val = finalY[j];
3301                highroom = range - pred;
3302                lowroom = pred;
3303                if (highroom < lowroom)
3304                   room = highroom * 2;
3305                else
3306                   room = lowroom * 2;
3307                if (val) {
3308                   step2_flag[low] = step2_flag[high] = 1;
3309                   step2_flag[j] = 1;
3310                   if (val >= room)
3311                      if (highroom > lowroom)
3312                         finalY[j] = val - lowroom + pred;
3313                      else
3314                         finalY[j] = pred - val + highroom - 1;
3315                   else
3316                      if (val & 1)
3317                         finalY[j] = pred - ((val+1)>>1);
3318                      else
3319                         finalY[j] = pred + (val>>1);
3320                } else {
3321                   step2_flag[j] = 0;
3322                   finalY[j] = pred;
3323                }
3324             }
3325 
3326 #ifdef STB_VORBIS_NO_DEFER_FLOOR
3327             do_floor(f, map, i, n, f->floor_buffers[i], finalY, step2_flag);
3328 #else
3329             // defer final floor computation until _after_ residue
3330             for (j=0; j < g->values; ++j) {
3331                if (!step2_flag[j])
3332                   finalY[j] = -1;
3333             }
3334 #endif
3335          } else {
3336            error:
3337             zero_channel[i] = TRUE;
3338          }
3339          // So we just defer everything else to later
3340 
3341          // at this point we've decoded the floor into buffer
3342       }
3343    }
3344    CHECK(f);
3345    // at this point we've decoded all floors
3346 
3347    if (f->alloc.alloc_buffer)
3348       assert(f->alloc.alloc_buffer_length_in_bytes == f->temp_offset);
3349 
3350    // re-enable coupled channels if necessary
3351    memcpy(really_zero_channel, zero_channel, sizeof(really_zero_channel[0]) * f->channels);
3352    for (i=0; i < map->coupling_steps; ++i)
3353       if (!zero_channel[map->chan[i].magnitude] || !zero_channel[map->chan[i].angle]) {
3354          zero_channel[map->chan[i].magnitude] = zero_channel[map->chan[i].angle] = FALSE;
3355       }
3356 
3357    CHECK(f);
3358 // RESIDUE DECODE
3359    for (i=0; i < map->submaps; ++i) {
3360       float *residue_buffers[STB_VORBIS_MAX_CHANNELS];
3361       int r;
3362       uint8 do_not_decode[256];
3363       int ch = 0;
3364       for (j=0; j < f->channels; ++j) {
3365          if (map->chan[j].mux == i) {
3366             if (zero_channel[j]) {
3367                do_not_decode[ch] = TRUE;
3368                residue_buffers[ch] = NULL;
3369             } else {
3370                do_not_decode[ch] = FALSE;
3371                residue_buffers[ch] = f->channel_buffers[j];
3372             }
3373             ++ch;
3374          }
3375       }
3376       r = map->submap_residue[i];
3377       decode_residue(f, residue_buffers, ch, n2, r, do_not_decode);
3378    }
3379 
3380    if (f->alloc.alloc_buffer)
3381       assert(f->alloc.alloc_buffer_length_in_bytes == f->temp_offset);
3382    CHECK(f);
3383 
3384 // INVERSE COUPLING
3385    for (i = map->coupling_steps-1; i >= 0; --i) {
3386       int n2 = n >> 1;
3387       float *m = f->channel_buffers[map->chan[i].magnitude];
3388       float *a = f->channel_buffers[map->chan[i].angle    ];
3389       for (j=0; j < n2; ++j) {
3390          float a2,m2;
3391          if (m[j] > 0)
3392             if (a[j] > 0)
3393                m2 = m[j], a2 = m[j] - a[j];
3394             else
3395                a2 = m[j], m2 = m[j] + a[j];
3396          else
3397             if (a[j] > 0)
3398                m2 = m[j], a2 = m[j] + a[j];
3399             else
3400                a2 = m[j], m2 = m[j] - a[j];
3401          m[j] = m2;
3402          a[j] = a2;
3403       }
3404    }
3405    CHECK(f);
3406 
3407    // finish decoding the floors
3408 #ifndef STB_VORBIS_NO_DEFER_FLOOR
3409    for (i=0; i < f->channels; ++i) {
3410       if (really_zero_channel[i]) {
3411          memset(f->channel_buffers[i], 0, sizeof(*f->channel_buffers[i]) * n2);
3412       } else {
3413          do_floor(f, map, i, n, f->channel_buffers[i], f->finalY[i], NULL);
3414       }
3415    }
3416 #else
3417    for (i=0; i < f->channels; ++i) {
3418       if (really_zero_channel[i]) {
3419          memset(f->channel_buffers[i], 0, sizeof(*f->channel_buffers[i]) * n2);
3420       } else {
3421          for (j=0; j < n2; ++j)
3422             f->channel_buffers[i][j] *= f->floor_buffers[i][j];
3423       }
3424    }
3425 #endif
3426 
3427 // INVERSE MDCT
3428    CHECK(f);
3429    for (i=0; i < f->channels; ++i)
3430       inverse_mdct(f->channel_buffers[i], n, f, m->blockflag);
3431    CHECK(f);
3432 
3433    // this shouldn't be necessary, unless we exited on an error
3434    // and want to flush to get to the next packet
3435    flush_packet(f);
3436 
3437    if (f->first_decode) {
3438       // assume we start so first non-discarded sample is sample 0
3439       // this isn't to spec, but spec would require us to read ahead
3440       // and decode the size of all current frames--could be done,
3441       // but presumably it's not a commonly used feature
3442       f->current_loc = -n2; // start of first frame is positioned for discard
3443       // we might have to discard samples "from" the next frame too,
3444       // if we're lapping a large block then a small at the start?
3445       f->discard_samples_deferred = n - right_end;
3446       f->current_loc_valid = TRUE;
3447       f->first_decode = FALSE;
3448    } else if (f->discard_samples_deferred) {
3449       if (f->discard_samples_deferred >= right_start - left_start) {
3450          f->discard_samples_deferred -= (right_start - left_start);
3451          left_start = right_start;
3452          *p_left = left_start;
3453       } else {
3454          left_start += f->discard_samples_deferred;
3455          *p_left = left_start;
3456          f->discard_samples_deferred = 0;
3457       }
3458    } else if (f->previous_length == 0 && f->current_loc_valid) {
3459       // we're recovering from a seek... that means we're going to discard
3460       // the samples from this packet even though we know our position from
3461       // the last page header, so we need to update the position based on
3462       // the discarded samples here
3463       // but wait, the code below is going to add this in itself even
3464       // on a discard, so we don't need to do it here...
3465    }
3466 
3467    // check if we have ogg information about the sample # for this packet
3468    if (f->last_seg_which == f->end_seg_with_known_loc) {
3469       // if we have a valid current loc, and this is final:
3470       if (f->current_loc_valid && (f->page_flag & PAGEFLAG_last_page)) {
3471          uint32 current_end = f->known_loc_for_packet;
3472          // then let's infer the size of the (probably) short final frame
3473          if (current_end < f->current_loc + (right_end-left_start)) {
3474             if (current_end < f->current_loc) {
3475                // negative truncation, that's impossible!
3476                *len = 0;
3477             } else {
3478                *len = current_end - f->current_loc;
3479             }
3480             *len += left_start; // this doesn't seem right, but has no ill effect on my test files
3481             if (*len > right_end) *len = right_end; // this should never happen
3482             f->current_loc += *len;
3483             return TRUE;
3484          }
3485       }
3486       // otherwise, just set our sample loc
3487       // guess that the ogg granule pos refers to the _middle_ of the
3488       // last frame?
3489       // set f->current_loc to the position of left_start
3490       f->current_loc = f->known_loc_for_packet - (n2-left_start);
3491       f->current_loc_valid = TRUE;
3492    }
3493    if (f->current_loc_valid)
3494       f->current_loc += (right_start - left_start);
3495 
3496    if (f->alloc.alloc_buffer)
3497       assert(f->alloc.alloc_buffer_length_in_bytes == f->temp_offset);
3498    *len = right_end;  // ignore samples after the window goes to 0
3499    CHECK(f);
3500 
3501    return TRUE;
3502 }
3503 
vorbis_decode_packet(vorb * f,int * len,int * p_left,int * p_right)3504 static int vorbis_decode_packet(vorb *f, int *len, int *p_left, int *p_right)
3505 {
3506    int mode, left_end, right_end;
3507    if (!vorbis_decode_initial(f, p_left, &left_end, p_right, &right_end, &mode)) return 0;
3508    return vorbis_decode_packet_rest(f, len, f->mode_config + mode, *p_left, left_end, *p_right, right_end, p_left);
3509 }
3510 
vorbis_finish_frame(stb_vorbis * f,int len,int left,int right)3511 static int vorbis_finish_frame(stb_vorbis *f, int len, int left, int right)
3512 {
3513    int prev,i,j;
3514    // we use right&left (the start of the right- and left-window sin()-regions)
3515    // to determine how much to return, rather than inferring from the rules
3516    // (same result, clearer code); 'left' indicates where our sin() window
3517    // starts, therefore where the previous window's right edge starts, and
3518    // therefore where to start mixing from the previous buffer. 'right'
3519    // indicates where our sin() ending-window starts, therefore that's where
3520    // we start saving, and where our returned-data ends.
3521 
3522    // mixin from previous window
3523    if (f->previous_length) {
3524       int i,j, n = f->previous_length;
3525       float *w = get_window(f, n);
3526       for (i=0; i < f->channels; ++i) {
3527          for (j=0; j < n; ++j)
3528             f->channel_buffers[i][left+j] =
3529                f->channel_buffers[i][left+j]*w[    j] +
3530                f->previous_window[i][     j]*w[n-1-j];
3531       }
3532    }
3533 
3534    prev = f->previous_length;
3535 
3536    // last half of this data becomes previous window
3537    f->previous_length = len - right;
3538 
3539    // @OPTIMIZE: could avoid this copy by double-buffering the
3540    // output (flipping previous_window with channel_buffers), but
3541    // then previous_window would have to be 2x as large, and
3542    // channel_buffers couldn't be temp mem (although they're NOT
3543    // currently temp mem, they could be (unless we want to level
3544    // performance by spreading out the computation))
3545    for (i=0; i < f->channels; ++i)
3546       for (j=0; right+j < len; ++j)
3547          f->previous_window[i][j] = f->channel_buffers[i][right+j];
3548 
3549    if (!prev)
3550       // there was no previous packet, so this data isn't valid...
3551       // this isn't entirely true, only the would-have-overlapped data
3552       // isn't valid, but this seems to be what the spec requires
3553       return 0;
3554 
3555    // truncate a short frame
3556    if (len < right) right = len;
3557 
3558    f->samples_output += right-left;
3559 
3560    return right - left;
3561 }
3562 
vorbis_pump_first_frame(stb_vorbis * f)3563 static int vorbis_pump_first_frame(stb_vorbis *f)
3564 {
3565    int len, right, left, res;
3566    res = vorbis_decode_packet(f, &len, &left, &right);
3567    if (res)
3568       vorbis_finish_frame(f, len, left, right);
3569    return res;
3570 }
3571 
3572 #ifndef STB_VORBIS_NO_PUSHDATA_API
is_whole_packet_present(stb_vorbis * f,int end_page)3573 static int is_whole_packet_present(stb_vorbis *f, int end_page)
3574 {
3575    // make sure that we have the packet available before continuing...
3576    // this requires a full ogg parse, but we know we can fetch from f->stream
3577 
3578    // instead of coding this out explicitly, we could save the current read state,
3579    // read the next packet with get8() until end-of-packet, check f->eof, then
3580    // reset the state? but that would be slower, esp. since we'd have over 256 bytes
3581    // of state to restore (primarily the page segment table)
3582 
3583    int s = f->next_seg, first = TRUE;
3584    uint8 *p = f->stream;
3585 
3586    if (s != -1) { // if we're not starting the packet with a 'continue on next page' flag
3587       for (; s < f->segment_count; ++s) {
3588          p += f->segments[s];
3589          if (f->segments[s] < 255)               // stop at first short segment
3590             break;
3591       }
3592       // either this continues, or it ends it...
3593       if (end_page)
3594          if (s < f->segment_count-1)             return error(f, VORBIS_invalid_stream);
3595       if (s == f->segment_count)
3596          s = -1; // set 'crosses page' flag
3597       if (p > f->stream_end)                     return error(f, VORBIS_need_more_data);
3598       first = FALSE;
3599    }
3600    for (; s == -1;) {
3601       uint8 *q;
3602       int n;
3603 
3604       // check that we have the page header ready
3605       if (p + 26 >= f->stream_end)               return error(f, VORBIS_need_more_data);
3606       // validate the page
3607       if (memcmp(p, ogg_page_header, 4))         return error(f, VORBIS_invalid_stream);
3608       if (p[4] != 0)                             return error(f, VORBIS_invalid_stream);
3609       if (first) { // the first segment must NOT have 'continued_packet', later ones MUST
3610          if (f->previous_length)
3611             if ((p[5] & PAGEFLAG_continued_packet))  return error(f, VORBIS_invalid_stream);
3612          // if no previous length, we're resynching, so we can come in on a continued-packet,
3613          // which we'll just drop
3614       } else {
3615          if (!(p[5] & PAGEFLAG_continued_packet)) return error(f, VORBIS_invalid_stream);
3616       }
3617       n = p[26]; // segment counts
3618       q = p+27;  // q points to segment table
3619       p = q + n; // advance past header
3620       // make sure we've read the segment table
3621       if (p > f->stream_end)                     return error(f, VORBIS_need_more_data);
3622       for (s=0; s < n; ++s) {
3623          p += q[s];
3624          if (q[s] < 255)
3625             break;
3626       }
3627       if (end_page)
3628          if (s < n-1)                            return error(f, VORBIS_invalid_stream);
3629       if (s == n)
3630          s = -1; // set 'crosses page' flag
3631       if (p > f->stream_end)                     return error(f, VORBIS_need_more_data);
3632       first = FALSE;
3633    }
3634    return TRUE;
3635 }
3636 #endif // !STB_VORBIS_NO_PUSHDATA_API
3637 
start_decoder(vorb * f)3638 static int start_decoder(vorb *f)
3639 {
3640    uint8 header[6], x,y;
3641    int len,i,j,k, max_submaps = 0;
3642    int longest_floorlist=0;
3643 
3644    // first page, first packet
3645 
3646    if (!start_page(f))                              return FALSE;
3647    // validate page flag
3648    if (!(f->page_flag & PAGEFLAG_first_page))       return error(f, VORBIS_invalid_first_page);
3649    if (f->page_flag & PAGEFLAG_last_page)           return error(f, VORBIS_invalid_first_page);
3650    if (f->page_flag & PAGEFLAG_continued_packet)    return error(f, VORBIS_invalid_first_page);
3651    // check for expected packet length
3652    if (f->segment_count != 1)                       return error(f, VORBIS_invalid_first_page);
3653    if (f->segments[0] != 30)                        return error(f, VORBIS_invalid_first_page);
3654    // read packet
3655    // check packet header
3656    if (get8(f) != VORBIS_packet_id)                 return error(f, VORBIS_invalid_first_page);
3657    if (!getn(f, header, 6))                         return error(f, VORBIS_unexpected_eof);
3658    if (!vorbis_validate(header))                    return error(f, VORBIS_invalid_first_page);
3659    // vorbis_version
3660    if (get32(f) != 0)                               return error(f, VORBIS_invalid_first_page);
3661    f->channels = get8(f); if (!f->channels)         return error(f, VORBIS_invalid_first_page);
3662    if (f->channels > STB_VORBIS_MAX_CHANNELS)       return error(f, VORBIS_too_many_channels);
3663    f->sample_rate = get32(f); if (!f->sample_rate)  return error(f, VORBIS_invalid_first_page);
3664    get32(f); // bitrate_maximum
3665    get32(f); // bitrate_nominal
3666    get32(f); // bitrate_minimum
3667    x = get8(f);
3668    {
3669       int log0,log1;
3670       log0 = x & 15;
3671       log1 = x >> 4;
3672       f->blocksize_0 = 1 << log0;
3673       f->blocksize_1 = 1 << log1;
3674       if (log0 < 6 || log0 > 13)                       return error(f, VORBIS_invalid_setup);
3675       if (log1 < 6 || log1 > 13)                       return error(f, VORBIS_invalid_setup);
3676       if (log0 > log1)                                 return error(f, VORBIS_invalid_setup);
3677    }
3678 
3679    // framing_flag
3680    x = get8(f);
3681    if (!(x & 1))                                    return error(f, VORBIS_invalid_first_page);
3682 
3683    // second packet!
3684    if (!start_page(f))                              return FALSE;
3685 
3686    if (!start_packet(f))                            return FALSE;
3687    do {
3688       len = next_segment(f);
3689       skip(f, len);
3690       f->bytes_in_seg = 0;
3691    } while (len);
3692 
3693    // third packet!
3694    if (!start_packet(f))                            return FALSE;
3695 
3696    #ifndef STB_VORBIS_NO_PUSHDATA_API
3697    if (IS_PUSH_MODE(f)) {
3698       if (!is_whole_packet_present(f, TRUE)) {
3699          // convert error in ogg header to write type
3700          if (f->error == VORBIS_invalid_stream)
3701             f->error = VORBIS_invalid_setup;
3702          return FALSE;
3703       }
3704    }
3705    #endif
3706 
3707    crc32_init(); // always init it, to avoid multithread race conditions
3708 
3709    if (get8_packet(f) != VORBIS_packet_setup)       return error(f, VORBIS_invalid_setup);
3710    for (i=0; i < 6; ++i) header[i] = get8_packet(f);
3711    if (!vorbis_validate(header))                    return error(f, VORBIS_invalid_setup);
3712 
3713    // codebooks
3714 
3715    f->codebook_count = get_bits(f,8) + 1;
3716    f->codebooks = (Codebook *) setup_malloc(f, sizeof(*f->codebooks) * f->codebook_count);
3717    if (f->codebooks == NULL)                        return error(f, VORBIS_outofmem);
3718    memset(f->codebooks, 0, sizeof(*f->codebooks) * f->codebook_count);
3719    for (i=0; i < f->codebook_count; ++i) {
3720       uint32 *values;
3721       int ordered, sorted_count;
3722       int total=0;
3723       uint8 *lengths;
3724       Codebook *c = f->codebooks+i;
3725       CHECK(f);
3726       x = get_bits(f, 8); if (x != 0x42)            return error(f, VORBIS_invalid_setup);
3727       x = get_bits(f, 8); if (x != 0x43)            return error(f, VORBIS_invalid_setup);
3728       x = get_bits(f, 8); if (x != 0x56)            return error(f, VORBIS_invalid_setup);
3729       x = get_bits(f, 8);
3730       c->dimensions = (get_bits(f, 8)<<8) + x;
3731       x = get_bits(f, 8);
3732       y = get_bits(f, 8);
3733       c->entries = (get_bits(f, 8)<<16) + (y<<8) + x;
3734       ordered = get_bits(f,1);
3735       c->sparse = ordered ? 0 : get_bits(f,1);
3736 
3737       if (c->dimensions == 0 && c->entries != 0)    return error(f, VORBIS_invalid_setup);
3738 
3739       if (c->sparse)
3740          lengths = (uint8 *) setup_temp_malloc(f, c->entries);
3741       else
3742          lengths = c->codeword_lengths = (uint8 *) setup_malloc(f, c->entries);
3743 
3744       if (!lengths) return error(f, VORBIS_outofmem);
3745 
3746       if (ordered) {
3747          int current_entry = 0;
3748          int current_length = get_bits(f,5) + 1;
3749          while (current_entry < c->entries) {
3750             int limit = c->entries - current_entry;
3751             int n = get_bits(f, ilog(limit));
3752             if (current_entry + n > (int) c->entries) { return error(f, VORBIS_invalid_setup); }
3753             memset(lengths + current_entry, current_length, n);
3754             current_entry += n;
3755             ++current_length;
3756          }
3757       } else {
3758          for (j=0; j < c->entries; ++j) {
3759             int present = c->sparse ? get_bits(f,1) : 1;
3760             if (present) {
3761                lengths[j] = get_bits(f, 5) + 1;
3762                ++total;
3763                if (lengths[j] == 32)
3764                   return error(f, VORBIS_invalid_setup);
3765             } else {
3766                lengths[j] = NO_CODE;
3767             }
3768          }
3769       }
3770 
3771       if (c->sparse && total >= c->entries >> 2) {
3772          // convert sparse items to non-sparse!
3773          if (c->entries > (int) f->setup_temp_memory_required)
3774             f->setup_temp_memory_required = c->entries;
3775 
3776          c->codeword_lengths = (uint8 *) setup_malloc(f, c->entries);
3777          if (c->codeword_lengths == NULL) return error(f, VORBIS_outofmem);
3778          memcpy(c->codeword_lengths, lengths, c->entries);
3779          setup_temp_free(f, lengths, c->entries); // note this is only safe if there have been no intervening temp mallocs!
3780          lengths = c->codeword_lengths;
3781          c->sparse = 0;
3782       }
3783 
3784       // compute the size of the sorted tables
3785       if (c->sparse) {
3786          sorted_count = total;
3787       } else {
3788          sorted_count = 0;
3789          #ifndef STB_VORBIS_NO_HUFFMAN_BINARY_SEARCH
3790          for (j=0; j < c->entries; ++j)
3791             if (lengths[j] > STB_VORBIS_FAST_HUFFMAN_LENGTH && lengths[j] != NO_CODE)
3792                ++sorted_count;
3793          #endif
3794       }
3795 
3796       c->sorted_entries = sorted_count;
3797       values = NULL;
3798 
3799       CHECK(f);
3800       if (!c->sparse) {
3801          c->codewords = (uint32 *) setup_malloc(f, sizeof(c->codewords[0]) * c->entries);
3802          if (!c->codewords)                  return error(f, VORBIS_outofmem);
3803       } else {
3804          unsigned int size;
3805          if (c->sorted_entries) {
3806             c->codeword_lengths = (uint8 *) setup_malloc(f, c->sorted_entries);
3807             if (!c->codeword_lengths)           return error(f, VORBIS_outofmem);
3808             c->codewords = (uint32 *) setup_temp_malloc(f, sizeof(*c->codewords) * c->sorted_entries);
3809             if (!c->codewords)                  return error(f, VORBIS_outofmem);
3810             values = (uint32 *) setup_temp_malloc(f, sizeof(*values) * c->sorted_entries);
3811             if (!values)                        return error(f, VORBIS_outofmem);
3812          }
3813          size = c->entries + (sizeof(*c->codewords) + sizeof(*values)) * c->sorted_entries;
3814          if (size > f->setup_temp_memory_required)
3815             f->setup_temp_memory_required = size;
3816       }
3817 
3818       if (!compute_codewords(c, lengths, c->entries, values)) {
3819          if (c->sparse) setup_temp_free(f, values, 0);
3820          return error(f, VORBIS_invalid_setup);
3821       }
3822 
3823       if (c->sorted_entries) {
3824          // allocate an extra slot for sentinels
3825          c->sorted_codewords = (uint32 *) setup_malloc(f, sizeof(*c->sorted_codewords) * (c->sorted_entries+1));
3826          if (c->sorted_codewords == NULL) return error(f, VORBIS_outofmem);
3827          // allocate an extra slot at the front so that c->sorted_values[-1] is defined
3828          // so that we can catch that case without an extra if
3829          c->sorted_values    = ( int   *) setup_malloc(f, sizeof(*c->sorted_values   ) * (c->sorted_entries+1));
3830          if (c->sorted_values == NULL) return error(f, VORBIS_outofmem);
3831          ++c->sorted_values;
3832          c->sorted_values[-1] = -1;
3833          compute_sorted_huffman(c, lengths, values);
3834       }
3835 
3836       if (c->sparse) {
3837          setup_temp_free(f, values, sizeof(*values)*c->sorted_entries);
3838          setup_temp_free(f, c->codewords, sizeof(*c->codewords)*c->sorted_entries);
3839          setup_temp_free(f, lengths, c->entries);
3840          c->codewords = NULL;
3841       }
3842 
3843       compute_accelerated_huffman(c);
3844 
3845       CHECK(f);
3846       c->lookup_type = get_bits(f, 4);
3847       if (c->lookup_type > 2) return error(f, VORBIS_invalid_setup);
3848       if (c->lookup_type > 0) {
3849          uint16 *mults;
3850          c->minimum_value = float32_unpack(get_bits(f, 32));
3851          c->delta_value = float32_unpack(get_bits(f, 32));
3852          c->value_bits = get_bits(f, 4)+1;
3853          c->sequence_p = get_bits(f,1);
3854          if (c->lookup_type == 1) {
3855             c->lookup_values = lookup1_values(c->entries, c->dimensions);
3856          } else {
3857             c->lookup_values = c->entries * c->dimensions;
3858          }
3859          if (c->lookup_values == 0) return error(f, VORBIS_invalid_setup);
3860          mults = (uint16 *) setup_temp_malloc(f, sizeof(mults[0]) * c->lookup_values);
3861          if (mults == NULL) return error(f, VORBIS_outofmem);
3862          for (j=0; j < (int) c->lookup_values; ++j) {
3863             int q = get_bits(f, c->value_bits);
3864             if (q == EOP) { setup_temp_free(f,mults,sizeof(mults[0])*c->lookup_values); return error(f, VORBIS_invalid_setup); }
3865             mults[j] = q;
3866          }
3867 
3868 #ifndef STB_VORBIS_DIVIDES_IN_CODEBOOK
3869          if (c->lookup_type == 1) {
3870             int len, sparse = c->sparse;
3871             float last=0;
3872             // pre-expand the lookup1-style multiplicands, to avoid a divide in the inner loop
3873             if (sparse) {
3874                if (c->sorted_entries == 0) goto skip;
3875                c->multiplicands = (codetype *) setup_malloc(f, sizeof(c->multiplicands[0]) * c->sorted_entries * c->dimensions);
3876             } else
3877                c->multiplicands = (codetype *) setup_malloc(f, sizeof(c->multiplicands[0]) * c->entries        * c->dimensions);
3878             if (c->multiplicands == NULL) { setup_temp_free(f,mults,sizeof(mults[0])*c->lookup_values); return error(f, VORBIS_outofmem); }
3879             len = sparse ? c->sorted_entries : c->entries;
3880             for (j=0; j < len; ++j) {
3881                unsigned int z = sparse ? c->sorted_values[j] : j;
3882                unsigned int div=1;
3883                for (k=0; k < c->dimensions; ++k) {
3884                   int off = (z / div) % c->lookup_values;
3885                   float val = mults[off];
3886                   val = mults[off]*c->delta_value + c->minimum_value + last;
3887                   c->multiplicands[j*c->dimensions + k] = val;
3888                   if (c->sequence_p)
3889                      last = val;
3890                   if (k+1 < c->dimensions) {
3891                      if (div > UINT_MAX / (unsigned int) c->lookup_values) {
3892                         setup_temp_free(f, mults,sizeof(mults[0])*c->lookup_values);
3893                         return error(f, VORBIS_invalid_setup);
3894                      }
3895                      div *= c->lookup_values;
3896                   }
3897                }
3898             }
3899             c->lookup_type = 2;
3900          }
3901          else
3902 #endif
3903          {
3904             float last=0;
3905             CHECK(f);
3906             c->multiplicands = (codetype *) setup_malloc(f, sizeof(c->multiplicands[0]) * c->lookup_values);
3907             if (c->multiplicands == NULL) { setup_temp_free(f, mults,sizeof(mults[0])*c->lookup_values); return error(f, VORBIS_outofmem); }
3908             for (j=0; j < (int) c->lookup_values; ++j) {
3909                float val = mults[j] * c->delta_value + c->minimum_value + last;
3910                c->multiplicands[j] = val;
3911                if (c->sequence_p)
3912                   last = val;
3913             }
3914          }
3915 #ifndef STB_VORBIS_DIVIDES_IN_CODEBOOK
3916         skip:;
3917 #endif
3918          setup_temp_free(f, mults, sizeof(mults[0])*c->lookup_values);
3919 
3920          CHECK(f);
3921       }
3922       CHECK(f);
3923    }
3924 
3925    // time domain transfers (notused)
3926 
3927    x = get_bits(f, 6) + 1;
3928    for (i=0; i < x; ++i) {
3929       uint32 z = get_bits(f, 16);
3930       if (z != 0) return error(f, VORBIS_invalid_setup);
3931    }
3932 
3933    // Floors
3934    f->floor_count = get_bits(f, 6)+1;
3935    f->floor_config = (Floor *)  setup_malloc(f, f->floor_count * sizeof(*f->floor_config));
3936    if (f->floor_config == NULL) return error(f, VORBIS_outofmem);
3937    for (i=0; i < f->floor_count; ++i) {
3938       f->floor_types[i] = get_bits(f, 16);
3939       if (f->floor_types[i] > 1) return error(f, VORBIS_invalid_setup);
3940       if (f->floor_types[i] == 0) {
3941          Floor0 *g = &f->floor_config[i].floor0;
3942          g->order = get_bits(f,8);
3943          g->rate = get_bits(f,16);
3944          g->bark_map_size = get_bits(f,16);
3945          g->amplitude_bits = get_bits(f,6);
3946          g->amplitude_offset = get_bits(f,8);
3947          g->number_of_books = get_bits(f,4) + 1;
3948          for (j=0; j < g->number_of_books; ++j)
3949             g->book_list[j] = get_bits(f,8);
3950          return error(f, VORBIS_feature_not_supported);
3951       } else {
3952          stbv__floor_ordering p[31*8+2];
3953          Floor1 *g = &f->floor_config[i].floor1;
3954          int max_class = -1;
3955          g->partitions = get_bits(f, 5);
3956          for (j=0; j < g->partitions; ++j) {
3957             g->partition_class_list[j] = get_bits(f, 4);
3958             if (g->partition_class_list[j] > max_class)
3959                max_class = g->partition_class_list[j];
3960          }
3961          for (j=0; j <= max_class; ++j) {
3962             g->class_dimensions[j] = get_bits(f, 3)+1;
3963             g->class_subclasses[j] = get_bits(f, 2);
3964             if (g->class_subclasses[j]) {
3965                g->class_masterbooks[j] = get_bits(f, 8);
3966                if (g->class_masterbooks[j] >= f->codebook_count) return error(f, VORBIS_invalid_setup);
3967             }
3968             for (k=0; k < 1 << g->class_subclasses[j]; ++k) {
3969                g->subclass_books[j][k] = get_bits(f,8)-1;
3970                if (g->subclass_books[j][k] >= f->codebook_count) return error(f, VORBIS_invalid_setup);
3971             }
3972          }
3973          g->floor1_multiplier = get_bits(f,2)+1;
3974          g->rangebits = get_bits(f,4);
3975          g->Xlist[0] = 0;
3976          g->Xlist[1] = 1 << g->rangebits;
3977          g->values = 2;
3978          for (j=0; j < g->partitions; ++j) {
3979             int c = g->partition_class_list[j];
3980             for (k=0; k < g->class_dimensions[c]; ++k) {
3981                g->Xlist[g->values] = get_bits(f, g->rangebits);
3982                ++g->values;
3983             }
3984          }
3985          // precompute the sorting
3986          for (j=0; j < g->values; ++j) {
3987             p[j].x = g->Xlist[j];
3988             p[j].id = j;
3989          }
3990          qsort(p, g->values, sizeof(p[0]), point_compare);
3991          for (j=0; j < g->values; ++j)
3992             g->sorted_order[j] = (uint8) p[j].id;
3993          // precompute the neighbors
3994          for (j=2; j < g->values; ++j) {
3995             int low,hi;
3996             neighbors(g->Xlist, j, &low,&hi);
3997             g->neighbors[j][0] = low;
3998             g->neighbors[j][1] = hi;
3999          }
4000 
4001          if (g->values > longest_floorlist)
4002             longest_floorlist = g->values;
4003       }
4004    }
4005 
4006    // Residue
4007    f->residue_count = get_bits(f, 6)+1;
4008    f->residue_config = (Residue *) setup_malloc(f, f->residue_count * sizeof(f->residue_config[0]));
4009    if (f->residue_config == NULL) return error(f, VORBIS_outofmem);
4010    memset(f->residue_config, 0, f->residue_count * sizeof(f->residue_config[0]));
4011    for (i=0; i < f->residue_count; ++i) {
4012       uint8 residue_cascade[64];
4013       Residue *r = f->residue_config+i;
4014       f->residue_types[i] = get_bits(f, 16);
4015       if (f->residue_types[i] > 2) return error(f, VORBIS_invalid_setup);
4016       r->begin = get_bits(f, 24);
4017       r->end = get_bits(f, 24);
4018       if (r->end < r->begin) return error(f, VORBIS_invalid_setup);
4019       r->part_size = get_bits(f,24)+1;
4020       r->classifications = get_bits(f,6)+1;
4021       r->classbook = get_bits(f,8);
4022       if (r->classbook >= f->codebook_count) return error(f, VORBIS_invalid_setup);
4023       for (j=0; j < r->classifications; ++j) {
4024          uint8 high_bits=0;
4025          uint8 low_bits=get_bits(f,3);
4026          if (get_bits(f,1))
4027             high_bits = get_bits(f,5);
4028          residue_cascade[j] = high_bits*8 + low_bits;
4029       }
4030       r->residue_books = (short (*)[8]) setup_malloc(f, sizeof(r->residue_books[0]) * r->classifications);
4031       if (r->residue_books == NULL) return error(f, VORBIS_outofmem);
4032       for (j=0; j < r->classifications; ++j) {
4033          for (k=0; k < 8; ++k) {
4034             if (residue_cascade[j] & (1 << k)) {
4035                r->residue_books[j][k] = get_bits(f, 8);
4036                if (r->residue_books[j][k] >= f->codebook_count) return error(f, VORBIS_invalid_setup);
4037             } else {
4038                r->residue_books[j][k] = -1;
4039             }
4040          }
4041       }
4042       // precompute the classifications[] array to avoid inner-loop mod/divide
4043       // call it 'classdata' since we already have r->classifications
4044       r->classdata = (uint8 **) setup_malloc(f, sizeof(*r->classdata) * f->codebooks[r->classbook].entries);
4045       if (!r->classdata) return error(f, VORBIS_outofmem);
4046       memset(r->classdata, 0, sizeof(*r->classdata) * f->codebooks[r->classbook].entries);
4047       for (j=0; j < f->codebooks[r->classbook].entries; ++j) {
4048          int classwords = f->codebooks[r->classbook].dimensions;
4049          int temp = j;
4050          r->classdata[j] = (uint8 *) setup_malloc(f, sizeof(r->classdata[j][0]) * classwords);
4051          if (r->classdata[j] == NULL) return error(f, VORBIS_outofmem);
4052          for (k=classwords-1; k >= 0; --k) {
4053             r->classdata[j][k] = temp % r->classifications;
4054             temp /= r->classifications;
4055          }
4056       }
4057    }
4058 
4059    f->mapping_count = get_bits(f,6)+1;
4060    f->mapping = (Mapping *) setup_malloc(f, f->mapping_count * sizeof(*f->mapping));
4061    if (f->mapping == NULL) return error(f, VORBIS_outofmem);
4062    memset(f->mapping, 0, f->mapping_count * sizeof(*f->mapping));
4063    for (i=0; i < f->mapping_count; ++i) {
4064       Mapping *m = f->mapping + i;
4065       int mapping_type = get_bits(f,16);
4066       if (mapping_type != 0) return error(f, VORBIS_invalid_setup);
4067       m->chan = (MappingChannel *) setup_malloc(f, f->channels * sizeof(*m->chan));
4068       if (m->chan == NULL) return error(f, VORBIS_outofmem);
4069       if (get_bits(f,1))
4070          m->submaps = get_bits(f,4)+1;
4071       else
4072          m->submaps = 1;
4073       if (m->submaps > max_submaps)
4074          max_submaps = m->submaps;
4075       if (get_bits(f,1)) {
4076          m->coupling_steps = get_bits(f,8)+1;
4077          for (k=0; k < m->coupling_steps; ++k) {
4078             m->chan[k].magnitude = get_bits(f, ilog(f->channels-1));
4079             m->chan[k].angle = get_bits(f, ilog(f->channels-1));
4080             if (m->chan[k].magnitude >= f->channels)        return error(f, VORBIS_invalid_setup);
4081             if (m->chan[k].angle     >= f->channels)        return error(f, VORBIS_invalid_setup);
4082             if (m->chan[k].magnitude == m->chan[k].angle)   return error(f, VORBIS_invalid_setup);
4083          }
4084       } else
4085          m->coupling_steps = 0;
4086 
4087       // reserved field
4088       if (get_bits(f,2)) return error(f, VORBIS_invalid_setup);
4089       if (m->submaps > 1) {
4090          for (j=0; j < f->channels; ++j) {
4091             m->chan[j].mux = get_bits(f, 4);
4092             if (m->chan[j].mux >= m->submaps)                return error(f, VORBIS_invalid_setup);
4093          }
4094       } else
4095          // @SPECIFICATION: this case is missing from the spec
4096          for (j=0; j < f->channels; ++j)
4097             m->chan[j].mux = 0;
4098 
4099       for (j=0; j < m->submaps; ++j) {
4100          get_bits(f,8); // discard
4101          m->submap_floor[j] = get_bits(f,8);
4102          m->submap_residue[j] = get_bits(f,8);
4103          if (m->submap_floor[j] >= f->floor_count)      return error(f, VORBIS_invalid_setup);
4104          if (m->submap_residue[j] >= f->residue_count)  return error(f, VORBIS_invalid_setup);
4105       }
4106    }
4107 
4108    // Modes
4109    f->mode_count = get_bits(f, 6)+1;
4110    for (i=0; i < f->mode_count; ++i) {
4111       Mode *m = f->mode_config+i;
4112       m->blockflag = get_bits(f,1);
4113       m->windowtype = get_bits(f,16);
4114       m->transformtype = get_bits(f,16);
4115       m->mapping = get_bits(f,8);
4116       if (m->windowtype != 0)                 return error(f, VORBIS_invalid_setup);
4117       if (m->transformtype != 0)              return error(f, VORBIS_invalid_setup);
4118       if (m->mapping >= f->mapping_count)     return error(f, VORBIS_invalid_setup);
4119    }
4120 
4121    flush_packet(f);
4122 
4123    f->previous_length = 0;
4124 
4125    for (i=0; i < f->channels; ++i) {
4126       f->channel_buffers[i] = (float *) setup_malloc(f, sizeof(float) * f->blocksize_1);
4127       f->previous_window[i] = (float *) setup_malloc(f, sizeof(float) * f->blocksize_1/2);
4128       f->finalY[i]          = (int16 *) setup_malloc(f, sizeof(int16) * longest_floorlist);
4129       if (f->channel_buffers[i] == NULL || f->previous_window[i] == NULL || f->finalY[i] == NULL) return error(f, VORBIS_outofmem);
4130       memset(f->channel_buffers[i], 0, sizeof(float) * f->blocksize_1);
4131       #ifdef STB_VORBIS_NO_DEFER_FLOOR
4132       f->floor_buffers[i]   = (float *) setup_malloc(f, sizeof(float) * f->blocksize_1/2);
4133       if (f->floor_buffers[i] == NULL) return error(f, VORBIS_outofmem);
4134       #endif
4135    }
4136 
4137    if (!init_blocksize(f, 0, f->blocksize_0)) return FALSE;
4138    if (!init_blocksize(f, 1, f->blocksize_1)) return FALSE;
4139    f->blocksize[0] = f->blocksize_0;
4140    f->blocksize[1] = f->blocksize_1;
4141 
4142 #ifdef STB_VORBIS_DIVIDE_TABLE
4143    if (integer_divide_table[1][1]==0)
4144       for (i=0; i < DIVTAB_NUMER; ++i)
4145          for (j=1; j < DIVTAB_DENOM; ++j)
4146             integer_divide_table[i][j] = i / j;
4147 #endif
4148 
4149    // compute how much temporary memory is needed
4150 
4151    // 1.
4152    {
4153       uint32 imdct_mem = (f->blocksize_1 * sizeof(float) >> 1);
4154       uint32 classify_mem;
4155       int i,max_part_read=0;
4156       for (i=0; i < f->residue_count; ++i) {
4157          Residue *r = f->residue_config + i;
4158          unsigned int actual_size = f->blocksize_1 / 2;
4159          unsigned int limit_r_begin = r->begin < actual_size ? r->begin : actual_size;
4160          unsigned int limit_r_end   = r->end   < actual_size ? r->end   : actual_size;
4161          int n_read = limit_r_end - limit_r_begin;
4162          int part_read = n_read / r->part_size;
4163          if (part_read > max_part_read)
4164             max_part_read = part_read;
4165       }
4166       #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
4167       classify_mem = f->channels * (sizeof(void*) + max_part_read * sizeof(uint8 *));
4168       #else
4169       classify_mem = f->channels * (sizeof(void*) + max_part_read * sizeof(int *));
4170       #endif
4171 
4172       // maximum reasonable partition size is f->blocksize_1
4173 
4174       f->temp_memory_required = classify_mem;
4175       if (imdct_mem > f->temp_memory_required)
4176          f->temp_memory_required = imdct_mem;
4177    }
4178 
4179    f->first_decode = TRUE;
4180 
4181    if (f->alloc.alloc_buffer) {
4182       assert(f->temp_offset == f->alloc.alloc_buffer_length_in_bytes);
4183       // check if there's enough temp memory so we don't error later
4184       if (f->setup_offset + sizeof(*f) + f->temp_memory_required > (unsigned) f->temp_offset)
4185          return error(f, VORBIS_outofmem);
4186    }
4187 
4188    f->first_audio_page_offset = stb_vorbis_get_file_offset(f);
4189 
4190    return TRUE;
4191 }
4192 
vorbis_deinit(stb_vorbis * p)4193 static void vorbis_deinit(stb_vorbis *p)
4194 {
4195    int i,j;
4196    if (p->residue_config) {
4197       for (i=0; i < p->residue_count; ++i) {
4198          Residue *r = p->residue_config+i;
4199          if (r->classdata) {
4200             for (j=0; j < p->codebooks[r->classbook].entries; ++j)
4201                setup_free(p, r->classdata[j]);
4202             setup_free(p, r->classdata);
4203          }
4204          setup_free(p, r->residue_books);
4205       }
4206    }
4207 
4208    if (p->codebooks) {
4209       CHECK(p);
4210       for (i=0; i < p->codebook_count; ++i) {
4211          Codebook *c = p->codebooks + i;
4212          setup_free(p, c->codeword_lengths);
4213          setup_free(p, c->multiplicands);
4214          setup_free(p, c->codewords);
4215          setup_free(p, c->sorted_codewords);
4216          // c->sorted_values[-1] is the first entry in the array
4217          setup_free(p, c->sorted_values ? c->sorted_values-1 : NULL);
4218       }
4219       setup_free(p, p->codebooks);
4220    }
4221    setup_free(p, p->floor_config);
4222    setup_free(p, p->residue_config);
4223    if (p->mapping) {
4224       for (i=0; i < p->mapping_count; ++i)
4225          setup_free(p, p->mapping[i].chan);
4226       setup_free(p, p->mapping);
4227    }
4228    CHECK(p);
4229    for (i=0; i < p->channels && i < STB_VORBIS_MAX_CHANNELS; ++i) {
4230       setup_free(p, p->channel_buffers[i]);
4231       setup_free(p, p->previous_window[i]);
4232       #ifdef STB_VORBIS_NO_DEFER_FLOOR
4233       setup_free(p, p->floor_buffers[i]);
4234       #endif
4235       setup_free(p, p->finalY[i]);
4236    }
4237    for (i=0; i < 2; ++i) {
4238       setup_free(p, p->A[i]);
4239       setup_free(p, p->B[i]);
4240       setup_free(p, p->C[i]);
4241       setup_free(p, p->window[i]);
4242       setup_free(p, p->bit_reverse[i]);
4243    }
4244    #ifndef STB_VORBIS_NO_STDIO
4245    if (p->close_on_free) fclose(p->f);
4246    #endif
4247 }
4248 
stb_vorbis_close(stb_vorbis * p)4249 void stb_vorbis_close(stb_vorbis *p)
4250 {
4251    if (p == NULL) return;
4252    vorbis_deinit(p);
4253    setup_free(p,p);
4254 }
4255 
vorbis_init(stb_vorbis * p,const stb_vorbis_alloc * z)4256 static void vorbis_init(stb_vorbis *p, const stb_vorbis_alloc *z)
4257 {
4258    memset(p, 0, sizeof(*p)); // NULL out all malloc'd pointers to start
4259    if (z) {
4260       p->alloc = *z;
4261       p->alloc.alloc_buffer_length_in_bytes = (p->alloc.alloc_buffer_length_in_bytes+3) & ~3;
4262       p->temp_offset = p->alloc.alloc_buffer_length_in_bytes;
4263    }
4264    p->eof = 0;
4265    p->error = VORBIS__no_error;
4266    p->stream = NULL;
4267    p->codebooks = NULL;
4268    p->page_crc_tests = -1;
4269    #ifndef STB_VORBIS_NO_STDIO
4270    p->close_on_free = FALSE;
4271    p->f = NULL;
4272    #endif
4273 }
4274 
stb_vorbis_get_sample_offset(stb_vorbis * f)4275 int stb_vorbis_get_sample_offset(stb_vorbis *f)
4276 {
4277    if (f->current_loc_valid)
4278       return f->current_loc;
4279    else
4280       return -1;
4281 }
4282 
stb_vorbis_get_info(stb_vorbis * f)4283 stb_vorbis_info stb_vorbis_get_info(stb_vorbis *f)
4284 {
4285    stb_vorbis_info d;
4286    d.channels = f->channels;
4287    d.sample_rate = f->sample_rate;
4288    d.setup_memory_required = f->setup_memory_required;
4289    d.setup_temp_memory_required = f->setup_temp_memory_required;
4290    d.temp_memory_required = f->temp_memory_required;
4291    d.max_frame_size = f->blocksize_1 >> 1;
4292    return d;
4293 }
4294 
stb_vorbis_get_error(stb_vorbis * f)4295 int stb_vorbis_get_error(stb_vorbis *f)
4296 {
4297    int e = f->error;
4298    f->error = VORBIS__no_error;
4299    return e;
4300 }
4301 
vorbis_alloc(stb_vorbis * f)4302 static stb_vorbis * vorbis_alloc(stb_vorbis *f)
4303 {
4304    stb_vorbis *p = (stb_vorbis *) setup_malloc(f, sizeof(*p));
4305    return p;
4306 }
4307 
4308 #ifndef STB_VORBIS_NO_PUSHDATA_API
4309 
stb_vorbis_flush_pushdata(stb_vorbis * f)4310 void stb_vorbis_flush_pushdata(stb_vorbis *f)
4311 {
4312    f->previous_length = 0;
4313    f->page_crc_tests  = 0;
4314    f->discard_samples_deferred = 0;
4315    f->current_loc_valid = FALSE;
4316    f->first_decode = FALSE;
4317    f->samples_output = 0;
4318    f->channel_buffer_start = 0;
4319    f->channel_buffer_end = 0;
4320 }
4321 
vorbis_search_for_page_pushdata(vorb * f,uint8 * data,int data_len)4322 static int vorbis_search_for_page_pushdata(vorb *f, uint8 *data, int data_len)
4323 {
4324    int i,n;
4325    for (i=0; i < f->page_crc_tests; ++i)
4326       f->scan[i].bytes_done = 0;
4327 
4328    // if we have room for more scans, search for them first, because
4329    // they may cause us to stop early if their header is incomplete
4330    if (f->page_crc_tests < STB_VORBIS_PUSHDATA_CRC_COUNT) {
4331       if (data_len < 4) return 0;
4332       data_len -= 3; // need to look for 4-byte sequence, so don't miss
4333                      // one that straddles a boundary
4334       for (i=0; i < data_len; ++i) {
4335          if (data[i] == 0x4f) {
4336             if (0==memcmp(data+i, ogg_page_header, 4)) {
4337                int j,len;
4338                uint32 crc;
4339                // make sure we have the whole page header
4340                if (i+26 >= data_len || i+27+data[i+26] >= data_len) {
4341                   // only read up to this page start, so hopefully we'll
4342                   // have the whole page header start next time
4343                   data_len = i;
4344                   break;
4345                }
4346                // ok, we have it all; compute the length of the page
4347                len = 27 + data[i+26];
4348                for (j=0; j < data[i+26]; ++j)
4349                   len += data[i+27+j];
4350                // scan everything up to the embedded crc (which we must 0)
4351                crc = 0;
4352                for (j=0; j < 22; ++j)
4353                   crc = crc32_update(crc, data[i+j]);
4354                // now process 4 0-bytes
4355                for (   ; j < 26; ++j)
4356                   crc = crc32_update(crc, 0);
4357                // len is the total number of bytes we need to scan
4358                n = f->page_crc_tests++;
4359                f->scan[n].bytes_left = len-j;
4360                f->scan[n].crc_so_far = crc;
4361                f->scan[n].goal_crc = data[i+22] + (data[i+23] << 8) + (data[i+24]<<16) + (data[i+25]<<24);
4362                // if the last frame on a page is continued to the next, then
4363                // we can't recover the sample_loc immediately
4364                if (data[i+27+data[i+26]-1] == 255)
4365                   f->scan[n].sample_loc = ~0;
4366                else
4367                   f->scan[n].sample_loc = data[i+6] + (data[i+7] << 8) + (data[i+ 8]<<16) + (data[i+ 9]<<24);
4368                f->scan[n].bytes_done = i+j;
4369                if (f->page_crc_tests == STB_VORBIS_PUSHDATA_CRC_COUNT)
4370                   break;
4371                // keep going if we still have room for more
4372             }
4373          }
4374       }
4375    }
4376 
4377    for (i=0; i < f->page_crc_tests;) {
4378       uint32 crc;
4379       int j;
4380       int n = f->scan[i].bytes_done;
4381       int m = f->scan[i].bytes_left;
4382       if (m > data_len - n) m = data_len - n;
4383       // m is the bytes to scan in the current chunk
4384       crc = f->scan[i].crc_so_far;
4385       for (j=0; j < m; ++j)
4386          crc = crc32_update(crc, data[n+j]);
4387       f->scan[i].bytes_left -= m;
4388       f->scan[i].crc_so_far = crc;
4389       if (f->scan[i].bytes_left == 0) {
4390          // does it match?
4391          if (f->scan[i].crc_so_far == f->scan[i].goal_crc) {
4392             // Houston, we have page
4393             data_len = n+m; // consumption amount is wherever that scan ended
4394             f->page_crc_tests = -1; // drop out of page scan mode
4395             f->previous_length = 0; // decode-but-don't-output one frame
4396             f->next_seg = -1;       // start a new page
4397             f->current_loc = f->scan[i].sample_loc; // set the current sample location
4398                                     // to the amount we'd have decoded had we decoded this page
4399             f->current_loc_valid = f->current_loc != ~0U;
4400             return data_len;
4401          }
4402          // delete entry
4403          f->scan[i] = f->scan[--f->page_crc_tests];
4404       } else {
4405          ++i;
4406       }
4407    }
4408 
4409    return data_len;
4410 }
4411 
4412 // return value: number of bytes we used
stb_vorbis_decode_frame_pushdata(stb_vorbis * f,const uint8 * data,int data_len,int * channels,float *** output,int * samples)4413 int stb_vorbis_decode_frame_pushdata(
4414          stb_vorbis *f,                   // the file we're decoding
4415          const uint8 *data, int data_len, // the memory available for decoding
4416          int *channels,                   // place to write number of float * buffers
4417          float ***output,                 // place to write float ** array of float * buffers
4418          int *samples                     // place to write number of output samples
4419      )
4420 {
4421    int i;
4422    int len,right,left;
4423 
4424    if (!IS_PUSH_MODE(f)) return error(f, VORBIS_invalid_api_mixing);
4425 
4426    if (f->page_crc_tests >= 0) {
4427       *samples = 0;
4428       return vorbis_search_for_page_pushdata(f, (uint8 *) data, data_len);
4429    }
4430 
4431    f->stream     = (uint8 *) data;
4432    f->stream_end = (uint8 *) data + data_len;
4433    f->error      = VORBIS__no_error;
4434 
4435    // check that we have the entire packet in memory
4436    if (!is_whole_packet_present(f, FALSE)) {
4437       *samples = 0;
4438       return 0;
4439    }
4440 
4441    if (!vorbis_decode_packet(f, &len, &left, &right)) {
4442       // save the actual error we encountered
4443       enum STBVorbisError error = f->error;
4444       if (error == VORBIS_bad_packet_type) {
4445          // flush and resynch
4446          f->error = VORBIS__no_error;
4447          while (get8_packet(f) != EOP)
4448             if (f->eof) break;
4449          *samples = 0;
4450          return (int) (f->stream - data);
4451       }
4452       if (error == VORBIS_continued_packet_flag_invalid) {
4453          if (f->previous_length == 0) {
4454             // we may be resynching, in which case it's ok to hit one
4455             // of these; just discard the packet
4456             f->error = VORBIS__no_error;
4457             while (get8_packet(f) != EOP)
4458                if (f->eof) break;
4459             *samples = 0;
4460             return (int) (f->stream - data);
4461          }
4462       }
4463       // if we get an error while parsing, what to do?
4464       // well, it DEFINITELY won't work to continue from where we are!
4465       stb_vorbis_flush_pushdata(f);
4466       // restore the error that actually made us bail
4467       f->error = error;
4468       *samples = 0;
4469       return 1;
4470    }
4471 
4472    // success!
4473    len = vorbis_finish_frame(f, len, left, right);
4474    for (i=0; i < f->channels; ++i)
4475       f->outputs[i] = f->channel_buffers[i] + left;
4476 
4477    if (channels) *channels = f->channels;
4478    *samples = len;
4479    *output = f->outputs;
4480    return (int) (f->stream - data);
4481 }
4482 
stb_vorbis_open_pushdata(const unsigned char * data,int data_len,int * data_used,int * error,const stb_vorbis_alloc * alloc)4483 stb_vorbis *stb_vorbis_open_pushdata(
4484          const unsigned char *data, int data_len, // the memory available for decoding
4485          int *data_used,              // only defined if result is not NULL
4486          int *error, const stb_vorbis_alloc *alloc)
4487 {
4488    stb_vorbis *f, p;
4489    vorbis_init(&p, alloc);
4490    p.stream     = (uint8 *) data;
4491    p.stream_end = (uint8 *) data + data_len;
4492    p.push_mode  = TRUE;
4493    if (!start_decoder(&p)) {
4494       if (p.eof)
4495          *error = VORBIS_need_more_data;
4496       else
4497          *error = p.error;
4498       return NULL;
4499    }
4500    f = vorbis_alloc(&p);
4501    if (f) {
4502       *f = p;
4503       *data_used = (int) (f->stream - data);
4504       *error = 0;
4505       return f;
4506    } else {
4507       vorbis_deinit(&p);
4508       return NULL;
4509    }
4510 }
4511 #endif // STB_VORBIS_NO_PUSHDATA_API
4512 
stb_vorbis_get_file_offset(stb_vorbis * f)4513 unsigned int stb_vorbis_get_file_offset(stb_vorbis *f)
4514 {
4515    #ifndef STB_VORBIS_NO_PUSHDATA_API
4516    if (f->push_mode) return 0;
4517    #endif
4518    if (USE_MEMORY(f)) return (unsigned int) (f->stream - f->stream_start);
4519    #ifndef STB_VORBIS_NO_STDIO
4520    return (unsigned int) (ftell(f->f) - f->f_start);
4521    #endif
4522 }
4523 
4524 #ifndef STB_VORBIS_NO_PULLDATA_API
4525 //
4526 // DATA-PULLING API
4527 //
4528 
vorbis_find_page(stb_vorbis * f,uint32 * end,uint32 * last)4529 static uint32 vorbis_find_page(stb_vorbis *f, uint32 *end, uint32 *last)
4530 {
4531    for(;;) {
4532       int n;
4533       if (f->eof) return 0;
4534       n = get8(f);
4535       if (n == 0x4f) { // page header candidate
4536          unsigned int retry_loc = stb_vorbis_get_file_offset(f);
4537          int i;
4538          // check if we're off the end of a file_section stream
4539          if (retry_loc - 25 > f->stream_len)
4540             return 0;
4541          // check the rest of the header
4542          for (i=1; i < 4; ++i)
4543             if (get8(f) != ogg_page_header[i])
4544                break;
4545          if (f->eof) return 0;
4546          if (i == 4) {
4547             uint8 header[27];
4548             uint32 i, crc, goal, len;
4549             for (i=0; i < 4; ++i)
4550                header[i] = ogg_page_header[i];
4551             for (; i < 27; ++i)
4552                header[i] = get8(f);
4553             if (f->eof) return 0;
4554             if (header[4] != 0) goto invalid;
4555             goal = header[22] + (header[23] << 8) + (header[24]<<16) + (header[25]<<24);
4556             for (i=22; i < 26; ++i)
4557                header[i] = 0;
4558             crc = 0;
4559             for (i=0; i < 27; ++i)
4560                crc = crc32_update(crc, header[i]);
4561             len = 0;
4562             for (i=0; i < header[26]; ++i) {
4563                int s = get8(f);
4564                crc = crc32_update(crc, s);
4565                len += s;
4566             }
4567             if (len && f->eof) return 0;
4568             for (i=0; i < len; ++i)
4569                crc = crc32_update(crc, get8(f));
4570             // finished parsing probable page
4571             if (crc == goal) {
4572                // we could now check that it's either got the last
4573                // page flag set, OR it's followed by the capture
4574                // pattern, but I guess TECHNICALLY you could have
4575                // a file with garbage between each ogg page and recover
4576                // from it automatically? So even though that paranoia
4577                // might decrease the chance of an invalid decode by
4578                // another 2^32, not worth it since it would hose those
4579                // invalid-but-useful files?
4580                if (end)
4581                   *end = stb_vorbis_get_file_offset(f);
4582                if (last) {
4583                   if (header[5] & 0x04)
4584                      *last = 1;
4585                   else
4586                      *last = 0;
4587                }
4588                set_file_offset(f, retry_loc-1);
4589                return 1;
4590             }
4591          }
4592         invalid:
4593          // not a valid page, so rewind and look for next one
4594          set_file_offset(f, retry_loc);
4595       }
4596    }
4597 }
4598 
4599 
4600 #define SAMPLE_unknown  0xffffffff
4601 
4602 // seeking is implemented with a binary search, which narrows down the range to
4603 // 64K, before using a linear search (because finding the synchronization
4604 // pattern can be expensive, and the chance we'd find the end page again is
4605 // relatively high for small ranges)
4606 //
4607 // two initial interpolation-style probes are used at the start of the search
4608 // to try to bound either side of the binary search sensibly, while still
4609 // working in O(log n) time if they fail.
4610 
get_seek_page_info(stb_vorbis * f,ProbedPage * z)4611 static int get_seek_page_info(stb_vorbis *f, ProbedPage *z)
4612 {
4613    uint8 header[27], lacing[255];
4614    int i,len;
4615 
4616    // record where the page starts
4617    z->page_start = stb_vorbis_get_file_offset(f);
4618 
4619    // parse the header
4620    getn(f, header, 27);
4621    if (header[0] != 'O' || header[1] != 'g' || header[2] != 'g' || header[3] != 'S')
4622       return 0;
4623    getn(f, lacing, header[26]);
4624 
4625    // determine the length of the payload
4626    len = 0;
4627    for (i=0; i < header[26]; ++i)
4628       len += lacing[i];
4629 
4630    // this implies where the page ends
4631    z->page_end = z->page_start + 27 + header[26] + len;
4632 
4633    // read the last-decoded sample out of the data
4634    z->last_decoded_sample = header[6] + (header[7] << 8) + (header[8] << 16) + (header[9] << 24);
4635 
4636    // restore file state to where we were
4637    set_file_offset(f, z->page_start);
4638    return 1;
4639 }
4640 
4641 // rarely used function to seek back to the preceeding page while finding the
4642 // start of a packet
go_to_page_before(stb_vorbis * f,unsigned int limit_offset)4643 static int go_to_page_before(stb_vorbis *f, unsigned int limit_offset)
4644 {
4645    unsigned int previous_safe, end;
4646 
4647    // now we want to seek back 64K from the limit
4648    if (limit_offset >= 65536 && limit_offset-65536 >= f->first_audio_page_offset)
4649       previous_safe = limit_offset - 65536;
4650    else
4651       previous_safe = f->first_audio_page_offset;
4652 
4653    set_file_offset(f, previous_safe);
4654 
4655    while (vorbis_find_page(f, &end, NULL)) {
4656       if (end >= limit_offset && stb_vorbis_get_file_offset(f) < limit_offset)
4657          return 1;
4658       set_file_offset(f, end);
4659    }
4660 
4661    return 0;
4662 }
4663 
4664 // implements the search logic for finding a page and starting decoding. if
4665 // the function succeeds, current_loc_valid will be true and current_loc will
4666 // be less than or equal to the provided sample number (the closer the
4667 // better).
seek_to_sample_coarse(stb_vorbis * f,uint32 sample_number)4668 static int seek_to_sample_coarse(stb_vorbis *f, uint32 sample_number)
4669 {
4670    ProbedPage left, right, mid;
4671    int i, start_seg_with_known_loc, end_pos, page_start;
4672    uint32 delta, stream_length, padding;
4673    double offset, bytes_per_sample;
4674    int probe = 0;
4675 
4676    // find the last page and validate the target sample
4677    stream_length = stb_vorbis_stream_length_in_samples(f);
4678    if (stream_length == 0)            return error(f, VORBIS_seek_without_length);
4679    if (sample_number > stream_length) return error(f, VORBIS_seek_invalid);
4680 
4681    // this is the maximum difference between the window-center (which is the
4682    // actual granule position value), and the right-start (which the spec
4683    // indicates should be the granule position (give or take one)).
4684    padding = ((f->blocksize_1 - f->blocksize_0) >> 2);
4685    if (sample_number < padding)
4686       sample_number = 0;
4687    else
4688       sample_number -= padding;
4689 
4690    left = f->p_first;
4691    while (left.last_decoded_sample == ~0U) {
4692       // (untested) the first page does not have a 'last_decoded_sample'
4693       set_file_offset(f, left.page_end);
4694       if (!get_seek_page_info(f, &left)) goto error;
4695    }
4696 
4697    right = f->p_last;
4698    assert(right.last_decoded_sample != ~0U);
4699 
4700    // starting from the start is handled differently
4701    if (sample_number <= left.last_decoded_sample) {
4702       if (stb_vorbis_seek_start(f))
4703          return 1;
4704       return 0;
4705    }
4706 
4707    while (left.page_end != right.page_start) {
4708       assert(left.page_end < right.page_start);
4709       // search range in bytes
4710       delta = right.page_start - left.page_end;
4711       if (delta <= 65536) {
4712          // there's only 64K left to search - handle it linearly
4713          set_file_offset(f, left.page_end);
4714       } else {
4715          if (probe < 2) {
4716             if (probe == 0) {
4717                // first probe (interpolate)
4718                double data_bytes = right.page_end - left.page_start;
4719                bytes_per_sample = data_bytes / right.last_decoded_sample;
4720                offset = left.page_start + bytes_per_sample * (sample_number - left.last_decoded_sample);
4721             } else {
4722                // second probe (try to bound the other side)
4723                double error = ((double) sample_number - mid.last_decoded_sample) * bytes_per_sample;
4724                if (error >= 0 && error <  8000) error =  8000;
4725                if (error <  0 && error > -8000) error = -8000;
4726                offset += error * 2;
4727             }
4728 
4729             // ensure the offset is valid
4730             if (offset < left.page_end)
4731                offset = left.page_end;
4732             if (offset > right.page_start - 65536)
4733                offset = right.page_start - 65536;
4734 
4735             set_file_offset(f, (unsigned int) offset);
4736          } else {
4737             // binary search for large ranges (offset by 32K to ensure
4738             // we don't hit the right page)
4739             set_file_offset(f, left.page_end + (delta / 2) - 32768);
4740          }
4741 
4742          if (!vorbis_find_page(f, NULL, NULL)) goto error;
4743       }
4744 
4745       for (;;) {
4746          if (!get_seek_page_info(f, &mid)) goto error;
4747          if (mid.last_decoded_sample != ~0U) break;
4748          // (untested) no frames end on this page
4749          set_file_offset(f, mid.page_end);
4750          assert(mid.page_start < right.page_start);
4751       }
4752 
4753       // if we've just found the last page again then we're in a tricky file,
4754       // and we're close enough.
4755       if (mid.page_start == right.page_start)
4756          break;
4757 
4758       if (sample_number < mid.last_decoded_sample)
4759          right = mid;
4760       else
4761          left = mid;
4762 
4763       ++probe;
4764    }
4765 
4766    // seek back to start of the last packet
4767    page_start = left.page_start;
4768    set_file_offset(f, page_start);
4769    if (!start_page(f)) return error(f, VORBIS_seek_failed);
4770    end_pos = f->end_seg_with_known_loc;
4771    assert(end_pos >= 0);
4772 
4773    for (;;) {
4774       for (i = end_pos; i > 0; --i)
4775          if (f->segments[i-1] != 255)
4776             break;
4777 
4778       start_seg_with_known_loc = i;
4779 
4780       if (start_seg_with_known_loc > 0 || !(f->page_flag & PAGEFLAG_continued_packet))
4781          break;
4782 
4783       // (untested) the final packet begins on an earlier page
4784       if (!go_to_page_before(f, page_start))
4785          goto error;
4786 
4787       page_start = stb_vorbis_get_file_offset(f);
4788       if (!start_page(f)) goto error;
4789       end_pos = f->segment_count - 1;
4790    }
4791 
4792    // prepare to start decoding
4793    f->current_loc_valid = FALSE;
4794    f->last_seg = FALSE;
4795    f->valid_bits = 0;
4796    f->packet_bytes = 0;
4797    f->bytes_in_seg = 0;
4798    f->previous_length = 0;
4799    f->next_seg = start_seg_with_known_loc;
4800 
4801    for (i = 0; i < start_seg_with_known_loc; i++)
4802       skip(f, f->segments[i]);
4803 
4804    // start decoding (optimizable - this frame is generally discarded)
4805    if (!vorbis_pump_first_frame(f))
4806       return 0;
4807    if (f->current_loc > sample_number)
4808       return error(f, VORBIS_seek_failed);
4809    return 1;
4810 
4811 error:
4812    // try to restore the file to a valid state
4813    stb_vorbis_seek_start(f);
4814    return error(f, VORBIS_seek_failed);
4815 }
4816 
4817 // the same as vorbis_decode_initial, but without advancing
peek_decode_initial(vorb * f,int * p_left_start,int * p_left_end,int * p_right_start,int * p_right_end,int * mode)4818 static int peek_decode_initial(vorb *f, int *p_left_start, int *p_left_end, int *p_right_start, int *p_right_end, int *mode)
4819 {
4820    int bits_read, bytes_read;
4821 
4822    if (!vorbis_decode_initial(f, p_left_start, p_left_end, p_right_start, p_right_end, mode))
4823       return 0;
4824 
4825    // either 1 or 2 bytes were read, figure out which so we can rewind
4826    bits_read = 1 + ilog(f->mode_count-1);
4827    if (f->mode_config[*mode].blockflag)
4828       bits_read += 2;
4829    bytes_read = (bits_read + 7) / 8;
4830 
4831    f->bytes_in_seg += bytes_read;
4832    f->packet_bytes -= bytes_read;
4833    skip(f, -bytes_read);
4834    if (f->next_seg == -1)
4835       f->next_seg = f->segment_count - 1;
4836    else
4837       f->next_seg--;
4838    f->valid_bits = 0;
4839 
4840    return 1;
4841 }
4842 
stb_vorbis_seek_frame(stb_vorbis * f,unsigned int sample_number)4843 int stb_vorbis_seek_frame(stb_vorbis *f, unsigned int sample_number)
4844 {
4845    uint32 max_frame_samples;
4846 
4847    if (IS_PUSH_MODE(f)) return error(f, VORBIS_invalid_api_mixing);
4848 
4849    // fast page-level search
4850    if (!seek_to_sample_coarse(f, sample_number))
4851       return 0;
4852 
4853    assert(f->current_loc_valid);
4854    assert(f->current_loc <= sample_number);
4855 
4856    // linear search for the relevant packet
4857    max_frame_samples = (f->blocksize_1*3 - f->blocksize_0) >> 2;
4858    while (f->current_loc < sample_number) {
4859       int left_start, left_end, right_start, right_end, mode, frame_samples;
4860       if (!peek_decode_initial(f, &left_start, &left_end, &right_start, &right_end, &mode))
4861          return error(f, VORBIS_seek_failed);
4862       // calculate the number of samples returned by the next frame
4863       frame_samples = right_start - left_start;
4864       if (f->current_loc + frame_samples > sample_number) {
4865          return 1; // the next frame will contain the sample
4866       } else if (f->current_loc + frame_samples + max_frame_samples > sample_number) {
4867          // there's a chance the frame after this could contain the sample
4868          vorbis_pump_first_frame(f);
4869       } else {
4870          // this frame is too early to be relevant
4871          f->current_loc += frame_samples;
4872          f->previous_length = 0;
4873          maybe_start_packet(f);
4874          flush_packet(f);
4875       }
4876    }
4877    // the next frame will start with the sample
4878    assert(f->current_loc == sample_number);
4879    return 1;
4880 }
4881 
stb_vorbis_seek(stb_vorbis * f,unsigned int sample_number)4882 int stb_vorbis_seek(stb_vorbis *f, unsigned int sample_number)
4883 {
4884    if (!stb_vorbis_seek_frame(f, sample_number))
4885       return 0;
4886 
4887    if (sample_number != f->current_loc) {
4888       int n;
4889       uint32 frame_start = f->current_loc;
4890       stb_vorbis_get_frame_float(f, &n, NULL);
4891       assert(sample_number > frame_start);
4892       assert(f->channel_buffer_start + (int) (sample_number-frame_start) <= f->channel_buffer_end);
4893       f->channel_buffer_start += (sample_number - frame_start);
4894    }
4895 
4896    return 1;
4897 }
4898 
stb_vorbis_seek_start(stb_vorbis * f)4899 int stb_vorbis_seek_start(stb_vorbis *f)
4900 {
4901    if (IS_PUSH_MODE(f)) { return error(f, VORBIS_invalid_api_mixing); }
4902    set_file_offset(f, f->first_audio_page_offset);
4903    f->previous_length = 0;
4904    f->first_decode = TRUE;
4905    f->next_seg = -1;
4906    return vorbis_pump_first_frame(f);
4907 }
4908 
stb_vorbis_stream_length_in_samples(stb_vorbis * f)4909 unsigned int stb_vorbis_stream_length_in_samples(stb_vorbis *f)
4910 {
4911    unsigned int restore_offset, previous_safe;
4912    unsigned int end, last_page_loc;
4913 
4914    if (IS_PUSH_MODE(f)) return error(f, VORBIS_invalid_api_mixing);
4915    if (!f->total_samples) {
4916       unsigned int last;
4917       uint32 lo,hi;
4918       char header[6];
4919 
4920       // first, store the current decode position so we can restore it
4921       restore_offset = stb_vorbis_get_file_offset(f);
4922 
4923       // now we want to seek back 64K from the end (the last page must
4924       // be at most a little less than 64K, but let's allow a little slop)
4925       if (f->stream_len >= 65536 && f->stream_len-65536 >= f->first_audio_page_offset)
4926          previous_safe = f->stream_len - 65536;
4927       else
4928          previous_safe = f->first_audio_page_offset;
4929 
4930       set_file_offset(f, previous_safe);
4931       // previous_safe is now our candidate 'earliest known place that seeking
4932       // to will lead to the final page'
4933 
4934       if (!vorbis_find_page(f, &end, &last)) {
4935          // if we can't find a page, we're hosed!
4936          f->error = VORBIS_cant_find_last_page;
4937          f->total_samples = 0xffffffff;
4938          goto done;
4939       }
4940 
4941       // check if there are more pages
4942       last_page_loc = stb_vorbis_get_file_offset(f);
4943 
4944       // stop when the last_page flag is set, not when we reach eof;
4945       // this allows us to stop short of a 'file_section' end without
4946       // explicitly checking the length of the section
4947       while (!last) {
4948          set_file_offset(f, end);
4949          if (!vorbis_find_page(f, &end, &last)) {
4950             // the last page we found didn't have the 'last page' flag
4951             // set. whoops!
4952             break;
4953          }
4954          previous_safe = last_page_loc+1;
4955          last_page_loc = stb_vorbis_get_file_offset(f);
4956       }
4957 
4958       set_file_offset(f, last_page_loc);
4959 
4960       // parse the header
4961       getn(f, (unsigned char *)header, 6);
4962       // extract the absolute granule position
4963       lo = get32(f);
4964       hi = get32(f);
4965       if (lo == 0xffffffff && hi == 0xffffffff) {
4966          f->error = VORBIS_cant_find_last_page;
4967          f->total_samples = SAMPLE_unknown;
4968          goto done;
4969       }
4970       if (hi)
4971          lo = 0xfffffffe; // saturate
4972       f->total_samples = lo;
4973 
4974       f->p_last.page_start = last_page_loc;
4975       f->p_last.page_end   = end;
4976       f->p_last.last_decoded_sample = lo;
4977 
4978      done:
4979       set_file_offset(f, restore_offset);
4980    }
4981    return f->total_samples == SAMPLE_unknown ? 0 : f->total_samples;
4982 }
4983 
stb_vorbis_stream_length_in_seconds(stb_vorbis * f)4984 float stb_vorbis_stream_length_in_seconds(stb_vorbis *f)
4985 {
4986    return stb_vorbis_stream_length_in_samples(f) / (float) f->sample_rate;
4987 }
4988 
4989 
4990 
stb_vorbis_get_frame_float(stb_vorbis * f,int * channels,float *** output)4991 int stb_vorbis_get_frame_float(stb_vorbis *f, int *channels, float ***output)
4992 {
4993    int len, right,left,i;
4994    if (IS_PUSH_MODE(f)) return error(f, VORBIS_invalid_api_mixing);
4995 
4996    if (!vorbis_decode_packet(f, &len, &left, &right)) {
4997       f->channel_buffer_start = f->channel_buffer_end = 0;
4998       return 0;
4999    }
5000 
5001    len = vorbis_finish_frame(f, len, left, right);
5002    for (i=0; i < f->channels; ++i)
5003       f->outputs[i] = f->channel_buffers[i] + left;
5004 
5005    f->channel_buffer_start = left;
5006    f->channel_buffer_end   = left+len;
5007 
5008    if (channels) *channels = f->channels;
5009    if (output)   *output = f->outputs;
5010    return len;
5011 }
5012 
5013 #ifndef STB_VORBIS_NO_STDIO
5014 
stb_vorbis_open_file_section(FILE * file,int close_on_free,int * error,const stb_vorbis_alloc * alloc,unsigned int length)5015 stb_vorbis * stb_vorbis_open_file_section(FILE *file, int close_on_free, int *error, const stb_vorbis_alloc *alloc, unsigned int length)
5016 {
5017    stb_vorbis *f, p;
5018    vorbis_init(&p, alloc);
5019    p.f = file;
5020    p.f_start = (uint32) ftell(file);
5021    p.stream_len   = length;
5022    p.close_on_free = close_on_free;
5023    if (start_decoder(&p)) {
5024       f = vorbis_alloc(&p);
5025       if (f) {
5026          *f = p;
5027          vorbis_pump_first_frame(f);
5028          return f;
5029       }
5030    }
5031    if (error) *error = p.error;
5032    vorbis_deinit(&p);
5033    return NULL;
5034 }
5035 
stb_vorbis_open_file(FILE * file,int close_on_free,int * error,const stb_vorbis_alloc * alloc)5036 stb_vorbis * stb_vorbis_open_file(FILE *file, int close_on_free, int *error, const stb_vorbis_alloc *alloc)
5037 {
5038    unsigned int len, start;
5039    start = (unsigned int) ftell(file);
5040    fseek(file, 0, SEEK_END);
5041    len = (unsigned int) (ftell(file) - start);
5042    fseek(file, start, SEEK_SET);
5043    return stb_vorbis_open_file_section(file, close_on_free, error, alloc, len);
5044 }
5045 
stb_vorbis_open_filename(const char * filename,int * error,const stb_vorbis_alloc * alloc)5046 stb_vorbis * stb_vorbis_open_filename(const char *filename, int *error, const stb_vorbis_alloc *alloc)
5047 {
5048    FILE *f = fopen(filename, "rb");
5049    if (f)
5050       return stb_vorbis_open_file(f, TRUE, error, alloc);
5051    if (error) *error = VORBIS_file_open_failure;
5052    return NULL;
5053 }
5054 #endif // STB_VORBIS_NO_STDIO
5055 
stb_vorbis_open_memory(const unsigned char * data,int len,int * error,const stb_vorbis_alloc * alloc)5056 stb_vorbis * stb_vorbis_open_memory(const unsigned char *data, int len, int *error, const stb_vorbis_alloc *alloc)
5057 {
5058    stb_vorbis *f, p;
5059    if (data == NULL) return NULL;
5060    vorbis_init(&p, alloc);
5061    p.stream = (uint8 *) data;
5062    p.stream_end = (uint8 *) data + len;
5063    p.stream_start = (uint8 *) p.stream;
5064    p.stream_len = len;
5065    p.push_mode = FALSE;
5066    if (start_decoder(&p)) {
5067       f = vorbis_alloc(&p);
5068       if (f) {
5069          *f = p;
5070          vorbis_pump_first_frame(f);
5071          if (error) *error = VORBIS__no_error;
5072          return f;
5073       }
5074    }
5075    if (error) *error = p.error;
5076    vorbis_deinit(&p);
5077    return NULL;
5078 }
5079 
5080 #ifndef STB_VORBIS_NO_INTEGER_CONVERSION
5081 #define PLAYBACK_MONO     1
5082 #define PLAYBACK_LEFT     2
5083 #define PLAYBACK_RIGHT    4
5084 
5085 #define L  (PLAYBACK_LEFT  | PLAYBACK_MONO)
5086 #define C  (PLAYBACK_LEFT  | PLAYBACK_RIGHT | PLAYBACK_MONO)
5087 #define R  (PLAYBACK_RIGHT | PLAYBACK_MONO)
5088 
5089 static int8 channel_position[7][6] =
5090 {
5091    { 0 },
5092    { C },
5093    { L, R },
5094    { L, C, R },
5095    { L, R, L, R },
5096    { L, C, R, L, R },
5097    { L, C, R, L, R, C },
5098 };
5099 
5100 
5101 #ifndef STB_VORBIS_NO_FAST_SCALED_FLOAT
5102    typedef union {
5103       float f;
5104       int i;
5105    } float_conv;
5106    typedef char stb_vorbis_float_size_test[sizeof(float)==4 && sizeof(int) == 4];
5107    #define FASTDEF(x) float_conv x
5108    // add (1<<23) to convert to int, then divide by 2^SHIFT, then add 0.5/2^SHIFT to round
5109    #define MAGIC(SHIFT) (1.5f * (1 << (23-SHIFT)) + 0.5f/(1 << SHIFT))
5110    #define ADDEND(SHIFT) (((150-SHIFT) << 23) + (1 << 22))
5111    #define FAST_SCALED_FLOAT_TO_INT(temp,x,s) (temp.f = (x) + MAGIC(s), temp.i - ADDEND(s))
5112    #define check_endianness()
5113 #else
5114    #define FAST_SCALED_FLOAT_TO_INT(temp,x,s) ((int) ((x) * (1 << (s))))
5115    #define check_endianness()
5116    #define FASTDEF(x)
5117 #endif
5118 
copy_samples(short * dest,float * src,int len)5119 static void copy_samples(short *dest, float *src, int len)
5120 {
5121    int i;
5122    check_endianness();
5123    for (i=0; i < len; ++i) {
5124       FASTDEF(temp);
5125       int v = FAST_SCALED_FLOAT_TO_INT(temp, src[i],15);
5126       if ((unsigned int) (v + 32768) > 65535)
5127          v = v < 0 ? -32768 : 32767;
5128       dest[i] = v;
5129    }
5130 }
5131 
compute_samples(int mask,short * output,int num_c,float ** data,int d_offset,int len)5132 static void compute_samples(int mask, short *output, int num_c, float **data, int d_offset, int len)
5133 {
5134    #define BUFFER_SIZE  32
5135    float buffer[BUFFER_SIZE];
5136    int i,j,o,n = BUFFER_SIZE;
5137    check_endianness();
5138    for (o = 0; o < len; o += BUFFER_SIZE) {
5139       memset(buffer, 0, sizeof(buffer));
5140       if (o + n > len) n = len - o;
5141       for (j=0; j < num_c; ++j) {
5142          if (channel_position[num_c][j] & mask) {
5143             for (i=0; i < n; ++i)
5144                buffer[i] += data[j][d_offset+o+i];
5145          }
5146       }
5147       for (i=0; i < n; ++i) {
5148          FASTDEF(temp);
5149          int v = FAST_SCALED_FLOAT_TO_INT(temp,buffer[i],15);
5150          if ((unsigned int) (v + 32768) > 65535)
5151             v = v < 0 ? -32768 : 32767;
5152          output[o+i] = v;
5153       }
5154    }
5155 }
5156 
compute_stereo_samples(short * output,int num_c,float ** data,int d_offset,int len)5157 static void compute_stereo_samples(short *output, int num_c, float **data, int d_offset, int len)
5158 {
5159    #define BUFFER_SIZE  32
5160    float buffer[BUFFER_SIZE];
5161    int i,j,o,n = BUFFER_SIZE >> 1;
5162    // o is the offset in the source data
5163    check_endianness();
5164    for (o = 0; o < len; o += BUFFER_SIZE >> 1) {
5165       // o2 is the offset in the output data
5166       int o2 = o << 1;
5167       memset(buffer, 0, sizeof(buffer));
5168       if (o + n > len) n = len - o;
5169       for (j=0; j < num_c; ++j) {
5170          int m = channel_position[num_c][j] & (PLAYBACK_LEFT | PLAYBACK_RIGHT);
5171          if (m == (PLAYBACK_LEFT | PLAYBACK_RIGHT)) {
5172             for (i=0; i < n; ++i) {
5173                buffer[i*2+0] += data[j][d_offset+o+i];
5174                buffer[i*2+1] += data[j][d_offset+o+i];
5175             }
5176          } else if (m == PLAYBACK_LEFT) {
5177             for (i=0; i < n; ++i) {
5178                buffer[i*2+0] += data[j][d_offset+o+i];
5179             }
5180          } else if (m == PLAYBACK_RIGHT) {
5181             for (i=0; i < n; ++i) {
5182                buffer[i*2+1] += data[j][d_offset+o+i];
5183             }
5184          }
5185       }
5186       for (i=0; i < (n<<1); ++i) {
5187          FASTDEF(temp);
5188          int v = FAST_SCALED_FLOAT_TO_INT(temp,buffer[i],15);
5189          if ((unsigned int) (v + 32768) > 65535)
5190             v = v < 0 ? -32768 : 32767;
5191          output[o2+i] = v;
5192       }
5193    }
5194 }
5195 
convert_samples_short(int buf_c,short ** buffer,int b_offset,int data_c,float ** data,int d_offset,int samples)5196 static void convert_samples_short(int buf_c, short **buffer, int b_offset, int data_c, float **data, int d_offset, int samples)
5197 {
5198    int i;
5199    if (buf_c != data_c && buf_c <= 2 && data_c <= 6) {
5200       static int channel_selector[3][2] = { {0}, {PLAYBACK_MONO}, {PLAYBACK_LEFT, PLAYBACK_RIGHT} };
5201       for (i=0; i < buf_c; ++i)
5202          compute_samples(channel_selector[buf_c][i], buffer[i]+b_offset, data_c, data, d_offset, samples);
5203    } else {
5204       int limit = buf_c < data_c ? buf_c : data_c;
5205       for (i=0; i < limit; ++i)
5206          copy_samples(buffer[i]+b_offset, data[i]+d_offset, samples);
5207       for (   ; i < buf_c; ++i)
5208          memset(buffer[i]+b_offset, 0, sizeof(short) * samples);
5209    }
5210 }
5211 
stb_vorbis_get_frame_short(stb_vorbis * f,int num_c,short ** buffer,int num_samples)5212 int stb_vorbis_get_frame_short(stb_vorbis *f, int num_c, short **buffer, int num_samples)
5213 {
5214    float **output;
5215    int len = stb_vorbis_get_frame_float(f, NULL, &output);
5216    if (len > num_samples) len = num_samples;
5217    if (len)
5218       convert_samples_short(num_c, buffer, 0, f->channels, output, 0, len);
5219    return len;
5220 }
5221 
convert_channels_short_interleaved(int buf_c,short * buffer,int data_c,float ** data,int d_offset,int len)5222 static void convert_channels_short_interleaved(int buf_c, short *buffer, int data_c, float **data, int d_offset, int len)
5223 {
5224    int i;
5225    check_endianness();
5226    if (buf_c != data_c && buf_c <= 2 && data_c <= 6) {
5227       assert(buf_c == 2);
5228       for (i=0; i < buf_c; ++i)
5229          compute_stereo_samples(buffer, data_c, data, d_offset, len);
5230    } else {
5231       int limit = buf_c < data_c ? buf_c : data_c;
5232       int j;
5233       for (j=0; j < len; ++j) {
5234          for (i=0; i < limit; ++i) {
5235             FASTDEF(temp);
5236             float f = data[i][d_offset+j];
5237             int v = FAST_SCALED_FLOAT_TO_INT(temp, f,15);//data[i][d_offset+j],15);
5238             if ((unsigned int) (v + 32768) > 65535)
5239                v = v < 0 ? -32768 : 32767;
5240             *buffer++ = v;
5241          }
5242          for (   ; i < buf_c; ++i)
5243             *buffer++ = 0;
5244       }
5245    }
5246 }
5247 
stb_vorbis_get_frame_short_interleaved(stb_vorbis * f,int num_c,short * buffer,int num_shorts)5248 int stb_vorbis_get_frame_short_interleaved(stb_vorbis *f, int num_c, short *buffer, int num_shorts)
5249 {
5250    float **output;
5251    int len;
5252    if (num_c == 1) return stb_vorbis_get_frame_short(f,num_c,&buffer, num_shorts);
5253    len = stb_vorbis_get_frame_float(f, NULL, &output);
5254    if (len) {
5255       if (len*num_c > num_shorts) len = num_shorts / num_c;
5256       convert_channels_short_interleaved(num_c, buffer, f->channels, output, 0, len);
5257    }
5258    return len;
5259 }
5260 
stb_vorbis_get_samples_short_interleaved(stb_vorbis * f,int channels,short * buffer,int num_shorts)5261 int stb_vorbis_get_samples_short_interleaved(stb_vorbis *f, int channels, short *buffer, int num_shorts)
5262 {
5263    float **outputs;
5264    int len = num_shorts / channels;
5265    int n=0;
5266    int z = f->channels;
5267    if (z > channels) z = channels;
5268    while (n < len) {
5269       int k = f->channel_buffer_end - f->channel_buffer_start;
5270       if (n+k >= len) k = len - n;
5271       if (k)
5272          convert_channels_short_interleaved(channels, buffer, f->channels, f->channel_buffers, f->channel_buffer_start, k);
5273       buffer += k*channels;
5274       n += k;
5275       f->channel_buffer_start += k;
5276       if (n == len) break;
5277       if (!stb_vorbis_get_frame_float(f, NULL, &outputs)) break;
5278    }
5279    return n;
5280 }
5281 
stb_vorbis_get_samples_short(stb_vorbis * f,int channels,short ** buffer,int len)5282 int stb_vorbis_get_samples_short(stb_vorbis *f, int channels, short **buffer, int len)
5283 {
5284    float **outputs;
5285    int n=0;
5286    int z = f->channels;
5287    if (z > channels) z = channels;
5288    while (n < len) {
5289       int k = f->channel_buffer_end - f->channel_buffer_start;
5290       if (n+k >= len) k = len - n;
5291       if (k)
5292          convert_samples_short(channels, buffer, n, f->channels, f->channel_buffers, f->channel_buffer_start, k);
5293       n += k;
5294       f->channel_buffer_start += k;
5295       if (n == len) break;
5296       if (!stb_vorbis_get_frame_float(f, NULL, &outputs)) break;
5297    }
5298    return n;
5299 }
5300 
5301 #ifndef STB_VORBIS_NO_STDIO
stb_vorbis_decode_filename(const char * filename,int * channels,int * sample_rate,short ** output)5302 int stb_vorbis_decode_filename(const char *filename, int *channels, int *sample_rate, short **output)
5303 {
5304    int data_len, offset, total, limit, error;
5305    short *data;
5306    stb_vorbis *v = stb_vorbis_open_filename(filename, &error, NULL);
5307    if (v == NULL) return -1;
5308    limit = v->channels * 4096;
5309    *channels = v->channels;
5310    if (sample_rate)
5311       *sample_rate = v->sample_rate;
5312    offset = data_len = 0;
5313    total = limit;
5314    data = (short *) malloc(total * sizeof(*data));
5315    if (data == NULL) {
5316       stb_vorbis_close(v);
5317       return -2;
5318    }
5319    for (;;) {
5320       int n = stb_vorbis_get_frame_short_interleaved(v, v->channels, data+offset, total-offset);
5321       if (n == 0) break;
5322       data_len += n;
5323       offset += n * v->channels;
5324       if (offset + limit > total) {
5325          short *data2;
5326          total *= 2;
5327          data2 = (short *) realloc(data, total * sizeof(*data));
5328          if (data2 == NULL) {
5329             free(data);
5330             stb_vorbis_close(v);
5331             return -2;
5332          }
5333          data = data2;
5334       }
5335    }
5336    *output = data;
5337    stb_vorbis_close(v);
5338    return data_len;
5339 }
5340 #endif // NO_STDIO
5341 
stb_vorbis_decode_memory(const uint8 * mem,int len,int * channels,int * sample_rate,short ** output)5342 int stb_vorbis_decode_memory(const uint8 *mem, int len, int *channels, int *sample_rate, short **output)
5343 {
5344    int data_len, offset, total, limit, error;
5345    short *data;
5346    stb_vorbis *v = stb_vorbis_open_memory(mem, len, &error, NULL);
5347    if (v == NULL) return -1;
5348    limit = v->channels * 4096;
5349    *channels = v->channels;
5350    if (sample_rate)
5351       *sample_rate = v->sample_rate;
5352    offset = data_len = 0;
5353    total = limit;
5354    data = (short *) malloc(total * sizeof(*data));
5355    if (data == NULL) {
5356       stb_vorbis_close(v);
5357       return -2;
5358    }
5359    for (;;) {
5360       int n = stb_vorbis_get_frame_short_interleaved(v, v->channels, data+offset, total-offset);
5361       if (n == 0) break;
5362       data_len += n;
5363       offset += n * v->channels;
5364       if (offset + limit > total) {
5365          short *data2;
5366          total *= 2;
5367          data2 = (short *) realloc(data, total * sizeof(*data));
5368          if (data2 == NULL) {
5369             free(data);
5370             stb_vorbis_close(v);
5371             return -2;
5372          }
5373          data = data2;
5374       }
5375    }
5376    *output = data;
5377    stb_vorbis_close(v);
5378    return data_len;
5379 }
5380 #endif // STB_VORBIS_NO_INTEGER_CONVERSION
5381 
stb_vorbis_get_samples_float_interleaved(stb_vorbis * f,int channels,float * buffer,int num_floats)5382 int stb_vorbis_get_samples_float_interleaved(stb_vorbis *f, int channels, float *buffer, int num_floats)
5383 {
5384    float **outputs;
5385    int len = num_floats / channels;
5386    int n=0;
5387    int z = f->channels;
5388    if (z > channels) z = channels;
5389    while (n < len) {
5390       int i,j;
5391       int k = f->channel_buffer_end - f->channel_buffer_start;
5392       if (n+k >= len) k = len - n;
5393       for (j=0; j < k; ++j) {
5394          for (i=0; i < z; ++i)
5395             *buffer++ = f->channel_buffers[i][f->channel_buffer_start+j];
5396          for (   ; i < channels; ++i)
5397             *buffer++ = 0;
5398       }
5399       n += k;
5400       f->channel_buffer_start += k;
5401       if (n == len)
5402          break;
5403       if (!stb_vorbis_get_frame_float(f, NULL, &outputs))
5404          break;
5405    }
5406    return n;
5407 }
5408 
stb_vorbis_get_samples_float(stb_vorbis * f,int channels,float ** buffer,int num_samples)5409 int stb_vorbis_get_samples_float(stb_vorbis *f, int channels, float **buffer, int num_samples)
5410 {
5411    float **outputs;
5412    int n=0;
5413    int z = f->channels;
5414    if (z > channels) z = channels;
5415    while (n < num_samples) {
5416       int i;
5417       int k = f->channel_buffer_end - f->channel_buffer_start;
5418       if (n+k >= num_samples) k = num_samples - n;
5419       if (k) {
5420          for (i=0; i < z; ++i)
5421             memcpy(buffer[i]+n, f->channel_buffers[i]+f->channel_buffer_start, sizeof(float)*k);
5422          for (   ; i < channels; ++i)
5423             memset(buffer[i]+n, 0, sizeof(float) * k);
5424       }
5425       n += k;
5426       f->channel_buffer_start += k;
5427       if (n == num_samples)
5428          break;
5429       if (!stb_vorbis_get_frame_float(f, NULL, &outputs))
5430          break;
5431    }
5432    return n;
5433 }
5434 #endif // STB_VORBIS_NO_PULLDATA_API
5435 
5436 /* Version history
5437     1.12    - 2017-11-21 - limit residue begin/end to blocksize/2 to avoid large temp allocs in bad/corrupt files
5438     1.11    - 2017-07-23 - fix MinGW compilation
5439     1.10    - 2017-03-03 - more robust seeking; fix negative ilog(); clear error in open_memory
5440     1.09    - 2016-04-04 - back out 'avoid discarding last frame' fix from previous version
5441     1.08    - 2016-04-02 - fixed multiple warnings; fix setup memory leaks;
5442                            avoid discarding last frame of audio data
5443     1.07    - 2015-01-16 - fixed some warnings, fix mingw, const-correct API
5444                            some more crash fixes when out of memory or with corrupt files
5445     1.06    - 2015-08-31 - full, correct support for seeking API (Dougall Johnson)
5446                            some crash fixes when out of memory or with corrupt files
5447     1.05    - 2015-04-19 - don't define __forceinline if it's redundant
5448     1.04    - 2014-08-27 - fix missing const-correct case in API
5449     1.03    - 2014-08-07 - Warning fixes
5450     1.02    - 2014-07-09 - Declare qsort compare function _cdecl on windows
5451     1.01    - 2014-06-18 - fix stb_vorbis_get_samples_float
5452     1.0     - 2014-05-26 - fix memory leaks; fix warnings; fix bugs in multichannel
5453                            (API change) report sample rate for decode-full-file funcs
5454     0.99996 - bracket #include <malloc.h> for macintosh compilation by Laurent Gomila
5455     0.99995 - use union instead of pointer-cast for fast-float-to-int to avoid alias-optimization problem
5456     0.99994 - change fast-float-to-int to work in single-precision FPU mode, remove endian-dependence
5457     0.99993 - remove assert that fired on legal files with empty tables
5458     0.99992 - rewind-to-start
5459     0.99991 - bugfix to stb_vorbis_get_samples_short by Bernhard Wodo
5460     0.9999 - (should have been 0.99990) fix no-CRT support, compiling as C++
5461     0.9998 - add a full-decode function with a memory source
5462     0.9997 - fix a bug in the read-from-FILE case in 0.9996 addition
5463     0.9996 - query length of vorbis stream in samples/seconds
5464     0.9995 - bugfix to another optimization that only happened in certain files
5465     0.9994 - bugfix to one of the optimizations that caused significant (but inaudible?) errors
5466     0.9993 - performance improvements; runs in 99% to 104% of time of reference implementation
5467     0.9992 - performance improvement of IMDCT; now performs close to reference implementation
5468     0.9991 - performance improvement of IMDCT
5469     0.999 - (should have been 0.9990) performance improvement of IMDCT
5470     0.998 - no-CRT support from Casey Muratori
5471     0.997 - bugfixes for bugs found by Terje Mathisen
5472     0.996 - bugfix: fast-huffman decode initialized incorrectly for sparse codebooks; fixing gives 10% speedup - found by Terje Mathisen
5473     0.995 - bugfix: fix to 'effective' overrun detection - found by Terje Mathisen
5474     0.994 - bugfix: garbage decode on final VQ symbol of a non-multiple - found by Terje Mathisen
5475     0.993 - bugfix: pushdata API required 1 extra byte for empty page (failed to consume final page if empty) - found by Terje Mathisen
5476     0.992 - fixes for MinGW warning
5477     0.991 - turn fast-float-conversion on by default
5478     0.990 - fix push-mode seek recovery if you seek into the headers
5479     0.98b - fix to bad release of 0.98
5480     0.98 - fix push-mode seek recovery; robustify float-to-int and support non-fast mode
5481     0.97 - builds under c++ (typecasting, don't use 'class' keyword)
5482     0.96 - somehow MY 0.95 was right, but the web one was wrong, so here's my 0.95 rereleased as 0.96, fixes a typo in the clamping code
5483     0.95 - clamping code for 16-bit functions
5484     0.94 - not publically released
5485     0.93 - fixed all-zero-floor case (was decoding garbage)
5486     0.92 - fixed a memory leak
5487     0.91 - conditional compiles to omit parts of the API and the infrastructure to support them: STB_VORBIS_NO_PULLDATA_API, STB_VORBIS_NO_PUSHDATA_API, STB_VORBIS_NO_STDIO, STB_VORBIS_NO_INTEGER_CONVERSION
5488     0.90 - first public release
5489 */
5490 
5491 #endif // STB_VORBIS_HEADER_ONLY
5492 
5493 
5494 /*
5495 ------------------------------------------------------------------------------
5496 This software is available under 2 licenses -- choose whichever you prefer.
5497 ------------------------------------------------------------------------------
5498 ALTERNATIVE A - MIT License
5499 Copyright (c) 2017 Sean Barrett
5500 Permission is hereby granted, free of charge, to any person obtaining a copy of
5501 this software and associated documentation files (the "Software"), to deal in
5502 the Software without restriction, including without limitation the rights to
5503 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
5504 of the Software, and to permit persons to whom the Software is furnished to do
5505 so, subject to the following conditions:
5506 The above copyright notice and this permission notice shall be included in all
5507 copies or substantial portions of the Software.
5508 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
5509 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
5510 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
5511 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
5512 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
5513 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
5514 SOFTWARE.
5515 ------------------------------------------------------------------------------
5516 ALTERNATIVE B - Public Domain (www.unlicense.org)
5517 This is free and unencumbered software released into the public domain.
5518 Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
5519 software, either in source code form or as a compiled binary, for any purpose,
5520 commercial or non-commercial, and by any means.
5521 In jurisdictions that recognize copyright laws, the author or authors of this
5522 software dedicate any and all copyright interest in the software to the public
5523 domain. We make this dedication for the benefit of the public at large and to
5524 the detriment of our heirs and successors. We intend this dedication to be an
5525 overt act of relinquishment in perpetuity of all present and future rights to
5526 this software under copyright law.
5527 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
5528 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
5529 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
5530 AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
5531 ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
5532 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
5533 ------------------------------------------------------------------------------
5534 */
5535