1 // Ogg Vorbis audio decoder - v1.16 - public domain
2 // http://nothings.org/stb_vorbis/
3 //
4 // Original version written by Sean Barrett in 2007.
5 //
6 // Originally sponsored by RAD Game Tools. Seeking implementation
7 // sponsored by Phillip Bennefall, Marc Andersen, Aaron Baker,
8 // Elias Software, Aras Pranckevicius, and Sean Barrett.
9 //
10 // LICENSE
11 //
12 //   See end of file for license information.
13 //
14 // Limitations:
15 //
16 //   - floor 0 not supported (used in old ogg vorbis files pre-2004)
17 //   - lossless sample-truncation at beginning ignored
18 //   - cannot concatenate multiple vorbis streams
19 //   - sample positions are 32-bit, limiting seekable 192Khz
20 //       files to around 6 hours (Ogg supports 64-bit)
21 //
22 // Feature contributors:
23 //    Dougall Johnson (sample-exact seeking)
24 //
25 // Bugfix/warning contributors:
26 //    Terje Mathisen     Niklas Frykholm     Andy Hill
27 //    Casey Muratori     John Bolton         Gargaj
28 //    Laurent Gomila     Marc LeBlanc        Ronny Chevalier
29 //    Bernhard Wodo      Evan Balster        alxprd@github
30 //    Tom Beaumont       Ingo Leitgeb        Nicolas Guillemot
31 //    Phillip Bennefall  Rohit               Thiago Goulart
32 //    manxorist@github   saga musix          github:infatum
33 //    Timur Gagiev
34 //
35 // Partial history:
36 //    1.16    - 2019-03-04 - fix warnings
37 //    1.15    - 2019-02-07 - explicit failure if Ogg Skeleton data is found
38 //    1.14    - 2018-02-11 - delete bogus dealloca usage
39 //    1.13    - 2018-01-29 - fix truncation of last frame (hopefully)
40 //    1.12    - 2017-11-21 - limit residue begin/end to blocksize/2 to avoid large temp allocs in bad/corrupt files
41 //    1.11    - 2017-07-23 - fix MinGW compilation
42 //    1.10    - 2017-03-03 - more robust seeking; fix negative ilog(); clear error in open_memory
43 //    1.09    - 2016-04-04 - back out 'truncation of last frame' fix from previous version
44 //    1.08    - 2016-04-02 - warnings; setup memory leaks; truncation of last frame
45 //    1.07    - 2015-01-16 - fixes for crashes on invalid files; warning fixes; const
46 //    1.06    - 2015-08-31 - full, correct support for seeking API (Dougall Johnson)
47 //                           some crash fixes when out of memory or with corrupt files
48 //                           fix some inappropriately signed shifts
49 //    1.05    - 2015-04-19 - don't define __forceinline if it's redundant
50 //    1.04    - 2014-08-27 - fix missing const-correct case in API
51 //    1.03    - 2014-08-07 - warning fixes
52 //    1.02    - 2014-07-09 - declare qsort comparison as explicitly _cdecl in Windows
53 //    1.01    - 2014-06-18 - fix stb_vorbis_get_samples_float (interleaved was correct)
54 //    1.0     - 2014-05-26 - fix memory leaks; fix warnings; fix bugs in >2-channel;
55 //                           (API change) report sample rate for decode-full-file funcs
56 //
57 // See end of file for full version history.
58 
59 
60 //////////////////////////////////////////////////////////////////////////////
61 //
62 //  HEADER BEGINS HERE
63 //
64 
65 #ifndef STB_VORBIS_INCLUDE_STB_VORBIS_H
66 #define STB_VORBIS_INCLUDE_STB_VORBIS_H
67 
68 #if defined(STB_VORBIS_NO_CRT) && !defined(STB_VORBIS_NO_STDIO)
69 #define STB_VORBIS_NO_STDIO 1
70 #endif
71 
72 #ifndef STB_VORBIS_NO_STDIO
73 #include <stdio.h>
74 #endif
75 
76 #ifdef __cplusplus
77 extern "C" {
78 #endif
79 
80 ///////////   THREAD SAFETY
81 
82 // Individual stb_vorbis* handles are not thread-safe; you cannot decode from
83 // them from multiple threads at the same time. However, you can have multiple
84 // stb_vorbis* handles and decode from them independently in multiple thrads.
85 
86 
87 ///////////   MEMORY ALLOCATION
88 
89 // normally stb_vorbis uses malloc() to allocate memory at startup,
90 // and alloca() to allocate temporary memory during a frame on the
91 // stack. (Memory consumption will depend on the amount of setup
92 // data in the file and how you set the compile flags for speed
93 // vs. size. In my test files the maximal-size usage is ~150KB.)
94 //
95 // You can modify the wrapper functions in the source (setup_malloc,
96 // setup_temp_malloc, temp_malloc) to change this behavior, or you
97 // can use a simpler allocation model: you pass in a buffer from
98 // which stb_vorbis will allocate _all_ its memory (including the
99 // temp memory). "open" may fail with a VORBIS_outofmem if you
100 // do not pass in enough data; there is no way to determine how
101 // much you do need except to succeed (at which point you can
102 // query get_info to find the exact amount required. yes I know
103 // this is lame).
104 //
105 // If you pass in a non-NULL buffer of the type below, allocation
106 // will occur from it as described above. Otherwise just pass NULL
107 // to use malloc()/alloca()
108 
109 typedef struct
110 {
111    char *alloc_buffer;
112    int   alloc_buffer_length_in_bytes;
113 } stb_vorbis_alloc;
114 
115 
116 ///////////   FUNCTIONS USEABLE WITH ALL INPUT MODES
117 
118 typedef struct stb_vorbis stb_vorbis;
119 
120 typedef struct
121 {
122    unsigned int sample_rate;
123    int channels;
124 
125    unsigned int setup_memory_required;
126    unsigned int setup_temp_memory_required;
127    unsigned int temp_memory_required;
128 
129    int max_frame_size;
130 } stb_vorbis_info;
131 
132 // get general information about the file
133 extern stb_vorbis_info stb_vorbis_get_info(stb_vorbis *f);
134 
135 // get the last error detected (clears it, too)
136 extern int stb_vorbis_get_error(stb_vorbis *f);
137 
138 // close an ogg vorbis file and free all memory in use
139 extern void stb_vorbis_close(stb_vorbis *f);
140 
141 // this function returns the offset (in samples) from the beginning of the
142 // file that will be returned by the next decode, if it is known, or -1
143 // otherwise. after a flush_pushdata() call, this may take a while before
144 // it becomes valid again.
145 // NOT WORKING YET after a seek with PULLDATA API
146 extern int stb_vorbis_get_sample_offset(stb_vorbis *f);
147 
148 // returns the current seek point within the file, or offset from the beginning
149 // of the memory buffer. In pushdata mode it returns 0.
150 extern unsigned int stb_vorbis_get_file_offset(stb_vorbis *f);
151 
152 ///////////   PUSHDATA API
153 
154 #ifndef STB_VORBIS_NO_PUSHDATA_API
155 
156 // this API allows you to get blocks of data from any source and hand
157 // them to stb_vorbis. you have to buffer them; stb_vorbis will tell
158 // you how much it used, and you have to give it the rest next time;
159 // and stb_vorbis may not have enough data to work with and you will
160 // need to give it the same data again PLUS more. Note that the Vorbis
161 // specification does not bound the size of an individual frame.
162 
163 extern stb_vorbis *stb_vorbis_open_pushdata(
164          const unsigned char * datablock, int datablock_length_in_bytes,
165          int *datablock_memory_consumed_in_bytes,
166          int *error,
167          const stb_vorbis_alloc *alloc_buffer);
168 // create a vorbis decoder by passing in the initial data block containing
169 //    the ogg&vorbis headers (you don't need to do parse them, just provide
170 //    the first N bytes of the file--you're told if it's not enough, see below)
171 // on success, returns an stb_vorbis *, does not set error, returns the amount of
172 //    data parsed/consumed on this call in *datablock_memory_consumed_in_bytes;
173 // on failure, returns NULL on error and sets *error, does not change *datablock_memory_consumed
174 // if returns NULL and *error is VORBIS_need_more_data, then the input block was
175 //       incomplete and you need to pass in a larger block from the start of the file
176 
177 extern int stb_vorbis_decode_frame_pushdata(
178          stb_vorbis *f,
179          const unsigned char *datablock, int datablock_length_in_bytes,
180          int *channels,             // place to write number of float * buffers
181          float ***output,           // place to write float ** array of float * buffers
182          int *samples               // place to write number of output samples
183      );
184 // decode a frame of audio sample data if possible from the passed-in data block
185 //
186 // return value: number of bytes we used from datablock
187 //
188 // possible cases:
189 //     0 bytes used, 0 samples output (need more data)
190 //     N bytes used, 0 samples output (resynching the stream, keep going)
191 //     N bytes used, M samples output (one frame of data)
192 // note that after opening a file, you will ALWAYS get one N-bytes,0-sample
193 // frame, because Vorbis always "discards" the first frame.
194 //
195 // Note that on resynch, stb_vorbis will rarely consume all of the buffer,
196 // instead only datablock_length_in_bytes-3 or less. This is because it wants
197 // to avoid missing parts of a page header if they cross a datablock boundary,
198 // without writing state-machiney code to record a partial detection.
199 //
200 // The number of channels returned are stored in *channels (which can be
201 // NULL--it is always the same as the number of channels reported by
202 // get_info). *output will contain an array of float* buffers, one per
203 // channel. In other words, (*output)[0][0] contains the first sample from
204 // the first channel, and (*output)[1][0] contains the first sample from
205 // the second channel.
206 
207 extern void stb_vorbis_flush_pushdata(stb_vorbis *f);
208 // inform stb_vorbis that your next datablock will not be contiguous with
209 // previous ones (e.g. you've seeked in the data); future attempts to decode
210 // frames will cause stb_vorbis to resynchronize (as noted above), and
211 // once it sees a valid Ogg page (typically 4-8KB, as large as 64KB), it
212 // will begin decoding the _next_ frame.
213 //
214 // if you want to seek using pushdata, you need to seek in your file, then
215 // call stb_vorbis_flush_pushdata(), then start calling decoding, then once
216 // decoding is returning you data, call stb_vorbis_get_sample_offset, and
217 // if you don't like the result, seek your file again and repeat.
218 #endif
219 
220 
221 //////////   PULLING INPUT API
222 
223 #ifndef STB_VORBIS_NO_PULLDATA_API
224 // This API assumes stb_vorbis is allowed to pull data from a source--
225 // either a block of memory containing the _entire_ vorbis stream, or a
226 // FILE * that you or it create, or possibly some other reading mechanism
227 // if you go modify the source to replace the FILE * case with some kind
228 // of callback to your code. (But if you don't support seeking, you may
229 // just want to go ahead and use pushdata.)
230 
231 #if !defined(STB_VORBIS_NO_STDIO) && !defined(STB_VORBIS_NO_INTEGER_CONVERSION)
232 extern int stb_vorbis_decode_filename(const char *filename, int *channels, int *sample_rate, short **output);
233 #endif
234 #if !defined(STB_VORBIS_NO_INTEGER_CONVERSION)
235 extern int stb_vorbis_decode_memory(const unsigned char *mem, int len, int *channels, int *sample_rate, short **output);
236 #endif
237 // decode an entire file and output the data interleaved into a malloc()ed
238 // buffer stored in *output. The return value is the number of samples
239 // decoded, or -1 if the file could not be opened or was not an ogg vorbis file.
240 // When you're done with it, just free() the pointer returned in *output.
241 
242 extern stb_vorbis * stb_vorbis_open_memory(const unsigned char *data, int len,
243                                   int *error, const stb_vorbis_alloc *alloc_buffer);
244 // create an ogg vorbis decoder from an ogg vorbis stream in memory (note
245 // this must be the entire stream!). on failure, returns NULL and sets *error
246 
247 #ifndef STB_VORBIS_NO_STDIO
248 extern stb_vorbis * stb_vorbis_open_filename(const char *filename,
249                                   int *error, const stb_vorbis_alloc *alloc_buffer);
250 // create an ogg vorbis decoder from a filename via fopen(). on failure,
251 // returns NULL and sets *error (possibly to VORBIS_file_open_failure).
252 
253 extern stb_vorbis * stb_vorbis_open_file(FILE *f, int close_handle_on_close,
254                                   int *error, const stb_vorbis_alloc *alloc_buffer);
255 // create an ogg vorbis decoder from an open FILE *, looking for a stream at
256 // the _current_ seek point (ftell). on failure, returns NULL and sets *error.
257 // note that stb_vorbis must "own" this stream; if you seek it in between
258 // calls to stb_vorbis, it will become confused. Moreover, if you attempt to
259 // perform stb_vorbis_seek_*() operations on this file, it will assume it
260 // owns the _entire_ rest of the file after the start point. Use the next
261 // function, stb_vorbis_open_file_section(), to limit it.
262 
263 extern stb_vorbis * stb_vorbis_open_file_section(FILE *f, int close_handle_on_close,
264                 int *error, const stb_vorbis_alloc *alloc_buffer, unsigned int len);
265 // create an ogg vorbis decoder from an open FILE *, looking for a stream at
266 // the _current_ seek point (ftell); the stream will be of length 'len' bytes.
267 // on failure, returns NULL and sets *error. note that stb_vorbis must "own"
268 // this stream; if you seek it in between calls to stb_vorbis, it will become
269 // confused.
270 #endif
271 
272 extern int stb_vorbis_seek_frame(stb_vorbis *f, unsigned int sample_number);
273 extern int stb_vorbis_seek(stb_vorbis *f, unsigned int sample_number);
274 // these functions seek in the Vorbis file to (approximately) 'sample_number'.
275 // after calling seek_frame(), the next call to get_frame_*() will include
276 // the specified sample. after calling stb_vorbis_seek(), the next call to
277 // stb_vorbis_get_samples_* will start with the specified sample. If you
278 // do not need to seek to EXACTLY the target sample when using get_samples_*,
279 // you can also use seek_frame().
280 
281 extern int stb_vorbis_seek_start(stb_vorbis *f);
282 // this function is equivalent to stb_vorbis_seek(f,0)
283 
284 extern unsigned int stb_vorbis_stream_length_in_samples(stb_vorbis *f);
285 extern float        stb_vorbis_stream_length_in_seconds(stb_vorbis *f);
286 // these functions return the total length of the vorbis stream
287 
288 extern int stb_vorbis_get_frame_float(stb_vorbis *f, int *channels, float ***output);
289 // decode the next frame and return the number of samples. the number of
290 // channels returned are stored in *channels (which can be NULL--it is always
291 // the same as the number of channels reported by get_info). *output will
292 // contain an array of float* buffers, one per channel. These outputs will
293 // be overwritten on the next call to stb_vorbis_get_frame_*.
294 //
295 // You generally should not intermix calls to stb_vorbis_get_frame_*()
296 // and stb_vorbis_get_samples_*(), since the latter calls the former.
297 
298 #ifndef STB_VORBIS_NO_INTEGER_CONVERSION
299 extern int stb_vorbis_get_frame_short_interleaved(stb_vorbis *f, int num_c, short *buffer, int num_shorts);
300 extern int stb_vorbis_get_frame_short            (stb_vorbis *f, int num_c, short **buffer, int num_samples);
301 #endif
302 // decode the next frame and return the number of *samples* per channel.
303 // Note that for interleaved data, you pass in the number of shorts (the
304 // size of your array), but the return value is the number of samples per
305 // channel, not the total number of samples.
306 //
307 // The data is coerced to the number of channels you request according to the
308 // channel coercion rules (see below). You must pass in the size of your
309 // buffer(s) so that stb_vorbis will not overwrite the end of the buffer.
310 // The maximum buffer size needed can be gotten from get_info(); however,
311 // the Vorbis I specification implies an absolute maximum of 4096 samples
312 // per channel.
313 
314 // Channel coercion rules:
315 //    Let M be the number of channels requested, and N the number of channels present,
316 //    and Cn be the nth channel; let stereo L be the sum of all L and center channels,
317 //    and stereo R be the sum of all R and center channels (channel assignment from the
318 //    vorbis spec).
319 //        M    N       output
320 //        1    k      sum(Ck) for all k
321 //        2    *      stereo L, stereo R
322 //        k    l      k > l, the first l channels, then 0s
323 //        k    l      k <= l, the first k channels
324 //    Note that this is not _good_ surround etc. mixing at all! It's just so
325 //    you get something useful.
326 
327 extern int stb_vorbis_get_samples_float_interleaved(stb_vorbis *f, int channels, float *buffer, int num_floats);
328 extern int stb_vorbis_get_samples_float(stb_vorbis *f, int channels, float **buffer, int num_samples);
329 // gets num_samples samples, not necessarily on a frame boundary--this requires
330 // buffering so you have to supply the buffers. DOES NOT APPLY THE COERCION RULES.
331 // Returns the number of samples stored per channel; it may be less than requested
332 // at the end of the file. If there are no more samples in the file, returns 0.
333 
334 #ifndef STB_VORBIS_NO_INTEGER_CONVERSION
335 extern int stb_vorbis_get_samples_short_interleaved(stb_vorbis *f, int channels, short *buffer, int num_shorts);
336 extern int stb_vorbis_get_samples_short(stb_vorbis *f, int channels, short **buffer, int num_samples);
337 #endif
338 // gets num_samples samples, not necessarily on a frame boundary--this requires
339 // buffering so you have to supply the buffers. Applies the coercion rules above
340 // to produce 'channels' channels. Returns the number of samples stored per channel;
341 // it may be less than requested at the end of the file. If there are no more
342 // samples in the file, returns 0.
343 
344 #endif
345 
346 ////////   ERROR CODES
347 
348 enum STBVorbisError
349 {
350    VORBIS__no_error,
351 
352    VORBIS_need_more_data=1,             // not a real error
353 
354    VORBIS_invalid_api_mixing,           // can't mix API modes
355    VORBIS_outofmem,                     // not enough memory
356    VORBIS_feature_not_supported,        // uses floor 0
357    VORBIS_too_many_channels,            // STB_VORBIS_MAX_CHANNELS is too small
358    VORBIS_file_open_failure,            // fopen() failed
359    VORBIS_seek_without_length,          // can't seek in unknown-length file
360 
361    VORBIS_unexpected_eof=10,            // file is truncated?
362    VORBIS_seek_invalid,                 // seek past EOF
363 
364    // decoding errors (corrupt/invalid stream) -- you probably
365    // don't care about the exact details of these
366 
367    // vorbis errors:
368    VORBIS_invalid_setup=20,
369    VORBIS_invalid_stream,
370 
371    // ogg errors:
372    VORBIS_missing_capture_pattern=30,
373    VORBIS_invalid_stream_structure_version,
374    VORBIS_continued_packet_flag_invalid,
375    VORBIS_incorrect_stream_serial_number,
376    VORBIS_invalid_first_page,
377    VORBIS_bad_packet_type,
378    VORBIS_cant_find_last_page,
379    VORBIS_seek_failed,
380    VORBIS_ogg_skeleton_not_supported
381 };
382 
383 
384 #ifdef __cplusplus
385 }
386 #endif
387 
388 #endif // STB_VORBIS_INCLUDE_STB_VORBIS_H
389 //
390 //  HEADER ENDS HERE
391 //
392 //////////////////////////////////////////////////////////////////////////////
393 
394 #ifndef STB_VORBIS_HEADER_ONLY
395 
396 // global configuration settings (e.g. set these in the project/makefile),
397 // or just set them in this file at the top (although ideally the first few
398 // should be visible when the header file is compiled too, although it's not
399 // crucial)
400 
401 // STB_VORBIS_NO_PUSHDATA_API
402 //     does not compile the code for the various stb_vorbis_*_pushdata()
403 //     functions
404 // #define STB_VORBIS_NO_PUSHDATA_API
405 
406 // STB_VORBIS_NO_PULLDATA_API
407 //     does not compile the code for the non-pushdata APIs
408 // #define STB_VORBIS_NO_PULLDATA_API
409 
410 // STB_VORBIS_NO_STDIO
411 //     does not compile the code for the APIs that use FILE *s internally
412 //     or externally (implied by STB_VORBIS_NO_PULLDATA_API)
413 // #define STB_VORBIS_NO_STDIO
414 
415 // STB_VORBIS_NO_INTEGER_CONVERSION
416 //     does not compile the code for converting audio sample data from
417 //     float to integer (implied by STB_VORBIS_NO_PULLDATA_API)
418 // #define STB_VORBIS_NO_INTEGER_CONVERSION
419 
420 // STB_VORBIS_NO_FAST_SCALED_FLOAT
421 //      does not use a fast float-to-int trick to accelerate float-to-int on
422 //      most platforms which requires endianness be defined correctly.
423 //#define STB_VORBIS_NO_FAST_SCALED_FLOAT
424 
425 
426 // STB_VORBIS_MAX_CHANNELS [number]
427 //     globally define this to the maximum number of channels you need.
428 //     The spec does not put a restriction on channels except that
429 //     the count is stored in a byte, so 255 is the hard limit.
430 //     Reducing this saves about 16 bytes per value, so using 16 saves
431 //     (255-16)*16 or around 4KB. Plus anything other memory usage
432 //     I forgot to account for. Can probably go as low as 8 (7.1 audio),
433 //     6 (5.1 audio), or 2 (stereo only).
434 #ifndef STB_VORBIS_MAX_CHANNELS
435 #define STB_VORBIS_MAX_CHANNELS    16  // enough for anyone?
436 #endif
437 
438 // STB_VORBIS_PUSHDATA_CRC_COUNT [number]
439 //     after a flush_pushdata(), stb_vorbis begins scanning for the
440 //     next valid page, without backtracking. when it finds something
441 //     that looks like a page, it streams through it and verifies its
442 //     CRC32. Should that validation fail, it keeps scanning. But it's
443 //     possible that _while_ streaming through to check the CRC32 of
444 //     one candidate page, it sees another candidate page. This #define
445 //     determines how many "overlapping" candidate pages it can search
446 //     at once. Note that "real" pages are typically ~4KB to ~8KB, whereas
447 //     garbage pages could be as big as 64KB, but probably average ~16KB.
448 //     So don't hose ourselves by scanning an apparent 64KB page and
449 //     missing a ton of real ones in the interim; so minimum of 2
450 #ifndef STB_VORBIS_PUSHDATA_CRC_COUNT
451 #define STB_VORBIS_PUSHDATA_CRC_COUNT  4
452 #endif
453 
454 // STB_VORBIS_FAST_HUFFMAN_LENGTH [number]
455 //     sets the log size of the huffman-acceleration table.  Maximum
456 //     supported value is 24. with larger numbers, more decodings are O(1),
457 //     but the table size is larger so worse cache missing, so you'll have
458 //     to probe (and try multiple ogg vorbis files) to find the sweet spot.
459 #ifndef STB_VORBIS_FAST_HUFFMAN_LENGTH
460 #define STB_VORBIS_FAST_HUFFMAN_LENGTH   10
461 #endif
462 
463 // STB_VORBIS_FAST_BINARY_LENGTH [number]
464 //     sets the log size of the binary-search acceleration table. this
465 //     is used in similar fashion to the fast-huffman size to set initial
466 //     parameters for the binary search
467 
468 // STB_VORBIS_FAST_HUFFMAN_INT
469 //     The fast huffman tables are much more efficient if they can be
470 //     stored as 16-bit results instead of 32-bit results. This restricts
471 //     the codebooks to having only 65535 possible outcomes, though.
472 //     (At least, accelerated by the huffman table.)
473 #ifndef STB_VORBIS_FAST_HUFFMAN_INT
474 #define STB_VORBIS_FAST_HUFFMAN_SHORT
475 #endif
476 
477 // STB_VORBIS_NO_HUFFMAN_BINARY_SEARCH
478 //     If the 'fast huffman' search doesn't succeed, then stb_vorbis falls
479 //     back on binary searching for the correct one. This requires storing
480 //     extra tables with the huffman codes in sorted order. Defining this
481 //     symbol trades off space for speed by forcing a linear search in the
482 //     non-fast case, except for "sparse" codebooks.
483 // #define STB_VORBIS_NO_HUFFMAN_BINARY_SEARCH
484 
485 // STB_VORBIS_DIVIDES_IN_RESIDUE
486 //     stb_vorbis precomputes the result of the scalar residue decoding
487 //     that would otherwise require a divide per chunk. you can trade off
488 //     space for time by defining this symbol.
489 // #define STB_VORBIS_DIVIDES_IN_RESIDUE
490 
491 // STB_VORBIS_DIVIDES_IN_CODEBOOK
492 //     vorbis VQ codebooks can be encoded two ways: with every case explicitly
493 //     stored, or with all elements being chosen from a small range of values,
494 //     and all values possible in all elements. By default, stb_vorbis expands
495 //     this latter kind out to look like the former kind for ease of decoding,
496 //     because otherwise an integer divide-per-vector-element is required to
497 //     unpack the index. If you define STB_VORBIS_DIVIDES_IN_CODEBOOK, you can
498 //     trade off storage for speed.
499 //#define STB_VORBIS_DIVIDES_IN_CODEBOOK
500 
501 #ifdef STB_VORBIS_CODEBOOK_SHORTS
502 #error "STB_VORBIS_CODEBOOK_SHORTS is no longer supported as it produced incorrect results for some input formats"
503 #endif
504 
505 // STB_VORBIS_DIVIDE_TABLE
506 //     this replaces small integer divides in the floor decode loop with
507 //     table lookups. made less than 1% difference, so disabled by default.
508 
509 // STB_VORBIS_NO_INLINE_DECODE
510 //     disables the inlining of the scalar codebook fast-huffman decode.
511 //     might save a little codespace; useful for debugging
512 // #define STB_VORBIS_NO_INLINE_DECODE
513 
514 // STB_VORBIS_NO_DEFER_FLOOR
515 //     Normally we only decode the floor without synthesizing the actual
516 //     full curve. We can instead synthesize the curve immediately. This
517 //     requires more memory and is very likely slower, so I don't think
518 //     you'd ever want to do it except for debugging.
519 // #define STB_VORBIS_NO_DEFER_FLOOR
520 
521 
522 
523 
524 //////////////////////////////////////////////////////////////////////////////
525 
526 #ifdef STB_VORBIS_NO_PULLDATA_API
527    #define STB_VORBIS_NO_INTEGER_CONVERSION
528    #define STB_VORBIS_NO_STDIO
529 #endif
530 
531 #if defined(STB_VORBIS_NO_CRT) && !defined(STB_VORBIS_NO_STDIO)
532    #define STB_VORBIS_NO_STDIO 1
533 #endif
534 
535 #ifndef STB_VORBIS_NO_INTEGER_CONVERSION
536 #ifndef STB_VORBIS_NO_FAST_SCALED_FLOAT
537 
538    // only need endianness for fast-float-to-int, which we don't
539    // use for pushdata
540 
541    #ifndef STB_VORBIS_BIG_ENDIAN
542      #define STB_VORBIS_ENDIAN  0
543    #else
544      #define STB_VORBIS_ENDIAN  1
545    #endif
546 
547 #endif
548 #endif
549 
550 
551 #ifndef STB_VORBIS_NO_STDIO
552 #include <stdio.h>
553 #endif
554 
555 #ifndef STB_VORBIS_NO_CRT
556    #include <stdlib.h>
557    #include <string.h>
558    #include <assert.h>
559    #include <math.h>
560 
561    // find definition of alloca if it's not in stdlib.h:
562    #if defined(_MSC_VER) || defined(__MINGW32__)
563       #include <malloc.h>
564    #endif
565    #if defined(__linux__) || defined(__linux) || defined(__EMSCRIPTEN__)
566       #include <alloca.h>
567    #endif
568 #else // STB_VORBIS_NO_CRT
569    #define NULL 0
570    #define malloc(s)   0
571    #define free(s)     ((void) 0)
572    #define realloc(s)  0
573 #endif // STB_VORBIS_NO_CRT
574 
575 #include <limits.h>
576 
577 #ifdef __MINGW32__
578    // eff you mingw:
579    //     "fixed":
580    //         http://sourceforge.net/p/mingw-w64/mailman/message/32882927/
581    //     "no that broke the build, reverted, who cares about C":
582    //         http://sourceforge.net/p/mingw-w64/mailman/message/32890381/
583    #ifdef __forceinline
584    #undef __forceinline
585    #endif
586    #define __forceinline
587    #define alloca __builtin_alloca
588 #elif !defined(_MSC_VER)
589    #if __GNUC__
590       #define __forceinline inline
591    #else
592       #define __forceinline
593    #endif
594 #endif
595 
596 #if STB_VORBIS_MAX_CHANNELS > 256
597 #error "Value of STB_VORBIS_MAX_CHANNELS outside of allowed range"
598 #endif
599 
600 #if STB_VORBIS_FAST_HUFFMAN_LENGTH > 24
601 #error "Value of STB_VORBIS_FAST_HUFFMAN_LENGTH outside of allowed range"
602 #endif
603 
604 
605 #if 0
606 #include <crtdbg.h>
607 #define CHECK(f)   _CrtIsValidHeapPointer(f->channel_buffers[1])
608 #else
609 #define CHECK(f)   ((void) 0)
610 #endif
611 
612 #define MAX_BLOCKSIZE_LOG  13   // from specification
613 #define MAX_BLOCKSIZE      (1 << MAX_BLOCKSIZE_LOG)
614 
615 
616 typedef unsigned char  uint8;
617 typedef   signed char   int8;
618 typedef unsigned short uint16;
619 typedef   signed short  int16;
620 typedef unsigned int   uint32;
621 typedef   signed int    int32;
622 
623 #ifndef TRUE
624 #define TRUE 1
625 #define FALSE 0
626 #endif
627 
628 typedef float codetype;
629 
630 // @NOTE
631 //
632 // Some arrays below are tagged "//varies", which means it's actually
633 // a variable-sized piece of data, but rather than malloc I assume it's
634 // small enough it's better to just allocate it all together with the
635 // main thing
636 //
637 // Most of the variables are specified with the smallest size I could pack
638 // them into. It might give better performance to make them all full-sized
639 // integers. It should be safe to freely rearrange the structures or change
640 // the sizes larger--nothing relies on silently truncating etc., nor the
641 // order of variables.
642 
643 #define FAST_HUFFMAN_TABLE_SIZE   (1 << STB_VORBIS_FAST_HUFFMAN_LENGTH)
644 #define FAST_HUFFMAN_TABLE_MASK   (FAST_HUFFMAN_TABLE_SIZE - 1)
645 
646 typedef struct
647 {
648    int dimensions, entries;
649    uint8 *codeword_lengths;
650    float  minimum_value;
651    float  delta_value;
652    uint8  value_bits;
653    uint8  lookup_type;
654    uint8  sequence_p;
655    uint8  sparse;
656    uint32 lookup_values;
657    codetype *multiplicands;
658    uint32 *codewords;
659    #ifdef STB_VORBIS_FAST_HUFFMAN_SHORT
660     int16  fast_huffman[FAST_HUFFMAN_TABLE_SIZE];
661    #else
662     int32  fast_huffman[FAST_HUFFMAN_TABLE_SIZE];
663    #endif
664    uint32 *sorted_codewords;
665    int    *sorted_values;
666    int     sorted_entries;
667 } Codebook;
668 
669 typedef struct
670 {
671    uint8 order;
672    uint16 rate;
673    uint16 bark_map_size;
674    uint8 amplitude_bits;
675    uint8 amplitude_offset;
676    uint8 number_of_books;
677    uint8 book_list[16]; // varies
678 } Floor0;
679 
680 typedef struct
681 {
682    uint8 partitions;
683    uint8 partition_class_list[32]; // varies
684    uint8 class_dimensions[16]; // varies
685    uint8 class_subclasses[16]; // varies
686    uint8 class_masterbooks[16]; // varies
687    int16 subclass_books[16][8]; // varies
688    uint16 Xlist[31*8+2]; // varies
689    uint8 sorted_order[31*8+2];
690    uint8 neighbors[31*8+2][2];
691    uint8 floor1_multiplier;
692    uint8 rangebits;
693    int values;
694 } Floor1;
695 
696 typedef union
697 {
698    Floor0 floor0;
699    Floor1 floor1;
700 } Floor;
701 
702 typedef struct
703 {
704    uint32 begin, end;
705    uint32 part_size;
706    uint8 classifications;
707    uint8 classbook;
708    uint8 **classdata;
709    int16 (*residue_books)[8];
710 } Residue;
711 
712 typedef struct
713 {
714    uint8 magnitude;
715    uint8 angle;
716    uint8 mux;
717 } MappingChannel;
718 
719 typedef struct
720 {
721    uint16 coupling_steps;
722    MappingChannel *chan;
723    uint8  submaps;
724    uint8  submap_floor[15]; // varies
725    uint8  submap_residue[15]; // varies
726 } Mapping;
727 
728 typedef struct
729 {
730    uint8 blockflag;
731    uint8 mapping;
732    uint16 windowtype;
733    uint16 transformtype;
734 } Mode;
735 
736 typedef struct
737 {
738    uint32  goal_crc;    // expected crc if match
739    int     bytes_left;  // bytes left in packet
740    uint32  crc_so_far;  // running crc
741    int     bytes_done;  // bytes processed in _current_ chunk
742    uint32  sample_loc;  // granule pos encoded in page
743 } CRCscan;
744 
745 typedef struct
746 {
747    uint32 page_start, page_end;
748    uint32 last_decoded_sample;
749 } ProbedPage;
750 
751 struct stb_vorbis
752 {
753   // user-accessible info
754    unsigned int sample_rate;
755    int channels;
756 
757    unsigned int setup_memory_required;
758    unsigned int temp_memory_required;
759    unsigned int setup_temp_memory_required;
760 
761   // input config
762 #ifndef STB_VORBIS_NO_STDIO
763    FILE *f;
764    uint32 f_start;
765    int close_on_free;
766 #endif
767 
768    uint8 *stream;
769    uint8 *stream_start;
770    uint8 *stream_end;
771 
772    uint32 stream_len;
773 
774    uint8  push_mode;
775 
776    uint32 first_audio_page_offset;
777 
778    ProbedPage p_first, p_last;
779 
780   // memory management
781    stb_vorbis_alloc alloc;
782    int setup_offset;
783    int temp_offset;
784 
785   // run-time results
786    int eof;
787    enum STBVorbisError error;
788 
789   // user-useful data
790 
791   // header info
792    int blocksize[2];
793    int blocksize_0, blocksize_1;
794    int codebook_count;
795    Codebook *codebooks;
796    int floor_count;
797    uint16 floor_types[64]; // varies
798    Floor *floor_config;
799    int residue_count;
800    uint16 residue_types[64]; // varies
801    Residue *residue_config;
802    int mapping_count;
803    Mapping *mapping;
804    int mode_count;
805    Mode mode_config[64];  // varies
806 
807    uint32 total_samples;
808 
809   // decode buffer
810    float *channel_buffers[STB_VORBIS_MAX_CHANNELS];
811    float *outputs        [STB_VORBIS_MAX_CHANNELS];
812 
813    float *previous_window[STB_VORBIS_MAX_CHANNELS];
814    int previous_length;
815 
816    #ifndef STB_VORBIS_NO_DEFER_FLOOR
817    int16 *finalY[STB_VORBIS_MAX_CHANNELS];
818    #else
819    float *floor_buffers[STB_VORBIS_MAX_CHANNELS];
820    #endif
821 
822    uint32 current_loc; // sample location of next frame to decode
823    int    current_loc_valid;
824 
825   // per-blocksize precomputed data
826 
827    // twiddle factors
828    float *A[2],*B[2],*C[2];
829    float *window[2];
830    uint16 *bit_reverse[2];
831 
832   // current page/packet/segment streaming info
833    uint32 serial; // stream serial number for verification
834    int last_page;
835    int segment_count;
836    uint8 segments[255];
837    uint8 page_flag;
838    uint8 bytes_in_seg;
839    uint8 first_decode;
840    int next_seg;
841    int last_seg;  // flag that we're on the last segment
842    int last_seg_which; // what was the segment number of the last seg?
843    uint32 acc;
844    int valid_bits;
845    int packet_bytes;
846    int end_seg_with_known_loc;
847    uint32 known_loc_for_packet;
848    int discard_samples_deferred;
849    uint32 samples_output;
850 
851   // push mode scanning
852    int page_crc_tests; // only in push_mode: number of tests active; -1 if not searching
853 #ifndef STB_VORBIS_NO_PUSHDATA_API
854    CRCscan scan[STB_VORBIS_PUSHDATA_CRC_COUNT];
855 #endif
856 
857   // sample-access
858    int channel_buffer_start;
859    int channel_buffer_end;
860 };
861 
862 #if defined(STB_VORBIS_NO_PUSHDATA_API)
863    #define IS_PUSH_MODE(f)   FALSE
864 #elif defined(STB_VORBIS_NO_PULLDATA_API)
865    #define IS_PUSH_MODE(f)   TRUE
866 #else
867    #define IS_PUSH_MODE(f)   ((f)->push_mode)
868 #endif
869 
870 typedef struct stb_vorbis vorb;
871 
error(vorb * f,enum STBVorbisError e)872 static int error(vorb *f, enum STBVorbisError e)
873 {
874    f->error = e;
875    if (!f->eof && e != VORBIS_need_more_data) {
876       f->error=e; // breakpoint for debugging
877    }
878    return 0;
879 }
880 
881 
882 // these functions are used for allocating temporary memory
883 // while decoding. if you can afford the stack space, use
884 // alloca(); otherwise, provide a temp buffer and it will
885 // allocate out of those.
886 
887 #define array_size_required(count,size)  (count*(sizeof(void *)+(size)))
888 
889 #define temp_alloc(f,size)              (f->alloc.alloc_buffer ? setup_temp_malloc(f,size) : alloca(size))
890 #define temp_free(f,p)                  0
891 #define temp_alloc_save(f)              ((f)->temp_offset)
892 #define temp_alloc_restore(f,p)         ((f)->temp_offset = (p))
893 
894 #define temp_block_array(f,count,size)  make_block_array(temp_alloc(f,array_size_required(count,size)), count, size)
895 
896 // given a sufficiently large block of memory, make an array of pointers to subblocks of it
make_block_array(void * mem,int count,int size)897 static void *make_block_array(void *mem, int count, int size)
898 {
899    int i;
900    void ** p = (void **) mem;
901    char *q = (char *) (p + count);
902    for (i=0; i < count; ++i) {
903       p[i] = q;
904       q += size;
905    }
906    return p;
907 }
908 
setup_malloc(vorb * f,int sz)909 static void *setup_malloc(vorb *f, int sz)
910 {
911    sz = (sz+3) & ~3;
912    f->setup_memory_required += sz;
913    if (f->alloc.alloc_buffer) {
914       void *p = (char *) f->alloc.alloc_buffer + f->setup_offset;
915       if (f->setup_offset + sz > f->temp_offset) return NULL;
916       f->setup_offset += sz;
917       return p;
918    }
919    return sz ? malloc(sz) : NULL;
920 }
921 
setup_free(vorb * f,void * p)922 static void setup_free(vorb *f, void *p)
923 {
924    if (f->alloc.alloc_buffer) return; // do nothing; setup mem is a stack
925    free(p);
926 }
927 
setup_temp_malloc(vorb * f,int sz)928 static void *setup_temp_malloc(vorb *f, int sz)
929 {
930    sz = (sz+3) & ~3;
931    if (f->alloc.alloc_buffer) {
932       if (f->temp_offset - sz < f->setup_offset) return NULL;
933       f->temp_offset -= sz;
934       return (char *) f->alloc.alloc_buffer + f->temp_offset;
935    }
936    return malloc(sz);
937 }
938 
setup_temp_free(vorb * f,void * p,int sz)939 static void setup_temp_free(vorb *f, void *p, int sz)
940 {
941    if (f->alloc.alloc_buffer) {
942       f->temp_offset += (sz+3)&~3;
943       return;
944    }
945    free(p);
946 }
947 
948 #define CRC32_POLY    0x04c11db7   // from spec
949 
950 static uint32 crc_table[256];
crc32_init(void)951 static void crc32_init(void)
952 {
953    int i,j;
954    uint32 s;
955    for(i=0; i < 256; i++) {
956       for (s=(uint32) i << 24, j=0; j < 8; ++j)
957          s = (s << 1) ^ (s >= (1U<<31) ? CRC32_POLY : 0);
958       crc_table[i] = s;
959    }
960 }
961 
crc32_update(uint32 crc,uint8 byte)962 static __forceinline uint32 crc32_update(uint32 crc, uint8 byte)
963 {
964    return (crc << 8) ^ crc_table[byte ^ (crc >> 24)];
965 }
966 
967 
968 // used in setup, and for huffman that doesn't go fast path
bit_reverse(unsigned int n)969 static unsigned int bit_reverse(unsigned int n)
970 {
971   n = ((n & 0xAAAAAAAA) >>  1) | ((n & 0x55555555) << 1);
972   n = ((n & 0xCCCCCCCC) >>  2) | ((n & 0x33333333) << 2);
973   n = ((n & 0xF0F0F0F0) >>  4) | ((n & 0x0F0F0F0F) << 4);
974   n = ((n & 0xFF00FF00) >>  8) | ((n & 0x00FF00FF) << 8);
975   return (n >> 16) | (n << 16);
976 }
977 
square(float x)978 static float square(float x)
979 {
980    return x*x;
981 }
982 
983 // this is a weird definition of log2() for which log2(1) = 1, log2(2) = 2, log2(4) = 3
984 // as required by the specification. fast(?) implementation from stb.h
985 // @OPTIMIZE: called multiple times per-packet with "constants"; move to setup
ilog(int32 n)986 static int ilog(int32 n)
987 {
988    static signed char log2_4[16] = { 0,1,2,2,3,3,3,3,4,4,4,4,4,4,4,4 };
989 
990    if (n < 0) return 0; // signed n returns 0
991 
992    // 2 compares if n < 16, 3 compares otherwise (4 if signed or n > 1<<29)
993    if (n < (1 << 14))
994         if (n < (1 <<  4))            return  0 + log2_4[n      ];
995         else if (n < (1 <<  9))       return  5 + log2_4[n >>  5];
996              else                     return 10 + log2_4[n >> 10];
997    else if (n < (1 << 24))
998              if (n < (1 << 19))       return 15 + log2_4[n >> 15];
999              else                     return 20 + log2_4[n >> 20];
1000         else if (n < (1 << 29))       return 25 + log2_4[n >> 25];
1001              else                     return 30 + log2_4[n >> 30];
1002 }
1003 
1004 #ifndef M_PI
1005   #define M_PI  3.14159265358979323846264f  // from CRC
1006 #endif
1007 
1008 // code length assigned to a value with no huffman encoding
1009 #define NO_CODE   255
1010 
1011 /////////////////////// LEAF SETUP FUNCTIONS //////////////////////////
1012 //
1013 // these functions are only called at setup, and only a few times
1014 // per file
1015 
float32_unpack(uint32 x)1016 static float float32_unpack(uint32 x)
1017 {
1018    // from the specification
1019    uint32 mantissa = x & 0x1fffff;
1020    uint32 sign = x & 0x80000000;
1021    uint32 exp = (x & 0x7fe00000) >> 21;
1022    double res = sign ? -(double)mantissa : (double)mantissa;
1023    return (float) ldexp((float)res, exp-788);
1024 }
1025 
1026 
1027 // zlib & jpeg huffman tables assume that the output symbols
1028 // can either be arbitrarily arranged, or have monotonically
1029 // increasing frequencies--they rely on the lengths being sorted;
1030 // this makes for a very simple generation algorithm.
1031 // vorbis allows a huffman table with non-sorted lengths. This
1032 // requires a more sophisticated construction, since symbols in
1033 // order do not map to huffman codes "in order".
add_entry(Codebook * c,uint32 huff_code,int symbol,int count,int len,uint32 * values)1034 static void add_entry(Codebook *c, uint32 huff_code, int symbol, int count, int len, uint32 *values)
1035 {
1036    if (!c->sparse) {
1037       c->codewords      [symbol] = huff_code;
1038    } else {
1039       c->codewords       [count] = huff_code;
1040       c->codeword_lengths[count] = len;
1041       values             [count] = symbol;
1042    }
1043 }
1044 
compute_codewords(Codebook * c,uint8 * len,int n,uint32 * values)1045 static int compute_codewords(Codebook *c, uint8 *len, int n, uint32 *values)
1046 {
1047    int i,k,m=0;
1048    uint32 available[32];
1049 
1050    memset(available, 0, sizeof(available));
1051    // find the first entry
1052    for (k=0; k < n; ++k) if (len[k] < NO_CODE) break;
1053    if (k == n) { assert(c->sorted_entries == 0); return TRUE; }
1054    // add to the list
1055    add_entry(c, 0, k, m++, len[k], values);
1056    // add all available leaves
1057    for (i=1; i <= len[k]; ++i)
1058       available[i] = 1U << (32-i);
1059    // note that the above code treats the first case specially,
1060    // but it's really the same as the following code, so they
1061    // could probably be combined (except the initial code is 0,
1062    // and I use 0 in available[] to mean 'empty')
1063    for (i=k+1; i < n; ++i) {
1064       uint32 res;
1065       int z = len[i], y;
1066       if (z == NO_CODE) continue;
1067       // find lowest available leaf (should always be earliest,
1068       // which is what the specification calls for)
1069       // note that this property, and the fact we can never have
1070       // more than one free leaf at a given level, isn't totally
1071       // trivial to prove, but it seems true and the assert never
1072       // fires, so!
1073       while (z > 0 && !available[z]) --z;
1074       if (z == 0) { return FALSE; }
1075       res = available[z];
1076       assert(z >= 0 && z < 32);
1077       available[z] = 0;
1078       add_entry(c, bit_reverse(res), i, m++, len[i], values);
1079       // propagate availability up the tree
1080       if (z != len[i]) {
1081          assert(len[i] >= 0 && len[i] < 32);
1082          for (y=len[i]; y > z; --y) {
1083             assert(available[y] == 0);
1084             available[y] = res + (1 << (32-y));
1085          }
1086       }
1087    }
1088    return TRUE;
1089 }
1090 
1091 // accelerated huffman table allows fast O(1) match of all symbols
1092 // of length <= STB_VORBIS_FAST_HUFFMAN_LENGTH
compute_accelerated_huffman(Codebook * c)1093 static void compute_accelerated_huffman(Codebook *c)
1094 {
1095    int i, len;
1096    for (i=0; i < FAST_HUFFMAN_TABLE_SIZE; ++i)
1097       c->fast_huffman[i] = -1;
1098 
1099    len = c->sparse ? c->sorted_entries : c->entries;
1100    #ifdef STB_VORBIS_FAST_HUFFMAN_SHORT
1101    if (len > 32767) len = 32767; // largest possible value we can encode!
1102    #endif
1103    for (i=0; i < len; ++i) {
1104       if (c->codeword_lengths[i] <= STB_VORBIS_FAST_HUFFMAN_LENGTH) {
1105          uint32 z = c->sparse ? bit_reverse(c->sorted_codewords[i]) : c->codewords[i];
1106          // set table entries for all bit combinations in the higher bits
1107          while (z < FAST_HUFFMAN_TABLE_SIZE) {
1108              c->fast_huffman[z] = i;
1109              z += 1 << c->codeword_lengths[i];
1110          }
1111       }
1112    }
1113 }
1114 
1115 #ifdef _MSC_VER
1116 #define STBV_CDECL __cdecl
1117 #else
1118 #define STBV_CDECL
1119 #endif
1120 
uint32_compare(const void * p,const void * q)1121 static int STBV_CDECL uint32_compare(const void *p, const void *q)
1122 {
1123    uint32 x = * (uint32 *) p;
1124    uint32 y = * (uint32 *) q;
1125    return x < y ? -1 : x > y;
1126 }
1127 
include_in_sort(Codebook * c,uint8 len)1128 static int include_in_sort(Codebook *c, uint8 len)
1129 {
1130    if (c->sparse) { assert(len != NO_CODE); return TRUE; }
1131    if (len == NO_CODE) return FALSE;
1132    if (len > STB_VORBIS_FAST_HUFFMAN_LENGTH) return TRUE;
1133    return FALSE;
1134 }
1135 
1136 // if the fast table above doesn't work, we want to binary
1137 // search them... need to reverse the bits
compute_sorted_huffman(Codebook * c,uint8 * lengths,uint32 * values)1138 static void compute_sorted_huffman(Codebook *c, uint8 *lengths, uint32 *values)
1139 {
1140    int i, len;
1141    // build a list of all the entries
1142    // OPTIMIZATION: don't include the short ones, since they'll be caught by FAST_HUFFMAN.
1143    // this is kind of a frivolous optimization--I don't see any performance improvement,
1144    // but it's like 4 extra lines of code, so.
1145    if (!c->sparse) {
1146       int k = 0;
1147       for (i=0; i < c->entries; ++i)
1148          if (include_in_sort(c, lengths[i]))
1149             c->sorted_codewords[k++] = bit_reverse(c->codewords[i]);
1150       assert(k == c->sorted_entries);
1151    } else {
1152       for (i=0; i < c->sorted_entries; ++i)
1153          c->sorted_codewords[i] = bit_reverse(c->codewords[i]);
1154    }
1155 
1156    qsort(c->sorted_codewords, c->sorted_entries, sizeof(c->sorted_codewords[0]), uint32_compare);
1157    c->sorted_codewords[c->sorted_entries] = 0xffffffff;
1158 
1159    len = c->sparse ? c->sorted_entries : c->entries;
1160    // now we need to indicate how they correspond; we could either
1161    //   #1: sort a different data structure that says who they correspond to
1162    //   #2: for each sorted entry, search the original list to find who corresponds
1163    //   #3: for each original entry, find the sorted entry
1164    // #1 requires extra storage, #2 is slow, #3 can use binary search!
1165    for (i=0; i < len; ++i) {
1166       int huff_len = c->sparse ? lengths[values[i]] : lengths[i];
1167       if (include_in_sort(c,huff_len)) {
1168          uint32 code = bit_reverse(c->codewords[i]);
1169          int x=0, n=c->sorted_entries;
1170          while (n > 1) {
1171             // invariant: sc[x] <= code < sc[x+n]
1172             int m = x + (n >> 1);
1173             if (c->sorted_codewords[m] <= code) {
1174                x = m;
1175                n -= (n>>1);
1176             } else {
1177                n >>= 1;
1178             }
1179          }
1180          assert(c->sorted_codewords[x] == code);
1181          if (c->sparse) {
1182             c->sorted_values[x] = values[i];
1183             c->codeword_lengths[x] = huff_len;
1184          } else {
1185             c->sorted_values[x] = i;
1186          }
1187       }
1188    }
1189 }
1190 
1191 // only run while parsing the header (3 times)
vorbis_validate(uint8 * data)1192 static int vorbis_validate(uint8 *data)
1193 {
1194    static uint8 vorbis[6] = { 'v', 'o', 'r', 'b', 'i', 's' };
1195    return memcmp(data, vorbis, 6) == 0;
1196 }
1197 
1198 // called from setup only, once per code book
1199 // (formula implied by specification)
lookup1_values(int entries,int dim)1200 static int lookup1_values(int entries, int dim)
1201 {
1202    int r = (int) floor(exp((float) log((float) entries) / dim));
1203    if ((int) floor(pow((float) r+1, dim)) <= entries)   // (int) cast for MinGW warning;
1204       ++r;                                              // floor() to avoid _ftol() when non-CRT
1205    assert(pow((float) r+1, dim) > entries);
1206    assert((int) floor(pow((float) r, dim)) <= entries); // (int),floor() as above
1207    return r;
1208 }
1209 
1210 // called twice per file
compute_twiddle_factors(int n,float * A,float * B,float * C)1211 static void compute_twiddle_factors(int n, float *A, float *B, float *C)
1212 {
1213    int n4 = n >> 2, n8 = n >> 3;
1214    int k,k2;
1215 
1216    for (k=k2=0; k < n4; ++k,k2+=2) {
1217       A[k2  ] = (float)  cos(4*k*M_PI/n);
1218       A[k2+1] = (float) -sin(4*k*M_PI/n);
1219       B[k2  ] = (float)  cos((k2+1)*M_PI/n/2) * 0.5f;
1220       B[k2+1] = (float)  sin((k2+1)*M_PI/n/2) * 0.5f;
1221    }
1222    for (k=k2=0; k < n8; ++k,k2+=2) {
1223       C[k2  ] = (float)  cos(2*(k2+1)*M_PI/n);
1224       C[k2+1] = (float) -sin(2*(k2+1)*M_PI/n);
1225    }
1226 }
1227 
compute_window(int n,float * window)1228 static void compute_window(int n, float *window)
1229 {
1230    int n2 = n >> 1, i;
1231    for (i=0; i < n2; ++i)
1232       window[i] = (float) sin(0.5 * M_PI * square((float) sin((i - 0 + 0.5) / n2 * 0.5 * M_PI)));
1233 }
1234 
compute_bitreverse(int n,uint16 * rev)1235 static void compute_bitreverse(int n, uint16 *rev)
1236 {
1237    int ld = ilog(n) - 1; // ilog is off-by-one from normal definitions
1238    int i, n8 = n >> 3;
1239    for (i=0; i < n8; ++i)
1240       rev[i] = (bit_reverse(i) >> (32-ld+3)) << 2;
1241 }
1242 
init_blocksize(vorb * f,int b,int n)1243 static int init_blocksize(vorb *f, int b, int n)
1244 {
1245    int n2 = n >> 1, n4 = n >> 2, n8 = n >> 3;
1246    f->A[b] = (float *) setup_malloc(f, sizeof(float) * n2);
1247    f->B[b] = (float *) setup_malloc(f, sizeof(float) * n2);
1248    f->C[b] = (float *) setup_malloc(f, sizeof(float) * n4);
1249    if (!f->A[b] || !f->B[b] || !f->C[b]) return error(f, VORBIS_outofmem);
1250    compute_twiddle_factors(n, f->A[b], f->B[b], f->C[b]);
1251    f->window[b] = (float *) setup_malloc(f, sizeof(float) * n2);
1252    if (!f->window[b]) return error(f, VORBIS_outofmem);
1253    compute_window(n, f->window[b]);
1254    f->bit_reverse[b] = (uint16 *) setup_malloc(f, sizeof(uint16) * n8);
1255    if (!f->bit_reverse[b]) return error(f, VORBIS_outofmem);
1256    compute_bitreverse(n, f->bit_reverse[b]);
1257    return TRUE;
1258 }
1259 
neighbors(uint16 * x,int n,int * plow,int * phigh)1260 static void neighbors(uint16 *x, int n, int *plow, int *phigh)
1261 {
1262    int low = -1;
1263    int high = 65536;
1264    int i;
1265    for (i=0; i < n; ++i) {
1266       if (x[i] > low  && x[i] < x[n]) { *plow  = i; low = x[i]; }
1267       if (x[i] < high && x[i] > x[n]) { *phigh = i; high = x[i]; }
1268    }
1269 }
1270 
1271 // this has been repurposed so y is now the original index instead of y
1272 typedef struct
1273 {
1274    uint16 x,id;
1275 } stbv__floor_ordering;
1276 
point_compare(const void * p,const void * q)1277 static int STBV_CDECL point_compare(const void *p, const void *q)
1278 {
1279    stbv__floor_ordering *a = (stbv__floor_ordering *) p;
1280    stbv__floor_ordering *b = (stbv__floor_ordering *) q;
1281    return a->x < b->x ? -1 : a->x > b->x;
1282 }
1283 
1284 //
1285 /////////////////////// END LEAF SETUP FUNCTIONS //////////////////////////
1286 
1287 
1288 #if defined(STB_VORBIS_NO_STDIO)
1289    #define USE_MEMORY(z)    TRUE
1290 #else
1291    #define USE_MEMORY(z)    ((z)->stream)
1292 #endif
1293 
get8(vorb * z)1294 static uint8 get8(vorb *z)
1295 {
1296    if (USE_MEMORY(z)) {
1297       if (z->stream >= z->stream_end) { z->eof = TRUE; return 0; }
1298       return *z->stream++;
1299    }
1300 
1301    #ifndef STB_VORBIS_NO_STDIO
1302    {
1303    int c = fgetc(z->f);
1304    if (c == EOF) { z->eof = TRUE; return 0; }
1305    return c;
1306    }
1307    #endif
1308 }
1309 
get32(vorb * f)1310 static uint32 get32(vorb *f)
1311 {
1312    uint32 x;
1313    x = get8(f);
1314    x += get8(f) << 8;
1315    x += get8(f) << 16;
1316    x += (uint32) get8(f) << 24;
1317    return x;
1318 }
1319 
getn(vorb * z,uint8 * data,int n)1320 static int getn(vorb *z, uint8 *data, int n)
1321 {
1322    if (USE_MEMORY(z)) {
1323       if (z->stream+n > z->stream_end) { z->eof = 1; return 0; }
1324       memcpy(data, z->stream, n);
1325       z->stream += n;
1326       return 1;
1327    }
1328 
1329    #ifndef STB_VORBIS_NO_STDIO
1330    if (fread(data, n, 1, z->f) == 1)
1331       return 1;
1332    else {
1333       z->eof = 1;
1334       return 0;
1335    }
1336    #endif
1337 }
1338 
skip(vorb * z,int n)1339 static void skip(vorb *z, int n)
1340 {
1341    if (USE_MEMORY(z)) {
1342       z->stream += n;
1343       if (z->stream >= z->stream_end) z->eof = 1;
1344       return;
1345    }
1346    #ifndef STB_VORBIS_NO_STDIO
1347    {
1348       long x = ftell(z->f);
1349       fseek(z->f, x+n, SEEK_SET);
1350    }
1351    #endif
1352 }
1353 
set_file_offset(stb_vorbis * f,unsigned int loc)1354 static int set_file_offset(stb_vorbis *f, unsigned int loc)
1355 {
1356    #ifndef STB_VORBIS_NO_PUSHDATA_API
1357    if (f->push_mode) return 0;
1358    #endif
1359    f->eof = 0;
1360    if (USE_MEMORY(f)) {
1361       if (f->stream_start + loc >= f->stream_end || f->stream_start + loc < f->stream_start) {
1362          f->stream = f->stream_end;
1363          f->eof = 1;
1364          return 0;
1365       } else {
1366          f->stream = f->stream_start + loc;
1367          return 1;
1368       }
1369    }
1370    #ifndef STB_VORBIS_NO_STDIO
1371    if (loc + f->f_start < loc || loc >= 0x80000000) {
1372       loc = 0x7fffffff;
1373       f->eof = 1;
1374    } else {
1375       loc += f->f_start;
1376    }
1377    if (!fseek(f->f, loc, SEEK_SET))
1378       return 1;
1379    f->eof = 1;
1380    fseek(f->f, f->f_start, SEEK_END);
1381    return 0;
1382    #endif
1383 }
1384 
1385 
1386 static uint8 ogg_page_header[4] = { 0x4f, 0x67, 0x67, 0x53 };
1387 
capture_pattern(vorb * f)1388 static int capture_pattern(vorb *f)
1389 {
1390    if (0x4f != get8(f)) return FALSE;
1391    if (0x67 != get8(f)) return FALSE;
1392    if (0x67 != get8(f)) return FALSE;
1393    if (0x53 != get8(f)) return FALSE;
1394    return TRUE;
1395 }
1396 
1397 #define PAGEFLAG_continued_packet   1
1398 #define PAGEFLAG_first_page         2
1399 #define PAGEFLAG_last_page          4
1400 
start_page_no_capturepattern(vorb * f)1401 static int start_page_no_capturepattern(vorb *f)
1402 {
1403    uint32 loc0,loc1,n;
1404    // stream structure version
1405    if (0 != get8(f)) return error(f, VORBIS_invalid_stream_structure_version);
1406    // header flag
1407    f->page_flag = get8(f);
1408    // absolute granule position
1409    loc0 = get32(f);
1410    loc1 = get32(f);
1411    // @TODO: validate loc0,loc1 as valid positions?
1412    // stream serial number -- vorbis doesn't interleave, so discard
1413    get32(f);
1414    //if (f->serial != get32(f)) return error(f, VORBIS_incorrect_stream_serial_number);
1415    // page sequence number
1416    n = get32(f);
1417    f->last_page = n;
1418    // CRC32
1419    get32(f);
1420    // page_segments
1421    f->segment_count = get8(f);
1422    if (!getn(f, f->segments, f->segment_count))
1423       return error(f, VORBIS_unexpected_eof);
1424    // assume we _don't_ know any the sample position of any segments
1425    f->end_seg_with_known_loc = -2;
1426    if (loc0 != ~0U || loc1 != ~0U) {
1427       int i;
1428       // determine which packet is the last one that will complete
1429       for (i=f->segment_count-1; i >= 0; --i)
1430          if (f->segments[i] < 255)
1431             break;
1432       // 'i' is now the index of the _last_ segment of a packet that ends
1433       if (i >= 0) {
1434          f->end_seg_with_known_loc = i;
1435          f->known_loc_for_packet   = loc0;
1436       }
1437    }
1438    if (f->first_decode) {
1439       int i,len;
1440       ProbedPage p;
1441       len = 0;
1442       for (i=0; i < f->segment_count; ++i)
1443          len += f->segments[i];
1444       len += 27 + f->segment_count;
1445       p.page_start = f->first_audio_page_offset;
1446       p.page_end = p.page_start + len;
1447       p.last_decoded_sample = loc0;
1448       f->p_first = p;
1449    }
1450    f->next_seg = 0;
1451    return TRUE;
1452 }
1453 
start_page(vorb * f)1454 static int start_page(vorb *f)
1455 {
1456    if (!capture_pattern(f)) return error(f, VORBIS_missing_capture_pattern);
1457    return start_page_no_capturepattern(f);
1458 }
1459 
start_packet(vorb * f)1460 static int start_packet(vorb *f)
1461 {
1462    while (f->next_seg == -1) {
1463       if (!start_page(f)) return FALSE;
1464       if (f->page_flag & PAGEFLAG_continued_packet)
1465          return error(f, VORBIS_continued_packet_flag_invalid);
1466    }
1467    f->last_seg = FALSE;
1468    f->valid_bits = 0;
1469    f->packet_bytes = 0;
1470    f->bytes_in_seg = 0;
1471    // f->next_seg is now valid
1472    return TRUE;
1473 }
1474 
maybe_start_packet(vorb * f)1475 static int maybe_start_packet(vorb *f)
1476 {
1477    if (f->next_seg == -1) {
1478       int x = get8(f);
1479       if (f->eof) return FALSE; // EOF at page boundary is not an error!
1480       if (0x4f != x      ) return error(f, VORBIS_missing_capture_pattern);
1481       if (0x67 != get8(f)) return error(f, VORBIS_missing_capture_pattern);
1482       if (0x67 != get8(f)) return error(f, VORBIS_missing_capture_pattern);
1483       if (0x53 != get8(f)) return error(f, VORBIS_missing_capture_pattern);
1484       if (!start_page_no_capturepattern(f)) return FALSE;
1485       if (f->page_flag & PAGEFLAG_continued_packet) {
1486          // set up enough state that we can read this packet if we want,
1487          // e.g. during recovery
1488          f->last_seg = FALSE;
1489          f->bytes_in_seg = 0;
1490          return error(f, VORBIS_continued_packet_flag_invalid);
1491       }
1492    }
1493    return start_packet(f);
1494 }
1495 
next_segment(vorb * f)1496 static int next_segment(vorb *f)
1497 {
1498    int len;
1499    if (f->last_seg) return 0;
1500    if (f->next_seg == -1) {
1501       f->last_seg_which = f->segment_count-1; // in case start_page fails
1502       if (!start_page(f)) { f->last_seg = 1; return 0; }
1503       if (!(f->page_flag & PAGEFLAG_continued_packet)) return error(f, VORBIS_continued_packet_flag_invalid);
1504    }
1505    len = f->segments[f->next_seg++];
1506    if (len < 255) {
1507       f->last_seg = TRUE;
1508       f->last_seg_which = f->next_seg-1;
1509    }
1510    if (f->next_seg >= f->segment_count)
1511       f->next_seg = -1;
1512    assert(f->bytes_in_seg == 0);
1513    f->bytes_in_seg = len;
1514    return len;
1515 }
1516 
1517 #define EOP    (-1)
1518 #define INVALID_BITS  (-1)
1519 
get8_packet_raw(vorb * f)1520 static int get8_packet_raw(vorb *f)
1521 {
1522    if (!f->bytes_in_seg) {  // CLANG!
1523       if (f->last_seg) return EOP;
1524       else if (!next_segment(f)) return EOP;
1525    }
1526    assert(f->bytes_in_seg > 0);
1527    --f->bytes_in_seg;
1528    ++f->packet_bytes;
1529    return get8(f);
1530 }
1531 
get8_packet(vorb * f)1532 static int get8_packet(vorb *f)
1533 {
1534    int x = get8_packet_raw(f);
1535    f->valid_bits = 0;
1536    return x;
1537 }
1538 
flush_packet(vorb * f)1539 static void flush_packet(vorb *f)
1540 {
1541    while (get8_packet_raw(f) != EOP);
1542 }
1543 
1544 // @OPTIMIZE: this is the secondary bit decoder, so it's probably not as important
1545 // as the huffman decoder?
get_bits(vorb * f,int n)1546 static uint32 get_bits(vorb *f, int n)
1547 {
1548    uint32 z;
1549 
1550    if (f->valid_bits < 0) return 0;
1551    if (f->valid_bits < n) {
1552       if (n > 24) {
1553          // the accumulator technique below would not work correctly in this case
1554          z = get_bits(f, 24);
1555          z += get_bits(f, n-24) << 24;
1556          return z;
1557       }
1558       if (f->valid_bits == 0) f->acc = 0;
1559       while (f->valid_bits < n) {
1560          int z = get8_packet_raw(f);
1561          if (z == EOP) {
1562             f->valid_bits = INVALID_BITS;
1563             return 0;
1564          }
1565          f->acc += z << f->valid_bits;
1566          f->valid_bits += 8;
1567       }
1568    }
1569    if (f->valid_bits < 0) return 0;
1570    z = f->acc & ((1 << n)-1);
1571    f->acc >>= n;
1572    f->valid_bits -= n;
1573    return z;
1574 }
1575 
1576 // @OPTIMIZE: primary accumulator for huffman
1577 // expand the buffer to as many bits as possible without reading off end of packet
1578 // it might be nice to allow f->valid_bits and f->acc to be stored in registers,
1579 // e.g. cache them locally and decode locally
prep_huffman(vorb * f)1580 static __forceinline void prep_huffman(vorb *f)
1581 {
1582    if (f->valid_bits <= 24) {
1583       if (f->valid_bits == 0) f->acc = 0;
1584       do {
1585          int z;
1586          if (f->last_seg && !f->bytes_in_seg) return;
1587          z = get8_packet_raw(f);
1588          if (z == EOP) return;
1589          f->acc += (unsigned) z << f->valid_bits;
1590          f->valid_bits += 8;
1591       } while (f->valid_bits <= 24);
1592    }
1593 }
1594 
1595 enum
1596 {
1597    VORBIS_packet_id = 1,
1598    VORBIS_packet_comment = 3,
1599    VORBIS_packet_setup = 5
1600 };
1601 
codebook_decode_scalar_raw(vorb * f,Codebook * c)1602 static int codebook_decode_scalar_raw(vorb *f, Codebook *c)
1603 {
1604    int i;
1605    prep_huffman(f);
1606 
1607    if (c->codewords == NULL && c->sorted_codewords == NULL)
1608       return -1;
1609 
1610    // cases to use binary search: sorted_codewords && !c->codewords
1611    //                             sorted_codewords && c->entries > 8
1612    if (c->entries > 8 ? c->sorted_codewords!=NULL : !c->codewords) {
1613       // binary search
1614       uint32 code = bit_reverse(f->acc);
1615       int x=0, n=c->sorted_entries, len;
1616 
1617       while (n > 1) {
1618          // invariant: sc[x] <= code < sc[x+n]
1619          int m = x + (n >> 1);
1620          if (c->sorted_codewords[m] <= code) {
1621             x = m;
1622             n -= (n>>1);
1623          } else {
1624             n >>= 1;
1625          }
1626       }
1627       // x is now the sorted index
1628       if (!c->sparse) x = c->sorted_values[x];
1629       // x is now sorted index if sparse, or symbol otherwise
1630       len = c->codeword_lengths[x];
1631       if (f->valid_bits >= len) {
1632          f->acc >>= len;
1633          f->valid_bits -= len;
1634          return x;
1635       }
1636 
1637       f->valid_bits = 0;
1638       return -1;
1639    }
1640 
1641    // if small, linear search
1642    assert(!c->sparse);
1643    for (i=0; i < c->entries; ++i) {
1644       if (c->codeword_lengths[i] == NO_CODE) continue;
1645       if (c->codewords[i] == (f->acc & ((1 << c->codeword_lengths[i])-1))) {
1646          if (f->valid_bits >= c->codeword_lengths[i]) {
1647             f->acc >>= c->codeword_lengths[i];
1648             f->valid_bits -= c->codeword_lengths[i];
1649             return i;
1650          }
1651          f->valid_bits = 0;
1652          return -1;
1653       }
1654    }
1655 
1656    error(f, VORBIS_invalid_stream);
1657    f->valid_bits = 0;
1658    return -1;
1659 }
1660 
1661 #ifndef STB_VORBIS_NO_INLINE_DECODE
1662 
1663 #define DECODE_RAW(var, f,c)                                  \
1664    if (f->valid_bits < STB_VORBIS_FAST_HUFFMAN_LENGTH)        \
1665       prep_huffman(f);                                        \
1666    var = f->acc & FAST_HUFFMAN_TABLE_MASK;                    \
1667    var = c->fast_huffman[var];                                \
1668    if (var >= 0) {                                            \
1669       int n = c->codeword_lengths[var];                       \
1670       f->acc >>= n;                                           \
1671       f->valid_bits -= n;                                     \
1672       if (f->valid_bits < 0) { f->valid_bits = 0; var = -1; } \
1673    } else {                                                   \
1674       var = codebook_decode_scalar_raw(f,c);                  \
1675    }
1676 
1677 #else
1678 
codebook_decode_scalar(vorb * f,Codebook * c)1679 static int codebook_decode_scalar(vorb *f, Codebook *c)
1680 {
1681    int i;
1682    if (f->valid_bits < STB_VORBIS_FAST_HUFFMAN_LENGTH)
1683       prep_huffman(f);
1684    // fast huffman table lookup
1685    i = f->acc & FAST_HUFFMAN_TABLE_MASK;
1686    i = c->fast_huffman[i];
1687    if (i >= 0) {
1688       f->acc >>= c->codeword_lengths[i];
1689       f->valid_bits -= c->codeword_lengths[i];
1690       if (f->valid_bits < 0) { f->valid_bits = 0; return -1; }
1691       return i;
1692    }
1693    return codebook_decode_scalar_raw(f,c);
1694 }
1695 
1696 #define DECODE_RAW(var,f,c)    var = codebook_decode_scalar(f,c);
1697 
1698 #endif
1699 
1700 #define DECODE(var,f,c)                                       \
1701    DECODE_RAW(var,f,c)                                        \
1702    if (c->sparse) var = c->sorted_values[var];
1703 
1704 #ifndef STB_VORBIS_DIVIDES_IN_CODEBOOK
1705   #define DECODE_VQ(var,f,c)   DECODE_RAW(var,f,c)
1706 #else
1707   #define DECODE_VQ(var,f,c)   DECODE(var,f,c)
1708 #endif
1709 
1710 
1711 
1712 
1713 
1714 
1715 // CODEBOOK_ELEMENT_FAST is an optimization for the CODEBOOK_FLOATS case
1716 // where we avoid one addition
1717 #define CODEBOOK_ELEMENT(c,off)          (c->multiplicands[off])
1718 #define CODEBOOK_ELEMENT_FAST(c,off)     (c->multiplicands[off])
1719 #define CODEBOOK_ELEMENT_BASE(c)         (0)
1720 
codebook_decode_start(vorb * f,Codebook * c)1721 static int codebook_decode_start(vorb *f, Codebook *c)
1722 {
1723    int z = -1;
1724 
1725    // type 0 is only legal in a scalar context
1726    if (c->lookup_type == 0)
1727       error(f, VORBIS_invalid_stream);
1728    else {
1729       DECODE_VQ(z,f,c);
1730       if (c->sparse) assert(z < c->sorted_entries);
1731       if (z < 0) {  // check for EOP
1732          if (!f->bytes_in_seg)
1733             if (f->last_seg)
1734                return z;
1735          error(f, VORBIS_invalid_stream);
1736       }
1737    }
1738    return z;
1739 }
1740 
codebook_decode(vorb * f,Codebook * c,float * output,int len)1741 static int codebook_decode(vorb *f, Codebook *c, float *output, int len)
1742 {
1743    int i,z = codebook_decode_start(f,c);
1744    if (z < 0) return FALSE;
1745    if (len > c->dimensions) len = c->dimensions;
1746 
1747 #ifdef STB_VORBIS_DIVIDES_IN_CODEBOOK
1748    if (c->lookup_type == 1) {
1749       float last = CODEBOOK_ELEMENT_BASE(c);
1750       int div = 1;
1751       for (i=0; i < len; ++i) {
1752          int off = (z / div) % c->lookup_values;
1753          float val = CODEBOOK_ELEMENT_FAST(c,off) + last;
1754          output[i] += val;
1755          if (c->sequence_p) last = val + c->minimum_value;
1756          div *= c->lookup_values;
1757       }
1758       return TRUE;
1759    }
1760 #endif
1761 
1762    z *= c->dimensions;
1763    if (c->sequence_p) {
1764       float last = CODEBOOK_ELEMENT_BASE(c);
1765       for (i=0; i < len; ++i) {
1766          float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1767          output[i] += val;
1768          last = val + c->minimum_value;
1769       }
1770    } else {
1771       float last = CODEBOOK_ELEMENT_BASE(c);
1772       for (i=0; i < len; ++i) {
1773          output[i] += CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1774       }
1775    }
1776 
1777    return TRUE;
1778 }
1779 
codebook_decode_step(vorb * f,Codebook * c,float * output,int len,int step)1780 static int codebook_decode_step(vorb *f, Codebook *c, float *output, int len, int step)
1781 {
1782    int i,z = codebook_decode_start(f,c);
1783    float last = CODEBOOK_ELEMENT_BASE(c);
1784    if (z < 0) return FALSE;
1785    if (len > c->dimensions) len = c->dimensions;
1786 
1787 #ifdef STB_VORBIS_DIVIDES_IN_CODEBOOK
1788    if (c->lookup_type == 1) {
1789       int div = 1;
1790       for (i=0; i < len; ++i) {
1791          int off = (z / div) % c->lookup_values;
1792          float val = CODEBOOK_ELEMENT_FAST(c,off) + last;
1793          output[i*step] += val;
1794          if (c->sequence_p) last = val;
1795          div *= c->lookup_values;
1796       }
1797       return TRUE;
1798    }
1799 #endif
1800 
1801    z *= c->dimensions;
1802    for (i=0; i < len; ++i) {
1803       float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1804       output[i*step] += val;
1805       if (c->sequence_p) last = val;
1806    }
1807 
1808    return TRUE;
1809 }
1810 
codebook_decode_deinterleave_repeat(vorb * f,Codebook * c,float ** outputs,int ch,int * c_inter_p,int * p_inter_p,int len,int total_decode)1811 static int codebook_decode_deinterleave_repeat(vorb *f, Codebook *c, float **outputs, int ch, int *c_inter_p, int *p_inter_p, int len, int total_decode)
1812 {
1813    int c_inter = *c_inter_p;
1814    int p_inter = *p_inter_p;
1815    int i,z, effective = c->dimensions;
1816 
1817    // type 0 is only legal in a scalar context
1818    if (c->lookup_type == 0)   return error(f, VORBIS_invalid_stream);
1819 
1820    while (total_decode > 0) {
1821       float last = CODEBOOK_ELEMENT_BASE(c);
1822       DECODE_VQ(z,f,c);
1823       #ifndef STB_VORBIS_DIVIDES_IN_CODEBOOK
1824       assert(!c->sparse || z < c->sorted_entries);
1825       #endif
1826       if (z < 0) {
1827          if (!f->bytes_in_seg)
1828             if (f->last_seg) return FALSE;
1829          return error(f, VORBIS_invalid_stream);
1830       }
1831 
1832       // if this will take us off the end of the buffers, stop short!
1833       // we check by computing the length of the virtual interleaved
1834       // buffer (len*ch), our current offset within it (p_inter*ch)+(c_inter),
1835       // and the length we'll be using (effective)
1836       if (c_inter + p_inter*ch + effective > len * ch) {
1837          effective = len*ch - (p_inter*ch - c_inter);
1838       }
1839 
1840    #ifdef STB_VORBIS_DIVIDES_IN_CODEBOOK
1841       if (c->lookup_type == 1) {
1842          int div = 1;
1843          for (i=0; i < effective; ++i) {
1844             int off = (z / div) % c->lookup_values;
1845             float val = CODEBOOK_ELEMENT_FAST(c,off) + last;
1846             if (outputs[c_inter])
1847                outputs[c_inter][p_inter] += val;
1848             if (++c_inter == ch) { c_inter = 0; ++p_inter; }
1849             if (c->sequence_p) last = val;
1850             div *= c->lookup_values;
1851          }
1852       } else
1853    #endif
1854       {
1855          z *= c->dimensions;
1856          if (c->sequence_p) {
1857             for (i=0; i < effective; ++i) {
1858                float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1859                if (outputs[c_inter])
1860                   outputs[c_inter][p_inter] += val;
1861                if (++c_inter == ch) { c_inter = 0; ++p_inter; }
1862                last = val;
1863             }
1864          } else {
1865             for (i=0; i < effective; ++i) {
1866                float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1867                if (outputs[c_inter])
1868                   outputs[c_inter][p_inter] += val;
1869                if (++c_inter == ch) { c_inter = 0; ++p_inter; }
1870             }
1871          }
1872       }
1873 
1874       total_decode -= effective;
1875    }
1876    *c_inter_p = c_inter;
1877    *p_inter_p = p_inter;
1878    return TRUE;
1879 }
1880 
predict_point(int x,int x0,int x1,int y0,int y1)1881 static int predict_point(int x, int x0, int x1, int y0, int y1)
1882 {
1883    int dy = y1 - y0;
1884    int adx = x1 - x0;
1885    // @OPTIMIZE: force int division to round in the right direction... is this necessary on x86?
1886    int err = abs(dy) * (x - x0);
1887    int off = err / adx;
1888    return dy < 0 ? y0 - off : y0 + off;
1889 }
1890 
1891 // the following table is block-copied from the specification
1892 static float inverse_db_table[256] =
1893 {
1894   1.0649863e-07f, 1.1341951e-07f, 1.2079015e-07f, 1.2863978e-07f,
1895   1.3699951e-07f, 1.4590251e-07f, 1.5538408e-07f, 1.6548181e-07f,
1896   1.7623575e-07f, 1.8768855e-07f, 1.9988561e-07f, 2.1287530e-07f,
1897   2.2670913e-07f, 2.4144197e-07f, 2.5713223e-07f, 2.7384213e-07f,
1898   2.9163793e-07f, 3.1059021e-07f, 3.3077411e-07f, 3.5226968e-07f,
1899   3.7516214e-07f, 3.9954229e-07f, 4.2550680e-07f, 4.5315863e-07f,
1900   4.8260743e-07f, 5.1396998e-07f, 5.4737065e-07f, 5.8294187e-07f,
1901   6.2082472e-07f, 6.6116941e-07f, 7.0413592e-07f, 7.4989464e-07f,
1902   7.9862701e-07f, 8.5052630e-07f, 9.0579828e-07f, 9.6466216e-07f,
1903   1.0273513e-06f, 1.0941144e-06f, 1.1652161e-06f, 1.2409384e-06f,
1904   1.3215816e-06f, 1.4074654e-06f, 1.4989305e-06f, 1.5963394e-06f,
1905   1.7000785e-06f, 1.8105592e-06f, 1.9282195e-06f, 2.0535261e-06f,
1906   2.1869758e-06f, 2.3290978e-06f, 2.4804557e-06f, 2.6416497e-06f,
1907   2.8133190e-06f, 2.9961443e-06f, 3.1908506e-06f, 3.3982101e-06f,
1908   3.6190449e-06f, 3.8542308e-06f, 4.1047004e-06f, 4.3714470e-06f,
1909   4.6555282e-06f, 4.9580707e-06f, 5.2802740e-06f, 5.6234160e-06f,
1910   5.9888572e-06f, 6.3780469e-06f, 6.7925283e-06f, 7.2339451e-06f,
1911   7.7040476e-06f, 8.2047000e-06f, 8.7378876e-06f, 9.3057248e-06f,
1912   9.9104632e-06f, 1.0554501e-05f, 1.1240392e-05f, 1.1970856e-05f,
1913   1.2748789e-05f, 1.3577278e-05f, 1.4459606e-05f, 1.5399272e-05f,
1914   1.6400004e-05f, 1.7465768e-05f, 1.8600792e-05f, 1.9809576e-05f,
1915   2.1096914e-05f, 2.2467911e-05f, 2.3928002e-05f, 2.5482978e-05f,
1916   2.7139006e-05f, 2.8902651e-05f, 3.0780908e-05f, 3.2781225e-05f,
1917   3.4911534e-05f, 3.7180282e-05f, 3.9596466e-05f, 4.2169667e-05f,
1918   4.4910090e-05f, 4.7828601e-05f, 5.0936773e-05f, 5.4246931e-05f,
1919   5.7772202e-05f, 6.1526565e-05f, 6.5524908e-05f, 6.9783085e-05f,
1920   7.4317983e-05f, 7.9147585e-05f, 8.4291040e-05f, 8.9768747e-05f,
1921   9.5602426e-05f, 0.00010181521f, 0.00010843174f, 0.00011547824f,
1922   0.00012298267f, 0.00013097477f, 0.00013948625f, 0.00014855085f,
1923   0.00015820453f, 0.00016848555f, 0.00017943469f, 0.00019109536f,
1924   0.00020351382f, 0.00021673929f, 0.00023082423f, 0.00024582449f,
1925   0.00026179955f, 0.00027881276f, 0.00029693158f, 0.00031622787f,
1926   0.00033677814f, 0.00035866388f, 0.00038197188f, 0.00040679456f,
1927   0.00043323036f, 0.00046138411f, 0.00049136745f, 0.00052329927f,
1928   0.00055730621f, 0.00059352311f, 0.00063209358f, 0.00067317058f,
1929   0.00071691700f, 0.00076350630f, 0.00081312324f, 0.00086596457f,
1930   0.00092223983f, 0.00098217216f, 0.0010459992f,  0.0011139742f,
1931   0.0011863665f,  0.0012634633f,  0.0013455702f,  0.0014330129f,
1932   0.0015261382f,  0.0016253153f,  0.0017309374f,  0.0018434235f,
1933   0.0019632195f,  0.0020908006f,  0.0022266726f,  0.0023713743f,
1934   0.0025254795f,  0.0026895994f,  0.0028643847f,  0.0030505286f,
1935   0.0032487691f,  0.0034598925f,  0.0036847358f,  0.0039241906f,
1936   0.0041792066f,  0.0044507950f,  0.0047400328f,  0.0050480668f,
1937   0.0053761186f,  0.0057254891f,  0.0060975636f,  0.0064938176f,
1938   0.0069158225f,  0.0073652516f,  0.0078438871f,  0.0083536271f,
1939   0.0088964928f,  0.009474637f,   0.010090352f,   0.010746080f,
1940   0.011444421f,   0.012188144f,   0.012980198f,   0.013823725f,
1941   0.014722068f,   0.015678791f,   0.016697687f,   0.017782797f,
1942   0.018938423f,   0.020169149f,   0.021479854f,   0.022875735f,
1943   0.024362330f,   0.025945531f,   0.027631618f,   0.029427276f,
1944   0.031339626f,   0.033376252f,   0.035545228f,   0.037855157f,
1945   0.040315199f,   0.042935108f,   0.045725273f,   0.048696758f,
1946   0.051861348f,   0.055231591f,   0.058820850f,   0.062643361f,
1947   0.066714279f,   0.071049749f,   0.075666962f,   0.080584227f,
1948   0.085821044f,   0.091398179f,   0.097337747f,   0.10366330f,
1949   0.11039993f,    0.11757434f,    0.12521498f,    0.13335215f,
1950   0.14201813f,    0.15124727f,    0.16107617f,    0.17154380f,
1951   0.18269168f,    0.19456402f,    0.20720788f,    0.22067342f,
1952   0.23501402f,    0.25028656f,    0.26655159f,    0.28387361f,
1953   0.30232132f,    0.32196786f,    0.34289114f,    0.36517414f,
1954   0.38890521f,    0.41417847f,    0.44109412f,    0.46975890f,
1955   0.50028648f,    0.53279791f,    0.56742212f,    0.60429640f,
1956   0.64356699f,    0.68538959f,    0.72993007f,    0.77736504f,
1957   0.82788260f,    0.88168307f,    0.9389798f,     1.0f
1958 };
1959 
1960 
1961 // @OPTIMIZE: if you want to replace this bresenham line-drawing routine,
1962 // note that you must produce bit-identical output to decode correctly;
1963 // this specific sequence of operations is specified in the spec (it's
1964 // drawing integer-quantized frequency-space lines that the encoder
1965 // expects to be exactly the same)
1966 //     ... also, isn't the whole point of Bresenham's algorithm to NOT
1967 // have to divide in the setup? sigh.
1968 #ifndef STB_VORBIS_NO_DEFER_FLOOR
1969 #define LINE_OP(a,b)   a *= b
1970 #else
1971 #define LINE_OP(a,b)   a = b
1972 #endif
1973 
1974 #ifdef STB_VORBIS_DIVIDE_TABLE
1975 #define DIVTAB_NUMER   32
1976 #define DIVTAB_DENOM   64
1977 int8 integer_divide_table[DIVTAB_NUMER][DIVTAB_DENOM]; // 2KB
1978 #endif
1979 
draw_line(float * output,int x0,int y0,int x1,int y1,int n)1980 static __forceinline void draw_line(float *output, int x0, int y0, int x1, int y1, int n)
1981 {
1982    int dy = y1 - y0;
1983    int adx = x1 - x0;
1984    int ady = abs(dy);
1985    int base;
1986    int x=x0,y=y0;
1987    int err = 0;
1988    int sy;
1989 
1990 #ifdef STB_VORBIS_DIVIDE_TABLE
1991    if (adx < DIVTAB_DENOM && ady < DIVTAB_NUMER) {
1992       if (dy < 0) {
1993          base = -integer_divide_table[ady][adx];
1994          sy = base-1;
1995       } else {
1996          base =  integer_divide_table[ady][adx];
1997          sy = base+1;
1998       }
1999    } else {
2000       base = dy / adx;
2001       if (dy < 0)
2002          sy = base - 1;
2003       else
2004          sy = base+1;
2005    }
2006 #else
2007    base = dy / adx;
2008    if (dy < 0)
2009       sy = base - 1;
2010    else
2011       sy = base+1;
2012 #endif
2013    ady -= abs(base) * adx;
2014    if (x1 > n) x1 = n;
2015    if (x < x1) {
2016       LINE_OP(output[x], inverse_db_table[y]);
2017       for (++x; x < x1; ++x) {
2018          err += ady;
2019          if (err >= adx) {
2020             err -= adx;
2021             y += sy;
2022          } else
2023             y += base;
2024          LINE_OP(output[x], inverse_db_table[y]);
2025       }
2026    }
2027 }
2028 
residue_decode(vorb * f,Codebook * book,float * target,int offset,int n,int rtype)2029 static int residue_decode(vorb *f, Codebook *book, float *target, int offset, int n, int rtype)
2030 {
2031    int k;
2032    if (rtype == 0) {
2033       int step = n / book->dimensions;
2034       for (k=0; k < step; ++k)
2035          if (!codebook_decode_step(f, book, target+offset+k, n-offset-k, step))
2036             return FALSE;
2037    } else {
2038       for (k=0; k < n; ) {
2039          if (!codebook_decode(f, book, target+offset, n-k))
2040             return FALSE;
2041          k += book->dimensions;
2042          offset += book->dimensions;
2043       }
2044    }
2045    return TRUE;
2046 }
2047 
2048 // n is 1/2 of the blocksize --
2049 // specification: "Correct per-vector decode length is [n]/2"
decode_residue(vorb * f,float * residue_buffers[],int ch,int n,int rn,uint8 * do_not_decode)2050 static void decode_residue(vorb *f, float *residue_buffers[], int ch, int n, int rn, uint8 *do_not_decode)
2051 {
2052    int i,j,pass;
2053    Residue *r = f->residue_config + rn;
2054    int rtype = f->residue_types[rn];
2055    int c = r->classbook;
2056    int classwords = f->codebooks[c].dimensions;
2057    unsigned int actual_size = rtype == 2 ? n*2 : n;
2058    unsigned int limit_r_begin = (r->begin < actual_size ? r->begin : actual_size);
2059    unsigned int limit_r_end   = (r->end   < actual_size ? r->end   : actual_size);
2060    int n_read = limit_r_end - limit_r_begin;
2061    int part_read = n_read / r->part_size;
2062    int temp_alloc_point = temp_alloc_save(f);
2063    #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2064    uint8 ***part_classdata = (uint8 ***) temp_block_array(f,f->channels, part_read * sizeof(**part_classdata));
2065    #else
2066    int **classifications = (int **) temp_block_array(f,f->channels, part_read * sizeof(**classifications));
2067    #endif
2068 
2069    CHECK(f);
2070 
2071    for (i=0; i < ch; ++i)
2072       if (!do_not_decode[i])
2073          memset(residue_buffers[i], 0, sizeof(float) * n);
2074 
2075    if (rtype == 2 && ch != 1) {
2076       for (j=0; j < ch; ++j)
2077          if (!do_not_decode[j])
2078             break;
2079       if (j == ch)
2080          goto done;
2081 
2082       for (pass=0; pass < 8; ++pass) {
2083          int pcount = 0, class_set = 0;
2084          if (ch == 2) {
2085             while (pcount < part_read) {
2086                int z = r->begin + pcount*r->part_size;
2087                int c_inter = (z & 1), p_inter = z>>1;
2088                if (pass == 0) {
2089                   Codebook *c = f->codebooks+r->classbook;
2090                   int q;
2091                   DECODE(q,f,c);
2092                   if (q == EOP) goto done;
2093                   #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2094                   part_classdata[0][class_set] = r->classdata[q];
2095                   #else
2096                   for (i=classwords-1; i >= 0; --i) {
2097                      classifications[0][i+pcount] = q % r->classifications;
2098                      q /= r->classifications;
2099                   }
2100                   #endif
2101                }
2102                for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) {
2103                   int z = r->begin + pcount*r->part_size;
2104                   #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2105                   int c = part_classdata[0][class_set][i];
2106                   #else
2107                   int c = classifications[0][pcount];
2108                   #endif
2109                   int b = r->residue_books[c][pass];
2110                   if (b >= 0) {
2111                      Codebook *book = f->codebooks + b;
2112                      #ifdef STB_VORBIS_DIVIDES_IN_CODEBOOK
2113                      if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r->part_size))
2114                         goto done;
2115                      #else
2116                      // saves 1%
2117                      if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r->part_size))
2118                         goto done;
2119                      #endif
2120                   } else {
2121                      z += r->part_size;
2122                      c_inter = z & 1;
2123                      p_inter = z >> 1;
2124                   }
2125                }
2126                #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2127                ++class_set;
2128                #endif
2129             }
2130          } else if (ch == 1) {
2131             while (pcount < part_read) {
2132                int z = r->begin + pcount*r->part_size;
2133                int c_inter = 0, p_inter = z;
2134                if (pass == 0) {
2135                   Codebook *c = f->codebooks+r->classbook;
2136                   int q;
2137                   DECODE(q,f,c);
2138                   if (q == EOP) goto done;
2139                   #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2140                   part_classdata[0][class_set] = r->classdata[q];
2141                   #else
2142                   for (i=classwords-1; i >= 0; --i) {
2143                      classifications[0][i+pcount] = q % r->classifications;
2144                      q /= r->classifications;
2145                   }
2146                   #endif
2147                }
2148                for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) {
2149                   int z = r->begin + pcount*r->part_size;
2150                   #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2151                   int c = part_classdata[0][class_set][i];
2152                   #else
2153                   int c = classifications[0][pcount];
2154                   #endif
2155                   int b = r->residue_books[c][pass];
2156                   if (b >= 0) {
2157                      Codebook *book = f->codebooks + b;
2158                      if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r->part_size))
2159                         goto done;
2160                   } else {
2161                      z += r->part_size;
2162                      c_inter = 0;
2163                      p_inter = z;
2164                   }
2165                }
2166                #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2167                ++class_set;
2168                #endif
2169             }
2170          } else {
2171             while (pcount < part_read) {
2172                int z = r->begin + pcount*r->part_size;
2173                int c_inter = z % ch, p_inter = z/ch;
2174                if (pass == 0) {
2175                   Codebook *c = f->codebooks+r->classbook;
2176                   int q;
2177                   DECODE(q,f,c);
2178                   if (q == EOP) goto done;
2179                   #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2180                   part_classdata[0][class_set] = r->classdata[q];
2181                   #else
2182                   for (i=classwords-1; i >= 0; --i) {
2183                      classifications[0][i+pcount] = q % r->classifications;
2184                      q /= r->classifications;
2185                   }
2186                   #endif
2187                }
2188                for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) {
2189                   int z = r->begin + pcount*r->part_size;
2190                   #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2191                   int c = part_classdata[0][class_set][i];
2192                   #else
2193                   int c = classifications[0][pcount];
2194                   #endif
2195                   int b = r->residue_books[c][pass];
2196                   if (b >= 0) {
2197                      Codebook *book = f->codebooks + b;
2198                      if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r->part_size))
2199                         goto done;
2200                   } else {
2201                      z += r->part_size;
2202                      c_inter = z % ch;
2203                      p_inter = z / ch;
2204                   }
2205                }
2206                #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2207                ++class_set;
2208                #endif
2209             }
2210          }
2211       }
2212       goto done;
2213    }
2214    CHECK(f);
2215 
2216    for (pass=0; pass < 8; ++pass) {
2217       int pcount = 0, class_set=0;
2218       while (pcount < part_read) {
2219          if (pass == 0) {
2220             for (j=0; j < ch; ++j) {
2221                if (!do_not_decode[j]) {
2222                   Codebook *c = f->codebooks+r->classbook;
2223                   int temp;
2224                   DECODE(temp,f,c);
2225                   if (temp == EOP) goto done;
2226                   #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2227                   part_classdata[j][class_set] = r->classdata[temp];
2228                   #else
2229                   for (i=classwords-1; i >= 0; --i) {
2230                      classifications[j][i+pcount] = temp % r->classifications;
2231                      temp /= r->classifications;
2232                   }
2233                   #endif
2234                }
2235             }
2236          }
2237          for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) {
2238             for (j=0; j < ch; ++j) {
2239                if (!do_not_decode[j]) {
2240                   #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2241                   int c = part_classdata[j][class_set][i];
2242                   #else
2243                   int c = classifications[j][pcount];
2244                   #endif
2245                   int b = r->residue_books[c][pass];
2246                   if (b >= 0) {
2247                      float *target = residue_buffers[j];
2248                      int offset = r->begin + pcount * r->part_size;
2249                      int n = r->part_size;
2250                      Codebook *book = f->codebooks + b;
2251                      if (!residue_decode(f, book, target, offset, n, rtype))
2252                         goto done;
2253                   }
2254                }
2255             }
2256          }
2257          #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2258          ++class_set;
2259          #endif
2260       }
2261    }
2262   done:
2263    CHECK(f);
2264    #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2265    temp_free(f,part_classdata);
2266    #else
2267    temp_free(f,classifications);
2268    #endif
2269    temp_alloc_restore(f,temp_alloc_point);
2270 }
2271 
2272 
2273 #if 0
2274 // slow way for debugging
2275 void inverse_mdct_slow(float *buffer, int n)
2276 {
2277    int i,j;
2278    int n2 = n >> 1;
2279    float *x = (float *) malloc(sizeof(*x) * n2);
2280    memcpy(x, buffer, sizeof(*x) * n2);
2281    for (i=0; i < n; ++i) {
2282       float acc = 0;
2283       for (j=0; j < n2; ++j)
2284          // formula from paper:
2285          //acc += n/4.0f * x[j] * (float) cos(M_PI / 2 / n * (2 * i + 1 + n/2.0)*(2*j+1));
2286          // formula from wikipedia
2287          //acc += 2.0f / n2 * x[j] * (float) cos(M_PI/n2 * (i + 0.5 + n2/2)*(j + 0.5));
2288          // these are equivalent, except the formula from the paper inverts the multiplier!
2289          // however, what actually works is NO MULTIPLIER!?!
2290          //acc += 64 * 2.0f / n2 * x[j] * (float) cos(M_PI/n2 * (i + 0.5 + n2/2)*(j + 0.5));
2291          acc += x[j] * (float) cos(M_PI / 2 / n * (2 * i + 1 + n/2.0)*(2*j+1));
2292       buffer[i] = acc;
2293    }
2294    free(x);
2295 }
2296 #elif 0
2297 // same as above, but just barely able to run in real time on modern machines
inverse_mdct_slow(float * buffer,int n,vorb * f,int blocktype)2298 void inverse_mdct_slow(float *buffer, int n, vorb *f, int blocktype)
2299 {
2300    float mcos[16384];
2301    int i,j;
2302    int n2 = n >> 1, nmask = (n << 2) -1;
2303    float *x = (float *) malloc(sizeof(*x) * n2);
2304    memcpy(x, buffer, sizeof(*x) * n2);
2305    for (i=0; i < 4*n; ++i)
2306       mcos[i] = (float) cos(M_PI / 2 * i / n);
2307 
2308    for (i=0; i < n; ++i) {
2309       float acc = 0;
2310       for (j=0; j < n2; ++j)
2311          acc += x[j] * mcos[(2 * i + 1 + n2)*(2*j+1) & nmask];
2312       buffer[i] = acc;
2313    }
2314    free(x);
2315 }
2316 #elif 0
2317 // transform to use a slow dct-iv; this is STILL basically trivial,
2318 // but only requires half as many ops
dct_iv_slow(float * buffer,int n)2319 void dct_iv_slow(float *buffer, int n)
2320 {
2321    float mcos[16384];
2322    float x[2048];
2323    int i,j;
2324    int n2 = n >> 1, nmask = (n << 3) - 1;
2325    memcpy(x, buffer, sizeof(*x) * n);
2326    for (i=0; i < 8*n; ++i)
2327       mcos[i] = (float) cos(M_PI / 4 * i / n);
2328    for (i=0; i < n; ++i) {
2329       float acc = 0;
2330       for (j=0; j < n; ++j)
2331          acc += x[j] * mcos[((2 * i + 1)*(2*j+1)) & nmask];
2332       buffer[i] = acc;
2333    }
2334 }
2335 
inverse_mdct_slow(float * buffer,int n,vorb * f,int blocktype)2336 void inverse_mdct_slow(float *buffer, int n, vorb *f, int blocktype)
2337 {
2338    int i, n4 = n >> 2, n2 = n >> 1, n3_4 = n - n4;
2339    float temp[4096];
2340 
2341    memcpy(temp, buffer, n2 * sizeof(float));
2342    dct_iv_slow(temp, n2);  // returns -c'-d, a-b'
2343 
2344    for (i=0; i < n4  ; ++i) buffer[i] = temp[i+n4];            // a-b'
2345    for (   ; i < n3_4; ++i) buffer[i] = -temp[n3_4 - i - 1];   // b-a', c+d'
2346    for (   ; i < n   ; ++i) buffer[i] = -temp[i - n3_4];       // c'+d
2347 }
2348 #endif
2349 
2350 #ifndef LIBVORBIS_MDCT
2351 #define LIBVORBIS_MDCT 0
2352 #endif
2353 
2354 #if LIBVORBIS_MDCT
2355 // directly call the vorbis MDCT using an interface documented
2356 // by Jeff Roberts... useful for performance comparison
2357 typedef struct
2358 {
2359   int n;
2360   int log2n;
2361 
2362   float *trig;
2363   int   *bitrev;
2364 
2365   float scale;
2366 } mdct_lookup;
2367 
2368 extern void mdct_init(mdct_lookup *lookup, int n);
2369 extern void mdct_clear(mdct_lookup *l);
2370 extern void mdct_backward(mdct_lookup *init, float *in, float *out);
2371 
2372 mdct_lookup M1,M2;
2373 
inverse_mdct(float * buffer,int n,vorb * f,int blocktype)2374 void inverse_mdct(float *buffer, int n, vorb *f, int blocktype)
2375 {
2376    mdct_lookup *M;
2377    if (M1.n == n) M = &M1;
2378    else if (M2.n == n) M = &M2;
2379    else if (M1.n == 0) { mdct_init(&M1, n); M = &M1; }
2380    else {
2381       if (M2.n) __asm int 3;
2382       mdct_init(&M2, n);
2383       M = &M2;
2384    }
2385 
2386    mdct_backward(M, buffer, buffer);
2387 }
2388 #endif
2389 
2390 
2391 // the following were split out into separate functions while optimizing;
2392 // they could be pushed back up but eh. __forceinline showed no change;
2393 // they're probably already being inlined.
imdct_step3_iter0_loop(int n,float * e,int i_off,int k_off,float * A)2394 static void imdct_step3_iter0_loop(int n, float *e, int i_off, int k_off, float *A)
2395 {
2396    float *ee0 = e + i_off;
2397    float *ee2 = ee0 + k_off;
2398    int i;
2399 
2400    assert((n & 3) == 0);
2401    for (i=(n>>2); i > 0; --i) {
2402       float k00_20, k01_21;
2403       k00_20  = ee0[ 0] - ee2[ 0];
2404       k01_21  = ee0[-1] - ee2[-1];
2405       ee0[ 0] += ee2[ 0];//ee0[ 0] = ee0[ 0] + ee2[ 0];
2406       ee0[-1] += ee2[-1];//ee0[-1] = ee0[-1] + ee2[-1];
2407       ee2[ 0] = k00_20 * A[0] - k01_21 * A[1];
2408       ee2[-1] = k01_21 * A[0] + k00_20 * A[1];
2409       A += 8;
2410 
2411       k00_20  = ee0[-2] - ee2[-2];
2412       k01_21  = ee0[-3] - ee2[-3];
2413       ee0[-2] += ee2[-2];//ee0[-2] = ee0[-2] + ee2[-2];
2414       ee0[-3] += ee2[-3];//ee0[-3] = ee0[-3] + ee2[-3];
2415       ee2[-2] = k00_20 * A[0] - k01_21 * A[1];
2416       ee2[-3] = k01_21 * A[0] + k00_20 * A[1];
2417       A += 8;
2418 
2419       k00_20  = ee0[-4] - ee2[-4];
2420       k01_21  = ee0[-5] - ee2[-5];
2421       ee0[-4] += ee2[-4];//ee0[-4] = ee0[-4] + ee2[-4];
2422       ee0[-5] += ee2[-5];//ee0[-5] = ee0[-5] + ee2[-5];
2423       ee2[-4] = k00_20 * A[0] - k01_21 * A[1];
2424       ee2[-5] = k01_21 * A[0] + k00_20 * A[1];
2425       A += 8;
2426 
2427       k00_20  = ee0[-6] - ee2[-6];
2428       k01_21  = ee0[-7] - ee2[-7];
2429       ee0[-6] += ee2[-6];//ee0[-6] = ee0[-6] + ee2[-6];
2430       ee0[-7] += ee2[-7];//ee0[-7] = ee0[-7] + ee2[-7];
2431       ee2[-6] = k00_20 * A[0] - k01_21 * A[1];
2432       ee2[-7] = k01_21 * A[0] + k00_20 * A[1];
2433       A += 8;
2434       ee0 -= 8;
2435       ee2 -= 8;
2436    }
2437 }
2438 
imdct_step3_inner_r_loop(int lim,float * e,int d0,int k_off,float * A,int k1)2439 static void imdct_step3_inner_r_loop(int lim, float *e, int d0, int k_off, float *A, int k1)
2440 {
2441    int i;
2442    float k00_20, k01_21;
2443 
2444    float *e0 = e + d0;
2445    float *e2 = e0 + k_off;
2446 
2447    for (i=lim >> 2; i > 0; --i) {
2448       k00_20 = e0[-0] - e2[-0];
2449       k01_21 = e0[-1] - e2[-1];
2450       e0[-0] += e2[-0];//e0[-0] = e0[-0] + e2[-0];
2451       e0[-1] += e2[-1];//e0[-1] = e0[-1] + e2[-1];
2452       e2[-0] = (k00_20)*A[0] - (k01_21) * A[1];
2453       e2[-1] = (k01_21)*A[0] + (k00_20) * A[1];
2454 
2455       A += k1;
2456 
2457       k00_20 = e0[-2] - e2[-2];
2458       k01_21 = e0[-3] - e2[-3];
2459       e0[-2] += e2[-2];//e0[-2] = e0[-2] + e2[-2];
2460       e0[-3] += e2[-3];//e0[-3] = e0[-3] + e2[-3];
2461       e2[-2] = (k00_20)*A[0] - (k01_21) * A[1];
2462       e2[-3] = (k01_21)*A[0] + (k00_20) * A[1];
2463 
2464       A += k1;
2465 
2466       k00_20 = e0[-4] - e2[-4];
2467       k01_21 = e0[-5] - e2[-5];
2468       e0[-4] += e2[-4];//e0[-4] = e0[-4] + e2[-4];
2469       e0[-5] += e2[-5];//e0[-5] = e0[-5] + e2[-5];
2470       e2[-4] = (k00_20)*A[0] - (k01_21) * A[1];
2471       e2[-5] = (k01_21)*A[0] + (k00_20) * A[1];
2472 
2473       A += k1;
2474 
2475       k00_20 = e0[-6] - e2[-6];
2476       k01_21 = e0[-7] - e2[-7];
2477       e0[-6] += e2[-6];//e0[-6] = e0[-6] + e2[-6];
2478       e0[-7] += e2[-7];//e0[-7] = e0[-7] + e2[-7];
2479       e2[-6] = (k00_20)*A[0] - (k01_21) * A[1];
2480       e2[-7] = (k01_21)*A[0] + (k00_20) * A[1];
2481 
2482       e0 -= 8;
2483       e2 -= 8;
2484 
2485       A += k1;
2486    }
2487 }
2488 
imdct_step3_inner_s_loop(int n,float * e,int i_off,int k_off,float * A,int a_off,int k0)2489 static void imdct_step3_inner_s_loop(int n, float *e, int i_off, int k_off, float *A, int a_off, int k0)
2490 {
2491    int i;
2492    float A0 = A[0];
2493    float A1 = A[0+1];
2494    float A2 = A[0+a_off];
2495    float A3 = A[0+a_off+1];
2496    float A4 = A[0+a_off*2+0];
2497    float A5 = A[0+a_off*2+1];
2498    float A6 = A[0+a_off*3+0];
2499    float A7 = A[0+a_off*3+1];
2500 
2501    float k00,k11;
2502 
2503    float *ee0 = e  +i_off;
2504    float *ee2 = ee0+k_off;
2505 
2506    for (i=n; i > 0; --i) {
2507       k00     = ee0[ 0] - ee2[ 0];
2508       k11     = ee0[-1] - ee2[-1];
2509       ee0[ 0] =  ee0[ 0] + ee2[ 0];
2510       ee0[-1] =  ee0[-1] + ee2[-1];
2511       ee2[ 0] = (k00) * A0 - (k11) * A1;
2512       ee2[-1] = (k11) * A0 + (k00) * A1;
2513 
2514       k00     = ee0[-2] - ee2[-2];
2515       k11     = ee0[-3] - ee2[-3];
2516       ee0[-2] =  ee0[-2] + ee2[-2];
2517       ee0[-3] =  ee0[-3] + ee2[-3];
2518       ee2[-2] = (k00) * A2 - (k11) * A3;
2519       ee2[-3] = (k11) * A2 + (k00) * A3;
2520 
2521       k00     = ee0[-4] - ee2[-4];
2522       k11     = ee0[-5] - ee2[-5];
2523       ee0[-4] =  ee0[-4] + ee2[-4];
2524       ee0[-5] =  ee0[-5] + ee2[-5];
2525       ee2[-4] = (k00) * A4 - (k11) * A5;
2526       ee2[-5] = (k11) * A4 + (k00) * A5;
2527 
2528       k00     = ee0[-6] - ee2[-6];
2529       k11     = ee0[-7] - ee2[-7];
2530       ee0[-6] =  ee0[-6] + ee2[-6];
2531       ee0[-7] =  ee0[-7] + ee2[-7];
2532       ee2[-6] = (k00) * A6 - (k11) * A7;
2533       ee2[-7] = (k11) * A6 + (k00) * A7;
2534 
2535       ee0 -= k0;
2536       ee2 -= k0;
2537    }
2538 }
2539 
iter_54(float * z)2540 static __forceinline void iter_54(float *z)
2541 {
2542    float k00,k11,k22,k33;
2543    float y0,y1,y2,y3;
2544 
2545    k00  = z[ 0] - z[-4];
2546    y0   = z[ 0] + z[-4];
2547    y2   = z[-2] + z[-6];
2548    k22  = z[-2] - z[-6];
2549 
2550    z[-0] = y0 + y2;      // z0 + z4 + z2 + z6
2551    z[-2] = y0 - y2;      // z0 + z4 - z2 - z6
2552 
2553    // done with y0,y2
2554 
2555    k33  = z[-3] - z[-7];
2556 
2557    z[-4] = k00 + k33;    // z0 - z4 + z3 - z7
2558    z[-6] = k00 - k33;    // z0 - z4 - z3 + z7
2559 
2560    // done with k33
2561 
2562    k11  = z[-1] - z[-5];
2563    y1   = z[-1] + z[-5];
2564    y3   = z[-3] + z[-7];
2565 
2566    z[-1] = y1 + y3;      // z1 + z5 + z3 + z7
2567    z[-3] = y1 - y3;      // z1 + z5 - z3 - z7
2568    z[-5] = k11 - k22;    // z1 - z5 + z2 - z6
2569    z[-7] = k11 + k22;    // z1 - z5 - z2 + z6
2570 }
2571 
imdct_step3_inner_s_loop_ld654(int n,float * e,int i_off,float * A,int base_n)2572 static void imdct_step3_inner_s_loop_ld654(int n, float *e, int i_off, float *A, int base_n)
2573 {
2574    int a_off = base_n >> 3;
2575    float A2 = A[0+a_off];
2576    float *z = e + i_off;
2577    float *base = z - 16 * n;
2578 
2579    while (z > base) {
2580       float k00,k11;
2581 
2582       k00   = z[-0] - z[-8];
2583       k11   = z[-1] - z[-9];
2584       z[-0] = z[-0] + z[-8];
2585       z[-1] = z[-1] + z[-9];
2586       z[-8] =  k00;
2587       z[-9] =  k11 ;
2588 
2589       k00    = z[ -2] - z[-10];
2590       k11    = z[ -3] - z[-11];
2591       z[ -2] = z[ -2] + z[-10];
2592       z[ -3] = z[ -3] + z[-11];
2593       z[-10] = (k00+k11) * A2;
2594       z[-11] = (k11-k00) * A2;
2595 
2596       k00    = z[-12] - z[ -4];  // reverse to avoid a unary negation
2597       k11    = z[ -5] - z[-13];
2598       z[ -4] = z[ -4] + z[-12];
2599       z[ -5] = z[ -5] + z[-13];
2600       z[-12] = k11;
2601       z[-13] = k00;
2602 
2603       k00    = z[-14] - z[ -6];  // reverse to avoid a unary negation
2604       k11    = z[ -7] - z[-15];
2605       z[ -6] = z[ -6] + z[-14];
2606       z[ -7] = z[ -7] + z[-15];
2607       z[-14] = (k00+k11) * A2;
2608       z[-15] = (k00-k11) * A2;
2609 
2610       iter_54(z);
2611       iter_54(z-8);
2612       z -= 16;
2613    }
2614 }
2615 
inverse_mdct(float * buffer,int n,vorb * f,int blocktype)2616 static void inverse_mdct(float *buffer, int n, vorb *f, int blocktype)
2617 {
2618    int n2 = n >> 1, n4 = n >> 2, n8 = n >> 3, l;
2619    int ld;
2620    // @OPTIMIZE: reduce register pressure by using fewer variables?
2621    int save_point = temp_alloc_save(f);
2622    float *buf2 = (float *) temp_alloc(f, n2 * sizeof(*buf2));
2623    float *u=NULL,*v=NULL;
2624    // twiddle factors
2625    float *A = f->A[blocktype];
2626 
2627    // IMDCT algorithm from "The use of multirate filter banks for coding of high quality digital audio"
2628    // See notes about bugs in that paper in less-optimal implementation 'inverse_mdct_old' after this function.
2629 
2630    // kernel from paper
2631 
2632 
2633    // merged:
2634    //   copy and reflect spectral data
2635    //   step 0
2636 
2637    // note that it turns out that the items added together during
2638    // this step are, in fact, being added to themselves (as reflected
2639    // by step 0). inexplicable inefficiency! this became obvious
2640    // once I combined the passes.
2641 
2642    // so there's a missing 'times 2' here (for adding X to itself).
2643    // this propagates through linearly to the end, where the numbers
2644    // are 1/2 too small, and need to be compensated for.
2645 
2646    {
2647       float *d,*e, *AA, *e_stop;
2648       d = &buf2[n2-2];
2649       AA = A;
2650       e = &buffer[0];
2651       e_stop = &buffer[n2];
2652       while (e != e_stop) {
2653          d[1] = (e[0] * AA[0] - e[2]*AA[1]);
2654          d[0] = (e[0] * AA[1] + e[2]*AA[0]);
2655          d -= 2;
2656          AA += 2;
2657          e += 4;
2658       }
2659 
2660       e = &buffer[n2-3];
2661       while (d >= buf2) {
2662          d[1] = (-e[2] * AA[0] - -e[0]*AA[1]);
2663          d[0] = (-e[2] * AA[1] + -e[0]*AA[0]);
2664          d -= 2;
2665          AA += 2;
2666          e -= 4;
2667       }
2668    }
2669 
2670    // now we use symbolic names for these, so that we can
2671    // possibly swap their meaning as we change which operations
2672    // are in place
2673 
2674    u = buffer;
2675    v = buf2;
2676 
2677    // step 2    (paper output is w, now u)
2678    // this could be in place, but the data ends up in the wrong
2679    // place... _somebody_'s got to swap it, so this is nominated
2680    {
2681       float *AA = &A[n2-8];
2682       float *d0,*d1, *e0, *e1;
2683 
2684       e0 = &v[n4];
2685       e1 = &v[0];
2686 
2687       d0 = &u[n4];
2688       d1 = &u[0];
2689 
2690       while (AA >= A) {
2691          float v40_20, v41_21;
2692 
2693          v41_21 = e0[1] - e1[1];
2694          v40_20 = e0[0] - e1[0];
2695          d0[1]  = e0[1] + e1[1];
2696          d0[0]  = e0[0] + e1[0];
2697          d1[1]  = v41_21*AA[4] - v40_20*AA[5];
2698          d1[0]  = v40_20*AA[4] + v41_21*AA[5];
2699 
2700          v41_21 = e0[3] - e1[3];
2701          v40_20 = e0[2] - e1[2];
2702          d0[3]  = e0[3] + e1[3];
2703          d0[2]  = e0[2] + e1[2];
2704          d1[3]  = v41_21*AA[0] - v40_20*AA[1];
2705          d1[2]  = v40_20*AA[0] + v41_21*AA[1];
2706 
2707          AA -= 8;
2708 
2709          d0 += 4;
2710          d1 += 4;
2711          e0 += 4;
2712          e1 += 4;
2713       }
2714    }
2715 
2716    // step 3
2717    ld = ilog(n) - 1; // ilog is off-by-one from normal definitions
2718 
2719    // optimized step 3:
2720 
2721    // the original step3 loop can be nested r inside s or s inside r;
2722    // it's written originally as s inside r, but this is dumb when r
2723    // iterates many times, and s few. So I have two copies of it and
2724    // switch between them halfway.
2725 
2726    // this is iteration 0 of step 3
2727    imdct_step3_iter0_loop(n >> 4, u, n2-1-n4*0, -(n >> 3), A);
2728    imdct_step3_iter0_loop(n >> 4, u, n2-1-n4*1, -(n >> 3), A);
2729 
2730    // this is iteration 1 of step 3
2731    imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*0, -(n >> 4), A, 16);
2732    imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*1, -(n >> 4), A, 16);
2733    imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*2, -(n >> 4), A, 16);
2734    imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*3, -(n >> 4), A, 16);
2735 
2736    l=2;
2737    for (; l < (ld-3)>>1; ++l) {
2738       int k0 = n >> (l+2), k0_2 = k0>>1;
2739       int lim = 1 << (l+1);
2740       int i;
2741       for (i=0; i < lim; ++i)
2742          imdct_step3_inner_r_loop(n >> (l+4), u, n2-1 - k0*i, -k0_2, A, 1 << (l+3));
2743    }
2744 
2745    for (; l < ld-6; ++l) {
2746       int k0 = n >> (l+2), k1 = 1 << (l+3), k0_2 = k0>>1;
2747       int rlim = n >> (l+6), r;
2748       int lim = 1 << (l+1);
2749       int i_off;
2750       float *A0 = A;
2751       i_off = n2-1;
2752       for (r=rlim; r > 0; --r) {
2753          imdct_step3_inner_s_loop(lim, u, i_off, -k0_2, A0, k1, k0);
2754          A0 += k1*4;
2755          i_off -= 8;
2756       }
2757    }
2758 
2759    // iterations with count:
2760    //   ld-6,-5,-4 all interleaved together
2761    //       the big win comes from getting rid of needless flops
2762    //         due to the constants on pass 5 & 4 being all 1 and 0;
2763    //       combining them to be simultaneous to improve cache made little difference
2764    imdct_step3_inner_s_loop_ld654(n >> 5, u, n2-1, A, n);
2765 
2766    // output is u
2767 
2768    // step 4, 5, and 6
2769    // cannot be in-place because of step 5
2770    {
2771       uint16 *bitrev = f->bit_reverse[blocktype];
2772       // weirdly, I'd have thought reading sequentially and writing
2773       // erratically would have been better than vice-versa, but in
2774       // fact that's not what my testing showed. (That is, with
2775       // j = bitreverse(i), do you read i and write j, or read j and write i.)
2776 
2777       float *d0 = &v[n4-4];
2778       float *d1 = &v[n2-4];
2779       while (d0 >= v) {
2780          int k4;
2781 
2782          k4 = bitrev[0];
2783          d1[3] = u[k4+0];
2784          d1[2] = u[k4+1];
2785          d0[3] = u[k4+2];
2786          d0[2] = u[k4+3];
2787 
2788          k4 = bitrev[1];
2789          d1[1] = u[k4+0];
2790          d1[0] = u[k4+1];
2791          d0[1] = u[k4+2];
2792          d0[0] = u[k4+3];
2793 
2794          d0 -= 4;
2795          d1 -= 4;
2796          bitrev += 2;
2797       }
2798    }
2799    // (paper output is u, now v)
2800 
2801 
2802    // data must be in buf2
2803    assert(v == buf2);
2804 
2805    // step 7   (paper output is v, now v)
2806    // this is now in place
2807    {
2808       float *C = f->C[blocktype];
2809       float *d, *e;
2810 
2811       d = v;
2812       e = v + n2 - 4;
2813 
2814       while (d < e) {
2815          float a02,a11,b0,b1,b2,b3;
2816 
2817          a02 = d[0] - e[2];
2818          a11 = d[1] + e[3];
2819 
2820          b0 = C[1]*a02 + C[0]*a11;
2821          b1 = C[1]*a11 - C[0]*a02;
2822 
2823          b2 = d[0] + e[ 2];
2824          b3 = d[1] - e[ 3];
2825 
2826          d[0] = b2 + b0;
2827          d[1] = b3 + b1;
2828          e[2] = b2 - b0;
2829          e[3] = b1 - b3;
2830 
2831          a02 = d[2] - e[0];
2832          a11 = d[3] + e[1];
2833 
2834          b0 = C[3]*a02 + C[2]*a11;
2835          b1 = C[3]*a11 - C[2]*a02;
2836 
2837          b2 = d[2] + e[ 0];
2838          b3 = d[3] - e[ 1];
2839 
2840          d[2] = b2 + b0;
2841          d[3] = b3 + b1;
2842          e[0] = b2 - b0;
2843          e[1] = b1 - b3;
2844 
2845          C += 4;
2846          d += 4;
2847          e -= 4;
2848       }
2849    }
2850 
2851    // data must be in buf2
2852 
2853 
2854    // step 8+decode   (paper output is X, now buffer)
2855    // this generates pairs of data a la 8 and pushes them directly through
2856    // the decode kernel (pushing rather than pulling) to avoid having
2857    // to make another pass later
2858 
2859    // this cannot POSSIBLY be in place, so we refer to the buffers directly
2860 
2861    {
2862       float *d0,*d1,*d2,*d3;
2863 
2864       float *B = f->B[blocktype] + n2 - 8;
2865       float *e = buf2 + n2 - 8;
2866       d0 = &buffer[0];
2867       d1 = &buffer[n2-4];
2868       d2 = &buffer[n2];
2869       d3 = &buffer[n-4];
2870       while (e >= v) {
2871          float p0,p1,p2,p3;
2872 
2873          p3 =  e[6]*B[7] - e[7]*B[6];
2874          p2 = -e[6]*B[6] - e[7]*B[7];
2875 
2876          d0[0] =   p3;
2877          d1[3] = - p3;
2878          d2[0] =   p2;
2879          d3[3] =   p2;
2880 
2881          p1 =  e[4]*B[5] - e[5]*B[4];
2882          p0 = -e[4]*B[4] - e[5]*B[5];
2883 
2884          d0[1] =   p1;
2885          d1[2] = - p1;
2886          d2[1] =   p0;
2887          d3[2] =   p0;
2888 
2889          p3 =  e[2]*B[3] - e[3]*B[2];
2890          p2 = -e[2]*B[2] - e[3]*B[3];
2891 
2892          d0[2] =   p3;
2893          d1[1] = - p3;
2894          d2[2] =   p2;
2895          d3[1] =   p2;
2896 
2897          p1 =  e[0]*B[1] - e[1]*B[0];
2898          p0 = -e[0]*B[0] - e[1]*B[1];
2899 
2900          d0[3] =   p1;
2901          d1[0] = - p1;
2902          d2[3] =   p0;
2903          d3[0] =   p0;
2904 
2905          B -= 8;
2906          e -= 8;
2907          d0 += 4;
2908          d2 += 4;
2909          d1 -= 4;
2910          d3 -= 4;
2911       }
2912    }
2913 
2914    temp_free(f,buf2);
2915    temp_alloc_restore(f,save_point);
2916 }
2917 
2918 #if 0
2919 // this is the original version of the above code, if you want to optimize it from scratch
2920 void inverse_mdct_naive(float *buffer, int n)
2921 {
2922    float s;
2923    float A[1 << 12], B[1 << 12], C[1 << 11];
2924    int i,k,k2,k4, n2 = n >> 1, n4 = n >> 2, n8 = n >> 3, l;
2925    int n3_4 = n - n4, ld;
2926    // how can they claim this only uses N words?!
2927    // oh, because they're only used sparsely, whoops
2928    float u[1 << 13], X[1 << 13], v[1 << 13], w[1 << 13];
2929    // set up twiddle factors
2930 
2931    for (k=k2=0; k < n4; ++k,k2+=2) {
2932       A[k2  ] = (float)  cos(4*k*M_PI/n);
2933       A[k2+1] = (float) -sin(4*k*M_PI/n);
2934       B[k2  ] = (float)  cos((k2+1)*M_PI/n/2);
2935       B[k2+1] = (float)  sin((k2+1)*M_PI/n/2);
2936    }
2937    for (k=k2=0; k < n8; ++k,k2+=2) {
2938       C[k2  ] = (float)  cos(2*(k2+1)*M_PI/n);
2939       C[k2+1] = (float) -sin(2*(k2+1)*M_PI/n);
2940    }
2941 
2942    // IMDCT algorithm from "The use of multirate filter banks for coding of high quality digital audio"
2943    // Note there are bugs in that pseudocode, presumably due to them attempting
2944    // to rename the arrays nicely rather than representing the way their actual
2945    // implementation bounces buffers back and forth. As a result, even in the
2946    // "some formulars corrected" version, a direct implementation fails. These
2947    // are noted below as "paper bug".
2948 
2949    // copy and reflect spectral data
2950    for (k=0; k < n2; ++k) u[k] = buffer[k];
2951    for (   ; k < n ; ++k) u[k] = -buffer[n - k - 1];
2952    // kernel from paper
2953    // step 1
2954    for (k=k2=k4=0; k < n4; k+=1, k2+=2, k4+=4) {
2955       v[n-k4-1] = (u[k4] - u[n-k4-1]) * A[k2]   - (u[k4+2] - u[n-k4-3])*A[k2+1];
2956       v[n-k4-3] = (u[k4] - u[n-k4-1]) * A[k2+1] + (u[k4+2] - u[n-k4-3])*A[k2];
2957    }
2958    // step 2
2959    for (k=k4=0; k < n8; k+=1, k4+=4) {
2960       w[n2+3+k4] = v[n2+3+k4] + v[k4+3];
2961       w[n2+1+k4] = v[n2+1+k4] + v[k4+1];
2962       w[k4+3]    = (v[n2+3+k4] - v[k4+3])*A[n2-4-k4] - (v[n2+1+k4]-v[k4+1])*A[n2-3-k4];
2963       w[k4+1]    = (v[n2+1+k4] - v[k4+1])*A[n2-4-k4] + (v[n2+3+k4]-v[k4+3])*A[n2-3-k4];
2964    }
2965    // step 3
2966    ld = ilog(n) - 1; // ilog is off-by-one from normal definitions
2967    for (l=0; l < ld-3; ++l) {
2968       int k0 = n >> (l+2), k1 = 1 << (l+3);
2969       int rlim = n >> (l+4), r4, r;
2970       int s2lim = 1 << (l+2), s2;
2971       for (r=r4=0; r < rlim; r4+=4,++r) {
2972          for (s2=0; s2 < s2lim; s2+=2) {
2973             u[n-1-k0*s2-r4] = w[n-1-k0*s2-r4] + w[n-1-k0*(s2+1)-r4];
2974             u[n-3-k0*s2-r4] = w[n-3-k0*s2-r4] + w[n-3-k0*(s2+1)-r4];
2975             u[n-1-k0*(s2+1)-r4] = (w[n-1-k0*s2-r4] - w[n-1-k0*(s2+1)-r4]) * A[r*k1]
2976                                 - (w[n-3-k0*s2-r4] - w[n-3-k0*(s2+1)-r4]) * A[r*k1+1];
2977             u[n-3-k0*(s2+1)-r4] = (w[n-3-k0*s2-r4] - w[n-3-k0*(s2+1)-r4]) * A[r*k1]
2978                                 + (w[n-1-k0*s2-r4] - w[n-1-k0*(s2+1)-r4]) * A[r*k1+1];
2979          }
2980       }
2981       if (l+1 < ld-3) {
2982          // paper bug: ping-ponging of u&w here is omitted
2983          memcpy(w, u, sizeof(u));
2984       }
2985    }
2986 
2987    // step 4
2988    for (i=0; i < n8; ++i) {
2989       int j = bit_reverse(i) >> (32-ld+3);
2990       assert(j < n8);
2991       if (i == j) {
2992          // paper bug: original code probably swapped in place; if copying,
2993          //            need to directly copy in this case
2994          int i8 = i << 3;
2995          v[i8+1] = u[i8+1];
2996          v[i8+3] = u[i8+3];
2997          v[i8+5] = u[i8+5];
2998          v[i8+7] = u[i8+7];
2999       } else if (i < j) {
3000          int i8 = i << 3, j8 = j << 3;
3001          v[j8+1] = u[i8+1], v[i8+1] = u[j8 + 1];
3002          v[j8+3] = u[i8+3], v[i8+3] = u[j8 + 3];
3003          v[j8+5] = u[i8+5], v[i8+5] = u[j8 + 5];
3004          v[j8+7] = u[i8+7], v[i8+7] = u[j8 + 7];
3005       }
3006    }
3007    // step 5
3008    for (k=0; k < n2; ++k) {
3009       w[k] = v[k*2+1];
3010    }
3011    // step 6
3012    for (k=k2=k4=0; k < n8; ++k, k2 += 2, k4 += 4) {
3013       u[n-1-k2] = w[k4];
3014       u[n-2-k2] = w[k4+1];
3015       u[n3_4 - 1 - k2] = w[k4+2];
3016       u[n3_4 - 2 - k2] = w[k4+3];
3017    }
3018    // step 7
3019    for (k=k2=0; k < n8; ++k, k2 += 2) {
3020       v[n2 + k2 ] = ( u[n2 + k2] + u[n-2-k2] + C[k2+1]*(u[n2+k2]-u[n-2-k2]) + C[k2]*(u[n2+k2+1]+u[n-2-k2+1]))/2;
3021       v[n-2 - k2] = ( u[n2 + k2] + u[n-2-k2] - C[k2+1]*(u[n2+k2]-u[n-2-k2]) - C[k2]*(u[n2+k2+1]+u[n-2-k2+1]))/2;
3022       v[n2+1+ k2] = ( u[n2+1+k2] - u[n-1-k2] + C[k2+1]*(u[n2+1+k2]+u[n-1-k2]) - C[k2]*(u[n2+k2]-u[n-2-k2]))/2;
3023       v[n-1 - k2] = (-u[n2+1+k2] + u[n-1-k2] + C[k2+1]*(u[n2+1+k2]+u[n-1-k2]) - C[k2]*(u[n2+k2]-u[n-2-k2]))/2;
3024    }
3025    // step 8
3026    for (k=k2=0; k < n4; ++k,k2 += 2) {
3027       X[k]      = v[k2+n2]*B[k2  ] + v[k2+1+n2]*B[k2+1];
3028       X[n2-1-k] = v[k2+n2]*B[k2+1] - v[k2+1+n2]*B[k2  ];
3029    }
3030 
3031    // decode kernel to output
3032    // determined the following value experimentally
3033    // (by first figuring out what made inverse_mdct_slow work); then matching that here
3034    // (probably vorbis encoder premultiplies by n or n/2, to save it on the decoder?)
3035    s = 0.5; // theoretically would be n4
3036 
3037    // [[[ note! the s value of 0.5 is compensated for by the B[] in the current code,
3038    //     so it needs to use the "old" B values to behave correctly, or else
3039    //     set s to 1.0 ]]]
3040    for (i=0; i < n4  ; ++i) buffer[i] = s * X[i+n4];
3041    for (   ; i < n3_4; ++i) buffer[i] = -s * X[n3_4 - i - 1];
3042    for (   ; i < n   ; ++i) buffer[i] = -s * X[i - n3_4];
3043 }
3044 #endif
3045 
get_window(vorb * f,int len)3046 static float *get_window(vorb *f, int len)
3047 {
3048    len <<= 1;
3049    if (len == f->blocksize_0) return f->window[0];
3050    if (len == f->blocksize_1) return f->window[1];
3051    assert(0);
3052    return NULL;
3053 }
3054 
3055 #ifndef STB_VORBIS_NO_DEFER_FLOOR
3056 typedef int16 YTYPE;
3057 #else
3058 typedef int YTYPE;
3059 #endif
do_floor(vorb * f,Mapping * map,int i,int n,float * target,YTYPE * finalY,uint8 * step2_flag)3060 static int do_floor(vorb *f, Mapping *map, int i, int n, float *target, YTYPE *finalY, uint8 *step2_flag)
3061 {
3062    int n2 = n >> 1;
3063    int s = map->chan[i].mux, floor;
3064    floor = map->submap_floor[s];
3065    if (f->floor_types[floor] == 0) {
3066       return error(f, VORBIS_invalid_stream);
3067    } else {
3068       Floor1 *g = &f->floor_config[floor].floor1;
3069       int j,q;
3070       int lx = 0, ly = finalY[0] * g->floor1_multiplier;
3071       for (q=1; q < g->values; ++q) {
3072          j = g->sorted_order[q];
3073          #ifndef STB_VORBIS_NO_DEFER_FLOOR
3074          if (finalY[j] >= 0)
3075          #else
3076          if (step2_flag[j])
3077          #endif
3078          {
3079             int hy = finalY[j] * g->floor1_multiplier;
3080             int hx = g->Xlist[j];
3081             if (lx != hx)
3082                draw_line(target, lx,ly, hx,hy, n2);
3083             CHECK(f);
3084             lx = hx, ly = hy;
3085          }
3086       }
3087       if (lx < n2) {
3088          // optimization of: draw_line(target, lx,ly, n,ly, n2);
3089          for (j=lx; j < n2; ++j)
3090             LINE_OP(target[j], inverse_db_table[ly]);
3091          CHECK(f);
3092       }
3093    }
3094    return TRUE;
3095 }
3096 
3097 // The meaning of "left" and "right"
3098 //
3099 // For a given frame:
3100 //     we compute samples from 0..n
3101 //     window_center is n/2
3102 //     we'll window and mix the samples from left_start to left_end with data from the previous frame
3103 //     all of the samples from left_end to right_start can be output without mixing; however,
3104 //        this interval is 0-length except when transitioning between short and long frames
3105 //     all of the samples from right_start to right_end need to be mixed with the next frame,
3106 //        which we don't have, so those get saved in a buffer
3107 //     frame N's right_end-right_start, the number of samples to mix with the next frame,
3108 //        has to be the same as frame N+1's left_end-left_start (which they are by
3109 //        construction)
3110 
vorbis_decode_initial(vorb * f,int * p_left_start,int * p_left_end,int * p_right_start,int * p_right_end,int * mode)3111 static int vorbis_decode_initial(vorb *f, int *p_left_start, int *p_left_end, int *p_right_start, int *p_right_end, int *mode)
3112 {
3113    Mode *m;
3114    int i, n, prev, next, window_center;
3115    f->channel_buffer_start = f->channel_buffer_end = 0;
3116 
3117   retry:
3118    if (f->eof) return FALSE;
3119    if (!maybe_start_packet(f))
3120       return FALSE;
3121    // check packet type
3122    if (get_bits(f,1) != 0) {
3123       if (IS_PUSH_MODE(f))
3124          return error(f,VORBIS_bad_packet_type);
3125       while (EOP != get8_packet(f));
3126       goto retry;
3127    }
3128 
3129    if (f->alloc.alloc_buffer)
3130       assert(f->alloc.alloc_buffer_length_in_bytes == f->temp_offset);
3131 
3132    i = get_bits(f, ilog(f->mode_count-1));
3133    if (i == EOP) return FALSE;
3134    if (i >= f->mode_count) return FALSE;
3135    *mode = i;
3136    m = f->mode_config + i;
3137    if (m->blockflag) {
3138       n = f->blocksize_1;
3139       prev = get_bits(f,1);
3140       next = get_bits(f,1);
3141    } else {
3142       prev = next = 0;
3143       n = f->blocksize_0;
3144    }
3145 
3146 // WINDOWING
3147 
3148    window_center = n >> 1;
3149    if (m->blockflag && !prev) {
3150       *p_left_start = (n - f->blocksize_0) >> 2;
3151       *p_left_end   = (n + f->blocksize_0) >> 2;
3152    } else {
3153       *p_left_start = 0;
3154       *p_left_end   = window_center;
3155    }
3156    if (m->blockflag && !next) {
3157       *p_right_start = (n*3 - f->blocksize_0) >> 2;
3158       *p_right_end   = (n*3 + f->blocksize_0) >> 2;
3159    } else {
3160       *p_right_start = window_center;
3161       *p_right_end   = n;
3162    }
3163 
3164    return TRUE;
3165 }
3166 
vorbis_decode_packet_rest(vorb * f,int * len,Mode * m,int left_start,int left_end,int right_start,int right_end,int * p_left)3167 static int vorbis_decode_packet_rest(vorb *f, int *len, Mode *m, int left_start, int left_end, int right_start, int right_end, int *p_left)
3168 {
3169    Mapping *map;
3170    int i,j,k,n,n2;
3171    int zero_channel[256];
3172    int really_zero_channel[256];
3173 
3174 // WINDOWING
3175 
3176    n = f->blocksize[m->blockflag];
3177    map = &f->mapping[m->mapping];
3178 
3179 // FLOORS
3180    n2 = n >> 1;
3181 
3182    CHECK(f);
3183 
3184    for (i=0; i < f->channels; ++i) {
3185       int s = map->chan[i].mux, floor;
3186       zero_channel[i] = FALSE;
3187       floor = map->submap_floor[s];
3188       if (f->floor_types[floor] == 0) {
3189          return error(f, VORBIS_invalid_stream);
3190       } else {
3191          Floor1 *g = &f->floor_config[floor].floor1;
3192          if (get_bits(f, 1)) {
3193             short *finalY;
3194             uint8 step2_flag[256];
3195             static int range_list[4] = { 256, 128, 86, 64 };
3196             int range = range_list[g->floor1_multiplier-1];
3197             int offset = 2;
3198             finalY = f->finalY[i];
3199             finalY[0] = get_bits(f, ilog(range)-1);
3200             finalY[1] = get_bits(f, ilog(range)-1);
3201             for (j=0; j < g->partitions; ++j) {
3202                int pclass = g->partition_class_list[j];
3203                int cdim = g->class_dimensions[pclass];
3204                int cbits = g->class_subclasses[pclass];
3205                int csub = (1 << cbits)-1;
3206                int cval = 0;
3207                if (cbits) {
3208                   Codebook *c = f->codebooks + g->class_masterbooks[pclass];
3209                   DECODE(cval,f,c);
3210                }
3211                for (k=0; k < cdim; ++k) {
3212                   int book = g->subclass_books[pclass][cval & csub];
3213                   cval = cval >> cbits;
3214                   if (book >= 0) {
3215                      int temp;
3216                      Codebook *c = f->codebooks + book;
3217                      DECODE(temp,f,c);
3218                      finalY[offset++] = temp;
3219                   } else
3220                      finalY[offset++] = 0;
3221                }
3222             }
3223             if (f->valid_bits == INVALID_BITS) goto error; // behavior according to spec
3224             step2_flag[0] = step2_flag[1] = 1;
3225             for (j=2; j < g->values; ++j) {
3226                int low, high, pred, highroom, lowroom, room, val;
3227                low = g->neighbors[j][0];
3228                high = g->neighbors[j][1];
3229                //neighbors(g->Xlist, j, &low, &high);
3230                pred = predict_point(g->Xlist[j], g->Xlist[low], g->Xlist[high], finalY[low], finalY[high]);
3231                val = finalY[j];
3232                highroom = range - pred;
3233                lowroom = pred;
3234                if (highroom < lowroom)
3235                   room = highroom * 2;
3236                else
3237                   room = lowroom * 2;
3238                if (val) {
3239                   step2_flag[low] = step2_flag[high] = 1;
3240                   step2_flag[j] = 1;
3241                   if (val >= room)
3242                      if (highroom > lowroom)
3243                         finalY[j] = val - lowroom + pred;
3244                      else
3245                         finalY[j] = pred - val + highroom - 1;
3246                   else
3247                      if (val & 1)
3248                         finalY[j] = pred - ((val+1)>>1);
3249                      else
3250                         finalY[j] = pred + (val>>1);
3251                } else {
3252                   step2_flag[j] = 0;
3253                   finalY[j] = pred;
3254                }
3255             }
3256 
3257 #ifdef STB_VORBIS_NO_DEFER_FLOOR
3258             do_floor(f, map, i, n, f->floor_buffers[i], finalY, step2_flag);
3259 #else
3260             // defer final floor computation until _after_ residue
3261             for (j=0; j < g->values; ++j) {
3262                if (!step2_flag[j])
3263                   finalY[j] = -1;
3264             }
3265 #endif
3266          } else {
3267            error:
3268             zero_channel[i] = TRUE;
3269          }
3270          // So we just defer everything else to later
3271 
3272          // at this point we've decoded the floor into buffer
3273       }
3274    }
3275    CHECK(f);
3276    // at this point we've decoded all floors
3277 
3278    if (f->alloc.alloc_buffer)
3279       assert(f->alloc.alloc_buffer_length_in_bytes == f->temp_offset);
3280 
3281    // re-enable coupled channels if necessary
3282    memcpy(really_zero_channel, zero_channel, sizeof(really_zero_channel[0]) * f->channels);
3283    for (i=0; i < map->coupling_steps; ++i)
3284       if (!zero_channel[map->chan[i].magnitude] || !zero_channel[map->chan[i].angle]) {
3285          zero_channel[map->chan[i].magnitude] = zero_channel[map->chan[i].angle] = FALSE;
3286       }
3287 
3288    CHECK(f);
3289 // RESIDUE DECODE
3290    for (i=0; i < map->submaps; ++i) {
3291       float *residue_buffers[STB_VORBIS_MAX_CHANNELS];
3292       int r;
3293       uint8 do_not_decode[256];
3294       int ch = 0;
3295       for (j=0; j < f->channels; ++j) {
3296          if (map->chan[j].mux == i) {
3297             if (zero_channel[j]) {
3298                do_not_decode[ch] = TRUE;
3299                residue_buffers[ch] = NULL;
3300             } else {
3301                do_not_decode[ch] = FALSE;
3302                residue_buffers[ch] = f->channel_buffers[j];
3303             }
3304             ++ch;
3305          }
3306       }
3307       r = map->submap_residue[i];
3308       decode_residue(f, residue_buffers, ch, n2, r, do_not_decode);
3309    }
3310 
3311    if (f->alloc.alloc_buffer)
3312       assert(f->alloc.alloc_buffer_length_in_bytes == f->temp_offset);
3313    CHECK(f);
3314 
3315 // INVERSE COUPLING
3316    for (i = map->coupling_steps-1; i >= 0; --i) {
3317       int n2 = n >> 1;
3318       float *m = f->channel_buffers[map->chan[i].magnitude];
3319       float *a = f->channel_buffers[map->chan[i].angle    ];
3320       for (j=0; j < n2; ++j) {
3321          float a2,m2;
3322          if (m[j] > 0)
3323             if (a[j] > 0)
3324                m2 = m[j], a2 = m[j] - a[j];
3325             else
3326                a2 = m[j], m2 = m[j] + a[j];
3327          else
3328             if (a[j] > 0)
3329                m2 = m[j], a2 = m[j] + a[j];
3330             else
3331                a2 = m[j], m2 = m[j] - a[j];
3332          m[j] = m2;
3333          a[j] = a2;
3334       }
3335    }
3336    CHECK(f);
3337 
3338    // finish decoding the floors
3339 #ifndef STB_VORBIS_NO_DEFER_FLOOR
3340    for (i=0; i < f->channels; ++i) {
3341       if (really_zero_channel[i]) {
3342          memset(f->channel_buffers[i], 0, sizeof(*f->channel_buffers[i]) * n2);
3343       } else {
3344          do_floor(f, map, i, n, f->channel_buffers[i], f->finalY[i], NULL);
3345       }
3346    }
3347 #else
3348    for (i=0; i < f->channels; ++i) {
3349       if (really_zero_channel[i]) {
3350          memset(f->channel_buffers[i], 0, sizeof(*f->channel_buffers[i]) * n2);
3351       } else {
3352          for (j=0; j < n2; ++j)
3353             f->channel_buffers[i][j] *= f->floor_buffers[i][j];
3354       }
3355    }
3356 #endif
3357 
3358 // INVERSE MDCT
3359    CHECK(f);
3360    for (i=0; i < f->channels; ++i)
3361       inverse_mdct(f->channel_buffers[i], n, f, m->blockflag);
3362    CHECK(f);
3363 
3364    // this shouldn't be necessary, unless we exited on an error
3365    // and want to flush to get to the next packet
3366    flush_packet(f);
3367 
3368    if (f->first_decode) {
3369       // assume we start so first non-discarded sample is sample 0
3370       // this isn't to spec, but spec would require us to read ahead
3371       // and decode the size of all current frames--could be done,
3372       // but presumably it's not a commonly used feature
3373       f->current_loc = -n2; // start of first frame is positioned for discard
3374       // we might have to discard samples "from" the next frame too,
3375       // if we're lapping a large block then a small at the start?
3376       f->discard_samples_deferred = n - right_end;
3377       f->current_loc_valid = TRUE;
3378       f->first_decode = FALSE;
3379    } else if (f->discard_samples_deferred) {
3380       if (f->discard_samples_deferred >= right_start - left_start) {
3381          f->discard_samples_deferred -= (right_start - left_start);
3382          left_start = right_start;
3383          *p_left = left_start;
3384       } else {
3385          left_start += f->discard_samples_deferred;
3386          *p_left = left_start;
3387          f->discard_samples_deferred = 0;
3388       }
3389    } else if (f->previous_length == 0 && f->current_loc_valid) {
3390       // we're recovering from a seek... that means we're going to discard
3391       // the samples from this packet even though we know our position from
3392       // the last page header, so we need to update the position based on
3393       // the discarded samples here
3394       // but wait, the code below is going to add this in itself even
3395       // on a discard, so we don't need to do it here...
3396    }
3397 
3398    // check if we have ogg information about the sample # for this packet
3399    if (f->last_seg_which == f->end_seg_with_known_loc) {
3400       // if we have a valid current loc, and this is final:
3401       if (f->current_loc_valid && (f->page_flag & PAGEFLAG_last_page)) {
3402          uint32 current_end = f->known_loc_for_packet;
3403          // then let's infer the size of the (probably) short final frame
3404          if (current_end < f->current_loc + (right_end-left_start)) {
3405             if (current_end < f->current_loc) {
3406                // negative truncation, that's impossible!
3407                *len = 0;
3408             } else {
3409                *len = current_end - f->current_loc;
3410             }
3411             *len += left_start; // this doesn't seem right, but has no ill effect on my test files
3412             if (*len > right_end) *len = right_end; // this should never happen
3413             f->current_loc += *len;
3414             return TRUE;
3415          }
3416       }
3417       // otherwise, just set our sample loc
3418       // guess that the ogg granule pos refers to the _middle_ of the
3419       // last frame?
3420       // set f->current_loc to the position of left_start
3421       f->current_loc = f->known_loc_for_packet - (n2-left_start);
3422       f->current_loc_valid = TRUE;
3423    }
3424    if (f->current_loc_valid)
3425       f->current_loc += (right_start - left_start);
3426 
3427    if (f->alloc.alloc_buffer)
3428       assert(f->alloc.alloc_buffer_length_in_bytes == f->temp_offset);
3429    *len = right_end;  // ignore samples after the window goes to 0
3430    CHECK(f);
3431 
3432    return TRUE;
3433 }
3434 
vorbis_decode_packet(vorb * f,int * len,int * p_left,int * p_right)3435 static int vorbis_decode_packet(vorb *f, int *len, int *p_left, int *p_right)
3436 {
3437    int mode, left_end, right_end;
3438    if (!vorbis_decode_initial(f, p_left, &left_end, p_right, &right_end, &mode)) return 0;
3439    return vorbis_decode_packet_rest(f, len, f->mode_config + mode, *p_left, left_end, *p_right, right_end, p_left);
3440 }
3441 
vorbis_finish_frame(stb_vorbis * f,int len,int left,int right)3442 static int vorbis_finish_frame(stb_vorbis *f, int len, int left, int right)
3443 {
3444    int prev,i,j;
3445    // we use right&left (the start of the right- and left-window sin()-regions)
3446    // to determine how much to return, rather than inferring from the rules
3447    // (same result, clearer code); 'left' indicates where our sin() window
3448    // starts, therefore where the previous window's right edge starts, and
3449    // therefore where to start mixing from the previous buffer. 'right'
3450    // indicates where our sin() ending-window starts, therefore that's where
3451    // we start saving, and where our returned-data ends.
3452 
3453    // mixin from previous window
3454    if (f->previous_length) {
3455       int i,j, n = f->previous_length;
3456       float *w = get_window(f, n);
3457       for (i=0; i < f->channels; ++i) {
3458          for (j=0; j < n; ++j)
3459             f->channel_buffers[i][left+j] =
3460                f->channel_buffers[i][left+j]*w[    j] +
3461                f->previous_window[i][     j]*w[n-1-j];
3462       }
3463    }
3464 
3465    prev = f->previous_length;
3466 
3467    // last half of this data becomes previous window
3468    f->previous_length = len - right;
3469 
3470    // @OPTIMIZE: could avoid this copy by double-buffering the
3471    // output (flipping previous_window with channel_buffers), but
3472    // then previous_window would have to be 2x as large, and
3473    // channel_buffers couldn't be temp mem (although they're NOT
3474    // currently temp mem, they could be (unless we want to level
3475    // performance by spreading out the computation))
3476    for (i=0; i < f->channels; ++i)
3477       for (j=0; right+j < len; ++j)
3478          f->previous_window[i][j] = f->channel_buffers[i][right+j];
3479 
3480    if (!prev)
3481       // there was no previous packet, so this data isn't valid...
3482       // this isn't entirely true, only the would-have-overlapped data
3483       // isn't valid, but this seems to be what the spec requires
3484       return 0;
3485 
3486    // truncate a short frame
3487    if (len < right) right = len;
3488 
3489    f->samples_output += right-left;
3490 
3491    return right - left;
3492 }
3493 
vorbis_pump_first_frame(stb_vorbis * f)3494 static int vorbis_pump_first_frame(stb_vorbis *f)
3495 {
3496    int len, right, left, res;
3497    res = vorbis_decode_packet(f, &len, &left, &right);
3498    if (res)
3499       vorbis_finish_frame(f, len, left, right);
3500    return res;
3501 }
3502 
3503 #ifndef STB_VORBIS_NO_PUSHDATA_API
is_whole_packet_present(stb_vorbis * f,int end_page)3504 static int is_whole_packet_present(stb_vorbis *f, int end_page)
3505 {
3506    // make sure that we have the packet available before continuing...
3507    // this requires a full ogg parse, but we know we can fetch from f->stream
3508 
3509    // instead of coding this out explicitly, we could save the current read state,
3510    // read the next packet with get8() until end-of-packet, check f->eof, then
3511    // reset the state? but that would be slower, esp. since we'd have over 256 bytes
3512    // of state to restore (primarily the page segment table)
3513 
3514    int s = f->next_seg, first = TRUE;
3515    uint8 *p = f->stream;
3516 
3517    if (s != -1) { // if we're not starting the packet with a 'continue on next page' flag
3518       for (; s < f->segment_count; ++s) {
3519          p += f->segments[s];
3520          if (f->segments[s] < 255)               // stop at first short segment
3521             break;
3522       }
3523       // either this continues, or it ends it...
3524       if (end_page)
3525          if (s < f->segment_count-1)             return error(f, VORBIS_invalid_stream);
3526       if (s == f->segment_count)
3527          s = -1; // set 'crosses page' flag
3528       if (p > f->stream_end)                     return error(f, VORBIS_need_more_data);
3529       first = FALSE;
3530    }
3531    for (; s == -1;) {
3532       uint8 *q;
3533       int n;
3534 
3535       // check that we have the page header ready
3536       if (p + 26 >= f->stream_end)               return error(f, VORBIS_need_more_data);
3537       // validate the page
3538       if (memcmp(p, ogg_page_header, 4))         return error(f, VORBIS_invalid_stream);
3539       if (p[4] != 0)                             return error(f, VORBIS_invalid_stream);
3540       if (first) { // the first segment must NOT have 'continued_packet', later ones MUST
3541          if (f->previous_length)
3542             if ((p[5] & PAGEFLAG_continued_packet))  return error(f, VORBIS_invalid_stream);
3543          // if no previous length, we're resynching, so we can come in on a continued-packet,
3544          // which we'll just drop
3545       } else {
3546          if (!(p[5] & PAGEFLAG_continued_packet)) return error(f, VORBIS_invalid_stream);
3547       }
3548       n = p[26]; // segment counts
3549       q = p+27;  // q points to segment table
3550       p = q + n; // advance past header
3551       // make sure we've read the segment table
3552       if (p > f->stream_end)                     return error(f, VORBIS_need_more_data);
3553       for (s=0; s < n; ++s) {
3554          p += q[s];
3555          if (q[s] < 255)
3556             break;
3557       }
3558       if (end_page)
3559          if (s < n-1)                            return error(f, VORBIS_invalid_stream);
3560       if (s == n)
3561          s = -1; // set 'crosses page' flag
3562       if (p > f->stream_end)                     return error(f, VORBIS_need_more_data);
3563       first = FALSE;
3564    }
3565    return TRUE;
3566 }
3567 #endif // !STB_VORBIS_NO_PUSHDATA_API
3568 
start_decoder(vorb * f)3569 static int start_decoder(vorb *f)
3570 {
3571    uint8 header[6], x,y;
3572    int len,i,j,k, max_submaps = 0;
3573    int longest_floorlist=0;
3574 
3575    // first page, first packet
3576 
3577    if (!start_page(f))                              return FALSE;
3578    // validate page flag
3579    if (!(f->page_flag & PAGEFLAG_first_page))       return error(f, VORBIS_invalid_first_page);
3580    if (f->page_flag & PAGEFLAG_last_page)           return error(f, VORBIS_invalid_first_page);
3581    if (f->page_flag & PAGEFLAG_continued_packet)    return error(f, VORBIS_invalid_first_page);
3582    // check for expected packet length
3583    if (f->segment_count != 1)                       return error(f, VORBIS_invalid_first_page);
3584    if (f->segments[0] != 30) {
3585       // check for the Ogg skeleton fishead identifying header to refine our error
3586       if (f->segments[0] == 64 &&
3587           getn(f, header, 6) &&
3588           header[0] == 'f' &&
3589           header[1] == 'i' &&
3590           header[2] == 's' &&
3591           header[3] == 'h' &&
3592           header[4] == 'e' &&
3593           header[5] == 'a' &&
3594           get8(f)   == 'd' &&
3595           get8(f)   == '\0')                        return error(f, VORBIS_ogg_skeleton_not_supported);
3596       else
3597                                                     return error(f, VORBIS_invalid_first_page);
3598    }
3599 
3600    // read packet
3601    // check packet header
3602    if (get8(f) != VORBIS_packet_id)                 return error(f, VORBIS_invalid_first_page);
3603    if (!getn(f, header, 6))                         return error(f, VORBIS_unexpected_eof);
3604    if (!vorbis_validate(header))                    return error(f, VORBIS_invalid_first_page);
3605    // vorbis_version
3606    if (get32(f) != 0)                               return error(f, VORBIS_invalid_first_page);
3607    f->channels = get8(f); if (!f->channels)         return error(f, VORBIS_invalid_first_page);
3608    if (f->channels > STB_VORBIS_MAX_CHANNELS)       return error(f, VORBIS_too_many_channels);
3609    f->sample_rate = get32(f); if (!f->sample_rate)  return error(f, VORBIS_invalid_first_page);
3610    get32(f); // bitrate_maximum
3611    get32(f); // bitrate_nominal
3612    get32(f); // bitrate_minimum
3613    x = get8(f);
3614    {
3615       int log0,log1;
3616       log0 = x & 15;
3617       log1 = x >> 4;
3618       f->blocksize_0 = 1 << log0;
3619       f->blocksize_1 = 1 << log1;
3620       if (log0 < 6 || log0 > 13)                       return error(f, VORBIS_invalid_setup);
3621       if (log1 < 6 || log1 > 13)                       return error(f, VORBIS_invalid_setup);
3622       if (log0 > log1)                                 return error(f, VORBIS_invalid_setup);
3623    }
3624 
3625    // framing_flag
3626    x = get8(f);
3627    if (!(x & 1))                                    return error(f, VORBIS_invalid_first_page);
3628 
3629    // second packet!
3630    if (!start_page(f))                              return FALSE;
3631 
3632    if (!start_packet(f))                            return FALSE;
3633    do {
3634       len = next_segment(f);
3635       skip(f, len);
3636       f->bytes_in_seg = 0;
3637    } while (len);
3638 
3639    // third packet!
3640    if (!start_packet(f))                            return FALSE;
3641 
3642    #ifndef STB_VORBIS_NO_PUSHDATA_API
3643    if (IS_PUSH_MODE(f)) {
3644       if (!is_whole_packet_present(f, TRUE)) {
3645          // convert error in ogg header to write type
3646          if (f->error == VORBIS_invalid_stream)
3647             f->error = VORBIS_invalid_setup;
3648          return FALSE;
3649       }
3650    }
3651    #endif
3652 
3653    crc32_init(); // always init it, to avoid multithread race conditions
3654 
3655    if (get8_packet(f) != VORBIS_packet_setup)       return error(f, VORBIS_invalid_setup);
3656    for (i=0; i < 6; ++i) header[i] = get8_packet(f);
3657    if (!vorbis_validate(header))                    return error(f, VORBIS_invalid_setup);
3658 
3659    // codebooks
3660 
3661    f->codebook_count = get_bits(f,8) + 1;
3662    f->codebooks = (Codebook *) setup_malloc(f, sizeof(*f->codebooks) * f->codebook_count);
3663    if (f->codebooks == NULL)                        return error(f, VORBIS_outofmem);
3664    memset(f->codebooks, 0, sizeof(*f->codebooks) * f->codebook_count);
3665    for (i=0; i < f->codebook_count; ++i) {
3666       uint32 *values;
3667       int ordered, sorted_count;
3668       int total=0;
3669       uint8 *lengths;
3670       Codebook *c = f->codebooks+i;
3671       CHECK(f);
3672       x = get_bits(f, 8); if (x != 0x42)            return error(f, VORBIS_invalid_setup);
3673       x = get_bits(f, 8); if (x != 0x43)            return error(f, VORBIS_invalid_setup);
3674       x = get_bits(f, 8); if (x != 0x56)            return error(f, VORBIS_invalid_setup);
3675       x = get_bits(f, 8);
3676       c->dimensions = (get_bits(f, 8)<<8) + x;
3677       x = get_bits(f, 8);
3678       y = get_bits(f, 8);
3679       c->entries = (get_bits(f, 8)<<16) + (y<<8) + x;
3680       ordered = get_bits(f,1);
3681       c->sparse = ordered ? 0 : get_bits(f,1);
3682 
3683       if (c->dimensions == 0 && c->entries != 0)    return error(f, VORBIS_invalid_setup);
3684 
3685       if (c->sparse)
3686          lengths = (uint8 *) setup_temp_malloc(f, c->entries);
3687       else
3688          lengths = c->codeword_lengths = (uint8 *) setup_malloc(f, c->entries);
3689 
3690       if (!lengths) return error(f, VORBIS_outofmem);
3691 
3692       if (ordered) {
3693          int current_entry = 0;
3694          int current_length = get_bits(f,5) + 1;
3695          while (current_entry < c->entries) {
3696             int limit = c->entries - current_entry;
3697             int n = get_bits(f, ilog(limit));
3698             if (current_entry + n > (int) c->entries) { return error(f, VORBIS_invalid_setup); }
3699             memset(lengths + current_entry, current_length, n);
3700             current_entry += n;
3701             ++current_length;
3702          }
3703       } else {
3704          for (j=0; j < c->entries; ++j) {
3705             int present = c->sparse ? get_bits(f,1) : 1;
3706             if (present) {
3707                lengths[j] = get_bits(f, 5) + 1;
3708                ++total;
3709                if (lengths[j] == 32)
3710                   return error(f, VORBIS_invalid_setup);
3711             } else {
3712                lengths[j] = NO_CODE;
3713             }
3714          }
3715       }
3716 
3717       if (c->sparse && total >= c->entries >> 2) {
3718          // convert sparse items to non-sparse!
3719          if (c->entries > (int) f->setup_temp_memory_required)
3720             f->setup_temp_memory_required = c->entries;
3721 
3722          c->codeword_lengths = (uint8 *) setup_malloc(f, c->entries);
3723          if (c->codeword_lengths == NULL) return error(f, VORBIS_outofmem);
3724          memcpy(c->codeword_lengths, lengths, c->entries);
3725          setup_temp_free(f, lengths, c->entries); // note this is only safe if there have been no intervening temp mallocs!
3726          lengths = c->codeword_lengths;
3727          c->sparse = 0;
3728       }
3729 
3730       // compute the size of the sorted tables
3731       if (c->sparse) {
3732          sorted_count = total;
3733       } else {
3734          sorted_count = 0;
3735          #ifndef STB_VORBIS_NO_HUFFMAN_BINARY_SEARCH
3736          for (j=0; j < c->entries; ++j)
3737             if (lengths[j] > STB_VORBIS_FAST_HUFFMAN_LENGTH && lengths[j] != NO_CODE)
3738                ++sorted_count;
3739          #endif
3740       }
3741 
3742       c->sorted_entries = sorted_count;
3743       values = NULL;
3744 
3745       CHECK(f);
3746       if (!c->sparse) {
3747          c->codewords = (uint32 *) setup_malloc(f, sizeof(c->codewords[0]) * c->entries);
3748          if (!c->codewords)                  return error(f, VORBIS_outofmem);
3749       } else {
3750          unsigned int size;
3751          if (c->sorted_entries) {
3752             c->codeword_lengths = (uint8 *) setup_malloc(f, c->sorted_entries);
3753             if (!c->codeword_lengths)           return error(f, VORBIS_outofmem);
3754             c->codewords = (uint32 *) setup_temp_malloc(f, sizeof(*c->codewords) * c->sorted_entries);
3755             if (!c->codewords)                  return error(f, VORBIS_outofmem);
3756             values = (uint32 *) setup_temp_malloc(f, sizeof(*values) * c->sorted_entries);
3757             if (!values)                        return error(f, VORBIS_outofmem);
3758          }
3759          size = c->entries + (sizeof(*c->codewords) + sizeof(*values)) * c->sorted_entries;
3760          if (size > f->setup_temp_memory_required)
3761             f->setup_temp_memory_required = size;
3762       }
3763 
3764       if (!compute_codewords(c, lengths, c->entries, values)) {
3765          if (c->sparse) setup_temp_free(f, values, 0);
3766          return error(f, VORBIS_invalid_setup);
3767       }
3768 
3769       if (c->sorted_entries) {
3770          // allocate an extra slot for sentinels
3771          c->sorted_codewords = (uint32 *) setup_malloc(f, sizeof(*c->sorted_codewords) * (c->sorted_entries+1));
3772          if (c->sorted_codewords == NULL) return error(f, VORBIS_outofmem);
3773          // allocate an extra slot at the front so that c->sorted_values[-1] is defined
3774          // so that we can catch that case without an extra if
3775          c->sorted_values    = ( int   *) setup_malloc(f, sizeof(*c->sorted_values   ) * (c->sorted_entries+1));
3776          if (c->sorted_values == NULL) return error(f, VORBIS_outofmem);
3777          ++c->sorted_values;
3778          c->sorted_values[-1] = -1;
3779          compute_sorted_huffman(c, lengths, values);
3780       }
3781 
3782       if (c->sparse) {
3783          setup_temp_free(f, values, sizeof(*values)*c->sorted_entries);
3784          setup_temp_free(f, c->codewords, sizeof(*c->codewords)*c->sorted_entries);
3785          setup_temp_free(f, lengths, c->entries);
3786          c->codewords = NULL;
3787       }
3788 
3789       compute_accelerated_huffman(c);
3790 
3791       CHECK(f);
3792       c->lookup_type = get_bits(f, 4);
3793       if (c->lookup_type > 2) return error(f, VORBIS_invalid_setup);
3794       if (c->lookup_type > 0) {
3795          uint16 *mults;
3796          c->minimum_value = float32_unpack(get_bits(f, 32));
3797          c->delta_value = float32_unpack(get_bits(f, 32));
3798          c->value_bits = get_bits(f, 4)+1;
3799          c->sequence_p = get_bits(f,1);
3800          if (c->lookup_type == 1) {
3801             c->lookup_values = lookup1_values(c->entries, c->dimensions);
3802          } else {
3803             c->lookup_values = c->entries * c->dimensions;
3804          }
3805          if (c->lookup_values == 0) return error(f, VORBIS_invalid_setup);
3806          mults = (uint16 *) setup_temp_malloc(f, sizeof(mults[0]) * c->lookup_values);
3807          if (mults == NULL) return error(f, VORBIS_outofmem);
3808          for (j=0; j < (int) c->lookup_values; ++j) {
3809             int q = get_bits(f, c->value_bits);
3810             if (q == EOP) { setup_temp_free(f,mults,sizeof(mults[0])*c->lookup_values); return error(f, VORBIS_invalid_setup); }
3811             mults[j] = q;
3812          }
3813 
3814 #ifndef STB_VORBIS_DIVIDES_IN_CODEBOOK
3815          if (c->lookup_type == 1) {
3816             int len, sparse = c->sparse;
3817             float last=0;
3818             // pre-expand the lookup1-style multiplicands, to avoid a divide in the inner loop
3819             if (sparse) {
3820                if (c->sorted_entries == 0) goto skip;
3821                c->multiplicands = (codetype *) setup_malloc(f, sizeof(c->multiplicands[0]) * c->sorted_entries * c->dimensions);
3822             } else
3823                c->multiplicands = (codetype *) setup_malloc(f, sizeof(c->multiplicands[0]) * c->entries        * c->dimensions);
3824             if (c->multiplicands == NULL) { setup_temp_free(f,mults,sizeof(mults[0])*c->lookup_values); return error(f, VORBIS_outofmem); }
3825             len = sparse ? c->sorted_entries : c->entries;
3826             for (j=0; j < len; ++j) {
3827                unsigned int z = sparse ? c->sorted_values[j] : j;
3828                unsigned int div=1;
3829                for (k=0; k < c->dimensions; ++k) {
3830                   int off = (z / div) % c->lookup_values;
3831                   float val = mults[off];
3832                   val = mults[off]*c->delta_value + c->minimum_value + last;
3833                   c->multiplicands[j*c->dimensions + k] = val;
3834                   if (c->sequence_p)
3835                      last = val;
3836                   if (k+1 < c->dimensions) {
3837                      if (div > UINT_MAX / (unsigned int) c->lookup_values) {
3838                         setup_temp_free(f, mults,sizeof(mults[0])*c->lookup_values);
3839                         return error(f, VORBIS_invalid_setup);
3840                      }
3841                      div *= c->lookup_values;
3842                   }
3843                }
3844             }
3845             c->lookup_type = 2;
3846          }
3847          else
3848 #endif
3849          {
3850             float last=0;
3851             CHECK(f);
3852             c->multiplicands = (codetype *) setup_malloc(f, sizeof(c->multiplicands[0]) * c->lookup_values);
3853             if (c->multiplicands == NULL) { setup_temp_free(f, mults,sizeof(mults[0])*c->lookup_values); return error(f, VORBIS_outofmem); }
3854             for (j=0; j < (int) c->lookup_values; ++j) {
3855                float val = mults[j] * c->delta_value + c->minimum_value + last;
3856                c->multiplicands[j] = val;
3857                if (c->sequence_p)
3858                   last = val;
3859             }
3860          }
3861 #ifndef STB_VORBIS_DIVIDES_IN_CODEBOOK
3862         skip:;
3863 #endif
3864          setup_temp_free(f, mults, sizeof(mults[0])*c->lookup_values);
3865 
3866          CHECK(f);
3867       }
3868       CHECK(f);
3869    }
3870 
3871    // time domain transfers (notused)
3872 
3873    x = get_bits(f, 6) + 1;
3874    for (i=0; i < x; ++i) {
3875       uint32 z = get_bits(f, 16);
3876       if (z != 0) return error(f, VORBIS_invalid_setup);
3877    }
3878 
3879    // Floors
3880    f->floor_count = get_bits(f, 6)+1;
3881    f->floor_config = (Floor *)  setup_malloc(f, f->floor_count * sizeof(*f->floor_config));
3882    if (f->floor_config == NULL) return error(f, VORBIS_outofmem);
3883    for (i=0; i < f->floor_count; ++i) {
3884       f->floor_types[i] = get_bits(f, 16);
3885       if (f->floor_types[i] > 1) return error(f, VORBIS_invalid_setup);
3886       if (f->floor_types[i] == 0) {
3887          Floor0 *g = &f->floor_config[i].floor0;
3888          g->order = get_bits(f,8);
3889          g->rate = get_bits(f,16);
3890          g->bark_map_size = get_bits(f,16);
3891          g->amplitude_bits = get_bits(f,6);
3892          g->amplitude_offset = get_bits(f,8);
3893          g->number_of_books = get_bits(f,4) + 1;
3894          for (j=0; j < g->number_of_books; ++j)
3895             g->book_list[j] = get_bits(f,8);
3896          return error(f, VORBIS_feature_not_supported);
3897       } else {
3898          stbv__floor_ordering p[31*8+2];
3899          Floor1 *g = &f->floor_config[i].floor1;
3900          int max_class = -1;
3901          g->partitions = get_bits(f, 5);
3902          for (j=0; j < g->partitions; ++j) {
3903             g->partition_class_list[j] = get_bits(f, 4);
3904             if (g->partition_class_list[j] > max_class)
3905                max_class = g->partition_class_list[j];
3906          }
3907          for (j=0; j <= max_class; ++j) {
3908             g->class_dimensions[j] = get_bits(f, 3)+1;
3909             g->class_subclasses[j] = get_bits(f, 2);
3910             if (g->class_subclasses[j]) {
3911                g->class_masterbooks[j] = get_bits(f, 8);
3912                if (g->class_masterbooks[j] >= f->codebook_count) return error(f, VORBIS_invalid_setup);
3913             }
3914             for (k=0; k < 1 << g->class_subclasses[j]; ++k) {
3915                g->subclass_books[j][k] = get_bits(f,8)-1;
3916                if (g->subclass_books[j][k] >= f->codebook_count) return error(f, VORBIS_invalid_setup);
3917             }
3918          }
3919          g->floor1_multiplier = get_bits(f,2)+1;
3920          g->rangebits = get_bits(f,4);
3921          g->Xlist[0] = 0;
3922          g->Xlist[1] = 1 << g->rangebits;
3923          g->values = 2;
3924          for (j=0; j < g->partitions; ++j) {
3925             int c = g->partition_class_list[j];
3926             for (k=0; k < g->class_dimensions[c]; ++k) {
3927                g->Xlist[g->values] = get_bits(f, g->rangebits);
3928                ++g->values;
3929             }
3930          }
3931          // precompute the sorting
3932          for (j=0; j < g->values; ++j) {
3933             p[j].x = g->Xlist[j];
3934             p[j].id = j;
3935          }
3936          qsort(p, g->values, sizeof(p[0]), point_compare);
3937          for (j=0; j < g->values; ++j)
3938             g->sorted_order[j] = (uint8) p[j].id;
3939          // precompute the neighbors
3940          for (j=2; j < g->values; ++j) {
3941             int low,hi;
3942             neighbors(g->Xlist, j, &low,&hi);
3943             g->neighbors[j][0] = low;
3944             g->neighbors[j][1] = hi;
3945          }
3946 
3947          if (g->values > longest_floorlist)
3948             longest_floorlist = g->values;
3949       }
3950    }
3951 
3952    // Residue
3953    f->residue_count = get_bits(f, 6)+1;
3954    f->residue_config = (Residue *) setup_malloc(f, f->residue_count * sizeof(f->residue_config[0]));
3955    if (f->residue_config == NULL) return error(f, VORBIS_outofmem);
3956    memset(f->residue_config, 0, f->residue_count * sizeof(f->residue_config[0]));
3957    for (i=0; i < f->residue_count; ++i) {
3958       uint8 residue_cascade[64];
3959       Residue *r = f->residue_config+i;
3960       f->residue_types[i] = get_bits(f, 16);
3961       if (f->residue_types[i] > 2) return error(f, VORBIS_invalid_setup);
3962       r->begin = get_bits(f, 24);
3963       r->end = get_bits(f, 24);
3964       if (r->end < r->begin) return error(f, VORBIS_invalid_setup);
3965       r->part_size = get_bits(f,24)+1;
3966       r->classifications = get_bits(f,6)+1;
3967       r->classbook = get_bits(f,8);
3968       if (r->classbook >= f->codebook_count) return error(f, VORBIS_invalid_setup);
3969       for (j=0; j < r->classifications; ++j) {
3970          uint8 high_bits=0;
3971          uint8 low_bits=get_bits(f,3);
3972          if (get_bits(f,1))
3973             high_bits = get_bits(f,5);
3974          residue_cascade[j] = high_bits*8 + low_bits;
3975       }
3976       r->residue_books = (short (*)[8]) setup_malloc(f, sizeof(r->residue_books[0]) * r->classifications);
3977       if (r->residue_books == NULL) return error(f, VORBIS_outofmem);
3978       for (j=0; j < r->classifications; ++j) {
3979          for (k=0; k < 8; ++k) {
3980             if (residue_cascade[j] & (1 << k)) {
3981                r->residue_books[j][k] = get_bits(f, 8);
3982                if (r->residue_books[j][k] >= f->codebook_count) return error(f, VORBIS_invalid_setup);
3983             } else {
3984                r->residue_books[j][k] = -1;
3985             }
3986          }
3987       }
3988       // precompute the classifications[] array to avoid inner-loop mod/divide
3989       // call it 'classdata' since we already have r->classifications
3990       r->classdata = (uint8 **) setup_malloc(f, sizeof(*r->classdata) * f->codebooks[r->classbook].entries);
3991       if (!r->classdata) return error(f, VORBIS_outofmem);
3992       memset(r->classdata, 0, sizeof(*r->classdata) * f->codebooks[r->classbook].entries);
3993       for (j=0; j < f->codebooks[r->classbook].entries; ++j) {
3994          int classwords = f->codebooks[r->classbook].dimensions;
3995          int temp = j;
3996          r->classdata[j] = (uint8 *) setup_malloc(f, sizeof(r->classdata[j][0]) * classwords);
3997          if (r->classdata[j] == NULL) return error(f, VORBIS_outofmem);
3998          for (k=classwords-1; k >= 0; --k) {
3999             r->classdata[j][k] = temp % r->classifications;
4000             temp /= r->classifications;
4001          }
4002       }
4003    }
4004 
4005    f->mapping_count = get_bits(f,6)+1;
4006    f->mapping = (Mapping *) setup_malloc(f, f->mapping_count * sizeof(*f->mapping));
4007    if (f->mapping == NULL) return error(f, VORBIS_outofmem);
4008    memset(f->mapping, 0, f->mapping_count * sizeof(*f->mapping));
4009    for (i=0; i < f->mapping_count; ++i) {
4010       Mapping *m = f->mapping + i;
4011       int mapping_type = get_bits(f,16);
4012       if (mapping_type != 0) return error(f, VORBIS_invalid_setup);
4013       m->chan = (MappingChannel *) setup_malloc(f, f->channels * sizeof(*m->chan));
4014       if (m->chan == NULL) return error(f, VORBIS_outofmem);
4015       if (get_bits(f,1))
4016          m->submaps = get_bits(f,4)+1;
4017       else
4018          m->submaps = 1;
4019       if (m->submaps > max_submaps)
4020          max_submaps = m->submaps;
4021       if (get_bits(f,1)) {
4022          m->coupling_steps = get_bits(f,8)+1;
4023          for (k=0; k < m->coupling_steps; ++k) {
4024             m->chan[k].magnitude = get_bits(f, ilog(f->channels-1));
4025             m->chan[k].angle = get_bits(f, ilog(f->channels-1));
4026             if (m->chan[k].magnitude >= f->channels)        return error(f, VORBIS_invalid_setup);
4027             if (m->chan[k].angle     >= f->channels)        return error(f, VORBIS_invalid_setup);
4028             if (m->chan[k].magnitude == m->chan[k].angle)   return error(f, VORBIS_invalid_setup);
4029          }
4030       } else
4031          m->coupling_steps = 0;
4032 
4033       // reserved field
4034       if (get_bits(f,2)) return error(f, VORBIS_invalid_setup);
4035       if (m->submaps > 1) {
4036          for (j=0; j < f->channels; ++j) {
4037             m->chan[j].mux = get_bits(f, 4);
4038             if (m->chan[j].mux >= m->submaps)                return error(f, VORBIS_invalid_setup);
4039          }
4040       } else
4041          // @SPECIFICATION: this case is missing from the spec
4042          for (j=0; j < f->channels; ++j)
4043             m->chan[j].mux = 0;
4044 
4045       for (j=0; j < m->submaps; ++j) {
4046          get_bits(f,8); // discard
4047          m->submap_floor[j] = get_bits(f,8);
4048          m->submap_residue[j] = get_bits(f,8);
4049          if (m->submap_floor[j] >= f->floor_count)      return error(f, VORBIS_invalid_setup);
4050          if (m->submap_residue[j] >= f->residue_count)  return error(f, VORBIS_invalid_setup);
4051       }
4052    }
4053 
4054    // Modes
4055    f->mode_count = get_bits(f, 6)+1;
4056    for (i=0; i < f->mode_count; ++i) {
4057       Mode *m = f->mode_config+i;
4058       m->blockflag = get_bits(f,1);
4059       m->windowtype = get_bits(f,16);
4060       m->transformtype = get_bits(f,16);
4061       m->mapping = get_bits(f,8);
4062       if (m->windowtype != 0)                 return error(f, VORBIS_invalid_setup);
4063       if (m->transformtype != 0)              return error(f, VORBIS_invalid_setup);
4064       if (m->mapping >= f->mapping_count)     return error(f, VORBIS_invalid_setup);
4065    }
4066 
4067    flush_packet(f);
4068 
4069    f->previous_length = 0;
4070 
4071    for (i=0; i < f->channels; ++i) {
4072       f->channel_buffers[i] = (float *) setup_malloc(f, sizeof(float) * f->blocksize_1);
4073       f->previous_window[i] = (float *) setup_malloc(f, sizeof(float) * f->blocksize_1/2);
4074       f->finalY[i]          = (int16 *) setup_malloc(f, sizeof(int16) * longest_floorlist);
4075       if (f->channel_buffers[i] == NULL || f->previous_window[i] == NULL || f->finalY[i] == NULL) return error(f, VORBIS_outofmem);
4076       memset(f->channel_buffers[i], 0, sizeof(float) * f->blocksize_1);
4077       #ifdef STB_VORBIS_NO_DEFER_FLOOR
4078       f->floor_buffers[i]   = (float *) setup_malloc(f, sizeof(float) * f->blocksize_1/2);
4079       if (f->floor_buffers[i] == NULL) return error(f, VORBIS_outofmem);
4080       #endif
4081    }
4082 
4083    if (!init_blocksize(f, 0, f->blocksize_0)) return FALSE;
4084    if (!init_blocksize(f, 1, f->blocksize_1)) return FALSE;
4085    f->blocksize[0] = f->blocksize_0;
4086    f->blocksize[1] = f->blocksize_1;
4087 
4088 #ifdef STB_VORBIS_DIVIDE_TABLE
4089    if (integer_divide_table[1][1]==0)
4090       for (i=0; i < DIVTAB_NUMER; ++i)
4091          for (j=1; j < DIVTAB_DENOM; ++j)
4092             integer_divide_table[i][j] = i / j;
4093 #endif
4094 
4095    // compute how much temporary memory is needed
4096 
4097    // 1.
4098    {
4099       uint32 imdct_mem = (f->blocksize_1 * sizeof(float) >> 1);
4100       uint32 classify_mem;
4101       int i,max_part_read=0;
4102       for (i=0; i < f->residue_count; ++i) {
4103          Residue *r = f->residue_config + i;
4104          unsigned int actual_size = f->blocksize_1 / 2;
4105          unsigned int limit_r_begin = r->begin < actual_size ? r->begin : actual_size;
4106          unsigned int limit_r_end   = r->end   < actual_size ? r->end   : actual_size;
4107          int n_read = limit_r_end - limit_r_begin;
4108          int part_read = n_read / r->part_size;
4109          if (part_read > max_part_read)
4110             max_part_read = part_read;
4111       }
4112       #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
4113       classify_mem = f->channels * (sizeof(void*) + max_part_read * sizeof(uint8 *));
4114       #else
4115       classify_mem = f->channels * (sizeof(void*) + max_part_read * sizeof(int *));
4116       #endif
4117 
4118       // maximum reasonable partition size is f->blocksize_1
4119 
4120       f->temp_memory_required = classify_mem;
4121       if (imdct_mem > f->temp_memory_required)
4122          f->temp_memory_required = imdct_mem;
4123    }
4124 
4125    f->first_decode = TRUE;
4126 
4127    if (f->alloc.alloc_buffer) {
4128       assert(f->temp_offset == f->alloc.alloc_buffer_length_in_bytes);
4129       // check if there's enough temp memory so we don't error later
4130       if (f->setup_offset + sizeof(*f) + f->temp_memory_required > (unsigned) f->temp_offset)
4131          return error(f, VORBIS_outofmem);
4132    }
4133 
4134    f->first_audio_page_offset = stb_vorbis_get_file_offset(f);
4135 
4136    return TRUE;
4137 }
4138 
vorbis_deinit(stb_vorbis * p)4139 static void vorbis_deinit(stb_vorbis *p)
4140 {
4141    int i,j;
4142    if (p->residue_config) {
4143       for (i=0; i < p->residue_count; ++i) {
4144          Residue *r = p->residue_config+i;
4145          if (r->classdata) {
4146             for (j=0; j < p->codebooks[r->classbook].entries; ++j)
4147                setup_free(p, r->classdata[j]);
4148             setup_free(p, r->classdata);
4149          }
4150          setup_free(p, r->residue_books);
4151       }
4152    }
4153 
4154    if (p->codebooks) {
4155       CHECK(p);
4156       for (i=0; i < p->codebook_count; ++i) {
4157          Codebook *c = p->codebooks + i;
4158          setup_free(p, c->codeword_lengths);
4159          setup_free(p, c->multiplicands);
4160          setup_free(p, c->codewords);
4161          setup_free(p, c->sorted_codewords);
4162          // c->sorted_values[-1] is the first entry in the array
4163          setup_free(p, c->sorted_values ? c->sorted_values-1 : NULL);
4164       }
4165       setup_free(p, p->codebooks);
4166    }
4167    setup_free(p, p->floor_config);
4168    setup_free(p, p->residue_config);
4169    if (p->mapping) {
4170       for (i=0; i < p->mapping_count; ++i)
4171          setup_free(p, p->mapping[i].chan);
4172       setup_free(p, p->mapping);
4173    }
4174    CHECK(p);
4175    for (i=0; i < p->channels && i < STB_VORBIS_MAX_CHANNELS; ++i) {
4176       setup_free(p, p->channel_buffers[i]);
4177       setup_free(p, p->previous_window[i]);
4178       #ifdef STB_VORBIS_NO_DEFER_FLOOR
4179       setup_free(p, p->floor_buffers[i]);
4180       #endif
4181       setup_free(p, p->finalY[i]);
4182    }
4183    for (i=0; i < 2; ++i) {
4184       setup_free(p, p->A[i]);
4185       setup_free(p, p->B[i]);
4186       setup_free(p, p->C[i]);
4187       setup_free(p, p->window[i]);
4188       setup_free(p, p->bit_reverse[i]);
4189    }
4190    #ifndef STB_VORBIS_NO_STDIO
4191    if (p->close_on_free) fclose(p->f);
4192    #endif
4193 }
4194 
stb_vorbis_close(stb_vorbis * p)4195 void stb_vorbis_close(stb_vorbis *p)
4196 {
4197    if (p == NULL) return;
4198    vorbis_deinit(p);
4199    setup_free(p,p);
4200 }
4201 
vorbis_init(stb_vorbis * p,const stb_vorbis_alloc * z)4202 static void vorbis_init(stb_vorbis *p, const stb_vorbis_alloc *z)
4203 {
4204    memset(p, 0, sizeof(*p)); // NULL out all malloc'd pointers to start
4205    if (z) {
4206       p->alloc = *z;
4207       p->alloc.alloc_buffer_length_in_bytes = (p->alloc.alloc_buffer_length_in_bytes+3) & ~3;
4208       p->temp_offset = p->alloc.alloc_buffer_length_in_bytes;
4209    }
4210    p->eof = 0;
4211    p->error = VORBIS__no_error;
4212    p->stream = NULL;
4213    p->codebooks = NULL;
4214    p->page_crc_tests = -1;
4215    #ifndef STB_VORBIS_NO_STDIO
4216    p->close_on_free = FALSE;
4217    p->f = NULL;
4218    #endif
4219 }
4220 
stb_vorbis_get_sample_offset(stb_vorbis * f)4221 int stb_vorbis_get_sample_offset(stb_vorbis *f)
4222 {
4223    if (f->current_loc_valid)
4224       return f->current_loc;
4225    else
4226       return -1;
4227 }
4228 
stb_vorbis_get_info(stb_vorbis * f)4229 stb_vorbis_info stb_vorbis_get_info(stb_vorbis *f)
4230 {
4231    stb_vorbis_info d;
4232    d.channels = f->channels;
4233    d.sample_rate = f->sample_rate;
4234    d.setup_memory_required = f->setup_memory_required;
4235    d.setup_temp_memory_required = f->setup_temp_memory_required;
4236    d.temp_memory_required = f->temp_memory_required;
4237    d.max_frame_size = f->blocksize_1 >> 1;
4238    return d;
4239 }
4240 
stb_vorbis_get_error(stb_vorbis * f)4241 int stb_vorbis_get_error(stb_vorbis *f)
4242 {
4243    int e = f->error;
4244    f->error = VORBIS__no_error;
4245    return e;
4246 }
4247 
vorbis_alloc(stb_vorbis * f)4248 static stb_vorbis * vorbis_alloc(stb_vorbis *f)
4249 {
4250    stb_vorbis *p = (stb_vorbis *) setup_malloc(f, sizeof(*p));
4251    return p;
4252 }
4253 
4254 #ifndef STB_VORBIS_NO_PUSHDATA_API
4255 
stb_vorbis_flush_pushdata(stb_vorbis * f)4256 void stb_vorbis_flush_pushdata(stb_vorbis *f)
4257 {
4258    f->previous_length = 0;
4259    f->page_crc_tests  = 0;
4260    f->discard_samples_deferred = 0;
4261    f->current_loc_valid = FALSE;
4262    f->first_decode = FALSE;
4263    f->samples_output = 0;
4264    f->channel_buffer_start = 0;
4265    f->channel_buffer_end = 0;
4266 }
4267 
vorbis_search_for_page_pushdata(vorb * f,uint8 * data,int data_len)4268 static int vorbis_search_for_page_pushdata(vorb *f, uint8 *data, int data_len)
4269 {
4270    int i,n;
4271    for (i=0; i < f->page_crc_tests; ++i)
4272       f->scan[i].bytes_done = 0;
4273 
4274    // if we have room for more scans, search for them first, because
4275    // they may cause us to stop early if their header is incomplete
4276    if (f->page_crc_tests < STB_VORBIS_PUSHDATA_CRC_COUNT) {
4277       if (data_len < 4) return 0;
4278       data_len -= 3; // need to look for 4-byte sequence, so don't miss
4279                      // one that straddles a boundary
4280       for (i=0; i < data_len; ++i) {
4281          if (data[i] == 0x4f) {
4282             if (0==memcmp(data+i, ogg_page_header, 4)) {
4283                int j,len;
4284                uint32 crc;
4285                // make sure we have the whole page header
4286                if (i+26 >= data_len || i+27+data[i+26] >= data_len) {
4287                   // only read up to this page start, so hopefully we'll
4288                   // have the whole page header start next time
4289                   data_len = i;
4290                   break;
4291                }
4292                // ok, we have it all; compute the length of the page
4293                len = 27 + data[i+26];
4294                for (j=0; j < data[i+26]; ++j)
4295                   len += data[i+27+j];
4296                // scan everything up to the embedded crc (which we must 0)
4297                crc = 0;
4298                for (j=0; j < 22; ++j)
4299                   crc = crc32_update(crc, data[i+j]);
4300                // now process 4 0-bytes
4301                for (   ; j < 26; ++j)
4302                   crc = crc32_update(crc, 0);
4303                // len is the total number of bytes we need to scan
4304                n = f->page_crc_tests++;
4305                f->scan[n].bytes_left = len-j;
4306                f->scan[n].crc_so_far = crc;
4307                f->scan[n].goal_crc = data[i+22] + (data[i+23] << 8) + (data[i+24]<<16) + (data[i+25]<<24);
4308                // if the last frame on a page is continued to the next, then
4309                // we can't recover the sample_loc immediately
4310                if (data[i+27+data[i+26]-1] == 255)
4311                   f->scan[n].sample_loc = ~0;
4312                else
4313                   f->scan[n].sample_loc = data[i+6] + (data[i+7] << 8) + (data[i+ 8]<<16) + (data[i+ 9]<<24);
4314                f->scan[n].bytes_done = i+j;
4315                if (f->page_crc_tests == STB_VORBIS_PUSHDATA_CRC_COUNT)
4316                   break;
4317                // keep going if we still have room for more
4318             }
4319          }
4320       }
4321    }
4322 
4323    for (i=0; i < f->page_crc_tests;) {
4324       uint32 crc;
4325       int j;
4326       int n = f->scan[i].bytes_done;
4327       int m = f->scan[i].bytes_left;
4328       if (m > data_len - n) m = data_len - n;
4329       // m is the bytes to scan in the current chunk
4330       crc = f->scan[i].crc_so_far;
4331       for (j=0; j < m; ++j)
4332          crc = crc32_update(crc, data[n+j]);
4333       f->scan[i].bytes_left -= m;
4334       f->scan[i].crc_so_far = crc;
4335       if (f->scan[i].bytes_left == 0) {
4336          // does it match?
4337          if (f->scan[i].crc_so_far == f->scan[i].goal_crc) {
4338             // Houston, we have page
4339             data_len = n+m; // consumption amount is wherever that scan ended
4340             f->page_crc_tests = -1; // drop out of page scan mode
4341             f->previous_length = 0; // decode-but-don't-output one frame
4342             f->next_seg = -1;       // start a new page
4343             f->current_loc = f->scan[i].sample_loc; // set the current sample location
4344                                     // to the amount we'd have decoded had we decoded this page
4345             f->current_loc_valid = f->current_loc != ~0U;
4346             return data_len;
4347          }
4348          // delete entry
4349          f->scan[i] = f->scan[--f->page_crc_tests];
4350       } else {
4351          ++i;
4352       }
4353    }
4354 
4355    return data_len;
4356 }
4357 
4358 // return value: number of bytes we used
stb_vorbis_decode_frame_pushdata(stb_vorbis * f,const uint8 * data,int data_len,int * channels,float *** output,int * samples)4359 int stb_vorbis_decode_frame_pushdata(
4360          stb_vorbis *f,                   // the file we're decoding
4361          const uint8 *data, int data_len, // the memory available for decoding
4362          int *channels,                   // place to write number of float * buffers
4363          float ***output,                 // place to write float ** array of float * buffers
4364          int *samples                     // place to write number of output samples
4365      )
4366 {
4367    int i;
4368    int len,right,left;
4369 
4370    if (!IS_PUSH_MODE(f)) return error(f, VORBIS_invalid_api_mixing);
4371 
4372    if (f->page_crc_tests >= 0) {
4373       *samples = 0;
4374       return vorbis_search_for_page_pushdata(f, (uint8 *) data, data_len);
4375    }
4376 
4377    f->stream     = (uint8 *) data;
4378    f->stream_end = (uint8 *) data + data_len;
4379    f->error      = VORBIS__no_error;
4380 
4381    // check that we have the entire packet in memory
4382    if (!is_whole_packet_present(f, FALSE)) {
4383       *samples = 0;
4384       return 0;
4385    }
4386 
4387    if (!vorbis_decode_packet(f, &len, &left, &right)) {
4388       // save the actual error we encountered
4389       enum STBVorbisError error = f->error;
4390       if (error == VORBIS_bad_packet_type) {
4391          // flush and resynch
4392          f->error = VORBIS__no_error;
4393          while (get8_packet(f) != EOP)
4394             if (f->eof) break;
4395          *samples = 0;
4396          return (int) (f->stream - data);
4397       }
4398       if (error == VORBIS_continued_packet_flag_invalid) {
4399          if (f->previous_length == 0) {
4400             // we may be resynching, in which case it's ok to hit one
4401             // of these; just discard the packet
4402             f->error = VORBIS__no_error;
4403             while (get8_packet(f) != EOP)
4404                if (f->eof) break;
4405             *samples = 0;
4406             return (int) (f->stream - data);
4407          }
4408       }
4409       // if we get an error while parsing, what to do?
4410       // well, it DEFINITELY won't work to continue from where we are!
4411       stb_vorbis_flush_pushdata(f);
4412       // restore the error that actually made us bail
4413       f->error = error;
4414       *samples = 0;
4415       return 1;
4416    }
4417 
4418    // success!
4419    len = vorbis_finish_frame(f, len, left, right);
4420    for (i=0; i < f->channels; ++i)
4421       f->outputs[i] = f->channel_buffers[i] + left;
4422 
4423    if (channels) *channels = f->channels;
4424    *samples = len;
4425    *output = f->outputs;
4426    return (int) (f->stream - data);
4427 }
4428 
stb_vorbis_open_pushdata(const unsigned char * data,int data_len,int * data_used,int * error,const stb_vorbis_alloc * alloc)4429 stb_vorbis *stb_vorbis_open_pushdata(
4430          const unsigned char *data, int data_len, // the memory available for decoding
4431          int *data_used,              // only defined if result is not NULL
4432          int *error, const stb_vorbis_alloc *alloc)
4433 {
4434    stb_vorbis *f, p;
4435    vorbis_init(&p, alloc);
4436    p.stream     = (uint8 *) data;
4437    p.stream_end = (uint8 *) data + data_len;
4438    p.push_mode  = TRUE;
4439    if (!start_decoder(&p)) {
4440       if (p.eof)
4441          *error = VORBIS_need_more_data;
4442       else
4443          *error = p.error;
4444       return NULL;
4445    }
4446    f = vorbis_alloc(&p);
4447    if (f) {
4448       *f = p;
4449       *data_used = (int) (f->stream - data);
4450       *error = 0;
4451       return f;
4452    } else {
4453       vorbis_deinit(&p);
4454       return NULL;
4455    }
4456 }
4457 #endif // STB_VORBIS_NO_PUSHDATA_API
4458 
stb_vorbis_get_file_offset(stb_vorbis * f)4459 unsigned int stb_vorbis_get_file_offset(stb_vorbis *f)
4460 {
4461    #ifndef STB_VORBIS_NO_PUSHDATA_API
4462    if (f->push_mode) return 0;
4463    #endif
4464    if (USE_MEMORY(f)) return (unsigned int) (f->stream - f->stream_start);
4465    #ifndef STB_VORBIS_NO_STDIO
4466    return (unsigned int) (ftell(f->f) - f->f_start);
4467    #endif
4468 }
4469 
4470 #ifndef STB_VORBIS_NO_PULLDATA_API
4471 //
4472 // DATA-PULLING API
4473 //
4474 
vorbis_find_page(stb_vorbis * f,uint32 * end,uint32 * last)4475 static uint32 vorbis_find_page(stb_vorbis *f, uint32 *end, uint32 *last)
4476 {
4477    for(;;) {
4478       int n;
4479       if (f->eof) return 0;
4480       n = get8(f);
4481       if (n == 0x4f) { // page header candidate
4482          unsigned int retry_loc = stb_vorbis_get_file_offset(f);
4483          int i;
4484          // check if we're off the end of a file_section stream
4485          if (retry_loc - 25 > f->stream_len)
4486             return 0;
4487          // check the rest of the header
4488          for (i=1; i < 4; ++i)
4489             if (get8(f) != ogg_page_header[i])
4490                break;
4491          if (f->eof) return 0;
4492          if (i == 4) {
4493             uint8 header[27];
4494             uint32 i, crc, goal, len;
4495             for (i=0; i < 4; ++i)
4496                header[i] = ogg_page_header[i];
4497             for (; i < 27; ++i)
4498                header[i] = get8(f);
4499             if (f->eof) return 0;
4500             if (header[4] != 0) goto invalid;
4501             goal = header[22] + (header[23] << 8) + (header[24]<<16) + (header[25]<<24);
4502             for (i=22; i < 26; ++i)
4503                header[i] = 0;
4504             crc = 0;
4505             for (i=0; i < 27; ++i)
4506                crc = crc32_update(crc, header[i]);
4507             len = 0;
4508             for (i=0; i < header[26]; ++i) {
4509                int s = get8(f);
4510                crc = crc32_update(crc, s);
4511                len += s;
4512             }
4513             if (len && f->eof) return 0;
4514             for (i=0; i < len; ++i)
4515                crc = crc32_update(crc, get8(f));
4516             // finished parsing probable page
4517             if (crc == goal) {
4518                // we could now check that it's either got the last
4519                // page flag set, OR it's followed by the capture
4520                // pattern, but I guess TECHNICALLY you could have
4521                // a file with garbage between each ogg page and recover
4522                // from it automatically? So even though that paranoia
4523                // might decrease the chance of an invalid decode by
4524                // another 2^32, not worth it since it would hose those
4525                // invalid-but-useful files?
4526                if (end)
4527                   *end = stb_vorbis_get_file_offset(f);
4528                if (last) {
4529                   if (header[5] & 0x04)
4530                      *last = 1;
4531                   else
4532                      *last = 0;
4533                }
4534                set_file_offset(f, retry_loc-1);
4535                return 1;
4536             }
4537          }
4538         invalid:
4539          // not a valid page, so rewind and look for next one
4540          set_file_offset(f, retry_loc);
4541       }
4542    }
4543 }
4544 
4545 
4546 #define SAMPLE_unknown  0xffffffff
4547 
4548 // seeking is implemented with a binary search, which narrows down the range to
4549 // 64K, before using a linear search (because finding the synchronization
4550 // pattern can be expensive, and the chance we'd find the end page again is
4551 // relatively high for small ranges)
4552 //
4553 // two initial interpolation-style probes are used at the start of the search
4554 // to try to bound either side of the binary search sensibly, while still
4555 // working in O(log n) time if they fail.
4556 
get_seek_page_info(stb_vorbis * f,ProbedPage * z)4557 static int get_seek_page_info(stb_vorbis *f, ProbedPage *z)
4558 {
4559    uint8 header[27], lacing[255];
4560    int i,len;
4561 
4562    // record where the page starts
4563    z->page_start = stb_vorbis_get_file_offset(f);
4564 
4565    // parse the header
4566    getn(f, header, 27);
4567    if (header[0] != 'O' || header[1] != 'g' || header[2] != 'g' || header[3] != 'S')
4568       return 0;
4569    getn(f, lacing, header[26]);
4570 
4571    // determine the length of the payload
4572    len = 0;
4573    for (i=0; i < header[26]; ++i)
4574       len += lacing[i];
4575 
4576    // this implies where the page ends
4577    z->page_end = z->page_start + 27 + header[26] + len;
4578 
4579    // read the last-decoded sample out of the data
4580    z->last_decoded_sample = header[6] + (header[7] << 8) + (header[8] << 16) + (header[9] << 24);
4581 
4582    // restore file state to where we were
4583    set_file_offset(f, z->page_start);
4584    return 1;
4585 }
4586 
4587 // rarely used function to seek back to the preceding page while finding the
4588 // start of a packet
go_to_page_before(stb_vorbis * f,unsigned int limit_offset)4589 static int go_to_page_before(stb_vorbis *f, unsigned int limit_offset)
4590 {
4591    unsigned int previous_safe, end;
4592 
4593    // now we want to seek back 64K from the limit
4594    if (limit_offset >= 65536 && limit_offset-65536 >= f->first_audio_page_offset)
4595       previous_safe = limit_offset - 65536;
4596    else
4597       previous_safe = f->first_audio_page_offset;
4598 
4599    set_file_offset(f, previous_safe);
4600 
4601    while (vorbis_find_page(f, &end, NULL)) {
4602       if (end >= limit_offset && stb_vorbis_get_file_offset(f) < limit_offset)
4603          return 1;
4604       set_file_offset(f, end);
4605    }
4606 
4607    return 0;
4608 }
4609 
4610 // implements the search logic for finding a page and starting decoding. if
4611 // the function succeeds, current_loc_valid will be true and current_loc will
4612 // be less than or equal to the provided sample number (the closer the
4613 // better).
seek_to_sample_coarse(stb_vorbis * f,uint32 sample_number)4614 static int seek_to_sample_coarse(stb_vorbis *f, uint32 sample_number)
4615 {
4616    ProbedPage left, right, mid;
4617    int i, start_seg_with_known_loc, end_pos, page_start;
4618    uint32 delta, stream_length, padding;
4619    double offset, bytes_per_sample;
4620    int probe = 0;
4621 
4622    // find the last page and validate the target sample
4623    stream_length = stb_vorbis_stream_length_in_samples(f);
4624    if (stream_length == 0)            return error(f, VORBIS_seek_without_length);
4625    if (sample_number > stream_length) return error(f, VORBIS_seek_invalid);
4626 
4627    // this is the maximum difference between the window-center (which is the
4628    // actual granule position value), and the right-start (which the spec
4629    // indicates should be the granule position (give or take one)).
4630    padding = ((f->blocksize_1 - f->blocksize_0) >> 2);
4631    if (sample_number < padding)
4632       sample_number = 0;
4633    else
4634       sample_number -= padding;
4635 
4636    left = f->p_first;
4637    while (left.last_decoded_sample == ~0U) {
4638       // (untested) the first page does not have a 'last_decoded_sample'
4639       set_file_offset(f, left.page_end);
4640       if (!get_seek_page_info(f, &left)) goto error;
4641    }
4642 
4643    right = f->p_last;
4644    assert(right.last_decoded_sample != ~0U);
4645 
4646    // starting from the start is handled differently
4647    if (sample_number <= left.last_decoded_sample) {
4648       if (stb_vorbis_seek_start(f))
4649          return 1;
4650       return 0;
4651    }
4652 
4653    while (left.page_end != right.page_start) {
4654       assert(left.page_end < right.page_start);
4655       // search range in bytes
4656       delta = right.page_start - left.page_end;
4657       if (delta <= 65536) {
4658          // there's only 64K left to search - handle it linearly
4659          set_file_offset(f, left.page_end);
4660       } else {
4661          if (probe < 2) {
4662             if (probe == 0) {
4663                // first probe (interpolate)
4664                double data_bytes = right.page_end - left.page_start;
4665                bytes_per_sample = data_bytes / right.last_decoded_sample;
4666                offset = left.page_start + bytes_per_sample * (sample_number - left.last_decoded_sample);
4667             } else {
4668                // second probe (try to bound the other side)
4669                double error = ((double) sample_number - mid.last_decoded_sample) * bytes_per_sample;
4670                if (error >= 0 && error <  8000) error =  8000;
4671                if (error <  0 && error > -8000) error = -8000;
4672                offset += error * 2;
4673             }
4674 
4675             // ensure the offset is valid
4676             if (offset < left.page_end)
4677                offset = left.page_end;
4678             if (offset > right.page_start - 65536)
4679                offset = right.page_start - 65536;
4680 
4681             set_file_offset(f, (unsigned int) offset);
4682          } else {
4683             // binary search for large ranges (offset by 32K to ensure
4684             // we don't hit the right page)
4685             set_file_offset(f, left.page_end + (delta / 2) - 32768);
4686          }
4687 
4688          if (!vorbis_find_page(f, NULL, NULL)) goto error;
4689       }
4690 
4691       for (;;) {
4692          if (!get_seek_page_info(f, &mid)) goto error;
4693          if (mid.last_decoded_sample != ~0U) break;
4694          // (untested) no frames end on this page
4695          set_file_offset(f, mid.page_end);
4696          assert(mid.page_start < right.page_start);
4697       }
4698 
4699       // if we've just found the last page again then we're in a tricky file,
4700       // and we're close enough.
4701       if (mid.page_start == right.page_start)
4702          break;
4703 
4704       if (sample_number < mid.last_decoded_sample)
4705          right = mid;
4706       else
4707          left = mid;
4708 
4709       ++probe;
4710    }
4711 
4712    // seek back to start of the last packet
4713    page_start = left.page_start;
4714    set_file_offset(f, page_start);
4715    if (!start_page(f)) return error(f, VORBIS_seek_failed);
4716    end_pos = f->end_seg_with_known_loc;
4717    assert(end_pos >= 0);
4718 
4719    for (;;) {
4720       for (i = end_pos; i > 0; --i)
4721          if (f->segments[i-1] != 255)
4722             break;
4723 
4724       start_seg_with_known_loc = i;
4725 
4726       if (start_seg_with_known_loc > 0 || !(f->page_flag & PAGEFLAG_continued_packet))
4727          break;
4728 
4729       // (untested) the final packet begins on an earlier page
4730       if (!go_to_page_before(f, page_start))
4731          goto error;
4732 
4733       page_start = stb_vorbis_get_file_offset(f);
4734       if (!start_page(f)) goto error;
4735       end_pos = f->segment_count - 1;
4736    }
4737 
4738    // prepare to start decoding
4739    f->current_loc_valid = FALSE;
4740    f->last_seg = FALSE;
4741    f->valid_bits = 0;
4742    f->packet_bytes = 0;
4743    f->bytes_in_seg = 0;
4744    f->previous_length = 0;
4745    f->next_seg = start_seg_with_known_loc;
4746 
4747    for (i = 0; i < start_seg_with_known_loc; i++)
4748       skip(f, f->segments[i]);
4749 
4750    // start decoding (optimizable - this frame is generally discarded)
4751    if (!vorbis_pump_first_frame(f))
4752       return 0;
4753    if (f->current_loc > sample_number)
4754       return error(f, VORBIS_seek_failed);
4755    return 1;
4756 
4757 error:
4758    // try to restore the file to a valid state
4759    stb_vorbis_seek_start(f);
4760    return error(f, VORBIS_seek_failed);
4761 }
4762 
4763 // the same as vorbis_decode_initial, but without advancing
peek_decode_initial(vorb * f,int * p_left_start,int * p_left_end,int * p_right_start,int * p_right_end,int * mode)4764 static int peek_decode_initial(vorb *f, int *p_left_start, int *p_left_end, int *p_right_start, int *p_right_end, int *mode)
4765 {
4766    int bits_read, bytes_read;
4767 
4768    if (!vorbis_decode_initial(f, p_left_start, p_left_end, p_right_start, p_right_end, mode))
4769       return 0;
4770 
4771    // either 1 or 2 bytes were read, figure out which so we can rewind
4772    bits_read = 1 + ilog(f->mode_count-1);
4773    if (f->mode_config[*mode].blockflag)
4774       bits_read += 2;
4775    bytes_read = (bits_read + 7) / 8;
4776 
4777    f->bytes_in_seg += bytes_read;
4778    f->packet_bytes -= bytes_read;
4779    skip(f, -bytes_read);
4780    if (f->next_seg == -1)
4781       f->next_seg = f->segment_count - 1;
4782    else
4783       f->next_seg--;
4784    f->valid_bits = 0;
4785 
4786    return 1;
4787 }
4788 
stb_vorbis_seek_frame(stb_vorbis * f,unsigned int sample_number)4789 int stb_vorbis_seek_frame(stb_vorbis *f, unsigned int sample_number)
4790 {
4791    uint32 max_frame_samples;
4792 
4793    if (IS_PUSH_MODE(f)) return error(f, VORBIS_invalid_api_mixing);
4794 
4795    // fast page-level search
4796    if (!seek_to_sample_coarse(f, sample_number))
4797       return 0;
4798 
4799    assert(f->current_loc_valid);
4800    assert(f->current_loc <= sample_number);
4801 
4802    // linear search for the relevant packet
4803    max_frame_samples = (f->blocksize_1*3 - f->blocksize_0) >> 2;
4804    while (f->current_loc < sample_number) {
4805       int left_start, left_end, right_start, right_end, mode, frame_samples;
4806       if (!peek_decode_initial(f, &left_start, &left_end, &right_start, &right_end, &mode))
4807          return error(f, VORBIS_seek_failed);
4808       // calculate the number of samples returned by the next frame
4809       frame_samples = right_start - left_start;
4810       if (f->current_loc + frame_samples > sample_number) {
4811          return 1; // the next frame will contain the sample
4812       } else if (f->current_loc + frame_samples + max_frame_samples > sample_number) {
4813          // there's a chance the frame after this could contain the sample
4814          vorbis_pump_first_frame(f);
4815       } else {
4816          // this frame is too early to be relevant
4817          f->current_loc += frame_samples;
4818          f->previous_length = 0;
4819          maybe_start_packet(f);
4820          flush_packet(f);
4821       }
4822    }
4823    // the next frame will start with the sample
4824    assert(f->current_loc == sample_number);
4825    return 1;
4826 }
4827 
stb_vorbis_seek(stb_vorbis * f,unsigned int sample_number)4828 int stb_vorbis_seek(stb_vorbis *f, unsigned int sample_number)
4829 {
4830    if (!stb_vorbis_seek_frame(f, sample_number))
4831       return 0;
4832 
4833    if (sample_number != f->current_loc) {
4834       int n;
4835       uint32 frame_start = f->current_loc;
4836       stb_vorbis_get_frame_float(f, &n, NULL);
4837       assert(sample_number > frame_start);
4838       assert(f->channel_buffer_start + (int) (sample_number-frame_start) <= f->channel_buffer_end);
4839       f->channel_buffer_start += (sample_number - frame_start);
4840    }
4841 
4842    return 1;
4843 }
4844 
stb_vorbis_seek_start(stb_vorbis * f)4845 int stb_vorbis_seek_start(stb_vorbis *f)
4846 {
4847    if (IS_PUSH_MODE(f)) { return error(f, VORBIS_invalid_api_mixing); }
4848    set_file_offset(f, f->first_audio_page_offset);
4849    f->previous_length = 0;
4850    f->first_decode = TRUE;
4851    f->next_seg = -1;
4852    return vorbis_pump_first_frame(f);
4853 }
4854 
stb_vorbis_stream_length_in_samples(stb_vorbis * f)4855 unsigned int stb_vorbis_stream_length_in_samples(stb_vorbis *f)
4856 {
4857    unsigned int restore_offset, previous_safe;
4858    unsigned int end, last_page_loc;
4859 
4860    if (IS_PUSH_MODE(f)) return error(f, VORBIS_invalid_api_mixing);
4861    if (!f->total_samples) {
4862       unsigned int last;
4863       uint32 lo,hi;
4864       char header[6];
4865 
4866       // first, store the current decode position so we can restore it
4867       restore_offset = stb_vorbis_get_file_offset(f);
4868 
4869       // now we want to seek back 64K from the end (the last page must
4870       // be at most a little less than 64K, but let's allow a little slop)
4871       if (f->stream_len >= 65536 && f->stream_len-65536 >= f->first_audio_page_offset)
4872          previous_safe = f->stream_len - 65536;
4873       else
4874          previous_safe = f->first_audio_page_offset;
4875 
4876       set_file_offset(f, previous_safe);
4877       // previous_safe is now our candidate 'earliest known place that seeking
4878       // to will lead to the final page'
4879 
4880       if (!vorbis_find_page(f, &end, &last)) {
4881          // if we can't find a page, we're hosed!
4882          f->error = VORBIS_cant_find_last_page;
4883          f->total_samples = 0xffffffff;
4884          goto done;
4885       }
4886 
4887       // check if there are more pages
4888       last_page_loc = stb_vorbis_get_file_offset(f);
4889 
4890       // stop when the last_page flag is set, not when we reach eof;
4891       // this allows us to stop short of a 'file_section' end without
4892       // explicitly checking the length of the section
4893       while (!last) {
4894          set_file_offset(f, end);
4895          if (!vorbis_find_page(f, &end, &last)) {
4896             // the last page we found didn't have the 'last page' flag
4897             // set. whoops!
4898             break;
4899          }
4900          previous_safe = last_page_loc+1;
4901          last_page_loc = stb_vorbis_get_file_offset(f);
4902       }
4903 
4904       set_file_offset(f, last_page_loc);
4905 
4906       // parse the header
4907       getn(f, (unsigned char *)header, 6);
4908       // extract the absolute granule position
4909       lo = get32(f);
4910       hi = get32(f);
4911       if (lo == 0xffffffff && hi == 0xffffffff) {
4912          f->error = VORBIS_cant_find_last_page;
4913          f->total_samples = SAMPLE_unknown;
4914          goto done;
4915       }
4916       if (hi)
4917          lo = 0xfffffffe; // saturate
4918       f->total_samples = lo;
4919 
4920       f->p_last.page_start = last_page_loc;
4921       f->p_last.page_end   = end;
4922       f->p_last.last_decoded_sample = lo;
4923 
4924      done:
4925       set_file_offset(f, restore_offset);
4926    }
4927    return f->total_samples == SAMPLE_unknown ? 0 : f->total_samples;
4928 }
4929 
stb_vorbis_stream_length_in_seconds(stb_vorbis * f)4930 float stb_vorbis_stream_length_in_seconds(stb_vorbis *f)
4931 {
4932    return stb_vorbis_stream_length_in_samples(f) / (float) f->sample_rate;
4933 }
4934 
4935 
4936 
stb_vorbis_get_frame_float(stb_vorbis * f,int * channels,float *** output)4937 int stb_vorbis_get_frame_float(stb_vorbis *f, int *channels, float ***output)
4938 {
4939    int len, right,left,i;
4940    if (IS_PUSH_MODE(f)) return error(f, VORBIS_invalid_api_mixing);
4941 
4942    if (!vorbis_decode_packet(f, &len, &left, &right)) {
4943       f->channel_buffer_start = f->channel_buffer_end = 0;
4944       return 0;
4945    }
4946 
4947    len = vorbis_finish_frame(f, len, left, right);
4948    for (i=0; i < f->channels; ++i)
4949       f->outputs[i] = f->channel_buffers[i] + left;
4950 
4951    f->channel_buffer_start = left;
4952    f->channel_buffer_end   = left+len;
4953 
4954    if (channels) *channels = f->channels;
4955    if (output)   *output = f->outputs;
4956    return len;
4957 }
4958 
4959 #ifndef STB_VORBIS_NO_STDIO
4960 
stb_vorbis_open_file_section(FILE * file,int close_on_free,int * error,const stb_vorbis_alloc * alloc,unsigned int length)4961 stb_vorbis * stb_vorbis_open_file_section(FILE *file, int close_on_free, int *error, const stb_vorbis_alloc *alloc, unsigned int length)
4962 {
4963    stb_vorbis *f, p;
4964    vorbis_init(&p, alloc);
4965    p.f = file;
4966    p.f_start = (uint32) ftell(file);
4967    p.stream_len   = length;
4968    p.close_on_free = close_on_free;
4969    if (start_decoder(&p)) {
4970       f = vorbis_alloc(&p);
4971       if (f) {
4972          *f = p;
4973          vorbis_pump_first_frame(f);
4974          return f;
4975       }
4976    }
4977    if (error) *error = p.error;
4978    vorbis_deinit(&p);
4979    return NULL;
4980 }
4981 
stb_vorbis_open_file(FILE * file,int close_on_free,int * error,const stb_vorbis_alloc * alloc)4982 stb_vorbis * stb_vorbis_open_file(FILE *file, int close_on_free, int *error, const stb_vorbis_alloc *alloc)
4983 {
4984    unsigned int len, start;
4985    start = (unsigned int) ftell(file);
4986    fseek(file, 0, SEEK_END);
4987    len = (unsigned int) (ftell(file) - start);
4988    fseek(file, start, SEEK_SET);
4989    return stb_vorbis_open_file_section(file, close_on_free, error, alloc, len);
4990 }
4991 
stb_vorbis_open_filename(const char * filename,int * error,const stb_vorbis_alloc * alloc)4992 stb_vorbis * stb_vorbis_open_filename(const char *filename, int *error, const stb_vorbis_alloc *alloc)
4993 {
4994    FILE *f;
4995 #if defined(_WIN32) && defined(__STDC_WANT_SECURE_LIB__)
4996    if (0 != fopen_s(&f, filename, "rb"))
4997       f = NULL;
4998 #else
4999    f = fopen(filename, "rb");
5000 #endif
5001    if (f)
5002       return stb_vorbis_open_file(f, TRUE, error, alloc);
5003    if (error) *error = VORBIS_file_open_failure;
5004    return NULL;
5005 }
5006 #endif // STB_VORBIS_NO_STDIO
5007 
stb_vorbis_open_memory(const unsigned char * data,int len,int * error,const stb_vorbis_alloc * alloc)5008 stb_vorbis * stb_vorbis_open_memory(const unsigned char *data, int len, int *error, const stb_vorbis_alloc *alloc)
5009 {
5010    stb_vorbis *f, p;
5011    if (data == NULL) return NULL;
5012    vorbis_init(&p, alloc);
5013    p.stream = (uint8 *) data;
5014    p.stream_end = (uint8 *) data + len;
5015    p.stream_start = (uint8 *) p.stream;
5016    p.stream_len = len;
5017    p.push_mode = FALSE;
5018    if (start_decoder(&p)) {
5019       f = vorbis_alloc(&p);
5020       if (f) {
5021          *f = p;
5022          vorbis_pump_first_frame(f);
5023          if (error) *error = VORBIS__no_error;
5024          return f;
5025       }
5026    }
5027    if (error) *error = p.error;
5028    vorbis_deinit(&p);
5029    return NULL;
5030 }
5031 
5032 #ifndef STB_VORBIS_NO_INTEGER_CONVERSION
5033 #define PLAYBACK_MONO     1
5034 #define PLAYBACK_LEFT     2
5035 #define PLAYBACK_RIGHT    4
5036 
5037 #define L  (PLAYBACK_LEFT  | PLAYBACK_MONO)
5038 #define C  (PLAYBACK_LEFT  | PLAYBACK_RIGHT | PLAYBACK_MONO)
5039 #define R  (PLAYBACK_RIGHT | PLAYBACK_MONO)
5040 
5041 static int8 channel_position[7][6] =
5042 {
5043    { 0 },
5044    { C },
5045    { L, R },
5046    { L, C, R },
5047    { L, R, L, R },
5048    { L, C, R, L, R },
5049    { L, C, R, L, R, C },
5050 };
5051 
5052 
5053 #ifndef STB_VORBIS_NO_FAST_SCALED_FLOAT
5054    typedef union {
5055       float f;
5056       int i;
5057    } float_conv;
5058    typedef char stb_vorbis_float_size_test[sizeof(float)==4 && sizeof(int) == 4];
5059    #define FASTDEF(x) float_conv x
5060    // add (1<<23) to convert to int, then divide by 2^SHIFT, then add 0.5/2^SHIFT to round
5061    #define MAGIC(SHIFT) (1.5f * (1 << (23-SHIFT)) + 0.5f/(1 << SHIFT))
5062    #define ADDEND(SHIFT) (((150-SHIFT) << 23) + (1 << 22))
5063    #define FAST_SCALED_FLOAT_TO_INT(temp,x,s) (temp.f = (x) + MAGIC(s), temp.i - ADDEND(s))
5064    #define check_endianness()
5065 #else
5066    #define FAST_SCALED_FLOAT_TO_INT(temp,x,s) ((int) ((x) * (1 << (s))))
5067    #define check_endianness()
5068    #define FASTDEF(x)
5069 #endif
5070 
copy_samples(short * dest,float * src,int len)5071 static void copy_samples(short *dest, float *src, int len)
5072 {
5073    int i;
5074    check_endianness();
5075    for (i=0; i < len; ++i) {
5076       FASTDEF(temp);
5077       int v = FAST_SCALED_FLOAT_TO_INT(temp, src[i],15);
5078       if ((unsigned int) (v + 32768) > 65535)
5079          v = v < 0 ? -32768 : 32767;
5080       dest[i] = v;
5081    }
5082 }
5083 
compute_samples(int mask,short * output,int num_c,float ** data,int d_offset,int len)5084 static void compute_samples(int mask, short *output, int num_c, float **data, int d_offset, int len)
5085 {
5086    #define BUFFER_SIZE  32
5087    float buffer[BUFFER_SIZE];
5088    int i,j,o,n = BUFFER_SIZE;
5089    check_endianness();
5090    for (o = 0; o < len; o += BUFFER_SIZE) {
5091       memset(buffer, 0, sizeof(buffer));
5092       if (o + n > len) n = len - o;
5093       for (j=0; j < num_c; ++j) {
5094          if (channel_position[num_c][j] & mask) {
5095             for (i=0; i < n; ++i)
5096                buffer[i] += data[j][d_offset+o+i];
5097          }
5098       }
5099       for (i=0; i < n; ++i) {
5100          FASTDEF(temp);
5101          int v = FAST_SCALED_FLOAT_TO_INT(temp,buffer[i],15);
5102          if ((unsigned int) (v + 32768) > 65535)
5103             v = v < 0 ? -32768 : 32767;
5104          output[o+i] = v;
5105       }
5106    }
5107 }
5108 
compute_stereo_samples(short * output,int num_c,float ** data,int d_offset,int len)5109 static void compute_stereo_samples(short *output, int num_c, float **data, int d_offset, int len)
5110 {
5111    #define BUFFER_SIZE  32
5112    float buffer[BUFFER_SIZE];
5113    int i,j,o,n = BUFFER_SIZE >> 1;
5114    // o is the offset in the source data
5115    check_endianness();
5116    for (o = 0; o < len; o += BUFFER_SIZE >> 1) {
5117       // o2 is the offset in the output data
5118       int o2 = o << 1;
5119       memset(buffer, 0, sizeof(buffer));
5120       if (o + n > len) n = len - o;
5121       for (j=0; j < num_c; ++j) {
5122          int m = channel_position[num_c][j] & (PLAYBACK_LEFT | PLAYBACK_RIGHT);
5123          if (m == (PLAYBACK_LEFT | PLAYBACK_RIGHT)) {
5124             for (i=0; i < n; ++i) {
5125                buffer[i*2+0] += data[j][d_offset+o+i];
5126                buffer[i*2+1] += data[j][d_offset+o+i];
5127             }
5128          } else if (m == PLAYBACK_LEFT) {
5129             for (i=0; i < n; ++i) {
5130                buffer[i*2+0] += data[j][d_offset+o+i];
5131             }
5132          } else if (m == PLAYBACK_RIGHT) {
5133             for (i=0; i < n; ++i) {
5134                buffer[i*2+1] += data[j][d_offset+o+i];
5135             }
5136          }
5137       }
5138       for (i=0; i < (n<<1); ++i) {
5139          FASTDEF(temp);
5140          int v = FAST_SCALED_FLOAT_TO_INT(temp,buffer[i],15);
5141          if ((unsigned int) (v + 32768) > 65535)
5142             v = v < 0 ? -32768 : 32767;
5143          output[o2+i] = v;
5144       }
5145    }
5146 }
5147 
convert_samples_short(int buf_c,short ** buffer,int b_offset,int data_c,float ** data,int d_offset,int samples)5148 static void convert_samples_short(int buf_c, short **buffer, int b_offset, int data_c, float **data, int d_offset, int samples)
5149 {
5150    int i;
5151    if (buf_c != data_c && buf_c <= 2 && data_c <= 6) {
5152       static int channel_selector[3][2] = { {0}, {PLAYBACK_MONO}, {PLAYBACK_LEFT, PLAYBACK_RIGHT} };
5153       for (i=0; i < buf_c; ++i)
5154          compute_samples(channel_selector[buf_c][i], buffer[i]+b_offset, data_c, data, d_offset, samples);
5155    } else {
5156       int limit = buf_c < data_c ? buf_c : data_c;
5157       for (i=0; i < limit; ++i)
5158          copy_samples(buffer[i]+b_offset, data[i]+d_offset, samples);
5159       for (   ; i < buf_c; ++i)
5160          memset(buffer[i]+b_offset, 0, sizeof(short) * samples);
5161    }
5162 }
5163 
stb_vorbis_get_frame_short(stb_vorbis * f,int num_c,short ** buffer,int num_samples)5164 int stb_vorbis_get_frame_short(stb_vorbis *f, int num_c, short **buffer, int num_samples)
5165 {
5166    float **output;
5167    int len = stb_vorbis_get_frame_float(f, NULL, &output);
5168    if (len > num_samples) len = num_samples;
5169    if (len)
5170       convert_samples_short(num_c, buffer, 0, f->channels, output, 0, len);
5171    return len;
5172 }
5173 
convert_channels_short_interleaved(int buf_c,short * buffer,int data_c,float ** data,int d_offset,int len)5174 static void convert_channels_short_interleaved(int buf_c, short *buffer, int data_c, float **data, int d_offset, int len)
5175 {
5176    int i;
5177    check_endianness();
5178    if (buf_c != data_c && buf_c <= 2 && data_c <= 6) {
5179       assert(buf_c == 2);
5180       for (i=0; i < buf_c; ++i)
5181          compute_stereo_samples(buffer, data_c, data, d_offset, len);
5182    } else {
5183       int limit = buf_c < data_c ? buf_c : data_c;
5184       int j;
5185       for (j=0; j < len; ++j) {
5186          for (i=0; i < limit; ++i) {
5187             FASTDEF(temp);
5188             float f = data[i][d_offset+j];
5189             int v = FAST_SCALED_FLOAT_TO_INT(temp, f,15);//data[i][d_offset+j],15);
5190             if ((unsigned int) (v + 32768) > 65535)
5191                v = v < 0 ? -32768 : 32767;
5192             *buffer++ = v;
5193          }
5194          for (   ; i < buf_c; ++i)
5195             *buffer++ = 0;
5196       }
5197    }
5198 }
5199 
stb_vorbis_get_frame_short_interleaved(stb_vorbis * f,int num_c,short * buffer,int num_shorts)5200 int stb_vorbis_get_frame_short_interleaved(stb_vorbis *f, int num_c, short *buffer, int num_shorts)
5201 {
5202    float **output;
5203    int len;
5204    if (num_c == 1) return stb_vorbis_get_frame_short(f,num_c,&buffer, num_shorts);
5205    len = stb_vorbis_get_frame_float(f, NULL, &output);
5206    if (len) {
5207       if (len*num_c > num_shorts) len = num_shorts / num_c;
5208       convert_channels_short_interleaved(num_c, buffer, f->channels, output, 0, len);
5209    }
5210    return len;
5211 }
5212 
stb_vorbis_get_samples_short_interleaved(stb_vorbis * f,int channels,short * buffer,int num_shorts)5213 int stb_vorbis_get_samples_short_interleaved(stb_vorbis *f, int channels, short *buffer, int num_shorts)
5214 {
5215    float **outputs;
5216    int len = num_shorts / channels;
5217    int n=0;
5218    int z = f->channels;
5219    if (z > channels) z = channels;
5220    while (n < len) {
5221       int k = f->channel_buffer_end - f->channel_buffer_start;
5222       if (n+k >= len) k = len - n;
5223       if (k)
5224          convert_channels_short_interleaved(channels, buffer, f->channels, f->channel_buffers, f->channel_buffer_start, k);
5225       buffer += k*channels;
5226       n += k;
5227       f->channel_buffer_start += k;
5228       if (n == len) break;
5229       if (!stb_vorbis_get_frame_float(f, NULL, &outputs)) break;
5230    }
5231    return n;
5232 }
5233 
stb_vorbis_get_samples_short(stb_vorbis * f,int channels,short ** buffer,int len)5234 int stb_vorbis_get_samples_short(stb_vorbis *f, int channels, short **buffer, int len)
5235 {
5236    float **outputs;
5237    int n=0;
5238    int z = f->channels;
5239    if (z > channels) z = channels;
5240    while (n < len) {
5241       int k = f->channel_buffer_end - f->channel_buffer_start;
5242       if (n+k >= len) k = len - n;
5243       if (k)
5244          convert_samples_short(channels, buffer, n, f->channels, f->channel_buffers, f->channel_buffer_start, k);
5245       n += k;
5246       f->channel_buffer_start += k;
5247       if (n == len) break;
5248       if (!stb_vorbis_get_frame_float(f, NULL, &outputs)) break;
5249    }
5250    return n;
5251 }
5252 
5253 #ifndef STB_VORBIS_NO_STDIO
stb_vorbis_decode_filename(const char * filename,int * channels,int * sample_rate,short ** output)5254 int stb_vorbis_decode_filename(const char *filename, int *channels, int *sample_rate, short **output)
5255 {
5256    int data_len, offset, total, limit, error;
5257    short *data;
5258    stb_vorbis *v = stb_vorbis_open_filename(filename, &error, NULL);
5259    if (v == NULL) return -1;
5260    limit = v->channels * 4096;
5261    *channels = v->channels;
5262    if (sample_rate)
5263       *sample_rate = v->sample_rate;
5264    offset = data_len = 0;
5265    total = limit;
5266    data = (short *) malloc(total * sizeof(*data));
5267    if (data == NULL) {
5268       stb_vorbis_close(v);
5269       return -2;
5270    }
5271    for (;;) {
5272       int n = stb_vorbis_get_frame_short_interleaved(v, v->channels, data+offset, total-offset);
5273       if (n == 0) break;
5274       data_len += n;
5275       offset += n * v->channels;
5276       if (offset + limit > total) {
5277          short *data2;
5278          total *= 2;
5279          data2 = (short *) realloc(data, total * sizeof(*data));
5280          if (data2 == NULL) {
5281             free(data);
5282             stb_vorbis_close(v);
5283             return -2;
5284          }
5285          data = data2;
5286       }
5287    }
5288    *output = data;
5289    stb_vorbis_close(v);
5290    return data_len;
5291 }
5292 #endif // NO_STDIO
5293 
stb_vorbis_decode_memory(const uint8 * mem,int len,int * channels,int * sample_rate,short ** output)5294 int stb_vorbis_decode_memory(const uint8 *mem, int len, int *channels, int *sample_rate, short **output)
5295 {
5296    int data_len, offset, total, limit, error;
5297    short *data;
5298    stb_vorbis *v = stb_vorbis_open_memory(mem, len, &error, NULL);
5299    if (v == NULL) return -1;
5300    limit = v->channels * 4096;
5301    *channels = v->channels;
5302    if (sample_rate)
5303       *sample_rate = v->sample_rate;
5304    offset = data_len = 0;
5305    total = limit;
5306    data = (short *) malloc(total * sizeof(*data));
5307    if (data == NULL) {
5308       stb_vorbis_close(v);
5309       return -2;
5310    }
5311    for (;;) {
5312       int n = stb_vorbis_get_frame_short_interleaved(v, v->channels, data+offset, total-offset);
5313       if (n == 0) break;
5314       data_len += n;
5315       offset += n * v->channels;
5316       if (offset + limit > total) {
5317          short *data2;
5318          total *= 2;
5319          data2 = (short *) realloc(data, total * sizeof(*data));
5320          if (data2 == NULL) {
5321             free(data);
5322             stb_vorbis_close(v);
5323             return -2;
5324          }
5325          data = data2;
5326       }
5327    }
5328    *output = data;
5329    stb_vorbis_close(v);
5330    return data_len;
5331 }
5332 #endif // STB_VORBIS_NO_INTEGER_CONVERSION
5333 
stb_vorbis_get_samples_float_interleaved(stb_vorbis * f,int channels,float * buffer,int num_floats)5334 int stb_vorbis_get_samples_float_interleaved(stb_vorbis *f, int channels, float *buffer, int num_floats)
5335 {
5336    float **outputs;
5337    int len = num_floats / channels;
5338    int n=0;
5339    int z = f->channels;
5340    if (z > channels) z = channels;
5341    while (n < len) {
5342       int i,j;
5343       int k = f->channel_buffer_end - f->channel_buffer_start;
5344       if (n+k >= len) k = len - n;
5345       for (j=0; j < k; ++j) {
5346          for (i=0; i < z; ++i)
5347             *buffer++ = f->channel_buffers[i][f->channel_buffer_start+j];
5348          for (   ; i < channels; ++i)
5349             *buffer++ = 0;
5350       }
5351       n += k;
5352       f->channel_buffer_start += k;
5353       if (n == len)
5354          break;
5355       if (!stb_vorbis_get_frame_float(f, NULL, &outputs))
5356          break;
5357    }
5358    return n;
5359 }
5360 
stb_vorbis_get_samples_float(stb_vorbis * f,int channels,float ** buffer,int num_samples)5361 int stb_vorbis_get_samples_float(stb_vorbis *f, int channels, float **buffer, int num_samples)
5362 {
5363    float **outputs;
5364    int n=0;
5365    int z = f->channels;
5366    if (z > channels) z = channels;
5367    while (n < num_samples) {
5368       int i;
5369       int k = f->channel_buffer_end - f->channel_buffer_start;
5370       if (n+k >= num_samples) k = num_samples - n;
5371       if (k) {
5372          for (i=0; i < z; ++i)
5373             memcpy(buffer[i]+n, f->channel_buffers[i]+f->channel_buffer_start, sizeof(float)*k);
5374          for (   ; i < channels; ++i)
5375             memset(buffer[i]+n, 0, sizeof(float) * k);
5376       }
5377       n += k;
5378       f->channel_buffer_start += k;
5379       if (n == num_samples)
5380          break;
5381       if (!stb_vorbis_get_frame_float(f, NULL, &outputs))
5382          break;
5383    }
5384    return n;
5385 }
5386 #endif // STB_VORBIS_NO_PULLDATA_API
5387 
5388 /* Version history
5389     1.12    - 2017-11-21 - limit residue begin/end to blocksize/2 to avoid large temp allocs in bad/corrupt files
5390     1.11    - 2017-07-23 - fix MinGW compilation
5391     1.10    - 2017-03-03 - more robust seeking; fix negative ilog(); clear error in open_memory
5392     1.09    - 2016-04-04 - back out 'avoid discarding last frame' fix from previous version
5393     1.08    - 2016-04-02 - fixed multiple warnings; fix setup memory leaks;
5394                            avoid discarding last frame of audio data
5395     1.07    - 2015-01-16 - fixed some warnings, fix mingw, const-correct API
5396                            some more crash fixes when out of memory or with corrupt files
5397     1.06    - 2015-08-31 - full, correct support for seeking API (Dougall Johnson)
5398                            some crash fixes when out of memory or with corrupt files
5399     1.05    - 2015-04-19 - don't define __forceinline if it's redundant
5400     1.04    - 2014-08-27 - fix missing const-correct case in API
5401     1.03    - 2014-08-07 - Warning fixes
5402     1.02    - 2014-07-09 - Declare qsort compare function _cdecl on windows
5403     1.01    - 2014-06-18 - fix stb_vorbis_get_samples_float
5404     1.0     - 2014-05-26 - fix memory leaks; fix warnings; fix bugs in multichannel
5405                            (API change) report sample rate for decode-full-file funcs
5406     0.99996 - bracket #include <malloc.h> for macintosh compilation by Laurent Gomila
5407     0.99995 - use union instead of pointer-cast for fast-float-to-int to avoid alias-optimization problem
5408     0.99994 - change fast-float-to-int to work in single-precision FPU mode, remove endian-dependence
5409     0.99993 - remove assert that fired on legal files with empty tables
5410     0.99992 - rewind-to-start
5411     0.99991 - bugfix to stb_vorbis_get_samples_short by Bernhard Wodo
5412     0.9999 - (should have been 0.99990) fix no-CRT support, compiling as C++
5413     0.9998 - add a full-decode function with a memory source
5414     0.9997 - fix a bug in the read-from-FILE case in 0.9996 addition
5415     0.9996 - query length of vorbis stream in samples/seconds
5416     0.9995 - bugfix to another optimization that only happened in certain files
5417     0.9994 - bugfix to one of the optimizations that caused significant (but inaudible?) errors
5418     0.9993 - performance improvements; runs in 99% to 104% of time of reference implementation
5419     0.9992 - performance improvement of IMDCT; now performs close to reference implementation
5420     0.9991 - performance improvement of IMDCT
5421     0.999 - (should have been 0.9990) performance improvement of IMDCT
5422     0.998 - no-CRT support from Casey Muratori
5423     0.997 - bugfixes for bugs found by Terje Mathisen
5424     0.996 - bugfix: fast-huffman decode initialized incorrectly for sparse codebooks; fixing gives 10% speedup - found by Terje Mathisen
5425     0.995 - bugfix: fix to 'effective' overrun detection - found by Terje Mathisen
5426     0.994 - bugfix: garbage decode on final VQ symbol of a non-multiple - found by Terje Mathisen
5427     0.993 - bugfix: pushdata API required 1 extra byte for empty page (failed to consume final page if empty) - found by Terje Mathisen
5428     0.992 - fixes for MinGW warning
5429     0.991 - turn fast-float-conversion on by default
5430     0.990 - fix push-mode seek recovery if you seek into the headers
5431     0.98b - fix to bad release of 0.98
5432     0.98 - fix push-mode seek recovery; robustify float-to-int and support non-fast mode
5433     0.97 - builds under c++ (typecasting, don't use 'class' keyword)
5434     0.96 - somehow MY 0.95 was right, but the web one was wrong, so here's my 0.95 rereleased as 0.96, fixes a typo in the clamping code
5435     0.95 - clamping code for 16-bit functions
5436     0.94 - not publically released
5437     0.93 - fixed all-zero-floor case (was decoding garbage)
5438     0.92 - fixed a memory leak
5439     0.91 - conditional compiles to omit parts of the API and the infrastructure to support them: STB_VORBIS_NO_PULLDATA_API, STB_VORBIS_NO_PUSHDATA_API, STB_VORBIS_NO_STDIO, STB_VORBIS_NO_INTEGER_CONVERSION
5440     0.90 - first public release
5441 */
5442 
5443 #endif // STB_VORBIS_HEADER_ONLY
5444 
5445 
5446 /*
5447 ------------------------------------------------------------------------------
5448 This software is available under 2 licenses -- choose whichever you prefer.
5449 ------------------------------------------------------------------------------
5450 ALTERNATIVE A - MIT License
5451 Copyright (c) 2017 Sean Barrett
5452 Permission is hereby granted, free of charge, to any person obtaining a copy of
5453 this software and associated documentation files (the "Software"), to deal in
5454 the Software without restriction, including without limitation the rights to
5455 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
5456 of the Software, and to permit persons to whom the Software is furnished to do
5457 so, subject to the following conditions:
5458 The above copyright notice and this permission notice shall be included in all
5459 copies or substantial portions of the Software.
5460 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
5461 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
5462 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
5463 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
5464 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
5465 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
5466 SOFTWARE.
5467 ------------------------------------------------------------------------------
5468 ALTERNATIVE B - Public Domain (www.unlicense.org)
5469 This is free and unencumbered software released into the public domain.
5470 Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
5471 software, either in source code form or as a compiled binary, for any purpose,
5472 commercial or non-commercial, and by any means.
5473 In jurisdictions that recognize copyright laws, the author or authors of this
5474 software dedicate any and all copyright interest in the software to the public
5475 domain. We make this dedication for the benefit of the public at large and to
5476 the detriment of our heirs and successors. We intend this dedication to be an
5477 overt act of relinquishment in perpetuity of all present and future rights to
5478 this software under copyright law.
5479 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
5480 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
5481 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
5482 AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
5483 ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
5484 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
5485 ------------------------------------------------------------------------------
5486 */
5487