1 // Ogg Vorbis audio decoder - v1.11 - public domain
2 // http://nothings.org/stb_vorbis/
3 //
4 // Original version written by Sean Barrett in 2007.
5 //
6 // Originally sponsored by RAD Game Tools. Seeking sponsored
7 // by Phillip Bennefall, Marc Andersen, Aaron Baker, Elias Software,
8 // Aras Pranckevicius, and Sean Barrett.
9 //
10 // LICENSE
11 //
12 //   See end of file for license information.
13 //
14 // Limitations:
15 //
16 //   - floor 0 not supported (used in old ogg vorbis files pre-2004)
17 //   - lossless sample-truncation at beginning ignored
18 //   - cannot concatenate multiple vorbis streams
19 //   - sample positions are 32-bit, limiting seekable 192Khz
20 //       files to around 6 hours (Ogg supports 64-bit)
21 //
22 // Feature contributors:
23 //    Dougall Johnson (sample-exact seeking)
24 //
25 // Bugfix/warning contributors:
26 //    Terje Mathisen     Niklas Frykholm     Andy Hill
27 //    Casey Muratori     John Bolton         Gargaj
28 //    Laurent Gomila     Marc LeBlanc        Ronny Chevalier
29 //    Bernhard Wodo      Evan Balster        alxprd@github
30 //    Tom Beaumont       Ingo Leitgeb        Nicolas Guillemot
31 //    Phillip Bennefall  Rohit               Thiago Goulart
32 //    manxorist@github   saga musix          github:infatum
33 //
34 // Partial history:
35 //    1.11    - 2017/07/23 - fix MinGW compilation
36 //    1.10    - 2017/03/03 - more robust seeking; fix negative ilog(); clear error in open_memory
37 //    1.09    - 2016/04/04 - back out 'truncation of last frame' fix from previous version
38 //    1.08    - 2016/04/02 - warnings; setup memory leaks; truncation of last frame
39 //    1.07    - 2015/01/16 - fixes for crashes on invalid files; warning fixes; const
40 //    1.06    - 2015/08/31 - full, correct support for seeking API (Dougall Johnson)
41 //                           some crash fixes when out of memory or with corrupt files
42 //                           fix some inappropriately signed shifts
43 //    1.05    - 2015/04/19 - don't define __forceinline if it's redundant
44 //    1.04    - 2014/08/27 - fix missing const-correct case in API
45 //    1.03    - 2014/08/07 - warning fixes
46 //    1.02    - 2014/07/09 - declare qsort comparison as explicitly _cdecl in Windows
47 //    1.01    - 2014/06/18 - fix stb_vorbis_get_samples_float (interleaved was correct)
48 //    1.0     - 2014/05/26 - fix memory leaks; fix warnings; fix bugs in >2-channel;
49 //                           (API change) report sample rate for decode-full-file funcs
50 //
51 // See end of file for full version history.
52 
53 
54 //////////////////////////////////////////////////////////////////////////////
55 //
56 //  HEADER BEGINS HERE
57 //
58 
59 #ifndef STB_VORBIS_INCLUDE_STB_VORBIS_H
60 #define STB_VORBIS_INCLUDE_STB_VORBIS_H
61 
62 #if defined(STB_VORBIS_NO_CRT) && !defined(STB_VORBIS_NO_STDIO)
63 #define STB_VORBIS_NO_STDIO 1
64 #endif
65 
66 #ifndef STB_VORBIS_NO_STDIO
67 #include <stdio.h>
68 #endif
69 
70 #ifdef __cplusplus
71 extern "C" {
72 #endif
73 
74 ///////////   THREAD SAFETY
75 
76 // Individual stb_vorbis* handles are not thread-safe; you cannot decode from
77 // them from multiple threads at the same time. However, you can have multiple
78 // stb_vorbis* handles and decode from them independently in multiple thrads.
79 
80 
81 ///////////   MEMORY ALLOCATION
82 
83 // normally stb_vorbis uses malloc() to allocate memory at startup,
84 // and alloca() to allocate temporary memory during a frame on the
85 // stack. (Memory consumption will depend on the amount of setup
86 // data in the file and how you set the compile flags for speed
87 // vs. size. In my test files the maximal-size usage is ~150KB.)
88 //
89 // You can modify the wrapper functions in the source (setup_malloc,
90 // setup_temp_malloc, temp_malloc) to change this behavior, or you
91 // can use a simpler allocation model: you pass in a buffer from
92 // which stb_vorbis will allocate _all_ its memory (including the
93 // temp memory). "open" may fail with a VORBIS_outofmem if you
94 // do not pass in enough data; there is no way to determine how
95 // much you do need except to succeed (at which point you can
96 // query get_info to find the exact amount required. yes I know
97 // this is lame).
98 //
99 // If you pass in a non-NULL buffer of the type below, allocation
100 // will occur from it as described above. Otherwise just pass NULL
101 // to use malloc()/alloca()
102 
103 typedef struct
104 {
105    char *alloc_buffer;
106    int   alloc_buffer_length_in_bytes;
107 } stb_vorbis_alloc;
108 
109 
110 ///////////   FUNCTIONS USEABLE WITH ALL INPUT MODES
111 
112 typedef struct stb_vorbis stb_vorbis;
113 
114 typedef struct
115 {
116    unsigned int sample_rate;
117    int channels;
118 
119    unsigned int setup_memory_required;
120    unsigned int setup_temp_memory_required;
121    unsigned int temp_memory_required;
122 
123    int max_frame_size;
124 } stb_vorbis_info;
125 
126 // get general information about the file
127 extern stb_vorbis_info stb_vorbis_get_info(stb_vorbis *f);
128 
129 // get the last error detected (clears it, too)
130 extern int stb_vorbis_get_error(stb_vorbis *f);
131 
132 // close an ogg vorbis file and free all memory in use
133 extern void stb_vorbis_close(stb_vorbis *f);
134 
135 // this function returns the offset (in samples) from the beginning of the
136 // file that will be returned by the next decode, if it is known, or -1
137 // otherwise. after a flush_pushdata() call, this may take a while before
138 // it becomes valid again.
139 // NOT WORKING YET after a seek with PULLDATA API
140 extern int stb_vorbis_get_sample_offset(stb_vorbis *f);
141 
142 // returns the current seek point within the file, or offset from the beginning
143 // of the memory buffer. In pushdata mode it returns 0.
144 extern unsigned int stb_vorbis_get_file_offset(stb_vorbis *f);
145 
146 ///////////   PUSHDATA API
147 
148 #ifndef STB_VORBIS_NO_PUSHDATA_API
149 
150 // this API allows you to get blocks of data from any source and hand
151 // them to stb_vorbis. you have to buffer them; stb_vorbis will tell
152 // you how much it used, and you have to give it the rest next time;
153 // and stb_vorbis may not have enough data to work with and you will
154 // need to give it the same data again PLUS more. Note that the Vorbis
155 // specification does not bound the size of an individual frame.
156 
157 extern stb_vorbis *stb_vorbis_open_pushdata(
158          const unsigned char * datablock, int datablock_length_in_bytes,
159          int *datablock_memory_consumed_in_bytes,
160          int *error,
161          const stb_vorbis_alloc *alloc_buffer);
162 // create a vorbis decoder by passing in the initial data block containing
163 //    the ogg&vorbis headers (you don't need to do parse them, just provide
164 //    the first N bytes of the file--you're told if it's not enough, see below)
165 // on success, returns an stb_vorbis *, does not set error, returns the amount of
166 //    data parsed/consumed on this call in *datablock_memory_consumed_in_bytes;
167 // on failure, returns NULL on error and sets *error, does not change *datablock_memory_consumed
168 // if returns NULL and *error is VORBIS_need_more_data, then the input block was
169 //       incomplete and you need to pass in a larger block from the start of the file
170 
171 extern int stb_vorbis_decode_frame_pushdata(
172          stb_vorbis *f,
173          const unsigned char *datablock, int datablock_length_in_bytes,
174          int *channels,             // place to write number of float * buffers
175          float ***output,           // place to write float ** array of float * buffers
176          int *samples               // place to write number of output samples
177      );
178 // decode a frame of audio sample data if possible from the passed-in data block
179 //
180 // return value: number of bytes we used from datablock
181 //
182 // possible cases:
183 //     0 bytes used, 0 samples output (need more data)
184 //     N bytes used, 0 samples output (resynching the stream, keep going)
185 //     N bytes used, M samples output (one frame of data)
186 // note that after opening a file, you will ALWAYS get one N-bytes,0-sample
187 // frame, because Vorbis always "discards" the first frame.
188 //
189 // Note that on resynch, stb_vorbis will rarely consume all of the buffer,
190 // instead only datablock_length_in_bytes-3 or less. This is because it wants
191 // to avoid missing parts of a page header if they cross a datablock boundary,
192 // without writing state-machiney code to record a partial detection.
193 //
194 // The number of channels returned are stored in *channels (which can be
195 // NULL--it is always the same as the number of channels reported by
196 // get_info). *output will contain an array of float* buffers, one per
197 // channel. In other words, (*output)[0][0] contains the first sample from
198 // the first channel, and (*output)[1][0] contains the first sample from
199 // the second channel.
200 
201 extern void stb_vorbis_flush_pushdata(stb_vorbis *f);
202 // inform stb_vorbis that your next datablock will not be contiguous with
203 // previous ones (e.g. you've seeked in the data); future attempts to decode
204 // frames will cause stb_vorbis to resynchronize (as noted above), and
205 // once it sees a valid Ogg page (typically 4-8KB, as large as 64KB), it
206 // will begin decoding the _next_ frame.
207 //
208 // if you want to seek using pushdata, you need to seek in your file, then
209 // call stb_vorbis_flush_pushdata(), then start calling decoding, then once
210 // decoding is returning you data, call stb_vorbis_get_sample_offset, and
211 // if you don't like the result, seek your file again and repeat.
212 #endif
213 
214 
215 //////////   PULLING INPUT API
216 
217 #ifndef STB_VORBIS_NO_PULLDATA_API
218 // This API assumes stb_vorbis is allowed to pull data from a source--
219 // either a block of memory containing the _entire_ vorbis stream, or a
220 // FILE * that you or it create, or possibly some other reading mechanism
221 // if you go modify the source to replace the FILE * case with some kind
222 // of callback to your code. (But if you don't support seeking, you may
223 // just want to go ahead and use pushdata.)
224 
225 #if !defined(STB_VORBIS_NO_STDIO) && !defined(STB_VORBIS_NO_INTEGER_CONVERSION)
226 extern int stb_vorbis_decode_filename(const char *filename, int *channels, int *sample_rate, short **output);
227 #endif
228 #if !defined(STB_VORBIS_NO_INTEGER_CONVERSION)
229 extern int stb_vorbis_decode_memory(const unsigned char *mem, int len, int *channels, int *sample_rate, short **output);
230 #endif
231 // decode an entire file and output the data interleaved into a malloc()ed
232 // buffer stored in *output. The return value is the number of samples
233 // decoded, or -1 if the file could not be opened or was not an ogg vorbis file.
234 // When you're done with it, just free() the pointer returned in *output.
235 
236 extern stb_vorbis * stb_vorbis_open_memory(const unsigned char *data, int len,
237                                   int *error, const stb_vorbis_alloc *alloc_buffer);
238 // create an ogg vorbis decoder from an ogg vorbis stream in memory (note
239 // this must be the entire stream!). on failure, returns NULL and sets *error
240 
241 #ifndef STB_VORBIS_NO_STDIO
242 extern stb_vorbis * stb_vorbis_open_filename(const char *filename,
243                                   int *error, const stb_vorbis_alloc *alloc_buffer);
244 // create an ogg vorbis decoder from a filename via fopen(). on failure,
245 // returns NULL and sets *error (possibly to VORBIS_file_open_failure).
246 
247 extern stb_vorbis * stb_vorbis_open_file(FILE *f, int close_handle_on_close,
248                                   int *error, const stb_vorbis_alloc *alloc_buffer);
249 // create an ogg vorbis decoder from an open FILE *, looking for a stream at
250 // the _current_ seek point (ftell). on failure, returns NULL and sets *error.
251 // note that stb_vorbis must "own" this stream; if you seek it in between
252 // calls to stb_vorbis, it will become confused. Morever, if you attempt to
253 // perform stb_vorbis_seek_*() operations on this file, it will assume it
254 // owns the _entire_ rest of the file after the start point. Use the next
255 // function, stb_vorbis_open_file_section(), to limit it.
256 
257 extern stb_vorbis * stb_vorbis_open_file_section(FILE *f, int close_handle_on_close,
258                 int *error, const stb_vorbis_alloc *alloc_buffer, unsigned int len);
259 // create an ogg vorbis decoder from an open FILE *, looking for a stream at
260 // the _current_ seek point (ftell); the stream will be of length 'len' bytes.
261 // on failure, returns NULL and sets *error. note that stb_vorbis must "own"
262 // this stream; if you seek it in between calls to stb_vorbis, it will become
263 // confused.
264 #endif
265 
266 extern int stb_vorbis_seek_frame(stb_vorbis *f, unsigned int sample_number);
267 extern int stb_vorbis_seek(stb_vorbis *f, unsigned int sample_number);
268 // these functions seek in the Vorbis file to (approximately) 'sample_number'.
269 // after calling seek_frame(), the next call to get_frame_*() will include
270 // the specified sample. after calling stb_vorbis_seek(), the next call to
271 // stb_vorbis_get_samples_* will start with the specified sample. If you
272 // do not need to seek to EXACTLY the target sample when using get_samples_*,
273 // you can also use seek_frame().
274 
275 extern int stb_vorbis_seek_start(stb_vorbis *f);
276 // this function is equivalent to stb_vorbis_seek(f,0)
277 
278 extern unsigned int stb_vorbis_stream_length_in_samples(stb_vorbis *f);
279 extern float        stb_vorbis_stream_length_in_seconds(stb_vorbis *f);
280 // these functions return the total length of the vorbis stream
281 
282 extern int stb_vorbis_get_frame_float(stb_vorbis *f, int *channels, float ***output);
283 // decode the next frame and return the number of samples. the number of
284 // channels returned are stored in *channels (which can be NULL--it is always
285 // the same as the number of channels reported by get_info). *output will
286 // contain an array of float* buffers, one per channel. These outputs will
287 // be overwritten on the next call to stb_vorbis_get_frame_*.
288 //
289 // You generally should not intermix calls to stb_vorbis_get_frame_*()
290 // and stb_vorbis_get_samples_*(), since the latter calls the former.
291 
292 #ifndef STB_VORBIS_NO_INTEGER_CONVERSION
293 extern int stb_vorbis_get_frame_short_interleaved(stb_vorbis *f, int num_c, short *buffer, int num_shorts);
294 extern int stb_vorbis_get_frame_short            (stb_vorbis *f, int num_c, short **buffer, int num_samples);
295 #endif
296 // decode the next frame and return the number of *samples* per channel.
297 // Note that for interleaved data, you pass in the number of shorts (the
298 // size of your array), but the return value is the number of samples per
299 // channel, not the total number of samples.
300 //
301 // The data is coerced to the number of channels you request according to the
302 // channel coercion rules (see below). You must pass in the size of your
303 // buffer(s) so that stb_vorbis will not overwrite the end of the buffer.
304 // The maximum buffer size needed can be gotten from get_info(); however,
305 // the Vorbis I specification implies an absolute maximum of 4096 samples
306 // per channel.
307 
308 // Channel coercion rules:
309 //    Let M be the number of channels requested, and N the number of channels present,
310 //    and Cn be the nth channel; let stereo L be the sum of all L and center channels,
311 //    and stereo R be the sum of all R and center channels (channel assignment from the
312 //    vorbis spec).
313 //        M    N       output
314 //        1    k      sum(Ck) for all k
315 //        2    *      stereo L, stereo R
316 //        k    l      k > l, the first l channels, then 0s
317 //        k    l      k <= l, the first k channels
318 //    Note that this is not _good_ surround etc. mixing at all! It's just so
319 //    you get something useful.
320 
321 extern int stb_vorbis_get_samples_float_interleaved(stb_vorbis *f, int channels, float *buffer, int num_floats);
322 extern int stb_vorbis_get_samples_float(stb_vorbis *f, int channels, float **buffer, int num_samples);
323 // gets num_samples samples, not necessarily on a frame boundary--this requires
324 // buffering so you have to supply the buffers. DOES NOT APPLY THE COERCION RULES.
325 // Returns the number of samples stored per channel; it may be less than requested
326 // at the end of the file. If there are no more samples in the file, returns 0.
327 
328 #ifndef STB_VORBIS_NO_INTEGER_CONVERSION
329 extern int stb_vorbis_get_samples_short_interleaved(stb_vorbis *f, int channels, short *buffer, int num_shorts);
330 extern int stb_vorbis_get_samples_short(stb_vorbis *f, int channels, short **buffer, int num_samples);
331 #endif
332 // gets num_samples samples, not necessarily on a frame boundary--this requires
333 // buffering so you have to supply the buffers. Applies the coercion rules above
334 // to produce 'channels' channels. Returns the number of samples stored per channel;
335 // it may be less than requested at the end of the file. If there are no more
336 // samples in the file, returns 0.
337 
338 #endif
339 
340 ////////   ERROR CODES
341 
342 enum STBVorbisError
343 {
344    VORBIS__no_error,
345 
346    VORBIS_need_more_data=1,             // not a real error
347 
348    VORBIS_invalid_api_mixing,           // can't mix API modes
349    VORBIS_outofmem,                     // not enough memory
350    VORBIS_feature_not_supported,        // uses floor 0
351    VORBIS_too_many_channels,            // STB_VORBIS_MAX_CHANNELS is too small
352    VORBIS_file_open_failure,            // fopen() failed
353    VORBIS_seek_without_length,          // can't seek in unknown-length file
354 
355    VORBIS_unexpected_eof=10,            // file is truncated?
356    VORBIS_seek_invalid,                 // seek past EOF
357 
358    // decoding errors (corrupt/invalid stream) -- you probably
359    // don't care about the exact details of these
360 
361    // vorbis errors:
362    VORBIS_invalid_setup=20,
363    VORBIS_invalid_stream,
364 
365    // ogg errors:
366    VORBIS_missing_capture_pattern=30,
367    VORBIS_invalid_stream_structure_version,
368    VORBIS_continued_packet_flag_invalid,
369    VORBIS_incorrect_stream_serial_number,
370    VORBIS_invalid_first_page,
371    VORBIS_bad_packet_type,
372    VORBIS_cant_find_last_page,
373    VORBIS_seek_failed
374 };
375 
376 
377 #ifdef __cplusplus
378 }
379 #endif
380 
381 #endif // STB_VORBIS_INCLUDE_STB_VORBIS_H
382 //
383 //  HEADER ENDS HERE
384 //
385 //////////////////////////////////////////////////////////////////////////////
386 
387 #ifndef STB_VORBIS_HEADER_ONLY
388 
389 // global configuration settings (e.g. set these in the project/makefile),
390 // or just set them in this file at the top (although ideally the first few
391 // should be visible when the header file is compiled too, although it's not
392 // crucial)
393 
394 // STB_VORBIS_NO_PUSHDATA_API
395 //     does not compile the code for the various stb_vorbis_*_pushdata()
396 //     functions
397 // #define STB_VORBIS_NO_PUSHDATA_API
398 
399 // STB_VORBIS_NO_PULLDATA_API
400 //     does not compile the code for the non-pushdata APIs
401 // #define STB_VORBIS_NO_PULLDATA_API
402 
403 // STB_VORBIS_NO_STDIO
404 //     does not compile the code for the APIs that use FILE *s internally
405 //     or externally (implied by STB_VORBIS_NO_PULLDATA_API)
406 // #define STB_VORBIS_NO_STDIO
407 
408 // STB_VORBIS_NO_INTEGER_CONVERSION
409 //     does not compile the code for converting audio sample data from
410 //     float to integer (implied by STB_VORBIS_NO_PULLDATA_API)
411 // #define STB_VORBIS_NO_INTEGER_CONVERSION
412 
413 // STB_VORBIS_NO_FAST_SCALED_FLOAT
414 //      does not use a fast float-to-int trick to accelerate float-to-int on
415 //      most platforms which requires endianness be defined correctly.
416 //#define STB_VORBIS_NO_FAST_SCALED_FLOAT
417 
418 
419 // STB_VORBIS_MAX_CHANNELS [number]
420 //     globally define this to the maximum number of channels you need.
421 //     The spec does not put a restriction on channels except that
422 //     the count is stored in a byte, so 255 is the hard limit.
423 //     Reducing this saves about 16 bytes per value, so using 16 saves
424 //     (255-16)*16 or around 4KB. Plus anything other memory usage
425 //     I forgot to account for. Can probably go as low as 8 (7.1 audio),
426 //     6 (5.1 audio), or 2 (stereo only).
427 #ifndef STB_VORBIS_MAX_CHANNELS
428 #define STB_VORBIS_MAX_CHANNELS    16  // enough for anyone?
429 #endif
430 
431 // STB_VORBIS_PUSHDATA_CRC_COUNT [number]
432 //     after a flush_pushdata(), stb_vorbis begins scanning for the
433 //     next valid page, without backtracking. when it finds something
434 //     that looks like a page, it streams through it and verifies its
435 //     CRC32. Should that validation fail, it keeps scanning. But it's
436 //     possible that _while_ streaming through to check the CRC32 of
437 //     one candidate page, it sees another candidate page. This #define
438 //     determines how many "overlapping" candidate pages it can search
439 //     at once. Note that "real" pages are typically ~4KB to ~8KB, whereas
440 //     garbage pages could be as big as 64KB, but probably average ~16KB.
441 //     So don't hose ourselves by scanning an apparent 64KB page and
442 //     missing a ton of real ones in the interim; so minimum of 2
443 #ifndef STB_VORBIS_PUSHDATA_CRC_COUNT
444 #define STB_VORBIS_PUSHDATA_CRC_COUNT  4
445 #endif
446 
447 // STB_VORBIS_FAST_HUFFMAN_LENGTH [number]
448 //     sets the log size of the huffman-acceleration table.  Maximum
449 //     supported value is 24. with larger numbers, more decodings are O(1),
450 //     but the table size is larger so worse cache missing, so you'll have
451 //     to probe (and try multiple ogg vorbis files) to find the sweet spot.
452 #ifndef STB_VORBIS_FAST_HUFFMAN_LENGTH
453 #define STB_VORBIS_FAST_HUFFMAN_LENGTH   10
454 #endif
455 
456 // STB_VORBIS_FAST_BINARY_LENGTH [number]
457 //     sets the log size of the binary-search acceleration table. this
458 //     is used in similar fashion to the fast-huffman size to set initial
459 //     parameters for the binary search
460 
461 // STB_VORBIS_FAST_HUFFMAN_INT
462 //     The fast huffman tables are much more efficient if they can be
463 //     stored as 16-bit results instead of 32-bit results. This restricts
464 //     the codebooks to having only 65535 possible outcomes, though.
465 //     (At least, accelerated by the huffman table.)
466 #ifndef STB_VORBIS_FAST_HUFFMAN_INT
467 #define STB_VORBIS_FAST_HUFFMAN_SHORT
468 #endif
469 
470 // STB_VORBIS_NO_HUFFMAN_BINARY_SEARCH
471 //     If the 'fast huffman' search doesn't succeed, then stb_vorbis falls
472 //     back on binary searching for the correct one. This requires storing
473 //     extra tables with the huffman codes in sorted order. Defining this
474 //     symbol trades off space for speed by forcing a linear search in the
475 //     non-fast case, except for "sparse" codebooks.
476 // #define STB_VORBIS_NO_HUFFMAN_BINARY_SEARCH
477 
478 // STB_VORBIS_DIVIDES_IN_RESIDUE
479 //     stb_vorbis precomputes the result of the scalar residue decoding
480 //     that would otherwise require a divide per chunk. you can trade off
481 //     space for time by defining this symbol.
482 // #define STB_VORBIS_DIVIDES_IN_RESIDUE
483 
484 // STB_VORBIS_DIVIDES_IN_CODEBOOK
485 //     vorbis VQ codebooks can be encoded two ways: with every case explicitly
486 //     stored, or with all elements being chosen from a small range of values,
487 //     and all values possible in all elements. By default, stb_vorbis expands
488 //     this latter kind out to look like the former kind for ease of decoding,
489 //     because otherwise an integer divide-per-vector-element is required to
490 //     unpack the index. If you define STB_VORBIS_DIVIDES_IN_CODEBOOK, you can
491 //     trade off storage for speed.
492 //#define STB_VORBIS_DIVIDES_IN_CODEBOOK
493 
494 #ifdef STB_VORBIS_CODEBOOK_SHORTS
495 #error "STB_VORBIS_CODEBOOK_SHORTS is no longer supported as it produced incorrect results for some input formats"
496 #endif
497 
498 // STB_VORBIS_DIVIDE_TABLE
499 //     this replaces small integer divides in the floor decode loop with
500 //     table lookups. made less than 1% difference, so disabled by default.
501 
502 // STB_VORBIS_NO_INLINE_DECODE
503 //     disables the inlining of the scalar codebook fast-huffman decode.
504 //     might save a little codespace; useful for debugging
505 // #define STB_VORBIS_NO_INLINE_DECODE
506 
507 // STB_VORBIS_NO_DEFER_FLOOR
508 //     Normally we only decode the floor without synthesizing the actual
509 //     full curve. We can instead synthesize the curve immediately. This
510 //     requires more memory and is very likely slower, so I don't think
511 //     you'd ever want to do it except for debugging.
512 // #define STB_VORBIS_NO_DEFER_FLOOR
513 
514 
515 
516 
517 //////////////////////////////////////////////////////////////////////////////
518 
519 #ifdef STB_VORBIS_NO_PULLDATA_API
520    #define STB_VORBIS_NO_INTEGER_CONVERSION
521    #define STB_VORBIS_NO_STDIO
522 #endif
523 
524 #if defined(STB_VORBIS_NO_CRT) && !defined(STB_VORBIS_NO_STDIO)
525    #define STB_VORBIS_NO_STDIO 1
526 #endif
527 
528 #ifndef STB_VORBIS_NO_INTEGER_CONVERSION
529 #ifndef STB_VORBIS_NO_FAST_SCALED_FLOAT
530 
531    // only need endianness for fast-float-to-int, which we don't
532    // use for pushdata
533 
534    #ifndef STB_VORBIS_BIG_ENDIAN
535      #define STB_VORBIS_ENDIAN  0
536    #else
537      #define STB_VORBIS_ENDIAN  1
538    #endif
539 
540 #endif
541 #endif
542 
543 
544 #ifndef STB_VORBIS_NO_STDIO
545 #include <stdio.h>
546 #endif
547 
548 #ifndef STB_VORBIS_NO_CRT
549    #include <stdlib.h>
550    #include <string.h>
551    #include <assert.h>
552    #include <math.h>
553 
554    // find definition of alloca if it's not in stdlib.h:
555    #if defined(_MSC_VER) || defined(__MINGW32__)
556       #include <malloc.h>
557    #endif
558    #if defined(__linux__) || defined(__linux) || defined(__EMSCRIPTEN__)
559       #include <alloca.h>
560    #endif
561 #else // STB_VORBIS_NO_CRT
562    #define NULL 0
563    #define malloc(s)   0
564    #define free(s)     ((void) 0)
565    #define realloc(s)  0
566 #endif // STB_VORBIS_NO_CRT
567 
568 #include <limits.h>
569 
570 #ifdef __MINGW32__
571    // eff you mingw:
572    //     "fixed":
573    //         http://sourceforge.net/p/mingw-w64/mailman/message/32882927/
574    //     "no that broke the build, reverted, who cares about C":
575    //         http://sourceforge.net/p/mingw-w64/mailman/message/32890381/
576    #ifdef __forceinline
577    #undef __forceinline
578    #endif
579    #define __forceinline
580    #define alloca __builtin_alloca
581 #elif !defined(_MSC_VER)
582    #if __GNUC__
583       #define __forceinline inline
584    #else
585       #define __forceinline
586    #endif
587 #endif
588 
589 #if STB_VORBIS_MAX_CHANNELS > 256
590 #error "Value of STB_VORBIS_MAX_CHANNELS outside of allowed range"
591 #endif
592 
593 #if STB_VORBIS_FAST_HUFFMAN_LENGTH > 24
594 #error "Value of STB_VORBIS_FAST_HUFFMAN_LENGTH outside of allowed range"
595 #endif
596 
597 
598 #if 0
599 #include <crtdbg.h>
600 #define CHECK(f)   _CrtIsValidHeapPointer(f->channel_buffers[1])
601 #else
602 #define CHECK(f)   ((void) 0)
603 #endif
604 
605 #define MAX_BLOCKSIZE_LOG  13   // from specification
606 #define MAX_BLOCKSIZE      (1 << MAX_BLOCKSIZE_LOG)
607 
608 
609 typedef unsigned char  uint8;
610 typedef   signed char   int8;
611 typedef unsigned short uint16;
612 typedef   signed short  int16;
613 typedef unsigned int   uint32;
614 typedef   signed int    int32;
615 
616 #ifndef TRUE
617 #define TRUE 1
618 #define FALSE 0
619 #endif
620 
621 typedef float codetype;
622 
623 // @NOTE
624 //
625 // Some arrays below are tagged "//varies", which means it's actually
626 // a variable-sized piece of data, but rather than malloc I assume it's
627 // small enough it's better to just allocate it all together with the
628 // main thing
629 //
630 // Most of the variables are specified with the smallest size I could pack
631 // them into. It might give better performance to make them all full-sized
632 // integers. It should be safe to freely rearrange the structures or change
633 // the sizes larger--nothing relies on silently truncating etc., nor the
634 // order of variables.
635 
636 #define FAST_HUFFMAN_TABLE_SIZE   (1 << STB_VORBIS_FAST_HUFFMAN_LENGTH)
637 #define FAST_HUFFMAN_TABLE_MASK   (FAST_HUFFMAN_TABLE_SIZE - 1)
638 
639 typedef struct
640 {
641    int dimensions, entries;
642    uint8 *codeword_lengths;
643    float  minimum_value;
644    float  delta_value;
645    uint8  value_bits;
646    uint8  lookup_type;
647    uint8  sequence_p;
648    uint8  sparse;
649    uint32 lookup_values;
650    codetype *multiplicands;
651    uint32 *codewords;
652    #ifdef STB_VORBIS_FAST_HUFFMAN_SHORT
653     int16  fast_huffman[FAST_HUFFMAN_TABLE_SIZE];
654    #else
655     int32  fast_huffman[FAST_HUFFMAN_TABLE_SIZE];
656    #endif
657    uint32 *sorted_codewords;
658    int    *sorted_values;
659    int     sorted_entries;
660 } Codebook;
661 
662 typedef struct
663 {
664    uint8 order;
665    uint16 rate;
666    uint16 bark_map_size;
667    uint8 amplitude_bits;
668    uint8 amplitude_offset;
669    uint8 number_of_books;
670    uint8 book_list[16]; // varies
671 } Floor0;
672 
673 typedef struct
674 {
675    uint8 partitions;
676    uint8 partition_class_list[32]; // varies
677    uint8 class_dimensions[16]; // varies
678    uint8 class_subclasses[16]; // varies
679    uint8 class_masterbooks[16]; // varies
680    int16 subclass_books[16][8]; // varies
681    uint16 Xlist[31*8+2]; // varies
682    uint8 sorted_order[31*8+2];
683    uint8 neighbors[31*8+2][2];
684    uint8 floor1_multiplier;
685    uint8 rangebits;
686    int values;
687 } Floor1;
688 
689 typedef union
690 {
691    Floor0 floor0;
692    Floor1 floor1;
693 } Floor;
694 
695 typedef struct
696 {
697    uint32 begin, end;
698    uint32 part_size;
699    uint8 classifications;
700    uint8 classbook;
701    uint8 **classdata;
702    int16 (*residue_books)[8];
703 } Residue;
704 
705 typedef struct
706 {
707    uint8 magnitude;
708    uint8 angle;
709    uint8 mux;
710 } MappingChannel;
711 
712 typedef struct
713 {
714    uint16 coupling_steps;
715    MappingChannel *chan;
716    uint8  submaps;
717    uint8  submap_floor[15]; // varies
718    uint8  submap_residue[15]; // varies
719 } Mapping;
720 
721 typedef struct
722 {
723    uint8 blockflag;
724    uint8 mapping;
725    uint16 windowtype;
726    uint16 transformtype;
727 } Mode;
728 
729 typedef struct
730 {
731    uint32  goal_crc;    // expected crc if match
732    int     bytes_left;  // bytes left in packet
733    uint32  crc_so_far;  // running crc
734    int     bytes_done;  // bytes processed in _current_ chunk
735    uint32  sample_loc;  // granule pos encoded in page
736 } CRCscan;
737 
738 typedef struct
739 {
740    uint32 page_start, page_end;
741    uint32 last_decoded_sample;
742 } ProbedPage;
743 
744 struct stb_vorbis
745 {
746   // user-accessible info
747    unsigned int sample_rate;
748    int channels;
749 
750    unsigned int setup_memory_required;
751    unsigned int temp_memory_required;
752    unsigned int setup_temp_memory_required;
753 
754   // input config
755 #ifndef STB_VORBIS_NO_STDIO
756    FILE *f;
757    uint32 f_start;
758    int close_on_free;
759 #endif
760 
761    uint8 *stream;
762    uint8 *stream_start;
763    uint8 *stream_end;
764 
765    uint32 stream_len;
766 
767    uint8  push_mode;
768 
769    uint32 first_audio_page_offset;
770 
771    ProbedPage p_first, p_last;
772 
773   // memory management
774    stb_vorbis_alloc alloc;
775    int setup_offset;
776    int temp_offset;
777 
778   // run-time results
779    int eof;
780    enum STBVorbisError error;
781 
782   // user-useful data
783 
784   // header info
785    int blocksize[2];
786    int blocksize_0, blocksize_1;
787    int codebook_count;
788    Codebook *codebooks;
789    int floor_count;
790    uint16 floor_types[64]; // varies
791    Floor *floor_config;
792    int residue_count;
793    uint16 residue_types[64]; // varies
794    Residue *residue_config;
795    int mapping_count;
796    Mapping *mapping;
797    int mode_count;
798    Mode mode_config[64];  // varies
799 
800    uint32 total_samples;
801 
802   // decode buffer
803    float *channel_buffers[STB_VORBIS_MAX_CHANNELS];
804    float *outputs        [STB_VORBIS_MAX_CHANNELS];
805 
806    float *previous_window[STB_VORBIS_MAX_CHANNELS];
807    int previous_length;
808 
809    #ifndef STB_VORBIS_NO_DEFER_FLOOR
810    int16 *finalY[STB_VORBIS_MAX_CHANNELS];
811    #else
812    float *floor_buffers[STB_VORBIS_MAX_CHANNELS];
813    #endif
814 
815    uint32 current_loc; // sample location of next frame to decode
816    int    current_loc_valid;
817 
818   // per-blocksize precomputed data
819 
820    // twiddle factors
821    float *A[2],*B[2],*C[2];
822    float *window[2];
823    uint16 *bit_reverse[2];
824 
825   // current page/packet/segment streaming info
826    uint32 serial; // stream serial number for verification
827    int last_page;
828    int segment_count;
829    uint8 segments[255];
830    uint8 page_flag;
831    uint8 bytes_in_seg;
832    uint8 first_decode;
833    int next_seg;
834    int last_seg;  // flag that we're on the last segment
835    int last_seg_which; // what was the segment number of the last seg?
836    uint32 acc;
837    int valid_bits;
838    int packet_bytes;
839    int end_seg_with_known_loc;
840    uint32 known_loc_for_packet;
841    int discard_samples_deferred;
842    uint32 samples_output;
843 
844   // push mode scanning
845    int page_crc_tests; // only in push_mode: number of tests active; -1 if not searching
846 #ifndef STB_VORBIS_NO_PUSHDATA_API
847    CRCscan scan[STB_VORBIS_PUSHDATA_CRC_COUNT];
848 #endif
849 
850   // sample-access
851    int channel_buffer_start;
852    int channel_buffer_end;
853 };
854 
855 #if defined(STB_VORBIS_NO_PUSHDATA_API)
856    #define IS_PUSH_MODE(f)   FALSE
857 #elif defined(STB_VORBIS_NO_PULLDATA_API)
858    #define IS_PUSH_MODE(f)   TRUE
859 #else
860    #define IS_PUSH_MODE(f)   ((f)->push_mode)
861 #endif
862 
863 typedef struct stb_vorbis vorb;
864 
error(vorb * f,enum STBVorbisError e)865 static int error(vorb *f, enum STBVorbisError e)
866 {
867    f->error = e;
868    if (!f->eof && e != VORBIS_need_more_data) {
869       f->error=e; // breakpoint for debugging
870    }
871    return 0;
872 }
873 
874 
875 // these functions are used for allocating temporary memory
876 // while decoding. if you can afford the stack space, use
877 // alloca(); otherwise, provide a temp buffer and it will
878 // allocate out of those.
879 
880 #define array_size_required(count,size)  (count*(sizeof(void *)+(size)))
881 
882 #define temp_alloc(f,size)              (f->alloc.alloc_buffer ? setup_temp_malloc(f,size) : alloca(size))
883 #ifdef dealloca
884 #define temp_free(f,p)                  (f->alloc.alloc_buffer ? 0 : dealloca(size))
885 #else
886 #define temp_free(f,p)                  0
887 #endif
888 #define temp_alloc_save(f)              ((f)->temp_offset)
889 #define temp_alloc_restore(f,p)         ((f)->temp_offset = (p))
890 
891 #define temp_block_array(f,count,size)  make_block_array(temp_alloc(f,array_size_required(count,size)), count, size)
892 
893 // given a sufficiently large block of memory, make an array of pointers to subblocks of it
make_block_array(void * mem,int count,int size)894 static void *make_block_array(void *mem, int count, int size)
895 {
896    int i;
897    void ** p = (void **) mem;
898    char *q = (char *) (p + count);
899    for (i=0; i < count; ++i) {
900       p[i] = q;
901       q += size;
902    }
903    return p;
904 }
905 
setup_malloc(vorb * f,int sz)906 static void *setup_malloc(vorb *f, int sz)
907 {
908    sz = (sz+3) & ~3;
909    f->setup_memory_required += sz;
910    if (f->alloc.alloc_buffer) {
911       void *p = (char *) f->alloc.alloc_buffer + f->setup_offset;
912       if (f->setup_offset + sz > f->temp_offset) return NULL;
913       f->setup_offset += sz;
914       return p;
915    }
916    return sz ? malloc(sz) : NULL;
917 }
918 
setup_free(vorb * f,void * p)919 static void setup_free(vorb *f, void *p)
920 {
921    if (f->alloc.alloc_buffer) return; // do nothing; setup mem is a stack
922    free(p);
923 }
924 
setup_temp_malloc(vorb * f,int sz)925 static void *setup_temp_malloc(vorb *f, int sz)
926 {
927    sz = (sz+3) & ~3;
928    if (f->alloc.alloc_buffer) {
929       if (f->temp_offset - sz < f->setup_offset) return NULL;
930       f->temp_offset -= sz;
931       return (char *) f->alloc.alloc_buffer + f->temp_offset;
932    }
933    return malloc(sz);
934 }
935 
setup_temp_free(vorb * f,void * p,int sz)936 static void setup_temp_free(vorb *f, void *p, int sz)
937 {
938    if (f->alloc.alloc_buffer) {
939       f->temp_offset += (sz+3)&~3;
940       return;
941    }
942    free(p);
943 }
944 
945 #define CRC32_POLY    0x04c11db7   // from spec
946 
947 static uint32 crc_table[256];
crc32_init(void)948 static void crc32_init(void)
949 {
950    int i,j;
951    uint32 s;
952    for(i=0; i < 256; i++) {
953       for (s=(uint32) i << 24, j=0; j < 8; ++j)
954          s = (s << 1) ^ (s >= (1U<<31) ? CRC32_POLY : 0);
955       crc_table[i] = s;
956    }
957 }
958 
crc32_update(uint32 crc,uint8 byte)959 static __forceinline uint32 crc32_update(uint32 crc, uint8 byte)
960 {
961    return (crc << 8) ^ crc_table[byte ^ (crc >> 24)];
962 }
963 
964 
965 // used in setup, and for huffman that doesn't go fast path
bit_reverse(unsigned int n)966 static unsigned int bit_reverse(unsigned int n)
967 {
968   n = ((n & 0xAAAAAAAA) >>  1) | ((n & 0x55555555) << 1);
969   n = ((n & 0xCCCCCCCC) >>  2) | ((n & 0x33333333) << 2);
970   n = ((n & 0xF0F0F0F0) >>  4) | ((n & 0x0F0F0F0F) << 4);
971   n = ((n & 0xFF00FF00) >>  8) | ((n & 0x00FF00FF) << 8);
972   return (n >> 16) | (n << 16);
973 }
974 
square(float x)975 static float square(float x)
976 {
977    return x*x;
978 }
979 
980 // this is a weird definition of log2() for which log2(1) = 1, log2(2) = 2, log2(4) = 3
981 // as required by the specification. fast(?) implementation from stb.h
982 // @OPTIMIZE: called multiple times per-packet with "constants"; move to setup
ilog(int32 n)983 static int ilog(int32 n)
984 {
985    static signed char log2_4[16] = { 0,1,2,2,3,3,3,3,4,4,4,4,4,4,4,4 };
986 
987    if (n < 0) return 0; // signed n returns 0
988 
989    // 2 compares if n < 16, 3 compares otherwise (4 if signed or n > 1<<29)
990    if (n < (1 << 14))
991         if (n < (1 <<  4))            return  0 + log2_4[n      ];
992         else if (n < (1 <<  9))       return  5 + log2_4[n >>  5];
993              else                     return 10 + log2_4[n >> 10];
994    else if (n < (1 << 24))
995              if (n < (1 << 19))       return 15 + log2_4[n >> 15];
996              else                     return 20 + log2_4[n >> 20];
997         else if (n < (1 << 29))       return 25 + log2_4[n >> 25];
998              else                     return 30 + log2_4[n >> 30];
999 }
1000 
1001 #ifndef M_PI
1002   #define M_PI  3.14159265358979323846264f  // from CRC
1003 #endif
1004 
1005 // code length assigned to a value with no huffman encoding
1006 #define NO_CODE   255
1007 
1008 /////////////////////// LEAF SETUP FUNCTIONS //////////////////////////
1009 //
1010 // these functions are only called at setup, and only a few times
1011 // per file
1012 
float32_unpack(uint32 x)1013 static float float32_unpack(uint32 x)
1014 {
1015    // from the specification
1016    uint32 mantissa = x & 0x1fffff;
1017    uint32 sign = x & 0x80000000;
1018    uint32 exp = (x & 0x7fe00000) >> 21;
1019    double res = sign ? -(double)mantissa : (double)mantissa;
1020    return (float) ldexp((float)res, exp-788);
1021 }
1022 
1023 
1024 // zlib & jpeg huffman tables assume that the output symbols
1025 // can either be arbitrarily arranged, or have monotonically
1026 // increasing frequencies--they rely on the lengths being sorted;
1027 // this makes for a very simple generation algorithm.
1028 // vorbis allows a huffman table with non-sorted lengths. This
1029 // requires a more sophisticated construction, since symbols in
1030 // order do not map to huffman codes "in order".
add_entry(Codebook * c,uint32 huff_code,int symbol,int count,int len,uint32 * values)1031 static void add_entry(Codebook *c, uint32 huff_code, int symbol, int count, int len, uint32 *values)
1032 {
1033    if (!c->sparse) {
1034       c->codewords      [symbol] = huff_code;
1035    } else {
1036       c->codewords       [count] = huff_code;
1037       c->codeword_lengths[count] = len;
1038       values             [count] = symbol;
1039    }
1040 }
1041 
compute_codewords(Codebook * c,uint8 * len,int n,uint32 * values)1042 static int compute_codewords(Codebook *c, uint8 *len, int n, uint32 *values)
1043 {
1044    int i,k,m=0;
1045    uint32 available[32];
1046 
1047    memset(available, 0, sizeof(available));
1048    // find the first entry
1049    for (k=0; k < n; ++k) if (len[k] < NO_CODE) break;
1050    if (k == n) { assert(c->sorted_entries == 0); return TRUE; }
1051    // add to the list
1052    add_entry(c, 0, k, m++, len[k], values);
1053    // add all available leaves
1054    for (i=1; i <= len[k]; ++i)
1055       available[i] = 1U << (32-i);
1056    // note that the above code treats the first case specially,
1057    // but it's really the same as the following code, so they
1058    // could probably be combined (except the initial code is 0,
1059    // and I use 0 in available[] to mean 'empty')
1060    for (i=k+1; i < n; ++i) {
1061       uint32 res;
1062       int z = len[i], y;
1063       if (z == NO_CODE) continue;
1064       // find lowest available leaf (should always be earliest,
1065       // which is what the specification calls for)
1066       // note that this property, and the fact we can never have
1067       // more than one free leaf at a given level, isn't totally
1068       // trivial to prove, but it seems true and the assert never
1069       // fires, so!
1070       while (z > 0 && !available[z]) --z;
1071       if (z == 0) { return FALSE; }
1072       res = available[z];
1073       assert(z >= 0 && z < 32);
1074       available[z] = 0;
1075       add_entry(c, bit_reverse(res), i, m++, len[i], values);
1076       // propogate availability up the tree
1077       if (z != len[i]) {
1078          assert(len[i] >= 0 && len[i] < 32);
1079          for (y=len[i]; y > z; --y) {
1080             assert(available[y] == 0);
1081             available[y] = res + (1 << (32-y));
1082          }
1083       }
1084    }
1085    return TRUE;
1086 }
1087 
1088 // accelerated huffman table allows fast O(1) match of all symbols
1089 // of length <= STB_VORBIS_FAST_HUFFMAN_LENGTH
compute_accelerated_huffman(Codebook * c)1090 static void compute_accelerated_huffman(Codebook *c)
1091 {
1092    int i, len;
1093    for (i=0; i < FAST_HUFFMAN_TABLE_SIZE; ++i)
1094       c->fast_huffman[i] = -1;
1095 
1096    len = c->sparse ? c->sorted_entries : c->entries;
1097    #ifdef STB_VORBIS_FAST_HUFFMAN_SHORT
1098    if (len > 32767) len = 32767; // largest possible value we can encode!
1099    #endif
1100    for (i=0; i < len; ++i) {
1101       if (c->codeword_lengths[i] <= STB_VORBIS_FAST_HUFFMAN_LENGTH) {
1102          uint32 z = c->sparse ? bit_reverse(c->sorted_codewords[i]) : c->codewords[i];
1103          // set table entries for all bit combinations in the higher bits
1104          while (z < FAST_HUFFMAN_TABLE_SIZE) {
1105              c->fast_huffman[z] = i;
1106              z += 1 << c->codeword_lengths[i];
1107          }
1108       }
1109    }
1110 }
1111 
1112 #ifdef _MSC_VER
1113 #define STBV_CDECL __cdecl
1114 #else
1115 #define STBV_CDECL
1116 #endif
1117 
uint32_compare(const void * p,const void * q)1118 static int STBV_CDECL uint32_compare(const void *p, const void *q)
1119 {
1120    uint32 x = * (uint32 *) p;
1121    uint32 y = * (uint32 *) q;
1122    return x < y ? -1 : x > y;
1123 }
1124 
include_in_sort(Codebook * c,uint8 len)1125 static int include_in_sort(Codebook *c, uint8 len)
1126 {
1127    if (c->sparse) { assert(len != NO_CODE); return TRUE; }
1128    if (len == NO_CODE) return FALSE;
1129    if (len > STB_VORBIS_FAST_HUFFMAN_LENGTH) return TRUE;
1130    return FALSE;
1131 }
1132 
1133 // if the fast table above doesn't work, we want to binary
1134 // search them... need to reverse the bits
compute_sorted_huffman(Codebook * c,uint8 * lengths,uint32 * values)1135 static void compute_sorted_huffman(Codebook *c, uint8 *lengths, uint32 *values)
1136 {
1137    int i, len;
1138    // build a list of all the entries
1139    // OPTIMIZATION: don't include the short ones, since they'll be caught by FAST_HUFFMAN.
1140    // this is kind of a frivolous optimization--I don't see any performance improvement,
1141    // but it's like 4 extra lines of code, so.
1142    if (!c->sparse) {
1143       int k = 0;
1144       for (i=0; i < c->entries; ++i)
1145          if (include_in_sort(c, lengths[i]))
1146             c->sorted_codewords[k++] = bit_reverse(c->codewords[i]);
1147       assert(k == c->sorted_entries);
1148    } else {
1149       for (i=0; i < c->sorted_entries; ++i)
1150          c->sorted_codewords[i] = bit_reverse(c->codewords[i]);
1151    }
1152 
1153    qsort(c->sorted_codewords, c->sorted_entries, sizeof(c->sorted_codewords[0]), uint32_compare);
1154    c->sorted_codewords[c->sorted_entries] = 0xffffffff;
1155 
1156    len = c->sparse ? c->sorted_entries : c->entries;
1157    // now we need to indicate how they correspond; we could either
1158    //   #1: sort a different data structure that says who they correspond to
1159    //   #2: for each sorted entry, search the original list to find who corresponds
1160    //   #3: for each original entry, find the sorted entry
1161    // #1 requires extra storage, #2 is slow, #3 can use binary search!
1162    for (i=0; i < len; ++i) {
1163       int huff_len = c->sparse ? lengths[values[i]] : lengths[i];
1164       if (include_in_sort(c,huff_len)) {
1165          uint32 code = bit_reverse(c->codewords[i]);
1166          int x=0, n=c->sorted_entries;
1167          while (n > 1) {
1168             // invariant: sc[x] <= code < sc[x+n]
1169             int m = x + (n >> 1);
1170             if (c->sorted_codewords[m] <= code) {
1171                x = m;
1172                n -= (n>>1);
1173             } else {
1174                n >>= 1;
1175             }
1176          }
1177          assert(c->sorted_codewords[x] == code);
1178          if (c->sparse) {
1179             c->sorted_values[x] = values[i];
1180             c->codeword_lengths[x] = huff_len;
1181          } else {
1182             c->sorted_values[x] = i;
1183          }
1184       }
1185    }
1186 }
1187 
1188 // only run while parsing the header (3 times)
vorbis_validate(uint8 * data)1189 static int vorbis_validate(uint8 *data)
1190 {
1191    static uint8 vorbis[6] = { 'v', 'o', 'r', 'b', 'i', 's' };
1192    return memcmp(data, vorbis, 6) == 0;
1193 }
1194 
1195 // called from setup only, once per code book
1196 // (formula implied by specification)
lookup1_values(int entries,int dim)1197 static int lookup1_values(int entries, int dim)
1198 {
1199    int r = (int) floor(exp((float) log((float) entries) / dim));
1200    if ((int) floor(pow((float) r+1, dim)) <= entries)   // (int) cast for MinGW warning;
1201       ++r;                                              // floor() to avoid _ftol() when non-CRT
1202    assert(pow((float) r+1, dim) > entries);
1203    assert((int) floor(pow((float) r, dim)) <= entries); // (int),floor() as above
1204    return r;
1205 }
1206 
1207 // called twice per file
compute_twiddle_factors(int n,float * A,float * B,float * C)1208 static void compute_twiddle_factors(int n, float *A, float *B, float *C)
1209 {
1210    int n4 = n >> 2, n8 = n >> 3;
1211    int k,k2;
1212 
1213    for (k=k2=0; k < n4; ++k,k2+=2) {
1214       A[k2  ] = (float)  cos(4*k*M_PI/n);
1215       A[k2+1] = (float) -sin(4*k*M_PI/n);
1216       B[k2  ] = (float)  cos((k2+1)*M_PI/n/2) * 0.5f;
1217       B[k2+1] = (float)  sin((k2+1)*M_PI/n/2) * 0.5f;
1218    }
1219    for (k=k2=0; k < n8; ++k,k2+=2) {
1220       C[k2  ] = (float)  cos(2*(k2+1)*M_PI/n);
1221       C[k2+1] = (float) -sin(2*(k2+1)*M_PI/n);
1222    }
1223 }
1224 
compute_window(int n,float * window)1225 static void compute_window(int n, float *window)
1226 {
1227    int n2 = n >> 1, i;
1228    for (i=0; i < n2; ++i)
1229       window[i] = (float) sin(0.5 * M_PI * square((float) sin((i - 0 + 0.5) / n2 * 0.5 * M_PI)));
1230 }
1231 
compute_bitreverse(int n,uint16 * rev)1232 static void compute_bitreverse(int n, uint16 *rev)
1233 {
1234    int ld = ilog(n) - 1; // ilog is off-by-one from normal definitions
1235    int i, n8 = n >> 3;
1236    for (i=0; i < n8; ++i)
1237       rev[i] = (bit_reverse(i) >> (32-ld+3)) << 2;
1238 }
1239 
init_blocksize(vorb * f,int b,int n)1240 static int init_blocksize(vorb *f, int b, int n)
1241 {
1242    int n2 = n >> 1, n4 = n >> 2, n8 = n >> 3;
1243    f->A[b] = (float *) setup_malloc(f, sizeof(float) * n2);
1244    f->B[b] = (float *) setup_malloc(f, sizeof(float) * n2);
1245    f->C[b] = (float *) setup_malloc(f, sizeof(float) * n4);
1246    if (!f->A[b] || !f->B[b] || !f->C[b]) return error(f, VORBIS_outofmem);
1247    compute_twiddle_factors(n, f->A[b], f->B[b], f->C[b]);
1248    f->window[b] = (float *) setup_malloc(f, sizeof(float) * n2);
1249    if (!f->window[b]) return error(f, VORBIS_outofmem);
1250    compute_window(n, f->window[b]);
1251    f->bit_reverse[b] = (uint16 *) setup_malloc(f, sizeof(uint16) * n8);
1252    if (!f->bit_reverse[b]) return error(f, VORBIS_outofmem);
1253    compute_bitreverse(n, f->bit_reverse[b]);
1254    return TRUE;
1255 }
1256 
neighbors(uint16 * x,int n,int * plow,int * phigh)1257 static void neighbors(uint16 *x, int n, int *plow, int *phigh)
1258 {
1259    int low = -1;
1260    int high = 65536;
1261    int i;
1262    for (i=0; i < n; ++i) {
1263       if (x[i] > low  && x[i] < x[n]) { *plow  = i; low = x[i]; }
1264       if (x[i] < high && x[i] > x[n]) { *phigh = i; high = x[i]; }
1265    }
1266 }
1267 
1268 // this has been repurposed so y is now the original index instead of y
1269 typedef struct
1270 {
1271    uint16 x,id;
1272 } stbv__floor_ordering;
1273 
point_compare(const void * p,const void * q)1274 static int STBV_CDECL point_compare(const void *p, const void *q)
1275 {
1276    stbv__floor_ordering *a = (stbv__floor_ordering *) p;
1277    stbv__floor_ordering *b = (stbv__floor_ordering *) q;
1278    return a->x < b->x ? -1 : a->x > b->x;
1279 }
1280 
1281 //
1282 /////////////////////// END LEAF SETUP FUNCTIONS //////////////////////////
1283 
1284 
1285 #if defined(STB_VORBIS_NO_STDIO)
1286    #define USE_MEMORY(z)    TRUE
1287 #else
1288    #define USE_MEMORY(z)    ((z)->stream)
1289 #endif
1290 
get8(vorb * z)1291 static uint8 get8(vorb *z)
1292 {
1293    if (USE_MEMORY(z)) {
1294       if (z->stream >= z->stream_end) { z->eof = TRUE; return 0; }
1295       return *z->stream++;
1296    }
1297 
1298    #ifndef STB_VORBIS_NO_STDIO
1299    {
1300    int c = fgetc(z->f);
1301    if (c == EOF) { z->eof = TRUE; return 0; }
1302    return c;
1303    }
1304    #endif
1305 }
1306 
get32(vorb * f)1307 static uint32 get32(vorb *f)
1308 {
1309    uint32 x;
1310    x = get8(f);
1311    x += get8(f) << 8;
1312    x += get8(f) << 16;
1313    x += (uint32) get8(f) << 24;
1314    return x;
1315 }
1316 
getn(vorb * z,uint8 * data,int n)1317 static int getn(vorb *z, uint8 *data, int n)
1318 {
1319    if (USE_MEMORY(z)) {
1320       if (z->stream+n > z->stream_end) { z->eof = 1; return 0; }
1321       memcpy(data, z->stream, n);
1322       z->stream += n;
1323       return 1;
1324    }
1325 
1326    #ifndef STB_VORBIS_NO_STDIO
1327    if (fread(data, n, 1, z->f) == 1)
1328       return 1;
1329    else {
1330       z->eof = 1;
1331       return 0;
1332    }
1333    #endif
1334 }
1335 
skip(vorb * z,int n)1336 static void skip(vorb *z, int n)
1337 {
1338    if (USE_MEMORY(z)) {
1339       z->stream += n;
1340       if (z->stream >= z->stream_end) z->eof = 1;
1341       return;
1342    }
1343    #ifndef STB_VORBIS_NO_STDIO
1344    {
1345       long x = ftell(z->f);
1346       fseek(z->f, x+n, SEEK_SET);
1347    }
1348    #endif
1349 }
1350 
set_file_offset(stb_vorbis * f,unsigned int loc)1351 static int set_file_offset(stb_vorbis *f, unsigned int loc)
1352 {
1353    #ifndef STB_VORBIS_NO_PUSHDATA_API
1354    if (f->push_mode) return 0;
1355    #endif
1356    f->eof = 0;
1357    if (USE_MEMORY(f)) {
1358       if (f->stream_start + loc >= f->stream_end || f->stream_start + loc < f->stream_start) {
1359          f->stream = f->stream_end;
1360          f->eof = 1;
1361          return 0;
1362       } else {
1363          f->stream = f->stream_start + loc;
1364          return 1;
1365       }
1366    }
1367    #ifndef STB_VORBIS_NO_STDIO
1368    if (loc + f->f_start < loc || loc >= 0x80000000) {
1369       loc = 0x7fffffff;
1370       f->eof = 1;
1371    } else {
1372       loc += f->f_start;
1373    }
1374    if (!fseek(f->f, loc, SEEK_SET))
1375       return 1;
1376    f->eof = 1;
1377    fseek(f->f, f->f_start, SEEK_END);
1378    return 0;
1379    #endif
1380 }
1381 
1382 
1383 static uint8 ogg_page_header[4] = { 0x4f, 0x67, 0x67, 0x53 };
1384 
capture_pattern(vorb * f)1385 static int capture_pattern(vorb *f)
1386 {
1387    if (0x4f != get8(f)) return FALSE;
1388    if (0x67 != get8(f)) return FALSE;
1389    if (0x67 != get8(f)) return FALSE;
1390    if (0x53 != get8(f)) return FALSE;
1391    return TRUE;
1392 }
1393 
1394 #define PAGEFLAG_continued_packet   1
1395 #define PAGEFLAG_first_page         2
1396 #define PAGEFLAG_last_page          4
1397 
start_page_no_capturepattern(vorb * f)1398 static int start_page_no_capturepattern(vorb *f)
1399 {
1400    uint32 loc0,loc1,n;
1401    // stream structure version
1402    if (0 != get8(f)) return error(f, VORBIS_invalid_stream_structure_version);
1403    // header flag
1404    f->page_flag = get8(f);
1405    // absolute granule position
1406    loc0 = get32(f);
1407    loc1 = get32(f);
1408    // @TODO: validate loc0,loc1 as valid positions?
1409    // stream serial number -- vorbis doesn't interleave, so discard
1410    get32(f);
1411    //if (f->serial != get32(f)) return error(f, VORBIS_incorrect_stream_serial_number);
1412    // page sequence number
1413    n = get32(f);
1414    f->last_page = n;
1415    // CRC32
1416    get32(f);
1417    // page_segments
1418    f->segment_count = get8(f);
1419    if (!getn(f, f->segments, f->segment_count))
1420       return error(f, VORBIS_unexpected_eof);
1421    // assume we _don't_ know any the sample position of any segments
1422    f->end_seg_with_known_loc = -2;
1423    if (loc0 != ~0U || loc1 != ~0U) {
1424       int i;
1425       // determine which packet is the last one that will complete
1426       for (i=f->segment_count-1; i >= 0; --i)
1427          if (f->segments[i] < 255)
1428             break;
1429       // 'i' is now the index of the _last_ segment of a packet that ends
1430       if (i >= 0) {
1431          f->end_seg_with_known_loc = i;
1432          f->known_loc_for_packet   = loc0;
1433       }
1434    }
1435    if (f->first_decode) {
1436       int i,len;
1437       ProbedPage p;
1438       len = 0;
1439       for (i=0; i < f->segment_count; ++i)
1440          len += f->segments[i];
1441       len += 27 + f->segment_count;
1442       p.page_start = f->first_audio_page_offset;
1443       p.page_end = p.page_start + len;
1444       p.last_decoded_sample = loc0;
1445       f->p_first = p;
1446    }
1447    f->next_seg = 0;
1448    return TRUE;
1449 }
1450 
start_page(vorb * f)1451 static int start_page(vorb *f)
1452 {
1453    if (!capture_pattern(f)) return error(f, VORBIS_missing_capture_pattern);
1454    return start_page_no_capturepattern(f);
1455 }
1456 
start_packet(vorb * f)1457 static int start_packet(vorb *f)
1458 {
1459    while (f->next_seg == -1) {
1460       if (!start_page(f)) return FALSE;
1461       if (f->page_flag & PAGEFLAG_continued_packet)
1462          return error(f, VORBIS_continued_packet_flag_invalid);
1463    }
1464    f->last_seg = FALSE;
1465    f->valid_bits = 0;
1466    f->packet_bytes = 0;
1467    f->bytes_in_seg = 0;
1468    // f->next_seg is now valid
1469    return TRUE;
1470 }
1471 
maybe_start_packet(vorb * f)1472 static int maybe_start_packet(vorb *f)
1473 {
1474    if (f->next_seg == -1) {
1475       int x = get8(f);
1476       if (f->eof) return FALSE; // EOF at page boundary is not an error!
1477       if (0x4f != x      ) return error(f, VORBIS_missing_capture_pattern);
1478       if (0x67 != get8(f)) return error(f, VORBIS_missing_capture_pattern);
1479       if (0x67 != get8(f)) return error(f, VORBIS_missing_capture_pattern);
1480       if (0x53 != get8(f)) return error(f, VORBIS_missing_capture_pattern);
1481       if (!start_page_no_capturepattern(f)) return FALSE;
1482       if (f->page_flag & PAGEFLAG_continued_packet) {
1483          // set up enough state that we can read this packet if we want,
1484          // e.g. during recovery
1485          f->last_seg = FALSE;
1486          f->bytes_in_seg = 0;
1487          return error(f, VORBIS_continued_packet_flag_invalid);
1488       }
1489    }
1490    return start_packet(f);
1491 }
1492 
next_segment(vorb * f)1493 static int next_segment(vorb *f)
1494 {
1495    int len;
1496    if (f->last_seg) return 0;
1497    if (f->next_seg == -1) {
1498       f->last_seg_which = f->segment_count-1; // in case start_page fails
1499       if (!start_page(f)) { f->last_seg = 1; return 0; }
1500       if (!(f->page_flag & PAGEFLAG_continued_packet)) return error(f, VORBIS_continued_packet_flag_invalid);
1501    }
1502    len = f->segments[f->next_seg++];
1503    if (len < 255) {
1504       f->last_seg = TRUE;
1505       f->last_seg_which = f->next_seg-1;
1506    }
1507    if (f->next_seg >= f->segment_count)
1508       f->next_seg = -1;
1509    assert(f->bytes_in_seg == 0);
1510    f->bytes_in_seg = len;
1511    return len;
1512 }
1513 
1514 #define EOP    (-1)
1515 #define INVALID_BITS  (-1)
1516 
get8_packet_raw(vorb * f)1517 static int get8_packet_raw(vorb *f)
1518 {
1519    if (!f->bytes_in_seg) {  // CLANG!
1520       if (f->last_seg) return EOP;
1521       else if (!next_segment(f)) return EOP;
1522    }
1523    assert(f->bytes_in_seg > 0);
1524    --f->bytes_in_seg;
1525    ++f->packet_bytes;
1526    return get8(f);
1527 }
1528 
get8_packet(vorb * f)1529 static int get8_packet(vorb *f)
1530 {
1531    int x = get8_packet_raw(f);
1532    f->valid_bits = 0;
1533    return x;
1534 }
1535 
flush_packet(vorb * f)1536 static void flush_packet(vorb *f)
1537 {
1538    while (get8_packet_raw(f) != EOP);
1539 }
1540 
1541 // @OPTIMIZE: this is the secondary bit decoder, so it's probably not as important
1542 // as the huffman decoder?
get_bits(vorb * f,int n)1543 static uint32 get_bits(vorb *f, int n)
1544 {
1545    uint32 z;
1546 
1547    if (f->valid_bits < 0) return 0;
1548    if (f->valid_bits < n) {
1549       if (n > 24) {
1550          // the accumulator technique below would not work correctly in this case
1551          z = get_bits(f, 24);
1552          z += get_bits(f, n-24) << 24;
1553          return z;
1554       }
1555       if (f->valid_bits == 0) f->acc = 0;
1556       while (f->valid_bits < n) {
1557          int z = get8_packet_raw(f);
1558          if (z == EOP) {
1559             f->valid_bits = INVALID_BITS;
1560             return 0;
1561          }
1562          f->acc += z << f->valid_bits;
1563          f->valid_bits += 8;
1564       }
1565    }
1566    if (f->valid_bits < 0) return 0;
1567    z = f->acc & ((1 << n)-1);
1568    f->acc >>= n;
1569    f->valid_bits -= n;
1570    return z;
1571 }
1572 
1573 // @OPTIMIZE: primary accumulator for huffman
1574 // expand the buffer to as many bits as possible without reading off end of packet
1575 // it might be nice to allow f->valid_bits and f->acc to be stored in registers,
1576 // e.g. cache them locally and decode locally
prep_huffman(vorb * f)1577 static __forceinline void prep_huffman(vorb *f)
1578 {
1579    if (f->valid_bits <= 24) {
1580       if (f->valid_bits == 0) f->acc = 0;
1581       do {
1582          int z;
1583          if (f->last_seg && !f->bytes_in_seg) return;
1584          z = get8_packet_raw(f);
1585          if (z == EOP) return;
1586          f->acc += (unsigned) z << f->valid_bits;
1587          f->valid_bits += 8;
1588       } while (f->valid_bits <= 24);
1589    }
1590 }
1591 
1592 enum
1593 {
1594    VORBIS_packet_id = 1,
1595    VORBIS_packet_comment = 3,
1596    VORBIS_packet_setup = 5
1597 };
1598 
codebook_decode_scalar_raw(vorb * f,Codebook * c)1599 static int codebook_decode_scalar_raw(vorb *f, Codebook *c)
1600 {
1601    int i;
1602    prep_huffman(f);
1603 
1604    if (c->codewords == NULL && c->sorted_codewords == NULL)
1605       return -1;
1606 
1607    // cases to use binary search: sorted_codewords && !c->codewords
1608    //                             sorted_codewords && c->entries > 8
1609    if (c->entries > 8 ? c->sorted_codewords!=NULL : !c->codewords) {
1610       // binary search
1611       uint32 code = bit_reverse(f->acc);
1612       int x=0, n=c->sorted_entries, len;
1613 
1614       while (n > 1) {
1615          // invariant: sc[x] <= code < sc[x+n]
1616          int m = x + (n >> 1);
1617          if (c->sorted_codewords[m] <= code) {
1618             x = m;
1619             n -= (n>>1);
1620          } else {
1621             n >>= 1;
1622          }
1623       }
1624       // x is now the sorted index
1625       if (!c->sparse) x = c->sorted_values[x];
1626       // x is now sorted index if sparse, or symbol otherwise
1627       len = c->codeword_lengths[x];
1628       if (f->valid_bits >= len) {
1629          f->acc >>= len;
1630          f->valid_bits -= len;
1631          return x;
1632       }
1633 
1634       f->valid_bits = 0;
1635       return -1;
1636    }
1637 
1638    // if small, linear search
1639    assert(!c->sparse);
1640    for (i=0; i < c->entries; ++i) {
1641       if (c->codeword_lengths[i] == NO_CODE) continue;
1642       if (c->codewords[i] == (f->acc & ((1 << c->codeword_lengths[i])-1))) {
1643          if (f->valid_bits >= c->codeword_lengths[i]) {
1644             f->acc >>= c->codeword_lengths[i];
1645             f->valid_bits -= c->codeword_lengths[i];
1646             return i;
1647          }
1648          f->valid_bits = 0;
1649          return -1;
1650       }
1651    }
1652 
1653    error(f, VORBIS_invalid_stream);
1654    f->valid_bits = 0;
1655    return -1;
1656 }
1657 
1658 #ifndef STB_VORBIS_NO_INLINE_DECODE
1659 
1660 #define DECODE_RAW(var, f,c)                                  \
1661    if (f->valid_bits < STB_VORBIS_FAST_HUFFMAN_LENGTH)        \
1662       prep_huffman(f);                                        \
1663    var = f->acc & FAST_HUFFMAN_TABLE_MASK;                    \
1664    var = c->fast_huffman[var];                                \
1665    if (var >= 0) {                                            \
1666       int n = c->codeword_lengths[var];                       \
1667       f->acc >>= n;                                           \
1668       f->valid_bits -= n;                                     \
1669       if (f->valid_bits < 0) { f->valid_bits = 0; var = -1; } \
1670    } else {                                                   \
1671       var = codebook_decode_scalar_raw(f,c);                  \
1672    }
1673 
1674 #else
1675 
codebook_decode_scalar(vorb * f,Codebook * c)1676 static int codebook_decode_scalar(vorb *f, Codebook *c)
1677 {
1678    int i;
1679    if (f->valid_bits < STB_VORBIS_FAST_HUFFMAN_LENGTH)
1680       prep_huffman(f);
1681    // fast huffman table lookup
1682    i = f->acc & FAST_HUFFMAN_TABLE_MASK;
1683    i = c->fast_huffman[i];
1684    if (i >= 0) {
1685       f->acc >>= c->codeword_lengths[i];
1686       f->valid_bits -= c->codeword_lengths[i];
1687       if (f->valid_bits < 0) { f->valid_bits = 0; return -1; }
1688       return i;
1689    }
1690    return codebook_decode_scalar_raw(f,c);
1691 }
1692 
1693 #define DECODE_RAW(var,f,c)    var = codebook_decode_scalar(f,c);
1694 
1695 #endif
1696 
1697 #define DECODE(var,f,c)                                       \
1698    DECODE_RAW(var,f,c)                                        \
1699    if (c->sparse) var = c->sorted_values[var];
1700 
1701 #ifndef STB_VORBIS_DIVIDES_IN_CODEBOOK
1702   #define DECODE_VQ(var,f,c)   DECODE_RAW(var,f,c)
1703 #else
1704   #define DECODE_VQ(var,f,c)   DECODE(var,f,c)
1705 #endif
1706 
1707 
1708 
1709 
1710 
1711 
1712 // CODEBOOK_ELEMENT_FAST is an optimization for the CODEBOOK_FLOATS case
1713 // where we avoid one addition
1714 #define CODEBOOK_ELEMENT(c,off)          (c->multiplicands[off])
1715 #define CODEBOOK_ELEMENT_FAST(c,off)     (c->multiplicands[off])
1716 #define CODEBOOK_ELEMENT_BASE(c)         (0)
1717 
codebook_decode_start(vorb * f,Codebook * c)1718 static int codebook_decode_start(vorb *f, Codebook *c)
1719 {
1720    int z = -1;
1721 
1722    // type 0 is only legal in a scalar context
1723    if (c->lookup_type == 0)
1724       error(f, VORBIS_invalid_stream);
1725    else {
1726       DECODE_VQ(z,f,c);
1727       if (c->sparse) assert(z < c->sorted_entries);
1728       if (z < 0) {  // check for EOP
1729          if (!f->bytes_in_seg)
1730             if (f->last_seg)
1731                return z;
1732          error(f, VORBIS_invalid_stream);
1733       }
1734    }
1735    return z;
1736 }
1737 
codebook_decode(vorb * f,Codebook * c,float * output,int len)1738 static int codebook_decode(vorb *f, Codebook *c, float *output, int len)
1739 {
1740    int i,z = codebook_decode_start(f,c);
1741    if (z < 0) return FALSE;
1742    if (len > c->dimensions) len = c->dimensions;
1743 
1744 #ifdef STB_VORBIS_DIVIDES_IN_CODEBOOK
1745    if (c->lookup_type == 1) {
1746       float last = CODEBOOK_ELEMENT_BASE(c);
1747       int div = 1;
1748       for (i=0; i < len; ++i) {
1749          int off = (z / div) % c->lookup_values;
1750          float val = CODEBOOK_ELEMENT_FAST(c,off) + last;
1751          output[i] += val;
1752          if (c->sequence_p) last = val + c->minimum_value;
1753          div *= c->lookup_values;
1754       }
1755       return TRUE;
1756    }
1757 #endif
1758 
1759    z *= c->dimensions;
1760    if (c->sequence_p) {
1761       float last = CODEBOOK_ELEMENT_BASE(c);
1762       for (i=0; i < len; ++i) {
1763          float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1764          output[i] += val;
1765          last = val + c->minimum_value;
1766       }
1767    } else {
1768       float last = CODEBOOK_ELEMENT_BASE(c);
1769       for (i=0; i < len; ++i) {
1770          output[i] += CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1771       }
1772    }
1773 
1774    return TRUE;
1775 }
1776 
codebook_decode_step(vorb * f,Codebook * c,float * output,int len,int step)1777 static int codebook_decode_step(vorb *f, Codebook *c, float *output, int len, int step)
1778 {
1779    int i,z = codebook_decode_start(f,c);
1780    float last = CODEBOOK_ELEMENT_BASE(c);
1781    if (z < 0) return FALSE;
1782    if (len > c->dimensions) len = c->dimensions;
1783 
1784 #ifdef STB_VORBIS_DIVIDES_IN_CODEBOOK
1785    if (c->lookup_type == 1) {
1786       int div = 1;
1787       for (i=0; i < len; ++i) {
1788          int off = (z / div) % c->lookup_values;
1789          float val = CODEBOOK_ELEMENT_FAST(c,off) + last;
1790          output[i*step] += val;
1791          if (c->sequence_p) last = val;
1792          div *= c->lookup_values;
1793       }
1794       return TRUE;
1795    }
1796 #endif
1797 
1798    z *= c->dimensions;
1799    for (i=0; i < len; ++i) {
1800       float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1801       output[i*step] += val;
1802       if (c->sequence_p) last = val;
1803    }
1804 
1805    return TRUE;
1806 }
1807 
codebook_decode_deinterleave_repeat(vorb * f,Codebook * c,float ** outputs,int ch,int * c_inter_p,int * p_inter_p,int len,int total_decode)1808 static int codebook_decode_deinterleave_repeat(vorb *f, Codebook *c, float **outputs, int ch, int *c_inter_p, int *p_inter_p, int len, int total_decode)
1809 {
1810    int c_inter = *c_inter_p;
1811    int p_inter = *p_inter_p;
1812    int i,z, effective = c->dimensions;
1813 
1814    // type 0 is only legal in a scalar context
1815    if (c->lookup_type == 0)   return error(f, VORBIS_invalid_stream);
1816 
1817    while (total_decode > 0) {
1818       float last = CODEBOOK_ELEMENT_BASE(c);
1819       DECODE_VQ(z,f,c);
1820       #ifndef STB_VORBIS_DIVIDES_IN_CODEBOOK
1821       assert(!c->sparse || z < c->sorted_entries);
1822       #endif
1823       if (z < 0) {
1824          if (!f->bytes_in_seg)
1825             if (f->last_seg) return FALSE;
1826          return error(f, VORBIS_invalid_stream);
1827       }
1828 
1829       // if this will take us off the end of the buffers, stop short!
1830       // we check by computing the length of the virtual interleaved
1831       // buffer (len*ch), our current offset within it (p_inter*ch)+(c_inter),
1832       // and the length we'll be using (effective)
1833       if (c_inter + p_inter*ch + effective > len * ch) {
1834          effective = len*ch - (p_inter*ch - c_inter);
1835       }
1836 
1837    #ifdef STB_VORBIS_DIVIDES_IN_CODEBOOK
1838       if (c->lookup_type == 1) {
1839          int div = 1;
1840          for (i=0; i < effective; ++i) {
1841             int off = (z / div) % c->lookup_values;
1842             float val = CODEBOOK_ELEMENT_FAST(c,off) + last;
1843             if (outputs[c_inter])
1844                outputs[c_inter][p_inter] += val;
1845             if (++c_inter == ch) { c_inter = 0; ++p_inter; }
1846             if (c->sequence_p) last = val;
1847             div *= c->lookup_values;
1848          }
1849       } else
1850    #endif
1851       {
1852          z *= c->dimensions;
1853          if (c->sequence_p) {
1854             for (i=0; i < effective; ++i) {
1855                float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1856                if (outputs[c_inter])
1857                   outputs[c_inter][p_inter] += val;
1858                if (++c_inter == ch) { c_inter = 0; ++p_inter; }
1859                last = val;
1860             }
1861          } else {
1862             for (i=0; i < effective; ++i) {
1863                float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1864                if (outputs[c_inter])
1865                   outputs[c_inter][p_inter] += val;
1866                if (++c_inter == ch) { c_inter = 0; ++p_inter; }
1867             }
1868          }
1869       }
1870 
1871       total_decode -= effective;
1872    }
1873    *c_inter_p = c_inter;
1874    *p_inter_p = p_inter;
1875    return TRUE;
1876 }
1877 
predict_point(int x,int x0,int x1,int y0,int y1)1878 static int predict_point(int x, int x0, int x1, int y0, int y1)
1879 {
1880    int dy = y1 - y0;
1881    int adx = x1 - x0;
1882    // @OPTIMIZE: force int division to round in the right direction... is this necessary on x86?
1883    int err = abs(dy) * (x - x0);
1884    int off = err / adx;
1885    return dy < 0 ? y0 - off : y0 + off;
1886 }
1887 
1888 // the following table is block-copied from the specification
1889 static float inverse_db_table[256] =
1890 {
1891   1.0649863e-07f, 1.1341951e-07f, 1.2079015e-07f, 1.2863978e-07f,
1892   1.3699951e-07f, 1.4590251e-07f, 1.5538408e-07f, 1.6548181e-07f,
1893   1.7623575e-07f, 1.8768855e-07f, 1.9988561e-07f, 2.1287530e-07f,
1894   2.2670913e-07f, 2.4144197e-07f, 2.5713223e-07f, 2.7384213e-07f,
1895   2.9163793e-07f, 3.1059021e-07f, 3.3077411e-07f, 3.5226968e-07f,
1896   3.7516214e-07f, 3.9954229e-07f, 4.2550680e-07f, 4.5315863e-07f,
1897   4.8260743e-07f, 5.1396998e-07f, 5.4737065e-07f, 5.8294187e-07f,
1898   6.2082472e-07f, 6.6116941e-07f, 7.0413592e-07f, 7.4989464e-07f,
1899   7.9862701e-07f, 8.5052630e-07f, 9.0579828e-07f, 9.6466216e-07f,
1900   1.0273513e-06f, 1.0941144e-06f, 1.1652161e-06f, 1.2409384e-06f,
1901   1.3215816e-06f, 1.4074654e-06f, 1.4989305e-06f, 1.5963394e-06f,
1902   1.7000785e-06f, 1.8105592e-06f, 1.9282195e-06f, 2.0535261e-06f,
1903   2.1869758e-06f, 2.3290978e-06f, 2.4804557e-06f, 2.6416497e-06f,
1904   2.8133190e-06f, 2.9961443e-06f, 3.1908506e-06f, 3.3982101e-06f,
1905   3.6190449e-06f, 3.8542308e-06f, 4.1047004e-06f, 4.3714470e-06f,
1906   4.6555282e-06f, 4.9580707e-06f, 5.2802740e-06f, 5.6234160e-06f,
1907   5.9888572e-06f, 6.3780469e-06f, 6.7925283e-06f, 7.2339451e-06f,
1908   7.7040476e-06f, 8.2047000e-06f, 8.7378876e-06f, 9.3057248e-06f,
1909   9.9104632e-06f, 1.0554501e-05f, 1.1240392e-05f, 1.1970856e-05f,
1910   1.2748789e-05f, 1.3577278e-05f, 1.4459606e-05f, 1.5399272e-05f,
1911   1.6400004e-05f, 1.7465768e-05f, 1.8600792e-05f, 1.9809576e-05f,
1912   2.1096914e-05f, 2.2467911e-05f, 2.3928002e-05f, 2.5482978e-05f,
1913   2.7139006e-05f, 2.8902651e-05f, 3.0780908e-05f, 3.2781225e-05f,
1914   3.4911534e-05f, 3.7180282e-05f, 3.9596466e-05f, 4.2169667e-05f,
1915   4.4910090e-05f, 4.7828601e-05f, 5.0936773e-05f, 5.4246931e-05f,
1916   5.7772202e-05f, 6.1526565e-05f, 6.5524908e-05f, 6.9783085e-05f,
1917   7.4317983e-05f, 7.9147585e-05f, 8.4291040e-05f, 8.9768747e-05f,
1918   9.5602426e-05f, 0.00010181521f, 0.00010843174f, 0.00011547824f,
1919   0.00012298267f, 0.00013097477f, 0.00013948625f, 0.00014855085f,
1920   0.00015820453f, 0.00016848555f, 0.00017943469f, 0.00019109536f,
1921   0.00020351382f, 0.00021673929f, 0.00023082423f, 0.00024582449f,
1922   0.00026179955f, 0.00027881276f, 0.00029693158f, 0.00031622787f,
1923   0.00033677814f, 0.00035866388f, 0.00038197188f, 0.00040679456f,
1924   0.00043323036f, 0.00046138411f, 0.00049136745f, 0.00052329927f,
1925   0.00055730621f, 0.00059352311f, 0.00063209358f, 0.00067317058f,
1926   0.00071691700f, 0.00076350630f, 0.00081312324f, 0.00086596457f,
1927   0.00092223983f, 0.00098217216f, 0.0010459992f,  0.0011139742f,
1928   0.0011863665f,  0.0012634633f,  0.0013455702f,  0.0014330129f,
1929   0.0015261382f,  0.0016253153f,  0.0017309374f,  0.0018434235f,
1930   0.0019632195f,  0.0020908006f,  0.0022266726f,  0.0023713743f,
1931   0.0025254795f,  0.0026895994f,  0.0028643847f,  0.0030505286f,
1932   0.0032487691f,  0.0034598925f,  0.0036847358f,  0.0039241906f,
1933   0.0041792066f,  0.0044507950f,  0.0047400328f,  0.0050480668f,
1934   0.0053761186f,  0.0057254891f,  0.0060975636f,  0.0064938176f,
1935   0.0069158225f,  0.0073652516f,  0.0078438871f,  0.0083536271f,
1936   0.0088964928f,  0.009474637f,   0.010090352f,   0.010746080f,
1937   0.011444421f,   0.012188144f,   0.012980198f,   0.013823725f,
1938   0.014722068f,   0.015678791f,   0.016697687f,   0.017782797f,
1939   0.018938423f,   0.020169149f,   0.021479854f,   0.022875735f,
1940   0.024362330f,   0.025945531f,   0.027631618f,   0.029427276f,
1941   0.031339626f,   0.033376252f,   0.035545228f,   0.037855157f,
1942   0.040315199f,   0.042935108f,   0.045725273f,   0.048696758f,
1943   0.051861348f,   0.055231591f,   0.058820850f,   0.062643361f,
1944   0.066714279f,   0.071049749f,   0.075666962f,   0.080584227f,
1945   0.085821044f,   0.091398179f,   0.097337747f,   0.10366330f,
1946   0.11039993f,    0.11757434f,    0.12521498f,    0.13335215f,
1947   0.14201813f,    0.15124727f,    0.16107617f,    0.17154380f,
1948   0.18269168f,    0.19456402f,    0.20720788f,    0.22067342f,
1949   0.23501402f,    0.25028656f,    0.26655159f,    0.28387361f,
1950   0.30232132f,    0.32196786f,    0.34289114f,    0.36517414f,
1951   0.38890521f,    0.41417847f,    0.44109412f,    0.46975890f,
1952   0.50028648f,    0.53279791f,    0.56742212f,    0.60429640f,
1953   0.64356699f,    0.68538959f,    0.72993007f,    0.77736504f,
1954   0.82788260f,    0.88168307f,    0.9389798f,     1.0f
1955 };
1956 
1957 
1958 // @OPTIMIZE: if you want to replace this bresenham line-drawing routine,
1959 // note that you must produce bit-identical output to decode correctly;
1960 // this specific sequence of operations is specified in the spec (it's
1961 // drawing integer-quantized frequency-space lines that the encoder
1962 // expects to be exactly the same)
1963 //     ... also, isn't the whole point of Bresenham's algorithm to NOT
1964 // have to divide in the setup? sigh.
1965 #ifndef STB_VORBIS_NO_DEFER_FLOOR
1966 #define LINE_OP(a,b)   a *= b
1967 #else
1968 #define LINE_OP(a,b)   a = b
1969 #endif
1970 
1971 #ifdef STB_VORBIS_DIVIDE_TABLE
1972 #define DIVTAB_NUMER   32
1973 #define DIVTAB_DENOM   64
1974 int8 integer_divide_table[DIVTAB_NUMER][DIVTAB_DENOM]; // 2KB
1975 #endif
1976 
draw_line(float * output,int x0,int y0,int x1,int y1,int n)1977 static __forceinline void draw_line(float *output, int x0, int y0, int x1, int y1, int n)
1978 {
1979    int dy = y1 - y0;
1980    int adx = x1 - x0;
1981    int ady = abs(dy);
1982    int base;
1983    int x=x0,y=y0;
1984    int err = 0;
1985    int sy;
1986 
1987 #ifdef STB_VORBIS_DIVIDE_TABLE
1988    if (adx < DIVTAB_DENOM && ady < DIVTAB_NUMER) {
1989       if (dy < 0) {
1990          base = -integer_divide_table[ady][adx];
1991          sy = base-1;
1992       } else {
1993          base =  integer_divide_table[ady][adx];
1994          sy = base+1;
1995       }
1996    } else {
1997       base = dy / adx;
1998       if (dy < 0)
1999          sy = base - 1;
2000       else
2001          sy = base+1;
2002    }
2003 #else
2004    base = dy / adx;
2005    if (dy < 0)
2006       sy = base - 1;
2007    else
2008       sy = base+1;
2009 #endif
2010    ady -= abs(base) * adx;
2011    if (x1 > n) x1 = n;
2012    if (x < x1) {
2013       LINE_OP(output[x], inverse_db_table[y]);
2014       for (++x; x < x1; ++x) {
2015          err += ady;
2016          if (err >= adx) {
2017             err -= adx;
2018             y += sy;
2019          } else
2020             y += base;
2021          LINE_OP(output[x], inverse_db_table[y]);
2022       }
2023    }
2024 }
2025 
residue_decode(vorb * f,Codebook * book,float * target,int offset,int n,int rtype)2026 static int residue_decode(vorb *f, Codebook *book, float *target, int offset, int n, int rtype)
2027 {
2028    int k;
2029    if (rtype == 0) {
2030       int step = n / book->dimensions;
2031       for (k=0; k < step; ++k)
2032          if (!codebook_decode_step(f, book, target+offset+k, n-offset-k, step))
2033             return FALSE;
2034    } else {
2035       for (k=0; k < n; ) {
2036          if (!codebook_decode(f, book, target+offset, n-k))
2037             return FALSE;
2038          k += book->dimensions;
2039          offset += book->dimensions;
2040       }
2041    }
2042    return TRUE;
2043 }
2044 
decode_residue(vorb * f,float * residue_buffers[],int ch,int n,int rn,uint8 * do_not_decode)2045 static void decode_residue(vorb *f, float *residue_buffers[], int ch, int n, int rn, uint8 *do_not_decode)
2046 {
2047    int i,j,pass;
2048    Residue *r = f->residue_config + rn;
2049    int rtype = f->residue_types[rn];
2050    int c = r->classbook;
2051    int classwords = f->codebooks[c].dimensions;
2052    int n_read = r->end - r->begin;
2053    int part_read = n_read / r->part_size;
2054    int temp_alloc_point = temp_alloc_save(f);
2055    #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2056    uint8 ***part_classdata = (uint8 ***) temp_block_array(f,f->channels, part_read * sizeof(**part_classdata));
2057    #else
2058    int **classifications = (int **) temp_block_array(f,f->channels, part_read * sizeof(**classifications));
2059    #endif
2060 
2061    CHECK(f);
2062 
2063    for (i=0; i < ch; ++i)
2064       if (!do_not_decode[i])
2065          memset(residue_buffers[i], 0, sizeof(float) * n);
2066 
2067    if (rtype == 2 && ch != 1) {
2068       for (j=0; j < ch; ++j)
2069          if (!do_not_decode[j])
2070             break;
2071       if (j == ch)
2072          goto done;
2073 
2074       for (pass=0; pass < 8; ++pass) {
2075          int pcount = 0, class_set = 0;
2076          if (ch == 2) {
2077             while (pcount < part_read) {
2078                int z = r->begin + pcount*r->part_size;
2079                int c_inter = (z & 1), p_inter = z>>1;
2080                if (pass == 0) {
2081                   Codebook *c = f->codebooks+r->classbook;
2082                   int q;
2083                   DECODE(q,f,c);
2084                   if (q == EOP) goto done;
2085                   #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2086                   part_classdata[0][class_set] = r->classdata[q];
2087                   #else
2088                   for (i=classwords-1; i >= 0; --i) {
2089                      classifications[0][i+pcount] = q % r->classifications;
2090                      q /= r->classifications;
2091                   }
2092                   #endif
2093                }
2094                for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) {
2095                   int z = r->begin + pcount*r->part_size;
2096                   #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2097                   int c = part_classdata[0][class_set][i];
2098                   #else
2099                   int c = classifications[0][pcount];
2100                   #endif
2101                   int b = r->residue_books[c][pass];
2102                   if (b >= 0) {
2103                      Codebook *book = f->codebooks + b;
2104                      #ifdef STB_VORBIS_DIVIDES_IN_CODEBOOK
2105                      if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r->part_size))
2106                         goto done;
2107                      #else
2108                      // saves 1%
2109                      if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r->part_size))
2110                         goto done;
2111                      #endif
2112                   } else {
2113                      z += r->part_size;
2114                      c_inter = z & 1;
2115                      p_inter = z >> 1;
2116                   }
2117                }
2118                #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2119                ++class_set;
2120                #endif
2121             }
2122          } else if (ch == 1) {
2123             while (pcount < part_read) {
2124                int z = r->begin + pcount*r->part_size;
2125                int c_inter = 0, p_inter = z;
2126                if (pass == 0) {
2127                   Codebook *c = f->codebooks+r->classbook;
2128                   int q;
2129                   DECODE(q,f,c);
2130                   if (q == EOP) goto done;
2131                   #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2132                   part_classdata[0][class_set] = r->classdata[q];
2133                   #else
2134                   for (i=classwords-1; i >= 0; --i) {
2135                      classifications[0][i+pcount] = q % r->classifications;
2136                      q /= r->classifications;
2137                   }
2138                   #endif
2139                }
2140                for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) {
2141                   int z = r->begin + pcount*r->part_size;
2142                   #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2143                   int c = part_classdata[0][class_set][i];
2144                   #else
2145                   int c = classifications[0][pcount];
2146                   #endif
2147                   int b = r->residue_books[c][pass];
2148                   if (b >= 0) {
2149                      Codebook *book = f->codebooks + b;
2150                      if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r->part_size))
2151                         goto done;
2152                   } else {
2153                      z += r->part_size;
2154                      c_inter = 0;
2155                      p_inter = z;
2156                   }
2157                }
2158                #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2159                ++class_set;
2160                #endif
2161             }
2162          } else {
2163             while (pcount < part_read) {
2164                int z = r->begin + pcount*r->part_size;
2165                int c_inter = z % ch, p_inter = z/ch;
2166                if (pass == 0) {
2167                   Codebook *c = f->codebooks+r->classbook;
2168                   int q;
2169                   DECODE(q,f,c);
2170                   if (q == EOP) goto done;
2171                   #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2172                   part_classdata[0][class_set] = r->classdata[q];
2173                   #else
2174                   for (i=classwords-1; i >= 0; --i) {
2175                      classifications[0][i+pcount] = q % r->classifications;
2176                      q /= r->classifications;
2177                   }
2178                   #endif
2179                }
2180                for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) {
2181                   int z = r->begin + pcount*r->part_size;
2182                   #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2183                   int c = part_classdata[0][class_set][i];
2184                   #else
2185                   int c = classifications[0][pcount];
2186                   #endif
2187                   int b = r->residue_books[c][pass];
2188                   if (b >= 0) {
2189                      Codebook *book = f->codebooks + b;
2190                      if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r->part_size))
2191                         goto done;
2192                   } else {
2193                      z += r->part_size;
2194                      c_inter = z % ch;
2195                      p_inter = z / ch;
2196                   }
2197                }
2198                #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2199                ++class_set;
2200                #endif
2201             }
2202          }
2203       }
2204       goto done;
2205    }
2206    CHECK(f);
2207 
2208    for (pass=0; pass < 8; ++pass) {
2209       int pcount = 0, class_set=0;
2210       while (pcount < part_read) {
2211          if (pass == 0) {
2212             for (j=0; j < ch; ++j) {
2213                if (!do_not_decode[j]) {
2214                   Codebook *c = f->codebooks+r->classbook;
2215                   int temp;
2216                   DECODE(temp,f,c);
2217                   if (temp == EOP) goto done;
2218                   #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2219                   part_classdata[j][class_set] = r->classdata[temp];
2220                   #else
2221                   for (i=classwords-1; i >= 0; --i) {
2222                      classifications[j][i+pcount] = temp % r->classifications;
2223                      temp /= r->classifications;
2224                   }
2225                   #endif
2226                }
2227             }
2228          }
2229          for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) {
2230             for (j=0; j < ch; ++j) {
2231                if (!do_not_decode[j]) {
2232                   #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2233                   int c = part_classdata[j][class_set][i];
2234                   #else
2235                   int c = classifications[j][pcount];
2236                   #endif
2237                   int b = r->residue_books[c][pass];
2238                   if (b >= 0) {
2239                      float *target = residue_buffers[j];
2240                      int offset = r->begin + pcount * r->part_size;
2241                      int n = r->part_size;
2242                      Codebook *book = f->codebooks + b;
2243                      if (!residue_decode(f, book, target, offset, n, rtype))
2244                         goto done;
2245                   }
2246                }
2247             }
2248          }
2249          #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2250          ++class_set;
2251          #endif
2252       }
2253    }
2254   done:
2255    CHECK(f);
2256    #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2257    temp_free(f,part_classdata);
2258    #else
2259    temp_free(f,classifications);
2260    #endif
2261    temp_alloc_restore(f,temp_alloc_point);
2262 }
2263 
2264 
2265 #if 0
2266 // slow way for debugging
2267 void inverse_mdct_slow(float *buffer, int n)
2268 {
2269    int i,j;
2270    int n2 = n >> 1;
2271    float *x = (float *) malloc(sizeof(*x) * n2);
2272    memcpy(x, buffer, sizeof(*x) * n2);
2273    for (i=0; i < n; ++i) {
2274       float acc = 0;
2275       for (j=0; j < n2; ++j)
2276          // formula from paper:
2277          //acc += n/4.0f * x[j] * (float) cos(M_PI / 2 / n * (2 * i + 1 + n/2.0)*(2*j+1));
2278          // formula from wikipedia
2279          //acc += 2.0f / n2 * x[j] * (float) cos(M_PI/n2 * (i + 0.5 + n2/2)*(j + 0.5));
2280          // these are equivalent, except the formula from the paper inverts the multiplier!
2281          // however, what actually works is NO MULTIPLIER!?!
2282          //acc += 64 * 2.0f / n2 * x[j] * (float) cos(M_PI/n2 * (i + 0.5 + n2/2)*(j + 0.5));
2283          acc += x[j] * (float) cos(M_PI / 2 / n * (2 * i + 1 + n/2.0)*(2*j+1));
2284       buffer[i] = acc;
2285    }
2286    free(x);
2287 }
2288 #elif 0
2289 // same as above, but just barely able to run in real time on modern machines
inverse_mdct_slow(float * buffer,int n,vorb * f,int blocktype)2290 void inverse_mdct_slow(float *buffer, int n, vorb *f, int blocktype)
2291 {
2292    float mcos[16384];
2293    int i,j;
2294    int n2 = n >> 1, nmask = (n << 2) -1;
2295    float *x = (float *) malloc(sizeof(*x) * n2);
2296    memcpy(x, buffer, sizeof(*x) * n2);
2297    for (i=0; i < 4*n; ++i)
2298       mcos[i] = (float) cos(M_PI / 2 * i / n);
2299 
2300    for (i=0; i < n; ++i) {
2301       float acc = 0;
2302       for (j=0; j < n2; ++j)
2303          acc += x[j] * mcos[(2 * i + 1 + n2)*(2*j+1) & nmask];
2304       buffer[i] = acc;
2305    }
2306    free(x);
2307 }
2308 #elif 0
2309 // transform to use a slow dct-iv; this is STILL basically trivial,
2310 // but only requires half as many ops
dct_iv_slow(float * buffer,int n)2311 void dct_iv_slow(float *buffer, int n)
2312 {
2313    float mcos[16384];
2314    float x[2048];
2315    int i,j;
2316    int n2 = n >> 1, nmask = (n << 3) - 1;
2317    memcpy(x, buffer, sizeof(*x) * n);
2318    for (i=0; i < 8*n; ++i)
2319       mcos[i] = (float) cos(M_PI / 4 * i / n);
2320    for (i=0; i < n; ++i) {
2321       float acc = 0;
2322       for (j=0; j < n; ++j)
2323          acc += x[j] * mcos[((2 * i + 1)*(2*j+1)) & nmask];
2324       buffer[i] = acc;
2325    }
2326 }
2327 
inverse_mdct_slow(float * buffer,int n,vorb * f,int blocktype)2328 void inverse_mdct_slow(float *buffer, int n, vorb *f, int blocktype)
2329 {
2330    int i, n4 = n >> 2, n2 = n >> 1, n3_4 = n - n4;
2331    float temp[4096];
2332 
2333    memcpy(temp, buffer, n2 * sizeof(float));
2334    dct_iv_slow(temp, n2);  // returns -c'-d, a-b'
2335 
2336    for (i=0; i < n4  ; ++i) buffer[i] = temp[i+n4];            // a-b'
2337    for (   ; i < n3_4; ++i) buffer[i] = -temp[n3_4 - i - 1];   // b-a', c+d'
2338    for (   ; i < n   ; ++i) buffer[i] = -temp[i - n3_4];       // c'+d
2339 }
2340 #endif
2341 
2342 #ifndef LIBVORBIS_MDCT
2343 #define LIBVORBIS_MDCT 0
2344 #endif
2345 
2346 #if LIBVORBIS_MDCT
2347 // directly call the vorbis MDCT using an interface documented
2348 // by Jeff Roberts... useful for performance comparison
2349 typedef struct
2350 {
2351   int n;
2352   int log2n;
2353 
2354   float *trig;
2355   int   *bitrev;
2356 
2357   float scale;
2358 } mdct_lookup;
2359 
2360 extern void mdct_init(mdct_lookup *lookup, int n);
2361 extern void mdct_clear(mdct_lookup *l);
2362 extern void mdct_backward(mdct_lookup *init, float *in, float *out);
2363 
2364 mdct_lookup M1,M2;
2365 
inverse_mdct(float * buffer,int n,vorb * f,int blocktype)2366 void inverse_mdct(float *buffer, int n, vorb *f, int blocktype)
2367 {
2368    mdct_lookup *M;
2369    if (M1.n == n) M = &M1;
2370    else if (M2.n == n) M = &M2;
2371    else if (M1.n == 0) { mdct_init(&M1, n); M = &M1; }
2372    else {
2373       if (M2.n) __asm int 3;
2374       mdct_init(&M2, n);
2375       M = &M2;
2376    }
2377 
2378    mdct_backward(M, buffer, buffer);
2379 }
2380 #endif
2381 
2382 
2383 // the following were split out into separate functions while optimizing;
2384 // they could be pushed back up but eh. __forceinline showed no change;
2385 // they're probably already being inlined.
imdct_step3_iter0_loop(int n,float * e,int i_off,int k_off,float * A)2386 static void imdct_step3_iter0_loop(int n, float *e, int i_off, int k_off, float *A)
2387 {
2388    float *ee0 = e + i_off;
2389    float *ee2 = ee0 + k_off;
2390    int i;
2391 
2392    assert((n & 3) == 0);
2393    for (i=(n>>2); i > 0; --i) {
2394       float k00_20, k01_21;
2395       k00_20  = ee0[ 0] - ee2[ 0];
2396       k01_21  = ee0[-1] - ee2[-1];
2397       ee0[ 0] += ee2[ 0];//ee0[ 0] = ee0[ 0] + ee2[ 0];
2398       ee0[-1] += ee2[-1];//ee0[-1] = ee0[-1] + ee2[-1];
2399       ee2[ 0] = k00_20 * A[0] - k01_21 * A[1];
2400       ee2[-1] = k01_21 * A[0] + k00_20 * A[1];
2401       A += 8;
2402 
2403       k00_20  = ee0[-2] - ee2[-2];
2404       k01_21  = ee0[-3] - ee2[-3];
2405       ee0[-2] += ee2[-2];//ee0[-2] = ee0[-2] + ee2[-2];
2406       ee0[-3] += ee2[-3];//ee0[-3] = ee0[-3] + ee2[-3];
2407       ee2[-2] = k00_20 * A[0] - k01_21 * A[1];
2408       ee2[-3] = k01_21 * A[0] + k00_20 * A[1];
2409       A += 8;
2410 
2411       k00_20  = ee0[-4] - ee2[-4];
2412       k01_21  = ee0[-5] - ee2[-5];
2413       ee0[-4] += ee2[-4];//ee0[-4] = ee0[-4] + ee2[-4];
2414       ee0[-5] += ee2[-5];//ee0[-5] = ee0[-5] + ee2[-5];
2415       ee2[-4] = k00_20 * A[0] - k01_21 * A[1];
2416       ee2[-5] = k01_21 * A[0] + k00_20 * A[1];
2417       A += 8;
2418 
2419       k00_20  = ee0[-6] - ee2[-6];
2420       k01_21  = ee0[-7] - ee2[-7];
2421       ee0[-6] += ee2[-6];//ee0[-6] = ee0[-6] + ee2[-6];
2422       ee0[-7] += ee2[-7];//ee0[-7] = ee0[-7] + ee2[-7];
2423       ee2[-6] = k00_20 * A[0] - k01_21 * A[1];
2424       ee2[-7] = k01_21 * A[0] + k00_20 * A[1];
2425       A += 8;
2426       ee0 -= 8;
2427       ee2 -= 8;
2428    }
2429 }
2430 
imdct_step3_inner_r_loop(int lim,float * e,int d0,int k_off,float * A,int k1)2431 static void imdct_step3_inner_r_loop(int lim, float *e, int d0, int k_off, float *A, int k1)
2432 {
2433    int i;
2434    float k00_20, k01_21;
2435 
2436    float *e0 = e + d0;
2437    float *e2 = e0 + k_off;
2438 
2439    for (i=lim >> 2; i > 0; --i) {
2440       k00_20 = e0[-0] - e2[-0];
2441       k01_21 = e0[-1] - e2[-1];
2442       e0[-0] += e2[-0];//e0[-0] = e0[-0] + e2[-0];
2443       e0[-1] += e2[-1];//e0[-1] = e0[-1] + e2[-1];
2444       e2[-0] = (k00_20)*A[0] - (k01_21) * A[1];
2445       e2[-1] = (k01_21)*A[0] + (k00_20) * A[1];
2446 
2447       A += k1;
2448 
2449       k00_20 = e0[-2] - e2[-2];
2450       k01_21 = e0[-3] - e2[-3];
2451       e0[-2] += e2[-2];//e0[-2] = e0[-2] + e2[-2];
2452       e0[-3] += e2[-3];//e0[-3] = e0[-3] + e2[-3];
2453       e2[-2] = (k00_20)*A[0] - (k01_21) * A[1];
2454       e2[-3] = (k01_21)*A[0] + (k00_20) * A[1];
2455 
2456       A += k1;
2457 
2458       k00_20 = e0[-4] - e2[-4];
2459       k01_21 = e0[-5] - e2[-5];
2460       e0[-4] += e2[-4];//e0[-4] = e0[-4] + e2[-4];
2461       e0[-5] += e2[-5];//e0[-5] = e0[-5] + e2[-5];
2462       e2[-4] = (k00_20)*A[0] - (k01_21) * A[1];
2463       e2[-5] = (k01_21)*A[0] + (k00_20) * A[1];
2464 
2465       A += k1;
2466 
2467       k00_20 = e0[-6] - e2[-6];
2468       k01_21 = e0[-7] - e2[-7];
2469       e0[-6] += e2[-6];//e0[-6] = e0[-6] + e2[-6];
2470       e0[-7] += e2[-7];//e0[-7] = e0[-7] + e2[-7];
2471       e2[-6] = (k00_20)*A[0] - (k01_21) * A[1];
2472       e2[-7] = (k01_21)*A[0] + (k00_20) * A[1];
2473 
2474       e0 -= 8;
2475       e2 -= 8;
2476 
2477       A += k1;
2478    }
2479 }
2480 
imdct_step3_inner_s_loop(int n,float * e,int i_off,int k_off,float * A,int a_off,int k0)2481 static void imdct_step3_inner_s_loop(int n, float *e, int i_off, int k_off, float *A, int a_off, int k0)
2482 {
2483    int i;
2484    float A0 = A[0];
2485    float A1 = A[0+1];
2486    float A2 = A[0+a_off];
2487    float A3 = A[0+a_off+1];
2488    float A4 = A[0+a_off*2+0];
2489    float A5 = A[0+a_off*2+1];
2490    float A6 = A[0+a_off*3+0];
2491    float A7 = A[0+a_off*3+1];
2492 
2493    float k00,k11;
2494 
2495    float *ee0 = e  +i_off;
2496    float *ee2 = ee0+k_off;
2497 
2498    for (i=n; i > 0; --i) {
2499       k00     = ee0[ 0] - ee2[ 0];
2500       k11     = ee0[-1] - ee2[-1];
2501       ee0[ 0] =  ee0[ 0] + ee2[ 0];
2502       ee0[-1] =  ee0[-1] + ee2[-1];
2503       ee2[ 0] = (k00) * A0 - (k11) * A1;
2504       ee2[-1] = (k11) * A0 + (k00) * A1;
2505 
2506       k00     = ee0[-2] - ee2[-2];
2507       k11     = ee0[-3] - ee2[-3];
2508       ee0[-2] =  ee0[-2] + ee2[-2];
2509       ee0[-3] =  ee0[-3] + ee2[-3];
2510       ee2[-2] = (k00) * A2 - (k11) * A3;
2511       ee2[-3] = (k11) * A2 + (k00) * A3;
2512 
2513       k00     = ee0[-4] - ee2[-4];
2514       k11     = ee0[-5] - ee2[-5];
2515       ee0[-4] =  ee0[-4] + ee2[-4];
2516       ee0[-5] =  ee0[-5] + ee2[-5];
2517       ee2[-4] = (k00) * A4 - (k11) * A5;
2518       ee2[-5] = (k11) * A4 + (k00) * A5;
2519 
2520       k00     = ee0[-6] - ee2[-6];
2521       k11     = ee0[-7] - ee2[-7];
2522       ee0[-6] =  ee0[-6] + ee2[-6];
2523       ee0[-7] =  ee0[-7] + ee2[-7];
2524       ee2[-6] = (k00) * A6 - (k11) * A7;
2525       ee2[-7] = (k11) * A6 + (k00) * A7;
2526 
2527       ee0 -= k0;
2528       ee2 -= k0;
2529    }
2530 }
2531 
iter_54(float * z)2532 static __forceinline void iter_54(float *z)
2533 {
2534    float k00,k11,k22,k33;
2535    float y0,y1,y2,y3;
2536 
2537    k00  = z[ 0] - z[-4];
2538    y0   = z[ 0] + z[-4];
2539    y2   = z[-2] + z[-6];
2540    k22  = z[-2] - z[-6];
2541 
2542    z[-0] = y0 + y2;      // z0 + z4 + z2 + z6
2543    z[-2] = y0 - y2;      // z0 + z4 - z2 - z6
2544 
2545    // done with y0,y2
2546 
2547    k33  = z[-3] - z[-7];
2548 
2549    z[-4] = k00 + k33;    // z0 - z4 + z3 - z7
2550    z[-6] = k00 - k33;    // z0 - z4 - z3 + z7
2551 
2552    // done with k33
2553 
2554    k11  = z[-1] - z[-5];
2555    y1   = z[-1] + z[-5];
2556    y3   = z[-3] + z[-7];
2557 
2558    z[-1] = y1 + y3;      // z1 + z5 + z3 + z7
2559    z[-3] = y1 - y3;      // z1 + z5 - z3 - z7
2560    z[-5] = k11 - k22;    // z1 - z5 + z2 - z6
2561    z[-7] = k11 + k22;    // z1 - z5 - z2 + z6
2562 }
2563 
imdct_step3_inner_s_loop_ld654(int n,float * e,int i_off,float * A,int base_n)2564 static void imdct_step3_inner_s_loop_ld654(int n, float *e, int i_off, float *A, int base_n)
2565 {
2566    int a_off = base_n >> 3;
2567    float A2 = A[0+a_off];
2568    float *z = e + i_off;
2569    float *base = z - 16 * n;
2570 
2571    while (z > base) {
2572       float k00,k11;
2573 
2574       k00   = z[-0] - z[-8];
2575       k11   = z[-1] - z[-9];
2576       z[-0] = z[-0] + z[-8];
2577       z[-1] = z[-1] + z[-9];
2578       z[-8] =  k00;
2579       z[-9] =  k11 ;
2580 
2581       k00    = z[ -2] - z[-10];
2582       k11    = z[ -3] - z[-11];
2583       z[ -2] = z[ -2] + z[-10];
2584       z[ -3] = z[ -3] + z[-11];
2585       z[-10] = (k00+k11) * A2;
2586       z[-11] = (k11-k00) * A2;
2587 
2588       k00    = z[-12] - z[ -4];  // reverse to avoid a unary negation
2589       k11    = z[ -5] - z[-13];
2590       z[ -4] = z[ -4] + z[-12];
2591       z[ -5] = z[ -5] + z[-13];
2592       z[-12] = k11;
2593       z[-13] = k00;
2594 
2595       k00    = z[-14] - z[ -6];  // reverse to avoid a unary negation
2596       k11    = z[ -7] - z[-15];
2597       z[ -6] = z[ -6] + z[-14];
2598       z[ -7] = z[ -7] + z[-15];
2599       z[-14] = (k00+k11) * A2;
2600       z[-15] = (k00-k11) * A2;
2601 
2602       iter_54(z);
2603       iter_54(z-8);
2604       z -= 16;
2605    }
2606 }
2607 
inverse_mdct(float * buffer,int n,vorb * f,int blocktype)2608 static void inverse_mdct(float *buffer, int n, vorb *f, int blocktype)
2609 {
2610    int n2 = n >> 1, n4 = n >> 2, n8 = n >> 3, l;
2611    int ld;
2612    // @OPTIMIZE: reduce register pressure by using fewer variables?
2613    int save_point = temp_alloc_save(f);
2614    float *buf2 = (float *) temp_alloc(f, n2 * sizeof(*buf2));
2615    float *u=NULL,*v=NULL;
2616    // twiddle factors
2617    float *A = f->A[blocktype];
2618 
2619    // IMDCT algorithm from "The use of multirate filter banks for coding of high quality digital audio"
2620    // See notes about bugs in that paper in less-optimal implementation 'inverse_mdct_old' after this function.
2621 
2622    // kernel from paper
2623 
2624 
2625    // merged:
2626    //   copy and reflect spectral data
2627    //   step 0
2628 
2629    // note that it turns out that the items added together during
2630    // this step are, in fact, being added to themselves (as reflected
2631    // by step 0). inexplicable inefficiency! this became obvious
2632    // once I combined the passes.
2633 
2634    // so there's a missing 'times 2' here (for adding X to itself).
2635    // this propogates through linearly to the end, where the numbers
2636    // are 1/2 too small, and need to be compensated for.
2637 
2638    {
2639       float *d,*e, *AA, *e_stop;
2640       d = &buf2[n2-2];
2641       AA = A;
2642       e = &buffer[0];
2643       e_stop = &buffer[n2];
2644       while (e != e_stop) {
2645          d[1] = (e[0] * AA[0] - e[2]*AA[1]);
2646          d[0] = (e[0] * AA[1] + e[2]*AA[0]);
2647          d -= 2;
2648          AA += 2;
2649          e += 4;
2650       }
2651 
2652       e = &buffer[n2-3];
2653       while (d >= buf2) {
2654          d[1] = (-e[2] * AA[0] - -e[0]*AA[1]);
2655          d[0] = (-e[2] * AA[1] + -e[0]*AA[0]);
2656          d -= 2;
2657          AA += 2;
2658          e -= 4;
2659       }
2660    }
2661 
2662    // now we use symbolic names for these, so that we can
2663    // possibly swap their meaning as we change which operations
2664    // are in place
2665 
2666    u = buffer;
2667    v = buf2;
2668 
2669    // step 2    (paper output is w, now u)
2670    // this could be in place, but the data ends up in the wrong
2671    // place... _somebody_'s got to swap it, so this is nominated
2672    {
2673       float *AA = &A[n2-8];
2674       float *d0,*d1, *e0, *e1;
2675 
2676       e0 = &v[n4];
2677       e1 = &v[0];
2678 
2679       d0 = &u[n4];
2680       d1 = &u[0];
2681 
2682       while (AA >= A) {
2683          float v40_20, v41_21;
2684 
2685          v41_21 = e0[1] - e1[1];
2686          v40_20 = e0[0] - e1[0];
2687          d0[1]  = e0[1] + e1[1];
2688          d0[0]  = e0[0] + e1[0];
2689          d1[1]  = v41_21*AA[4] - v40_20*AA[5];
2690          d1[0]  = v40_20*AA[4] + v41_21*AA[5];
2691 
2692          v41_21 = e0[3] - e1[3];
2693          v40_20 = e0[2] - e1[2];
2694          d0[3]  = e0[3] + e1[3];
2695          d0[2]  = e0[2] + e1[2];
2696          d1[3]  = v41_21*AA[0] - v40_20*AA[1];
2697          d1[2]  = v40_20*AA[0] + v41_21*AA[1];
2698 
2699          AA -= 8;
2700 
2701          d0 += 4;
2702          d1 += 4;
2703          e0 += 4;
2704          e1 += 4;
2705       }
2706    }
2707 
2708    // step 3
2709    ld = ilog(n) - 1; // ilog is off-by-one from normal definitions
2710 
2711    // optimized step 3:
2712 
2713    // the original step3 loop can be nested r inside s or s inside r;
2714    // it's written originally as s inside r, but this is dumb when r
2715    // iterates many times, and s few. So I have two copies of it and
2716    // switch between them halfway.
2717 
2718    // this is iteration 0 of step 3
2719    imdct_step3_iter0_loop(n >> 4, u, n2-1-n4*0, -(n >> 3), A);
2720    imdct_step3_iter0_loop(n >> 4, u, n2-1-n4*1, -(n >> 3), A);
2721 
2722    // this is iteration 1 of step 3
2723    imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*0, -(n >> 4), A, 16);
2724    imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*1, -(n >> 4), A, 16);
2725    imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*2, -(n >> 4), A, 16);
2726    imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*3, -(n >> 4), A, 16);
2727 
2728    l=2;
2729    for (; l < (ld-3)>>1; ++l) {
2730       int k0 = n >> (l+2), k0_2 = k0>>1;
2731       int lim = 1 << (l+1);
2732       int i;
2733       for (i=0; i < lim; ++i)
2734          imdct_step3_inner_r_loop(n >> (l+4), u, n2-1 - k0*i, -k0_2, A, 1 << (l+3));
2735    }
2736 
2737    for (; l < ld-6; ++l) {
2738       int k0 = n >> (l+2), k1 = 1 << (l+3), k0_2 = k0>>1;
2739       int rlim = n >> (l+6), r;
2740       int lim = 1 << (l+1);
2741       int i_off;
2742       float *A0 = A;
2743       i_off = n2-1;
2744       for (r=rlim; r > 0; --r) {
2745          imdct_step3_inner_s_loop(lim, u, i_off, -k0_2, A0, k1, k0);
2746          A0 += k1*4;
2747          i_off -= 8;
2748       }
2749    }
2750 
2751    // iterations with count:
2752    //   ld-6,-5,-4 all interleaved together
2753    //       the big win comes from getting rid of needless flops
2754    //         due to the constants on pass 5 & 4 being all 1 and 0;
2755    //       combining them to be simultaneous to improve cache made little difference
2756    imdct_step3_inner_s_loop_ld654(n >> 5, u, n2-1, A, n);
2757 
2758    // output is u
2759 
2760    // step 4, 5, and 6
2761    // cannot be in-place because of step 5
2762    {
2763       uint16 *bitrev = f->bit_reverse[blocktype];
2764       // weirdly, I'd have thought reading sequentially and writing
2765       // erratically would have been better than vice-versa, but in
2766       // fact that's not what my testing showed. (That is, with
2767       // j = bitreverse(i), do you read i and write j, or read j and write i.)
2768 
2769       float *d0 = &v[n4-4];
2770       float *d1 = &v[n2-4];
2771       while (d0 >= v) {
2772          int k4;
2773 
2774          k4 = bitrev[0];
2775          d1[3] = u[k4+0];
2776          d1[2] = u[k4+1];
2777          d0[3] = u[k4+2];
2778          d0[2] = u[k4+3];
2779 
2780          k4 = bitrev[1];
2781          d1[1] = u[k4+0];
2782          d1[0] = u[k4+1];
2783          d0[1] = u[k4+2];
2784          d0[0] = u[k4+3];
2785 
2786          d0 -= 4;
2787          d1 -= 4;
2788          bitrev += 2;
2789       }
2790    }
2791    // (paper output is u, now v)
2792 
2793 
2794    // data must be in buf2
2795    assert(v == buf2);
2796 
2797    // step 7   (paper output is v, now v)
2798    // this is now in place
2799    {
2800       float *C = f->C[blocktype];
2801       float *d, *e;
2802 
2803       d = v;
2804       e = v + n2 - 4;
2805 
2806       while (d < e) {
2807          float a02,a11,b0,b1,b2,b3;
2808 
2809          a02 = d[0] - e[2];
2810          a11 = d[1] + e[3];
2811 
2812          b0 = C[1]*a02 + C[0]*a11;
2813          b1 = C[1]*a11 - C[0]*a02;
2814 
2815          b2 = d[0] + e[ 2];
2816          b3 = d[1] - e[ 3];
2817 
2818          d[0] = b2 + b0;
2819          d[1] = b3 + b1;
2820          e[2] = b2 - b0;
2821          e[3] = b1 - b3;
2822 
2823          a02 = d[2] - e[0];
2824          a11 = d[3] + e[1];
2825 
2826          b0 = C[3]*a02 + C[2]*a11;
2827          b1 = C[3]*a11 - C[2]*a02;
2828 
2829          b2 = d[2] + e[ 0];
2830          b3 = d[3] - e[ 1];
2831 
2832          d[2] = b2 + b0;
2833          d[3] = b3 + b1;
2834          e[0] = b2 - b0;
2835          e[1] = b1 - b3;
2836 
2837          C += 4;
2838          d += 4;
2839          e -= 4;
2840       }
2841    }
2842 
2843    // data must be in buf2
2844 
2845 
2846    // step 8+decode   (paper output is X, now buffer)
2847    // this generates pairs of data a la 8 and pushes them directly through
2848    // the decode kernel (pushing rather than pulling) to avoid having
2849    // to make another pass later
2850 
2851    // this cannot POSSIBLY be in place, so we refer to the buffers directly
2852 
2853    {
2854       float *d0,*d1,*d2,*d3;
2855 
2856       float *B = f->B[blocktype] + n2 - 8;
2857       float *e = buf2 + n2 - 8;
2858       d0 = &buffer[0];
2859       d1 = &buffer[n2-4];
2860       d2 = &buffer[n2];
2861       d3 = &buffer[n-4];
2862       while (e >= v) {
2863          float p0,p1,p2,p3;
2864 
2865          p3 =  e[6]*B[7] - e[7]*B[6];
2866          p2 = -e[6]*B[6] - e[7]*B[7];
2867 
2868          d0[0] =   p3;
2869          d1[3] = - p3;
2870          d2[0] =   p2;
2871          d3[3] =   p2;
2872 
2873          p1 =  e[4]*B[5] - e[5]*B[4];
2874          p0 = -e[4]*B[4] - e[5]*B[5];
2875 
2876          d0[1] =   p1;
2877          d1[2] = - p1;
2878          d2[1] =   p0;
2879          d3[2] =   p0;
2880 
2881          p3 =  e[2]*B[3] - e[3]*B[2];
2882          p2 = -e[2]*B[2] - e[3]*B[3];
2883 
2884          d0[2] =   p3;
2885          d1[1] = - p3;
2886          d2[2] =   p2;
2887          d3[1] =   p2;
2888 
2889          p1 =  e[0]*B[1] - e[1]*B[0];
2890          p0 = -e[0]*B[0] - e[1]*B[1];
2891 
2892          d0[3] =   p1;
2893          d1[0] = - p1;
2894          d2[3] =   p0;
2895          d3[0] =   p0;
2896 
2897          B -= 8;
2898          e -= 8;
2899          d0 += 4;
2900          d2 += 4;
2901          d1 -= 4;
2902          d3 -= 4;
2903       }
2904    }
2905 
2906    temp_free(f,buf2);
2907    temp_alloc_restore(f,save_point);
2908 }
2909 
2910 #if 0
2911 // this is the original version of the above code, if you want to optimize it from scratch
2912 void inverse_mdct_naive(float *buffer, int n)
2913 {
2914    float s;
2915    float A[1 << 12], B[1 << 12], C[1 << 11];
2916    int i,k,k2,k4, n2 = n >> 1, n4 = n >> 2, n8 = n >> 3, l;
2917    int n3_4 = n - n4, ld;
2918    // how can they claim this only uses N words?!
2919    // oh, because they're only used sparsely, whoops
2920    float u[1 << 13], X[1 << 13], v[1 << 13], w[1 << 13];
2921    // set up twiddle factors
2922 
2923    for (k=k2=0; k < n4; ++k,k2+=2) {
2924       A[k2  ] = (float)  cos(4*k*M_PI/n);
2925       A[k2+1] = (float) -sin(4*k*M_PI/n);
2926       B[k2  ] = (float)  cos((k2+1)*M_PI/n/2);
2927       B[k2+1] = (float)  sin((k2+1)*M_PI/n/2);
2928    }
2929    for (k=k2=0; k < n8; ++k,k2+=2) {
2930       C[k2  ] = (float)  cos(2*(k2+1)*M_PI/n);
2931       C[k2+1] = (float) -sin(2*(k2+1)*M_PI/n);
2932    }
2933 
2934    // IMDCT algorithm from "The use of multirate filter banks for coding of high quality digital audio"
2935    // Note there are bugs in that pseudocode, presumably due to them attempting
2936    // to rename the arrays nicely rather than representing the way their actual
2937    // implementation bounces buffers back and forth. As a result, even in the
2938    // "some formulars corrected" version, a direct implementation fails. These
2939    // are noted below as "paper bug".
2940 
2941    // copy and reflect spectral data
2942    for (k=0; k < n2; ++k) u[k] = buffer[k];
2943    for (   ; k < n ; ++k) u[k] = -buffer[n - k - 1];
2944    // kernel from paper
2945    // step 1
2946    for (k=k2=k4=0; k < n4; k+=1, k2+=2, k4+=4) {
2947       v[n-k4-1] = (u[k4] - u[n-k4-1]) * A[k2]   - (u[k4+2] - u[n-k4-3])*A[k2+1];
2948       v[n-k4-3] = (u[k4] - u[n-k4-1]) * A[k2+1] + (u[k4+2] - u[n-k4-3])*A[k2];
2949    }
2950    // step 2
2951    for (k=k4=0; k < n8; k+=1, k4+=4) {
2952       w[n2+3+k4] = v[n2+3+k4] + v[k4+3];
2953       w[n2+1+k4] = v[n2+1+k4] + v[k4+1];
2954       w[k4+3]    = (v[n2+3+k4] - v[k4+3])*A[n2-4-k4] - (v[n2+1+k4]-v[k4+1])*A[n2-3-k4];
2955       w[k4+1]    = (v[n2+1+k4] - v[k4+1])*A[n2-4-k4] + (v[n2+3+k4]-v[k4+3])*A[n2-3-k4];
2956    }
2957    // step 3
2958    ld = ilog(n) - 1; // ilog is off-by-one from normal definitions
2959    for (l=0; l < ld-3; ++l) {
2960       int k0 = n >> (l+2), k1 = 1 << (l+3);
2961       int rlim = n >> (l+4), r4, r;
2962       int s2lim = 1 << (l+2), s2;
2963       for (r=r4=0; r < rlim; r4+=4,++r) {
2964          for (s2=0; s2 < s2lim; s2+=2) {
2965             u[n-1-k0*s2-r4] = w[n-1-k0*s2-r4] + w[n-1-k0*(s2+1)-r4];
2966             u[n-3-k0*s2-r4] = w[n-3-k0*s2-r4] + w[n-3-k0*(s2+1)-r4];
2967             u[n-1-k0*(s2+1)-r4] = (w[n-1-k0*s2-r4] - w[n-1-k0*(s2+1)-r4]) * A[r*k1]
2968                                 - (w[n-3-k0*s2-r4] - w[n-3-k0*(s2+1)-r4]) * A[r*k1+1];
2969             u[n-3-k0*(s2+1)-r4] = (w[n-3-k0*s2-r4] - w[n-3-k0*(s2+1)-r4]) * A[r*k1]
2970                                 + (w[n-1-k0*s2-r4] - w[n-1-k0*(s2+1)-r4]) * A[r*k1+1];
2971          }
2972       }
2973       if (l+1 < ld-3) {
2974          // paper bug: ping-ponging of u&w here is omitted
2975          memcpy(w, u, sizeof(u));
2976       }
2977    }
2978 
2979    // step 4
2980    for (i=0; i < n8; ++i) {
2981       int j = bit_reverse(i) >> (32-ld+3);
2982       assert(j < n8);
2983       if (i == j) {
2984          // paper bug: original code probably swapped in place; if copying,
2985          //            need to directly copy in this case
2986          int i8 = i << 3;
2987          v[i8+1] = u[i8+1];
2988          v[i8+3] = u[i8+3];
2989          v[i8+5] = u[i8+5];
2990          v[i8+7] = u[i8+7];
2991       } else if (i < j) {
2992          int i8 = i << 3, j8 = j << 3;
2993          v[j8+1] = u[i8+1], v[i8+1] = u[j8 + 1];
2994          v[j8+3] = u[i8+3], v[i8+3] = u[j8 + 3];
2995          v[j8+5] = u[i8+5], v[i8+5] = u[j8 + 5];
2996          v[j8+7] = u[i8+7], v[i8+7] = u[j8 + 7];
2997       }
2998    }
2999    // step 5
3000    for (k=0; k < n2; ++k) {
3001       w[k] = v[k*2+1];
3002    }
3003    // step 6
3004    for (k=k2=k4=0; k < n8; ++k, k2 += 2, k4 += 4) {
3005       u[n-1-k2] = w[k4];
3006       u[n-2-k2] = w[k4+1];
3007       u[n3_4 - 1 - k2] = w[k4+2];
3008       u[n3_4 - 2 - k2] = w[k4+3];
3009    }
3010    // step 7
3011    for (k=k2=0; k < n8; ++k, k2 += 2) {
3012       v[n2 + k2 ] = ( u[n2 + k2] + u[n-2-k2] + C[k2+1]*(u[n2+k2]-u[n-2-k2]) + C[k2]*(u[n2+k2+1]+u[n-2-k2+1]))/2;
3013       v[n-2 - k2] = ( u[n2 + k2] + u[n-2-k2] - C[k2+1]*(u[n2+k2]-u[n-2-k2]) - C[k2]*(u[n2+k2+1]+u[n-2-k2+1]))/2;
3014       v[n2+1+ k2] = ( u[n2+1+k2] - u[n-1-k2] + C[k2+1]*(u[n2+1+k2]+u[n-1-k2]) - C[k2]*(u[n2+k2]-u[n-2-k2]))/2;
3015       v[n-1 - k2] = (-u[n2+1+k2] + u[n-1-k2] + C[k2+1]*(u[n2+1+k2]+u[n-1-k2]) - C[k2]*(u[n2+k2]-u[n-2-k2]))/2;
3016    }
3017    // step 8
3018    for (k=k2=0; k < n4; ++k,k2 += 2) {
3019       X[k]      = v[k2+n2]*B[k2  ] + v[k2+1+n2]*B[k2+1];
3020       X[n2-1-k] = v[k2+n2]*B[k2+1] - v[k2+1+n2]*B[k2  ];
3021    }
3022 
3023    // decode kernel to output
3024    // determined the following value experimentally
3025    // (by first figuring out what made inverse_mdct_slow work); then matching that here
3026    // (probably vorbis encoder premultiplies by n or n/2, to save it on the decoder?)
3027    s = 0.5; // theoretically would be n4
3028 
3029    // [[[ note! the s value of 0.5 is compensated for by the B[] in the current code,
3030    //     so it needs to use the "old" B values to behave correctly, or else
3031    //     set s to 1.0 ]]]
3032    for (i=0; i < n4  ; ++i) buffer[i] = s * X[i+n4];
3033    for (   ; i < n3_4; ++i) buffer[i] = -s * X[n3_4 - i - 1];
3034    for (   ; i < n   ; ++i) buffer[i] = -s * X[i - n3_4];
3035 }
3036 #endif
3037 
get_window(vorb * f,int len)3038 static float *get_window(vorb *f, int len)
3039 {
3040    len <<= 1;
3041    if (len == f->blocksize_0) return f->window[0];
3042    if (len == f->blocksize_1) return f->window[1];
3043    assert(0);
3044    return NULL;
3045 }
3046 
3047 #ifndef STB_VORBIS_NO_DEFER_FLOOR
3048 typedef int16 YTYPE;
3049 #else
3050 typedef int YTYPE;
3051 #endif
do_floor(vorb * f,Mapping * map,int i,int n,float * target,YTYPE * finalY,uint8 * step2_flag)3052 static int do_floor(vorb *f, Mapping *map, int i, int n, float *target, YTYPE *finalY, uint8 *step2_flag)
3053 {
3054    int n2 = n >> 1;
3055    int s = map->chan[i].mux, floor;
3056    floor = map->submap_floor[s];
3057    if (f->floor_types[floor] == 0) {
3058       return error(f, VORBIS_invalid_stream);
3059    } else {
3060       Floor1 *g = &f->floor_config[floor].floor1;
3061       int j,q;
3062       int lx = 0, ly = finalY[0] * g->floor1_multiplier;
3063       for (q=1; q < g->values; ++q) {
3064          j = g->sorted_order[q];
3065          #ifndef STB_VORBIS_NO_DEFER_FLOOR
3066          if (finalY[j] >= 0)
3067          #else
3068          if (step2_flag[j])
3069          #endif
3070          {
3071             int hy = finalY[j] * g->floor1_multiplier;
3072             int hx = g->Xlist[j];
3073             if (lx != hx)
3074                draw_line(target, lx,ly, hx,hy, n2);
3075             CHECK(f);
3076             lx = hx, ly = hy;
3077          }
3078       }
3079       if (lx < n2) {
3080          // optimization of: draw_line(target, lx,ly, n,ly, n2);
3081          for (j=lx; j < n2; ++j)
3082             LINE_OP(target[j], inverse_db_table[ly]);
3083          CHECK(f);
3084       }
3085    }
3086    return TRUE;
3087 }
3088 
3089 // The meaning of "left" and "right"
3090 //
3091 // For a given frame:
3092 //     we compute samples from 0..n
3093 //     window_center is n/2
3094 //     we'll window and mix the samples from left_start to left_end with data from the previous frame
3095 //     all of the samples from left_end to right_start can be output without mixing; however,
3096 //        this interval is 0-length except when transitioning between short and long frames
3097 //     all of the samples from right_start to right_end need to be mixed with the next frame,
3098 //        which we don't have, so those get saved in a buffer
3099 //     frame N's right_end-right_start, the number of samples to mix with the next frame,
3100 //        has to be the same as frame N+1's left_end-left_start (which they are by
3101 //        construction)
3102 
vorbis_decode_initial(vorb * f,int * p_left_start,int * p_left_end,int * p_right_start,int * p_right_end,int * mode)3103 static int vorbis_decode_initial(vorb *f, int *p_left_start, int *p_left_end, int *p_right_start, int *p_right_end, int *mode)
3104 {
3105    Mode *m;
3106    int i, n, prev, next, window_center;
3107    f->channel_buffer_start = f->channel_buffer_end = 0;
3108 
3109   retry:
3110    if (f->eof) return FALSE;
3111    if (!maybe_start_packet(f))
3112       return FALSE;
3113    // check packet type
3114    if (get_bits(f,1) != 0) {
3115       if (IS_PUSH_MODE(f))
3116          return error(f,VORBIS_bad_packet_type);
3117       while (EOP != get8_packet(f));
3118       goto retry;
3119    }
3120 
3121    if (f->alloc.alloc_buffer)
3122       assert(f->alloc.alloc_buffer_length_in_bytes == f->temp_offset);
3123 
3124    i = get_bits(f, ilog(f->mode_count-1));
3125    if (i == EOP) return FALSE;
3126    if (i >= f->mode_count) return FALSE;
3127    *mode = i;
3128    m = f->mode_config + i;
3129    if (m->blockflag) {
3130       n = f->blocksize_1;
3131       prev = get_bits(f,1);
3132       next = get_bits(f,1);
3133    } else {
3134       prev = next = 0;
3135       n = f->blocksize_0;
3136    }
3137 
3138 // WINDOWING
3139 
3140    window_center = n >> 1;
3141    if (m->blockflag && !prev) {
3142       *p_left_start = (n - f->blocksize_0) >> 2;
3143       *p_left_end   = (n + f->blocksize_0) >> 2;
3144    } else {
3145       *p_left_start = 0;
3146       *p_left_end   = window_center;
3147    }
3148    if (m->blockflag && !next) {
3149       *p_right_start = (n*3 - f->blocksize_0) >> 2;
3150       *p_right_end   = (n*3 + f->blocksize_0) >> 2;
3151    } else {
3152       *p_right_start = window_center;
3153       *p_right_end   = n;
3154    }
3155 
3156    return TRUE;
3157 }
3158 
vorbis_decode_packet_rest(vorb * f,int * len,Mode * m,int left_start,int left_end,int right_start,int right_end,int * p_left)3159 static int vorbis_decode_packet_rest(vorb *f, int *len, Mode *m, int left_start, int left_end, int right_start, int right_end, int *p_left)
3160 {
3161    Mapping *map;
3162    int i,j,k,n,n2;
3163    int zero_channel[256];
3164    int really_zero_channel[256];
3165 
3166 // WINDOWING
3167 
3168    n = f->blocksize[m->blockflag];
3169    map = &f->mapping[m->mapping];
3170 
3171 // FLOORS
3172    n2 = n >> 1;
3173 
3174    CHECK(f);
3175 
3176    for (i=0; i < f->channels; ++i) {
3177       int s = map->chan[i].mux, floor;
3178       zero_channel[i] = FALSE;
3179       floor = map->submap_floor[s];
3180       if (f->floor_types[floor] == 0) {
3181          return error(f, VORBIS_invalid_stream);
3182       } else {
3183          Floor1 *g = &f->floor_config[floor].floor1;
3184          if (get_bits(f, 1)) {
3185             short *finalY;
3186             uint8 step2_flag[256];
3187             static int range_list[4] = { 256, 128, 86, 64 };
3188             int range = range_list[g->floor1_multiplier-1];
3189             int offset = 2;
3190             finalY = f->finalY[i];
3191             finalY[0] = get_bits(f, ilog(range)-1);
3192             finalY[1] = get_bits(f, ilog(range)-1);
3193             for (j=0; j < g->partitions; ++j) {
3194                int pclass = g->partition_class_list[j];
3195                int cdim = g->class_dimensions[pclass];
3196                int cbits = g->class_subclasses[pclass];
3197                int csub = (1 << cbits)-1;
3198                int cval = 0;
3199                if (cbits) {
3200                   Codebook *c = f->codebooks + g->class_masterbooks[pclass];
3201                   DECODE(cval,f,c);
3202                }
3203                for (k=0; k < cdim; ++k) {
3204                   int book = g->subclass_books[pclass][cval & csub];
3205                   cval = cval >> cbits;
3206                   if (book >= 0) {
3207                      int temp;
3208                      Codebook *c = f->codebooks + book;
3209                      DECODE(temp,f,c);
3210                      finalY[offset++] = temp;
3211                   } else
3212                      finalY[offset++] = 0;
3213                }
3214             }
3215             if (f->valid_bits == INVALID_BITS) goto error; // behavior according to spec
3216             step2_flag[0] = step2_flag[1] = 1;
3217             for (j=2; j < g->values; ++j) {
3218                int low, high, pred, highroom, lowroom, room, val;
3219                low = g->neighbors[j][0];
3220                high = g->neighbors[j][1];
3221                //neighbors(g->Xlist, j, &low, &high);
3222                pred = predict_point(g->Xlist[j], g->Xlist[low], g->Xlist[high], finalY[low], finalY[high]);
3223                val = finalY[j];
3224                highroom = range - pred;
3225                lowroom = pred;
3226                if (highroom < lowroom)
3227                   room = highroom * 2;
3228                else
3229                   room = lowroom * 2;
3230                if (val) {
3231                   step2_flag[low] = step2_flag[high] = 1;
3232                   step2_flag[j] = 1;
3233                   if (val >= room)
3234                      if (highroom > lowroom)
3235                         finalY[j] = val - lowroom + pred;
3236                      else
3237                         finalY[j] = pred - val + highroom - 1;
3238                   else
3239                      if (val & 1)
3240                         finalY[j] = pred - ((val+1)>>1);
3241                      else
3242                         finalY[j] = pred + (val>>1);
3243                } else {
3244                   step2_flag[j] = 0;
3245                   finalY[j] = pred;
3246                }
3247             }
3248 
3249 #ifdef STB_VORBIS_NO_DEFER_FLOOR
3250             do_floor(f, map, i, n, f->floor_buffers[i], finalY, step2_flag);
3251 #else
3252             // defer final floor computation until _after_ residue
3253             for (j=0; j < g->values; ++j) {
3254                if (!step2_flag[j])
3255                   finalY[j] = -1;
3256             }
3257 #endif
3258          } else {
3259            error:
3260             zero_channel[i] = TRUE;
3261          }
3262          // So we just defer everything else to later
3263 
3264          // at this point we've decoded the floor into buffer
3265       }
3266    }
3267    CHECK(f);
3268    // at this point we've decoded all floors
3269 
3270    if (f->alloc.alloc_buffer)
3271       assert(f->alloc.alloc_buffer_length_in_bytes == f->temp_offset);
3272 
3273    // re-enable coupled channels if necessary
3274    memcpy(really_zero_channel, zero_channel, sizeof(really_zero_channel[0]) * f->channels);
3275    for (i=0; i < map->coupling_steps; ++i)
3276       if (!zero_channel[map->chan[i].magnitude] || !zero_channel[map->chan[i].angle]) {
3277          zero_channel[map->chan[i].magnitude] = zero_channel[map->chan[i].angle] = FALSE;
3278       }
3279 
3280    CHECK(f);
3281 // RESIDUE DECODE
3282    for (i=0; i < map->submaps; ++i) {
3283       float *residue_buffers[STB_VORBIS_MAX_CHANNELS];
3284       int r;
3285       uint8 do_not_decode[256];
3286       int ch = 0;
3287       for (j=0; j < f->channels; ++j) {
3288          if (map->chan[j].mux == i) {
3289             if (zero_channel[j]) {
3290                do_not_decode[ch] = TRUE;
3291                residue_buffers[ch] = NULL;
3292             } else {
3293                do_not_decode[ch] = FALSE;
3294                residue_buffers[ch] = f->channel_buffers[j];
3295             }
3296             ++ch;
3297          }
3298       }
3299       r = map->submap_residue[i];
3300       decode_residue(f, residue_buffers, ch, n2, r, do_not_decode);
3301    }
3302 
3303    if (f->alloc.alloc_buffer)
3304       assert(f->alloc.alloc_buffer_length_in_bytes == f->temp_offset);
3305    CHECK(f);
3306 
3307 // INVERSE COUPLING
3308    for (i = map->coupling_steps-1; i >= 0; --i) {
3309       int n2 = n >> 1;
3310       float *m = f->channel_buffers[map->chan[i].magnitude];
3311       float *a = f->channel_buffers[map->chan[i].angle    ];
3312       for (j=0; j < n2; ++j) {
3313          float a2,m2;
3314          if (m[j] > 0)
3315             if (a[j] > 0)
3316                m2 = m[j], a2 = m[j] - a[j];
3317             else
3318                a2 = m[j], m2 = m[j] + a[j];
3319          else
3320             if (a[j] > 0)
3321                m2 = m[j], a2 = m[j] + a[j];
3322             else
3323                a2 = m[j], m2 = m[j] - a[j];
3324          m[j] = m2;
3325          a[j] = a2;
3326       }
3327    }
3328    CHECK(f);
3329 
3330    // finish decoding the floors
3331 #ifndef STB_VORBIS_NO_DEFER_FLOOR
3332    for (i=0; i < f->channels; ++i) {
3333       if (really_zero_channel[i]) {
3334          memset(f->channel_buffers[i], 0, sizeof(*f->channel_buffers[i]) * n2);
3335       } else {
3336          do_floor(f, map, i, n, f->channel_buffers[i], f->finalY[i], NULL);
3337       }
3338    }
3339 #else
3340    for (i=0; i < f->channels; ++i) {
3341       if (really_zero_channel[i]) {
3342          memset(f->channel_buffers[i], 0, sizeof(*f->channel_buffers[i]) * n2);
3343       } else {
3344          for (j=0; j < n2; ++j)
3345             f->channel_buffers[i][j] *= f->floor_buffers[i][j];
3346       }
3347    }
3348 #endif
3349 
3350 // INVERSE MDCT
3351    CHECK(f);
3352    for (i=0; i < f->channels; ++i)
3353       inverse_mdct(f->channel_buffers[i], n, f, m->blockflag);
3354    CHECK(f);
3355 
3356    // this shouldn't be necessary, unless we exited on an error
3357    // and want to flush to get to the next packet
3358    flush_packet(f);
3359 
3360    if (f->first_decode) {
3361       // assume we start so first non-discarded sample is sample 0
3362       // this isn't to spec, but spec would require us to read ahead
3363       // and decode the size of all current frames--could be done,
3364       // but presumably it's not a commonly used feature
3365       f->current_loc = -n2; // start of first frame is positioned for discard
3366       // we might have to discard samples "from" the next frame too,
3367       // if we're lapping a large block then a small at the start?
3368       f->discard_samples_deferred = n - right_end;
3369       f->current_loc_valid = TRUE;
3370       f->first_decode = FALSE;
3371    } else if (f->discard_samples_deferred) {
3372       if (f->discard_samples_deferred >= right_start - left_start) {
3373          f->discard_samples_deferred -= (right_start - left_start);
3374          left_start = right_start;
3375          *p_left = left_start;
3376       } else {
3377          left_start += f->discard_samples_deferred;
3378          *p_left = left_start;
3379          f->discard_samples_deferred = 0;
3380       }
3381    } else if (f->previous_length == 0 && f->current_loc_valid) {
3382       // we're recovering from a seek... that means we're going to discard
3383       // the samples from this packet even though we know our position from
3384       // the last page header, so we need to update the position based on
3385       // the discarded samples here
3386       // but wait, the code below is going to add this in itself even
3387       // on a discard, so we don't need to do it here...
3388    }
3389 
3390    // check if we have ogg information about the sample # for this packet
3391    if (f->last_seg_which == f->end_seg_with_known_loc) {
3392       // if we have a valid current loc, and this is final:
3393       if (f->current_loc_valid && (f->page_flag & PAGEFLAG_last_page)) {
3394          uint32 current_end = f->known_loc_for_packet - (n-right_end);
3395          // then let's infer the size of the (probably) short final frame
3396          if (current_end < f->current_loc + (right_end-left_start)) {
3397             if (current_end < f->current_loc) {
3398                // negative truncation, that's impossible!
3399                *len = 0;
3400             } else {
3401                *len = current_end - f->current_loc;
3402             }
3403             *len += left_start;
3404             if (*len > right_end) *len = right_end; // this should never happen
3405             f->current_loc += *len;
3406             return TRUE;
3407          }
3408       }
3409       // otherwise, just set our sample loc
3410       // guess that the ogg granule pos refers to the _middle_ of the
3411       // last frame?
3412       // set f->current_loc to the position of left_start
3413       f->current_loc = f->known_loc_for_packet - (n2-left_start);
3414       f->current_loc_valid = TRUE;
3415    }
3416    if (f->current_loc_valid)
3417       f->current_loc += (right_start - left_start);
3418 
3419    if (f->alloc.alloc_buffer)
3420       assert(f->alloc.alloc_buffer_length_in_bytes == f->temp_offset);
3421    *len = right_end;  // ignore samples after the window goes to 0
3422    CHECK(f);
3423 
3424    return TRUE;
3425 }
3426 
vorbis_decode_packet(vorb * f,int * len,int * p_left,int * p_right)3427 static int vorbis_decode_packet(vorb *f, int *len, int *p_left, int *p_right)
3428 {
3429    int mode, left_end, right_end;
3430    if (!vorbis_decode_initial(f, p_left, &left_end, p_right, &right_end, &mode)) return 0;
3431    return vorbis_decode_packet_rest(f, len, f->mode_config + mode, *p_left, left_end, *p_right, right_end, p_left);
3432 }
3433 
vorbis_finish_frame(stb_vorbis * f,int len,int left,int right)3434 static int vorbis_finish_frame(stb_vorbis *f, int len, int left, int right)
3435 {
3436    int prev,i,j;
3437    // we use right&left (the start of the right- and left-window sin()-regions)
3438    // to determine how much to return, rather than inferring from the rules
3439    // (same result, clearer code); 'left' indicates where our sin() window
3440    // starts, therefore where the previous window's right edge starts, and
3441    // therefore where to start mixing from the previous buffer. 'right'
3442    // indicates where our sin() ending-window starts, therefore that's where
3443    // we start saving, and where our returned-data ends.
3444 
3445    // mixin from previous window
3446    if (f->previous_length) {
3447       int i,j, n = f->previous_length;
3448       float *w = get_window(f, n);
3449       for (i=0; i < f->channels; ++i) {
3450          for (j=0; j < n; ++j)
3451             f->channel_buffers[i][left+j] =
3452                f->channel_buffers[i][left+j]*w[    j] +
3453                f->previous_window[i][     j]*w[n-1-j];
3454       }
3455    }
3456 
3457    prev = f->previous_length;
3458 
3459    // last half of this data becomes previous window
3460    f->previous_length = len - right;
3461 
3462    // @OPTIMIZE: could avoid this copy by double-buffering the
3463    // output (flipping previous_window with channel_buffers), but
3464    // then previous_window would have to be 2x as large, and
3465    // channel_buffers couldn't be temp mem (although they're NOT
3466    // currently temp mem, they could be (unless we want to level
3467    // performance by spreading out the computation))
3468    for (i=0; i < f->channels; ++i)
3469       for (j=0; right+j < len; ++j)
3470          f->previous_window[i][j] = f->channel_buffers[i][right+j];
3471 
3472    if (!prev)
3473       // there was no previous packet, so this data isn't valid...
3474       // this isn't entirely true, only the would-have-overlapped data
3475       // isn't valid, but this seems to be what the spec requires
3476       return 0;
3477 
3478    // truncate a short frame
3479    if (len < right) right = len;
3480 
3481    f->samples_output += right-left;
3482 
3483    return right - left;
3484 }
3485 
vorbis_pump_first_frame(stb_vorbis * f)3486 static int vorbis_pump_first_frame(stb_vorbis *f)
3487 {
3488    int len, right, left, res;
3489    res = vorbis_decode_packet(f, &len, &left, &right);
3490    if (res)
3491       vorbis_finish_frame(f, len, left, right);
3492    return res;
3493 }
3494 
3495 #ifndef STB_VORBIS_NO_PUSHDATA_API
is_whole_packet_present(stb_vorbis * f,int end_page)3496 static int is_whole_packet_present(stb_vorbis *f, int end_page)
3497 {
3498    // make sure that we have the packet available before continuing...
3499    // this requires a full ogg parse, but we know we can fetch from f->stream
3500 
3501    // instead of coding this out explicitly, we could save the current read state,
3502    // read the next packet with get8() until end-of-packet, check f->eof, then
3503    // reset the state? but that would be slower, esp. since we'd have over 256 bytes
3504    // of state to restore (primarily the page segment table)
3505 
3506    int s = f->next_seg, first = TRUE;
3507    uint8 *p = f->stream;
3508 
3509    if (s != -1) { // if we're not starting the packet with a 'continue on next page' flag
3510       for (; s < f->segment_count; ++s) {
3511          p += f->segments[s];
3512          if (f->segments[s] < 255)               // stop at first short segment
3513             break;
3514       }
3515       // either this continues, or it ends it...
3516       if (end_page)
3517          if (s < f->segment_count-1)             return error(f, VORBIS_invalid_stream);
3518       if (s == f->segment_count)
3519          s = -1; // set 'crosses page' flag
3520       if (p > f->stream_end)                     return error(f, VORBIS_need_more_data);
3521       first = FALSE;
3522    }
3523    for (; s == -1;) {
3524       uint8 *q;
3525       int n;
3526 
3527       // check that we have the page header ready
3528       if (p + 26 >= f->stream_end)               return error(f, VORBIS_need_more_data);
3529       // validate the page
3530       if (memcmp(p, ogg_page_header, 4))         return error(f, VORBIS_invalid_stream);
3531       if (p[4] != 0)                             return error(f, VORBIS_invalid_stream);
3532       if (first) { // the first segment must NOT have 'continued_packet', later ones MUST
3533          if (f->previous_length)
3534             if ((p[5] & PAGEFLAG_continued_packet))  return error(f, VORBIS_invalid_stream);
3535          // if no previous length, we're resynching, so we can come in on a continued-packet,
3536          // which we'll just drop
3537       } else {
3538          if (!(p[5] & PAGEFLAG_continued_packet)) return error(f, VORBIS_invalid_stream);
3539       }
3540       n = p[26]; // segment counts
3541       q = p+27;  // q points to segment table
3542       p = q + n; // advance past header
3543       // make sure we've read the segment table
3544       if (p > f->stream_end)                     return error(f, VORBIS_need_more_data);
3545       for (s=0; s < n; ++s) {
3546          p += q[s];
3547          if (q[s] < 255)
3548             break;
3549       }
3550       if (end_page)
3551          if (s < n-1)                            return error(f, VORBIS_invalid_stream);
3552       if (s == n)
3553          s = -1; // set 'crosses page' flag
3554       if (p > f->stream_end)                     return error(f, VORBIS_need_more_data);
3555       first = FALSE;
3556    }
3557    return TRUE;
3558 }
3559 #endif // !STB_VORBIS_NO_PUSHDATA_API
3560 
start_decoder(vorb * f)3561 static int start_decoder(vorb *f)
3562 {
3563    uint8 header[6], x,y;
3564    int len,i,j,k, max_submaps = 0;
3565    int longest_floorlist=0;
3566 
3567    // first page, first packet
3568 
3569    if (!start_page(f))                              return FALSE;
3570    // validate page flag
3571    if (!(f->page_flag & PAGEFLAG_first_page))       return error(f, VORBIS_invalid_first_page);
3572    if (f->page_flag & PAGEFLAG_last_page)           return error(f, VORBIS_invalid_first_page);
3573    if (f->page_flag & PAGEFLAG_continued_packet)    return error(f, VORBIS_invalid_first_page);
3574    // check for expected packet length
3575    if (f->segment_count != 1)                       return error(f, VORBIS_invalid_first_page);
3576    if (f->segments[0] != 30)                        return error(f, VORBIS_invalid_first_page);
3577    // read packet
3578    // check packet header
3579    if (get8(f) != VORBIS_packet_id)                 return error(f, VORBIS_invalid_first_page);
3580    if (!getn(f, header, 6))                         return error(f, VORBIS_unexpected_eof);
3581    if (!vorbis_validate(header))                    return error(f, VORBIS_invalid_first_page);
3582    // vorbis_version
3583    if (get32(f) != 0)                               return error(f, VORBIS_invalid_first_page);
3584    f->channels = get8(f); if (!f->channels)         return error(f, VORBIS_invalid_first_page);
3585    if (f->channels > STB_VORBIS_MAX_CHANNELS)       return error(f, VORBIS_too_many_channels);
3586    f->sample_rate = get32(f); if (!f->sample_rate)  return error(f, VORBIS_invalid_first_page);
3587    get32(f); // bitrate_maximum
3588    get32(f); // bitrate_nominal
3589    get32(f); // bitrate_minimum
3590    x = get8(f);
3591    {
3592       int log0,log1;
3593       log0 = x & 15;
3594       log1 = x >> 4;
3595       f->blocksize_0 = 1 << log0;
3596       f->blocksize_1 = 1 << log1;
3597       if (log0 < 6 || log0 > 13)                       return error(f, VORBIS_invalid_setup);
3598       if (log1 < 6 || log1 > 13)                       return error(f, VORBIS_invalid_setup);
3599       if (log0 > log1)                                 return error(f, VORBIS_invalid_setup);
3600    }
3601 
3602    // framing_flag
3603    x = get8(f);
3604    if (!(x & 1))                                    return error(f, VORBIS_invalid_first_page);
3605 
3606    // second packet!
3607    if (!start_page(f))                              return FALSE;
3608 
3609    if (!start_packet(f))                            return FALSE;
3610    do {
3611       len = next_segment(f);
3612       skip(f, len);
3613       f->bytes_in_seg = 0;
3614    } while (len);
3615 
3616    // third packet!
3617    if (!start_packet(f))                            return FALSE;
3618 
3619    #ifndef STB_VORBIS_NO_PUSHDATA_API
3620    if (IS_PUSH_MODE(f)) {
3621       if (!is_whole_packet_present(f, TRUE)) {
3622          // convert error in ogg header to write type
3623          if (f->error == VORBIS_invalid_stream)
3624             f->error = VORBIS_invalid_setup;
3625          return FALSE;
3626       }
3627    }
3628    #endif
3629 
3630    crc32_init(); // always init it, to avoid multithread race conditions
3631 
3632    if (get8_packet(f) != VORBIS_packet_setup)       return error(f, VORBIS_invalid_setup);
3633    for (i=0; i < 6; ++i) header[i] = get8_packet(f);
3634    if (!vorbis_validate(header))                    return error(f, VORBIS_invalid_setup);
3635 
3636    // codebooks
3637 
3638    f->codebook_count = get_bits(f,8) + 1;
3639    f->codebooks = (Codebook *) setup_malloc(f, sizeof(*f->codebooks) * f->codebook_count);
3640    if (f->codebooks == NULL)                        return error(f, VORBIS_outofmem);
3641    memset(f->codebooks, 0, sizeof(*f->codebooks) * f->codebook_count);
3642    for (i=0; i < f->codebook_count; ++i) {
3643       uint32 *values;
3644       int ordered, sorted_count;
3645       int total=0;
3646       uint8 *lengths;
3647       Codebook *c = f->codebooks+i;
3648       CHECK(f);
3649       x = get_bits(f, 8); if (x != 0x42)            return error(f, VORBIS_invalid_setup);
3650       x = get_bits(f, 8); if (x != 0x43)            return error(f, VORBIS_invalid_setup);
3651       x = get_bits(f, 8); if (x != 0x56)            return error(f, VORBIS_invalid_setup);
3652       x = get_bits(f, 8);
3653       c->dimensions = (get_bits(f, 8)<<8) + x;
3654       x = get_bits(f, 8);
3655       y = get_bits(f, 8);
3656       c->entries = (get_bits(f, 8)<<16) + (y<<8) + x;
3657       ordered = get_bits(f,1);
3658       c->sparse = ordered ? 0 : get_bits(f,1);
3659 
3660       if (c->dimensions == 0 && c->entries != 0)    return error(f, VORBIS_invalid_setup);
3661 
3662       if (c->sparse)
3663          lengths = (uint8 *) setup_temp_malloc(f, c->entries);
3664       else
3665          lengths = c->codeword_lengths = (uint8 *) setup_malloc(f, c->entries);
3666 
3667       if (!lengths) return error(f, VORBIS_outofmem);
3668 
3669       if (ordered) {
3670          int current_entry = 0;
3671          int current_length = get_bits(f,5) + 1;
3672          while (current_entry < c->entries) {
3673             int limit = c->entries - current_entry;
3674             int n = get_bits(f, ilog(limit));
3675             if (current_entry + n > (int) c->entries) { return error(f, VORBIS_invalid_setup); }
3676             memset(lengths + current_entry, current_length, n);
3677             current_entry += n;
3678             ++current_length;
3679          }
3680       } else {
3681          for (j=0; j < c->entries; ++j) {
3682             int present = c->sparse ? get_bits(f,1) : 1;
3683             if (present) {
3684                lengths[j] = get_bits(f, 5) + 1;
3685                ++total;
3686                if (lengths[j] == 32)
3687                   return error(f, VORBIS_invalid_setup);
3688             } else {
3689                lengths[j] = NO_CODE;
3690             }
3691          }
3692       }
3693 
3694       if (c->sparse && total >= c->entries >> 2) {
3695          // convert sparse items to non-sparse!
3696          if (c->entries > (int) f->setup_temp_memory_required)
3697             f->setup_temp_memory_required = c->entries;
3698 
3699          c->codeword_lengths = (uint8 *) setup_malloc(f, c->entries);
3700          if (c->codeword_lengths == NULL) return error(f, VORBIS_outofmem);
3701          memcpy(c->codeword_lengths, lengths, c->entries);
3702          setup_temp_free(f, lengths, c->entries); // note this is only safe if there have been no intervening temp mallocs!
3703          lengths = c->codeword_lengths;
3704          c->sparse = 0;
3705       }
3706 
3707       // compute the size of the sorted tables
3708       if (c->sparse) {
3709          sorted_count = total;
3710       } else {
3711          sorted_count = 0;
3712          #ifndef STB_VORBIS_NO_HUFFMAN_BINARY_SEARCH
3713          for (j=0; j < c->entries; ++j)
3714             if (lengths[j] > STB_VORBIS_FAST_HUFFMAN_LENGTH && lengths[j] != NO_CODE)
3715                ++sorted_count;
3716          #endif
3717       }
3718 
3719       c->sorted_entries = sorted_count;
3720       values = NULL;
3721 
3722       CHECK(f);
3723       if (!c->sparse) {
3724          c->codewords = (uint32 *) setup_malloc(f, sizeof(c->codewords[0]) * c->entries);
3725          if (!c->codewords)                  return error(f, VORBIS_outofmem);
3726       } else {
3727          unsigned int size;
3728          if (c->sorted_entries) {
3729             c->codeword_lengths = (uint8 *) setup_malloc(f, c->sorted_entries);
3730             if (!c->codeword_lengths)           return error(f, VORBIS_outofmem);
3731             c->codewords = (uint32 *) setup_temp_malloc(f, sizeof(*c->codewords) * c->sorted_entries);
3732             if (!c->codewords)                  return error(f, VORBIS_outofmem);
3733             values = (uint32 *) setup_temp_malloc(f, sizeof(*values) * c->sorted_entries);
3734             if (!values)                        return error(f, VORBIS_outofmem);
3735          }
3736          size = c->entries + (sizeof(*c->codewords) + sizeof(*values)) * c->sorted_entries;
3737          if (size > f->setup_temp_memory_required)
3738             f->setup_temp_memory_required = size;
3739       }
3740 
3741       if (!compute_codewords(c, lengths, c->entries, values)) {
3742          if (c->sparse) setup_temp_free(f, values, 0);
3743          return error(f, VORBIS_invalid_setup);
3744       }
3745 
3746       if (c->sorted_entries) {
3747          // allocate an extra slot for sentinels
3748          c->sorted_codewords = (uint32 *) setup_malloc(f, sizeof(*c->sorted_codewords) * (c->sorted_entries+1));
3749          if (c->sorted_codewords == NULL) return error(f, VORBIS_outofmem);
3750          // allocate an extra slot at the front so that c->sorted_values[-1] is defined
3751          // so that we can catch that case without an extra if
3752          c->sorted_values    = ( int   *) setup_malloc(f, sizeof(*c->sorted_values   ) * (c->sorted_entries+1));
3753          if (c->sorted_values == NULL) return error(f, VORBIS_outofmem);
3754          ++c->sorted_values;
3755          c->sorted_values[-1] = -1;
3756          compute_sorted_huffman(c, lengths, values);
3757       }
3758 
3759       if (c->sparse) {
3760          setup_temp_free(f, values, sizeof(*values)*c->sorted_entries);
3761          setup_temp_free(f, c->codewords, sizeof(*c->codewords)*c->sorted_entries);
3762          setup_temp_free(f, lengths, c->entries);
3763          c->codewords = NULL;
3764       }
3765 
3766       compute_accelerated_huffman(c);
3767 
3768       CHECK(f);
3769       c->lookup_type = get_bits(f, 4);
3770       if (c->lookup_type > 2) return error(f, VORBIS_invalid_setup);
3771       if (c->lookup_type > 0) {
3772          uint16 *mults;
3773          c->minimum_value = float32_unpack(get_bits(f, 32));
3774          c->delta_value = float32_unpack(get_bits(f, 32));
3775          c->value_bits = get_bits(f, 4)+1;
3776          c->sequence_p = get_bits(f,1);
3777          if (c->lookup_type == 1) {
3778             c->lookup_values = lookup1_values(c->entries, c->dimensions);
3779          } else {
3780             c->lookup_values = c->entries * c->dimensions;
3781          }
3782          if (c->lookup_values == 0) return error(f, VORBIS_invalid_setup);
3783          mults = (uint16 *) setup_temp_malloc(f, sizeof(mults[0]) * c->lookup_values);
3784          if (mults == NULL) return error(f, VORBIS_outofmem);
3785          for (j=0; j < (int) c->lookup_values; ++j) {
3786             int q = get_bits(f, c->value_bits);
3787             if (q == EOP) { setup_temp_free(f,mults,sizeof(mults[0])*c->lookup_values); return error(f, VORBIS_invalid_setup); }
3788             mults[j] = q;
3789          }
3790 
3791 #ifndef STB_VORBIS_DIVIDES_IN_CODEBOOK
3792          if (c->lookup_type == 1) {
3793             int len, sparse = c->sparse;
3794             float last=0;
3795             // pre-expand the lookup1-style multiplicands, to avoid a divide in the inner loop
3796             if (sparse) {
3797                if (c->sorted_entries == 0) goto skip;
3798                c->multiplicands = (codetype *) setup_malloc(f, sizeof(c->multiplicands[0]) * c->sorted_entries * c->dimensions);
3799             } else
3800                c->multiplicands = (codetype *) setup_malloc(f, sizeof(c->multiplicands[0]) * c->entries        * c->dimensions);
3801             if (c->multiplicands == NULL) { setup_temp_free(f,mults,sizeof(mults[0])*c->lookup_values); return error(f, VORBIS_outofmem); }
3802             len = sparse ? c->sorted_entries : c->entries;
3803             for (j=0; j < len; ++j) {
3804                unsigned int z = sparse ? c->sorted_values[j] : j;
3805                unsigned int div=1;
3806                for (k=0; k < c->dimensions; ++k) {
3807                   int off = (z / div) % c->lookup_values;
3808                   float val = mults[off];
3809                   val = mults[off]*c->delta_value + c->minimum_value + last;
3810                   c->multiplicands[j*c->dimensions + k] = val;
3811                   if (c->sequence_p)
3812                      last = val;
3813                   if (k+1 < c->dimensions) {
3814                      if (div > UINT_MAX / (unsigned int) c->lookup_values) {
3815                         setup_temp_free(f, mults,sizeof(mults[0])*c->lookup_values);
3816                         return error(f, VORBIS_invalid_setup);
3817                      }
3818                      div *= c->lookup_values;
3819                   }
3820                }
3821             }
3822             c->lookup_type = 2;
3823          }
3824          else
3825 #endif
3826          {
3827             float last=0;
3828             CHECK(f);
3829             c->multiplicands = (codetype *) setup_malloc(f, sizeof(c->multiplicands[0]) * c->lookup_values);
3830             if (c->multiplicands == NULL) { setup_temp_free(f, mults,sizeof(mults[0])*c->lookup_values); return error(f, VORBIS_outofmem); }
3831             for (j=0; j < (int) c->lookup_values; ++j) {
3832                float val = mults[j] * c->delta_value + c->minimum_value + last;
3833                c->multiplicands[j] = val;
3834                if (c->sequence_p)
3835                   last = val;
3836             }
3837          }
3838 #ifndef STB_VORBIS_DIVIDES_IN_CODEBOOK
3839         skip:;
3840 #endif
3841          setup_temp_free(f, mults, sizeof(mults[0])*c->lookup_values);
3842 
3843          CHECK(f);
3844       }
3845       CHECK(f);
3846    }
3847 
3848    // time domain transfers (notused)
3849 
3850    x = get_bits(f, 6) + 1;
3851    for (i=0; i < x; ++i) {
3852       uint32 z = get_bits(f, 16);
3853       if (z != 0) return error(f, VORBIS_invalid_setup);
3854    }
3855 
3856    // Floors
3857    f->floor_count = get_bits(f, 6)+1;
3858    f->floor_config = (Floor *)  setup_malloc(f, f->floor_count * sizeof(*f->floor_config));
3859    if (f->floor_config == NULL) return error(f, VORBIS_outofmem);
3860    for (i=0; i < f->floor_count; ++i) {
3861       f->floor_types[i] = get_bits(f, 16);
3862       if (f->floor_types[i] > 1) return error(f, VORBIS_invalid_setup);
3863       if (f->floor_types[i] == 0) {
3864          Floor0 *g = &f->floor_config[i].floor0;
3865          g->order = get_bits(f,8);
3866          g->rate = get_bits(f,16);
3867          g->bark_map_size = get_bits(f,16);
3868          g->amplitude_bits = get_bits(f,6);
3869          g->amplitude_offset = get_bits(f,8);
3870          g->number_of_books = get_bits(f,4) + 1;
3871          for (j=0; j < g->number_of_books; ++j)
3872             g->book_list[j] = get_bits(f,8);
3873          return error(f, VORBIS_feature_not_supported);
3874       } else {
3875          stbv__floor_ordering p[31*8+2];
3876          Floor1 *g = &f->floor_config[i].floor1;
3877          int max_class = -1;
3878          g->partitions = get_bits(f, 5);
3879          for (j=0; j < g->partitions; ++j) {
3880             g->partition_class_list[j] = get_bits(f, 4);
3881             if (g->partition_class_list[j] > max_class)
3882                max_class = g->partition_class_list[j];
3883          }
3884          for (j=0; j <= max_class; ++j) {
3885             g->class_dimensions[j] = get_bits(f, 3)+1;
3886             g->class_subclasses[j] = get_bits(f, 2);
3887             if (g->class_subclasses[j]) {
3888                g->class_masterbooks[j] = get_bits(f, 8);
3889                if (g->class_masterbooks[j] >= f->codebook_count) return error(f, VORBIS_invalid_setup);
3890             }
3891             for (k=0; k < 1 << g->class_subclasses[j]; ++k) {
3892                g->subclass_books[j][k] = get_bits(f,8)-1;
3893                if (g->subclass_books[j][k] >= f->codebook_count) return error(f, VORBIS_invalid_setup);
3894             }
3895          }
3896          g->floor1_multiplier = get_bits(f,2)+1;
3897          g->rangebits = get_bits(f,4);
3898          g->Xlist[0] = 0;
3899          g->Xlist[1] = 1 << g->rangebits;
3900          g->values = 2;
3901          for (j=0; j < g->partitions; ++j) {
3902             int c = g->partition_class_list[j];
3903             for (k=0; k < g->class_dimensions[c]; ++k) {
3904                g->Xlist[g->values] = get_bits(f, g->rangebits);
3905                ++g->values;
3906             }
3907          }
3908          // precompute the sorting
3909          for (j=0; j < g->values; ++j) {
3910             p[j].x = g->Xlist[j];
3911             p[j].id = j;
3912          }
3913          qsort(p, g->values, sizeof(p[0]), point_compare);
3914          for (j=0; j < g->values; ++j)
3915             g->sorted_order[j] = (uint8) p[j].id;
3916          // precompute the neighbors
3917          for (j=2; j < g->values; ++j) {
3918             int low = 0, hi = 0;
3919             neighbors(g->Xlist, j, &low,&hi);
3920             g->neighbors[j][0] = low;
3921             g->neighbors[j][1] = hi;
3922          }
3923 
3924          if (g->values > longest_floorlist)
3925             longest_floorlist = g->values;
3926       }
3927    }
3928 
3929    // Residue
3930    f->residue_count = get_bits(f, 6)+1;
3931    f->residue_config = (Residue *) setup_malloc(f, f->residue_count * sizeof(f->residue_config[0]));
3932    if (f->residue_config == NULL) return error(f, VORBIS_outofmem);
3933    memset(f->residue_config, 0, f->residue_count * sizeof(f->residue_config[0]));
3934    for (i=0; i < f->residue_count; ++i) {
3935       uint8 residue_cascade[64];
3936       Residue *r = f->residue_config+i;
3937       f->residue_types[i] = get_bits(f, 16);
3938       if (f->residue_types[i] > 2) return error(f, VORBIS_invalid_setup);
3939       r->begin = get_bits(f, 24);
3940       r->end = get_bits(f, 24);
3941       if (r->end < r->begin) return error(f, VORBIS_invalid_setup);
3942       r->part_size = get_bits(f,24)+1;
3943       r->classifications = get_bits(f,6)+1;
3944       r->classbook = get_bits(f,8);
3945       if (r->classbook >= f->codebook_count) return error(f, VORBIS_invalid_setup);
3946       for (j=0; j < r->classifications; ++j) {
3947          uint8 high_bits=0;
3948          uint8 low_bits=get_bits(f,3);
3949          if (get_bits(f,1))
3950             high_bits = get_bits(f,5);
3951          residue_cascade[j] = high_bits*8 + low_bits;
3952       }
3953       r->residue_books = (short (*)[8]) setup_malloc(f, sizeof(r->residue_books[0]) * r->classifications);
3954       if (r->residue_books == NULL) return error(f, VORBIS_outofmem);
3955       for (j=0; j < r->classifications; ++j) {
3956          for (k=0; k < 8; ++k) {
3957             if (residue_cascade[j] & (1 << k)) {
3958                r->residue_books[j][k] = get_bits(f, 8);
3959                if (r->residue_books[j][k] >= f->codebook_count) return error(f, VORBIS_invalid_setup);
3960             } else {
3961                r->residue_books[j][k] = -1;
3962             }
3963          }
3964       }
3965       // precompute the classifications[] array to avoid inner-loop mod/divide
3966       // call it 'classdata' since we already have r->classifications
3967       r->classdata = (uint8 **) setup_malloc(f, sizeof(*r->classdata) * f->codebooks[r->classbook].entries);
3968       if (!r->classdata) return error(f, VORBIS_outofmem);
3969       memset(r->classdata, 0, sizeof(*r->classdata) * f->codebooks[r->classbook].entries);
3970       for (j=0; j < f->codebooks[r->classbook].entries; ++j) {
3971          int classwords = f->codebooks[r->classbook].dimensions;
3972          int temp = j;
3973          r->classdata[j] = (uint8 *) setup_malloc(f, sizeof(r->classdata[j][0]) * classwords);
3974          if (r->classdata[j] == NULL) return error(f, VORBIS_outofmem);
3975          for (k=classwords-1; k >= 0; --k) {
3976             r->classdata[j][k] = temp % r->classifications;
3977             temp /= r->classifications;
3978          }
3979       }
3980    }
3981 
3982    f->mapping_count = get_bits(f,6)+1;
3983    f->mapping = (Mapping *) setup_malloc(f, f->mapping_count * sizeof(*f->mapping));
3984    if (f->mapping == NULL) return error(f, VORBIS_outofmem);
3985    memset(f->mapping, 0, f->mapping_count * sizeof(*f->mapping));
3986    for (i=0; i < f->mapping_count; ++i) {
3987       Mapping *m = f->mapping + i;
3988       int mapping_type = get_bits(f,16);
3989       if (mapping_type != 0) return error(f, VORBIS_invalid_setup);
3990       m->chan = (MappingChannel *) setup_malloc(f, f->channels * sizeof(*m->chan));
3991       if (m->chan == NULL) return error(f, VORBIS_outofmem);
3992       if (get_bits(f,1))
3993          m->submaps = get_bits(f,4)+1;
3994       else
3995          m->submaps = 1;
3996       if (m->submaps > max_submaps)
3997          max_submaps = m->submaps;
3998       if (get_bits(f,1)) {
3999          m->coupling_steps = get_bits(f,8)+1;
4000          for (k=0; k < m->coupling_steps; ++k) {
4001             m->chan[k].magnitude = get_bits(f, ilog(f->channels-1));
4002             m->chan[k].angle = get_bits(f, ilog(f->channels-1));
4003             if (m->chan[k].magnitude >= f->channels)        return error(f, VORBIS_invalid_setup);
4004             if (m->chan[k].angle     >= f->channels)        return error(f, VORBIS_invalid_setup);
4005             if (m->chan[k].magnitude == m->chan[k].angle)   return error(f, VORBIS_invalid_setup);
4006          }
4007       } else
4008          m->coupling_steps = 0;
4009 
4010       // reserved field
4011       if (get_bits(f,2)) return error(f, VORBIS_invalid_setup);
4012       if (m->submaps > 1) {
4013          for (j=0; j < f->channels; ++j) {
4014             m->chan[j].mux = get_bits(f, 4);
4015             if (m->chan[j].mux >= m->submaps)                return error(f, VORBIS_invalid_setup);
4016          }
4017       } else
4018          // @SPECIFICATION: this case is missing from the spec
4019          for (j=0; j < f->channels; ++j)
4020             m->chan[j].mux = 0;
4021 
4022       for (j=0; j < m->submaps; ++j) {
4023          get_bits(f,8); // discard
4024          m->submap_floor[j] = get_bits(f,8);
4025          m->submap_residue[j] = get_bits(f,8);
4026          if (m->submap_floor[j] >= f->floor_count)      return error(f, VORBIS_invalid_setup);
4027          if (m->submap_residue[j] >= f->residue_count)  return error(f, VORBIS_invalid_setup);
4028       }
4029    }
4030 
4031    // Modes
4032    f->mode_count = get_bits(f, 6)+1;
4033    for (i=0; i < f->mode_count; ++i) {
4034       Mode *m = f->mode_config+i;
4035       m->blockflag = get_bits(f,1);
4036       m->windowtype = get_bits(f,16);
4037       m->transformtype = get_bits(f,16);
4038       m->mapping = get_bits(f,8);
4039       if (m->windowtype != 0)                 return error(f, VORBIS_invalid_setup);
4040       if (m->transformtype != 0)              return error(f, VORBIS_invalid_setup);
4041       if (m->mapping >= f->mapping_count)     return error(f, VORBIS_invalid_setup);
4042    }
4043 
4044    flush_packet(f);
4045 
4046    f->previous_length = 0;
4047 
4048    for (i=0; i < f->channels; ++i) {
4049       f->channel_buffers[i] = (float *) setup_malloc(f, sizeof(float) * f->blocksize_1);
4050       f->previous_window[i] = (float *) setup_malloc(f, sizeof(float) * f->blocksize_1/2);
4051       f->finalY[i]          = (int16 *) setup_malloc(f, sizeof(int16) * longest_floorlist);
4052       if (f->channel_buffers[i] == NULL || f->previous_window[i] == NULL || f->finalY[i] == NULL) return error(f, VORBIS_outofmem);
4053       #ifdef STB_VORBIS_NO_DEFER_FLOOR
4054       f->floor_buffers[i]   = (float *) setup_malloc(f, sizeof(float) * f->blocksize_1/2);
4055       if (f->floor_buffers[i] == NULL) return error(f, VORBIS_outofmem);
4056       #endif
4057    }
4058 
4059    if (!init_blocksize(f, 0, f->blocksize_0)) return FALSE;
4060    if (!init_blocksize(f, 1, f->blocksize_1)) return FALSE;
4061    f->blocksize[0] = f->blocksize_0;
4062    f->blocksize[1] = f->blocksize_1;
4063 
4064 #ifdef STB_VORBIS_DIVIDE_TABLE
4065    if (integer_divide_table[1][1]==0)
4066       for (i=0; i < DIVTAB_NUMER; ++i)
4067          for (j=1; j < DIVTAB_DENOM; ++j)
4068             integer_divide_table[i][j] = i / j;
4069 #endif
4070 
4071    // compute how much temporary memory is needed
4072 
4073    // 1.
4074    {
4075       uint32 imdct_mem = (f->blocksize_1 * sizeof(float) >> 1);
4076       uint32 classify_mem;
4077       int i,max_part_read=0;
4078       for (i=0; i < f->residue_count; ++i) {
4079          Residue *r = f->residue_config + i;
4080          int n_read = r->end - r->begin;
4081          int part_read = n_read / r->part_size;
4082          if (part_read > max_part_read)
4083             max_part_read = part_read;
4084       }
4085       #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
4086       classify_mem = f->channels * (sizeof(void*) + max_part_read * sizeof(uint8 *));
4087       #else
4088       classify_mem = f->channels * (sizeof(void*) + max_part_read * sizeof(int *));
4089       #endif
4090 
4091       f->temp_memory_required = classify_mem;
4092       if (imdct_mem > f->temp_memory_required)
4093          f->temp_memory_required = imdct_mem;
4094    }
4095 
4096    f->first_decode = TRUE;
4097 
4098    if (f->alloc.alloc_buffer) {
4099       assert(f->temp_offset == f->alloc.alloc_buffer_length_in_bytes);
4100       // check if there's enough temp memory so we don't error later
4101       if (f->setup_offset + sizeof(*f) + f->temp_memory_required > (unsigned) f->temp_offset)
4102          return error(f, VORBIS_outofmem);
4103    }
4104 
4105    f->first_audio_page_offset = stb_vorbis_get_file_offset(f);
4106 
4107    return TRUE;
4108 }
4109 
vorbis_deinit(stb_vorbis * p)4110 static void vorbis_deinit(stb_vorbis *p)
4111 {
4112    int i,j;
4113    if (p->residue_config) {
4114       for (i=0; i < p->residue_count; ++i) {
4115          Residue *r = p->residue_config+i;
4116          if (r->classdata) {
4117             for (j=0; j < p->codebooks[r->classbook].entries; ++j)
4118                setup_free(p, r->classdata[j]);
4119             setup_free(p, r->classdata);
4120          }
4121          setup_free(p, r->residue_books);
4122       }
4123    }
4124 
4125    if (p->codebooks) {
4126       CHECK(p);
4127       for (i=0; i < p->codebook_count; ++i) {
4128          Codebook *c = p->codebooks + i;
4129          setup_free(p, c->codeword_lengths);
4130          setup_free(p, c->multiplicands);
4131          setup_free(p, c->codewords);
4132          setup_free(p, c->sorted_codewords);
4133          // c->sorted_values[-1] is the first entry in the array
4134          setup_free(p, c->sorted_values ? c->sorted_values-1 : NULL);
4135       }
4136       setup_free(p, p->codebooks);
4137    }
4138    setup_free(p, p->floor_config);
4139    setup_free(p, p->residue_config);
4140    if (p->mapping) {
4141       for (i=0; i < p->mapping_count; ++i)
4142          setup_free(p, p->mapping[i].chan);
4143       setup_free(p, p->mapping);
4144    }
4145    CHECK(p);
4146    for (i=0; i < p->channels && i < STB_VORBIS_MAX_CHANNELS; ++i) {
4147       setup_free(p, p->channel_buffers[i]);
4148       setup_free(p, p->previous_window[i]);
4149       #ifdef STB_VORBIS_NO_DEFER_FLOOR
4150       setup_free(p, p->floor_buffers[i]);
4151       #endif
4152       setup_free(p, p->finalY[i]);
4153    }
4154    for (i=0; i < 2; ++i) {
4155       setup_free(p, p->A[i]);
4156       setup_free(p, p->B[i]);
4157       setup_free(p, p->C[i]);
4158       setup_free(p, p->window[i]);
4159       setup_free(p, p->bit_reverse[i]);
4160    }
4161    #ifndef STB_VORBIS_NO_STDIO
4162    if (p->close_on_free) fclose(p->f);
4163    #endif
4164 }
4165 
stb_vorbis_close(stb_vorbis * p)4166 void stb_vorbis_close(stb_vorbis *p)
4167 {
4168    if (p == NULL) return;
4169    vorbis_deinit(p);
4170    setup_free(p,p);
4171 }
4172 
vorbis_init(stb_vorbis * p,const stb_vorbis_alloc * z)4173 static void vorbis_init(stb_vorbis *p, const stb_vorbis_alloc *z)
4174 {
4175    memset(p, 0, sizeof(*p)); // NULL out all malloc'd pointers to start
4176    if (z) {
4177       p->alloc = *z;
4178       p->alloc.alloc_buffer_length_in_bytes = (p->alloc.alloc_buffer_length_in_bytes+3) & ~3;
4179       p->temp_offset = p->alloc.alloc_buffer_length_in_bytes;
4180    }
4181    p->eof = 0;
4182    p->error = VORBIS__no_error;
4183    p->stream = NULL;
4184    p->codebooks = NULL;
4185    p->page_crc_tests = -1;
4186    #ifndef STB_VORBIS_NO_STDIO
4187    p->close_on_free = FALSE;
4188    p->f = NULL;
4189    #endif
4190 }
4191 
stb_vorbis_get_sample_offset(stb_vorbis * f)4192 int stb_vorbis_get_sample_offset(stb_vorbis *f)
4193 {
4194    if (f->current_loc_valid)
4195       return f->current_loc;
4196    else
4197       return -1;
4198 }
4199 
stb_vorbis_get_info(stb_vorbis * f)4200 stb_vorbis_info stb_vorbis_get_info(stb_vorbis *f)
4201 {
4202    stb_vorbis_info d;
4203    d.channels = f->channels;
4204    d.sample_rate = f->sample_rate;
4205    d.setup_memory_required = f->setup_memory_required;
4206    d.setup_temp_memory_required = f->setup_temp_memory_required;
4207    d.temp_memory_required = f->temp_memory_required;
4208    d.max_frame_size = f->blocksize_1 >> 1;
4209    return d;
4210 }
4211 
stb_vorbis_get_error(stb_vorbis * f)4212 int stb_vorbis_get_error(stb_vorbis *f)
4213 {
4214    int e = f->error;
4215    f->error = VORBIS__no_error;
4216    return e;
4217 }
4218 
vorbis_alloc(stb_vorbis * f)4219 static stb_vorbis * vorbis_alloc(stb_vorbis *f)
4220 {
4221    stb_vorbis *p = (stb_vorbis *) setup_malloc(f, sizeof(*p));
4222    return p;
4223 }
4224 
4225 #ifndef STB_VORBIS_NO_PUSHDATA_API
4226 
stb_vorbis_flush_pushdata(stb_vorbis * f)4227 void stb_vorbis_flush_pushdata(stb_vorbis *f)
4228 {
4229    f->previous_length = 0;
4230    f->page_crc_tests  = 0;
4231    f->discard_samples_deferred = 0;
4232    f->current_loc_valid = FALSE;
4233    f->first_decode = FALSE;
4234    f->samples_output = 0;
4235    f->channel_buffer_start = 0;
4236    f->channel_buffer_end = 0;
4237 }
4238 
vorbis_search_for_page_pushdata(vorb * f,uint8 * data,int data_len)4239 static int vorbis_search_for_page_pushdata(vorb *f, uint8 *data, int data_len)
4240 {
4241    int i,n;
4242    for (i=0; i < f->page_crc_tests; ++i)
4243       f->scan[i].bytes_done = 0;
4244 
4245    // if we have room for more scans, search for them first, because
4246    // they may cause us to stop early if their header is incomplete
4247    if (f->page_crc_tests < STB_VORBIS_PUSHDATA_CRC_COUNT) {
4248       if (data_len < 4) return 0;
4249       data_len -= 3; // need to look for 4-byte sequence, so don't miss
4250                      // one that straddles a boundary
4251       for (i=0; i < data_len; ++i) {
4252          if (data[i] == 0x4f) {
4253             if (0==memcmp(data+i, ogg_page_header, 4)) {
4254                int j,len;
4255                uint32 crc;
4256                // make sure we have the whole page header
4257                if (i+26 >= data_len || i+27+data[i+26] >= data_len) {
4258                   // only read up to this page start, so hopefully we'll
4259                   // have the whole page header start next time
4260                   data_len = i;
4261                   break;
4262                }
4263                // ok, we have it all; compute the length of the page
4264                len = 27 + data[i+26];
4265                for (j=0; j < data[i+26]; ++j)
4266                   len += data[i+27+j];
4267                // scan everything up to the embedded crc (which we must 0)
4268                crc = 0;
4269                for (j=0; j < 22; ++j)
4270                   crc = crc32_update(crc, data[i+j]);
4271                // now process 4 0-bytes
4272                for (   ; j < 26; ++j)
4273                   crc = crc32_update(crc, 0);
4274                // len is the total number of bytes we need to scan
4275                n = f->page_crc_tests++;
4276                f->scan[n].bytes_left = len-j;
4277                f->scan[n].crc_so_far = crc;
4278                f->scan[n].goal_crc = data[i+22] + (data[i+23] << 8) + (data[i+24]<<16) + (data[i+25]<<24);
4279                // if the last frame on a page is continued to the next, then
4280                // we can't recover the sample_loc immediately
4281                if (data[i+27+data[i+26]-1] == 255)
4282                   f->scan[n].sample_loc = ~0;
4283                else
4284                   f->scan[n].sample_loc = data[i+6] + (data[i+7] << 8) + (data[i+ 8]<<16) + (data[i+ 9]<<24);
4285                f->scan[n].bytes_done = i+j;
4286                if (f->page_crc_tests == STB_VORBIS_PUSHDATA_CRC_COUNT)
4287                   break;
4288                // keep going if we still have room for more
4289             }
4290          }
4291       }
4292    }
4293 
4294    for (i=0; i < f->page_crc_tests;) {
4295       uint32 crc;
4296       int j;
4297       int n = f->scan[i].bytes_done;
4298       int m = f->scan[i].bytes_left;
4299       if (m > data_len - n) m = data_len - n;
4300       // m is the bytes to scan in the current chunk
4301       crc = f->scan[i].crc_so_far;
4302       for (j=0; j < m; ++j)
4303          crc = crc32_update(crc, data[n+j]);
4304       f->scan[i].bytes_left -= m;
4305       f->scan[i].crc_so_far = crc;
4306       if (f->scan[i].bytes_left == 0) {
4307          // does it match?
4308          if (f->scan[i].crc_so_far == f->scan[i].goal_crc) {
4309             // Houston, we have page
4310             data_len = n+m; // consumption amount is wherever that scan ended
4311             f->page_crc_tests = -1; // drop out of page scan mode
4312             f->previous_length = 0; // decode-but-don't-output one frame
4313             f->next_seg = -1;       // start a new page
4314             f->current_loc = f->scan[i].sample_loc; // set the current sample location
4315                                     // to the amount we'd have decoded had we decoded this page
4316             f->current_loc_valid = f->current_loc != ~0U;
4317             return data_len;
4318          }
4319          // delete entry
4320          f->scan[i] = f->scan[--f->page_crc_tests];
4321       } else {
4322          ++i;
4323       }
4324    }
4325 
4326    return data_len;
4327 }
4328 
4329 // return value: number of bytes we used
stb_vorbis_decode_frame_pushdata(stb_vorbis * f,const uint8 * data,int data_len,int * channels,float *** output,int * samples)4330 int stb_vorbis_decode_frame_pushdata(
4331          stb_vorbis *f,                   // the file we're decoding
4332          const uint8 *data, int data_len, // the memory available for decoding
4333          int *channels,                   // place to write number of float * buffers
4334          float ***output,                 // place to write float ** array of float * buffers
4335          int *samples                     // place to write number of output samples
4336      )
4337 {
4338    int i;
4339    int len,right,left;
4340 
4341    if (!IS_PUSH_MODE(f)) return error(f, VORBIS_invalid_api_mixing);
4342 
4343    if (f->page_crc_tests >= 0) {
4344       *samples = 0;
4345       return vorbis_search_for_page_pushdata(f, (uint8 *) data, data_len);
4346    }
4347 
4348    f->stream     = (uint8 *) data;
4349    f->stream_end = (uint8 *) data + data_len;
4350    f->error      = VORBIS__no_error;
4351 
4352    // check that we have the entire packet in memory
4353    if (!is_whole_packet_present(f, FALSE)) {
4354       *samples = 0;
4355       return 0;
4356    }
4357 
4358    if (!vorbis_decode_packet(f, &len, &left, &right)) {
4359       // save the actual error we encountered
4360       enum STBVorbisError error = f->error;
4361       if (error == VORBIS_bad_packet_type) {
4362          // flush and resynch
4363          f->error = VORBIS__no_error;
4364          while (get8_packet(f) != EOP)
4365             if (f->eof) break;
4366          *samples = 0;
4367          return (int) (f->stream - data);
4368       }
4369       if (error == VORBIS_continued_packet_flag_invalid) {
4370          if (f->previous_length == 0) {
4371             // we may be resynching, in which case it's ok to hit one
4372             // of these; just discard the packet
4373             f->error = VORBIS__no_error;
4374             while (get8_packet(f) != EOP)
4375                if (f->eof) break;
4376             *samples = 0;
4377             return (int) (f->stream - data);
4378          }
4379       }
4380       // if we get an error while parsing, what to do?
4381       // well, it DEFINITELY won't work to continue from where we are!
4382       stb_vorbis_flush_pushdata(f);
4383       // restore the error that actually made us bail
4384       f->error = error;
4385       *samples = 0;
4386       return 1;
4387    }
4388 
4389    // success!
4390    len = vorbis_finish_frame(f, len, left, right);
4391    for (i=0; i < f->channels; ++i)
4392       f->outputs[i] = f->channel_buffers[i] + left;
4393 
4394    if (channels) *channels = f->channels;
4395    *samples = len;
4396    *output = f->outputs;
4397    return (int) (f->stream - data);
4398 }
4399 
stb_vorbis_open_pushdata(const unsigned char * data,int data_len,int * data_used,int * error,const stb_vorbis_alloc * alloc)4400 stb_vorbis *stb_vorbis_open_pushdata(
4401          const unsigned char *data, int data_len, // the memory available for decoding
4402          int *data_used,              // only defined if result is not NULL
4403          int *error, const stb_vorbis_alloc *alloc)
4404 {
4405    stb_vorbis *f, p;
4406    vorbis_init(&p, alloc);
4407    p.stream     = (uint8 *) data;
4408    p.stream_end = (uint8 *) data + data_len;
4409    p.push_mode  = TRUE;
4410    if (!start_decoder(&p)) {
4411       if (p.eof)
4412          *error = VORBIS_need_more_data;
4413       else
4414          *error = p.error;
4415       return NULL;
4416    }
4417    f = vorbis_alloc(&p);
4418    if (f) {
4419       *f = p;
4420       *data_used = (int) (f->stream - data);
4421       *error = 0;
4422       return f;
4423    } else {
4424       vorbis_deinit(&p);
4425       return NULL;
4426    }
4427 }
4428 #endif // STB_VORBIS_NO_PUSHDATA_API
4429 
stb_vorbis_get_file_offset(stb_vorbis * f)4430 unsigned int stb_vorbis_get_file_offset(stb_vorbis *f)
4431 {
4432    #ifndef STB_VORBIS_NO_PUSHDATA_API
4433    if (f->push_mode) return 0;
4434    #endif
4435    if (USE_MEMORY(f)) return (unsigned int) (f->stream - f->stream_start);
4436    #ifndef STB_VORBIS_NO_STDIO
4437    return (unsigned int) (ftell(f->f) - f->f_start);
4438    #endif
4439 }
4440 
4441 #ifndef STB_VORBIS_NO_PULLDATA_API
4442 //
4443 // DATA-PULLING API
4444 //
4445 
vorbis_find_page(stb_vorbis * f,uint32 * end,uint32 * last)4446 static uint32 vorbis_find_page(stb_vorbis *f, uint32 *end, uint32 *last)
4447 {
4448    for(;;) {
4449       int n;
4450       if (f->eof) return 0;
4451       n = get8(f);
4452       if (n == 0x4f) { // page header candidate
4453          unsigned int retry_loc = stb_vorbis_get_file_offset(f);
4454          int i;
4455          // check if we're off the end of a file_section stream
4456          if (retry_loc - 25 > f->stream_len)
4457             return 0;
4458          // check the rest of the header
4459          for (i=1; i < 4; ++i)
4460             if (get8(f) != ogg_page_header[i])
4461                break;
4462          if (f->eof) return 0;
4463          if (i == 4) {
4464             uint8 header[27];
4465             uint32 i, crc, goal, len;
4466             for (i=0; i < 4; ++i)
4467                header[i] = ogg_page_header[i];
4468             for (; i < 27; ++i)
4469                header[i] = get8(f);
4470             if (f->eof) return 0;
4471             if (header[4] != 0) goto invalid;
4472             goal = header[22] + (header[23] << 8) + (header[24]<<16) + (header[25]<<24);
4473             for (i=22; i < 26; ++i)
4474                header[i] = 0;
4475             crc = 0;
4476             for (i=0; i < 27; ++i)
4477                crc = crc32_update(crc, header[i]);
4478             len = 0;
4479             for (i=0; i < header[26]; ++i) {
4480                int s = get8(f);
4481                crc = crc32_update(crc, s);
4482                len += s;
4483             }
4484             if (len && f->eof) return 0;
4485             for (i=0; i < len; ++i)
4486                crc = crc32_update(crc, get8(f));
4487             // finished parsing probable page
4488             if (crc == goal) {
4489                // we could now check that it's either got the last
4490                // page flag set, OR it's followed by the capture
4491                // pattern, but I guess TECHNICALLY you could have
4492                // a file with garbage between each ogg page and recover
4493                // from it automatically? So even though that paranoia
4494                // might decrease the chance of an invalid decode by
4495                // another 2^32, not worth it since it would hose those
4496                // invalid-but-useful files?
4497                if (end)
4498                   *end = stb_vorbis_get_file_offset(f);
4499                if (last) {
4500                   if (header[5] & 0x04)
4501                      *last = 1;
4502                   else
4503                      *last = 0;
4504                }
4505                set_file_offset(f, retry_loc-1);
4506                return 1;
4507             }
4508          }
4509         invalid:
4510          // not a valid page, so rewind and look for next one
4511          set_file_offset(f, retry_loc);
4512       }
4513    }
4514 }
4515 
4516 
4517 #define SAMPLE_unknown  0xffffffff
4518 
4519 // seeking is implemented with a binary search, which narrows down the range to
4520 // 64K, before using a linear search (because finding the synchronization
4521 // pattern can be expensive, and the chance we'd find the end page again is
4522 // relatively high for small ranges)
4523 //
4524 // two initial interpolation-style probes are used at the start of the search
4525 // to try to bound either side of the binary search sensibly, while still
4526 // working in O(log n) time if they fail.
4527 
get_seek_page_info(stb_vorbis * f,ProbedPage * z)4528 static int get_seek_page_info(stb_vorbis *f, ProbedPage *z)
4529 {
4530    uint8 header[27], lacing[255];
4531    int i,len;
4532 
4533    // record where the page starts
4534    z->page_start = stb_vorbis_get_file_offset(f);
4535 
4536    // parse the header
4537    getn(f, header, 27);
4538    if (header[0] != 'O' || header[1] != 'g' || header[2] != 'g' || header[3] != 'S')
4539       return 0;
4540    getn(f, lacing, header[26]);
4541 
4542    // determine the length of the payload
4543    len = 0;
4544    for (i=0; i < header[26]; ++i)
4545       len += lacing[i];
4546 
4547    // this implies where the page ends
4548    z->page_end = z->page_start + 27 + header[26] + len;
4549 
4550    // read the last-decoded sample out of the data
4551    z->last_decoded_sample = header[6] + (header[7] << 8) + (header[8] << 16) + (header[9] << 24);
4552 
4553    // restore file state to where we were
4554    set_file_offset(f, z->page_start);
4555    return 1;
4556 }
4557 
4558 // rarely used function to seek back to the preceeding page while finding the
4559 // start of a packet
go_to_page_before(stb_vorbis * f,unsigned int limit_offset)4560 static int go_to_page_before(stb_vorbis *f, unsigned int limit_offset)
4561 {
4562    unsigned int previous_safe, end;
4563 
4564    // now we want to seek back 64K from the limit
4565    if (limit_offset >= 65536 && limit_offset-65536 >= f->first_audio_page_offset)
4566       previous_safe = limit_offset - 65536;
4567    else
4568       previous_safe = f->first_audio_page_offset;
4569 
4570    set_file_offset(f, previous_safe);
4571 
4572    while (vorbis_find_page(f, &end, NULL)) {
4573       if (end >= limit_offset && stb_vorbis_get_file_offset(f) < limit_offset)
4574          return 1;
4575       set_file_offset(f, end);
4576    }
4577 
4578    return 0;
4579 }
4580 
4581 // implements the search logic for finding a page and starting decoding. if
4582 // the function succeeds, current_loc_valid will be true and current_loc will
4583 // be less than or equal to the provided sample number (the closer the
4584 // better).
seek_to_sample_coarse(stb_vorbis * f,uint32 sample_number)4585 static int seek_to_sample_coarse(stb_vorbis *f, uint32 sample_number)
4586 {
4587    ProbedPage left, right, mid;
4588    int i, start_seg_with_known_loc, end_pos, page_start;
4589    uint32 delta, stream_length, padding;
4590    double offset = 0.0, bytes_per_sample = 0.0;
4591    int probe = 0;
4592 
4593    // find the last page and validate the target sample
4594    stream_length = stb_vorbis_stream_length_in_samples(f);
4595    if (stream_length == 0)            return error(f, VORBIS_seek_without_length);
4596    if (sample_number > stream_length) return error(f, VORBIS_seek_invalid);
4597 
4598    // this is the maximum difference between the window-center (which is the
4599    // actual granule position value), and the right-start (which the spec
4600    // indicates should be the granule position (give or take one)).
4601    padding = ((f->blocksize_1 - f->blocksize_0) >> 2);
4602    if (sample_number < padding)
4603       sample_number = 0;
4604    else
4605       sample_number -= padding;
4606 
4607    left = f->p_first;
4608    while (left.last_decoded_sample == ~0U) {
4609       // (untested) the first page does not have a 'last_decoded_sample'
4610       set_file_offset(f, left.page_end);
4611       if (!get_seek_page_info(f, &left)) goto error;
4612    }
4613 
4614    right = f->p_last;
4615    assert(right.last_decoded_sample != ~0U);
4616 
4617    // starting from the start is handled differently
4618    if (sample_number <= left.last_decoded_sample) {
4619       if (stb_vorbis_seek_start(f))
4620          return 1;
4621       return 0;
4622    }
4623 
4624    while (left.page_end != right.page_start) {
4625       assert(left.page_end < right.page_start);
4626       // search range in bytes
4627       delta = right.page_start - left.page_end;
4628       if (delta <= 65536) {
4629          // there's only 64K left to search - handle it linearly
4630          set_file_offset(f, left.page_end);
4631       } else {
4632          if (probe < 2) {
4633             if (probe == 0) {
4634                // first probe (interpolate)
4635                double data_bytes = right.page_end - left.page_start;
4636                bytes_per_sample = data_bytes / right.last_decoded_sample;
4637                offset = left.page_start + bytes_per_sample * (sample_number - left.last_decoded_sample);
4638             } else {
4639                // second probe (try to bound the other side)
4640                double error = ((double) sample_number - mid.last_decoded_sample) * bytes_per_sample;
4641                if (error >= 0 && error <  8000) error =  8000;
4642                if (error <  0 && error > -8000) error = -8000;
4643                offset += error * 2;
4644             }
4645 
4646             // ensure the offset is valid
4647             if (offset < left.page_end)
4648                offset = left.page_end;
4649             if (offset > right.page_start - 65536)
4650                offset = right.page_start - 65536;
4651 
4652             set_file_offset(f, (unsigned int) offset);
4653          } else {
4654             // binary search for large ranges (offset by 32K to ensure
4655             // we don't hit the right page)
4656             set_file_offset(f, left.page_end + (delta / 2) - 32768);
4657          }
4658 
4659          if (!vorbis_find_page(f, NULL, NULL)) goto error;
4660       }
4661 
4662       for (;;) {
4663          if (!get_seek_page_info(f, &mid)) goto error;
4664          if (mid.last_decoded_sample != ~0U) break;
4665          // (untested) no frames end on this page
4666          set_file_offset(f, mid.page_end);
4667          assert(mid.page_start < right.page_start);
4668       }
4669 
4670       // if we've just found the last page again then we're in a tricky file,
4671       // and we're close enough.
4672       if (mid.page_start == right.page_start)
4673          break;
4674 
4675       if (sample_number < mid.last_decoded_sample)
4676          right = mid;
4677       else
4678          left = mid;
4679 
4680       ++probe;
4681    }
4682 
4683    // seek back to start of the last packet
4684    page_start = left.page_start;
4685    set_file_offset(f, page_start);
4686    if (!start_page(f)) return error(f, VORBIS_seek_failed);
4687    end_pos = f->end_seg_with_known_loc;
4688    assert(end_pos >= 0);
4689 
4690    for (;;) {
4691       for (i = end_pos; i > 0; --i)
4692          if (f->segments[i-1] != 255)
4693             break;
4694 
4695       start_seg_with_known_loc = i;
4696 
4697       if (start_seg_with_known_loc > 0 || !(f->page_flag & PAGEFLAG_continued_packet))
4698          break;
4699 
4700       // (untested) the final packet begins on an earlier page
4701       if (!go_to_page_before(f, page_start))
4702          goto error;
4703 
4704       page_start = stb_vorbis_get_file_offset(f);
4705       if (!start_page(f)) goto error;
4706       end_pos = f->segment_count - 1;
4707    }
4708 
4709    // prepare to start decoding
4710    f->current_loc_valid = FALSE;
4711    f->last_seg = FALSE;
4712    f->valid_bits = 0;
4713    f->packet_bytes = 0;
4714    f->bytes_in_seg = 0;
4715    f->previous_length = 0;
4716    f->next_seg = start_seg_with_known_loc;
4717 
4718    for (i = 0; i < start_seg_with_known_loc; i++)
4719       skip(f, f->segments[i]);
4720 
4721    // start decoding (optimizable - this frame is generally discarded)
4722    if (!vorbis_pump_first_frame(f))
4723       return 0;
4724    if (f->current_loc > sample_number)
4725       return error(f, VORBIS_seek_failed);
4726    return 1;
4727 
4728 error:
4729    // try to restore the file to a valid state
4730    stb_vorbis_seek_start(f);
4731    return error(f, VORBIS_seek_failed);
4732 }
4733 
4734 // the same as vorbis_decode_initial, but without advancing
peek_decode_initial(vorb * f,int * p_left_start,int * p_left_end,int * p_right_start,int * p_right_end,int * mode)4735 static int peek_decode_initial(vorb *f, int *p_left_start, int *p_left_end, int *p_right_start, int *p_right_end, int *mode)
4736 {
4737    int bits_read, bytes_read;
4738 
4739    if (!vorbis_decode_initial(f, p_left_start, p_left_end, p_right_start, p_right_end, mode))
4740       return 0;
4741 
4742    // either 1 or 2 bytes were read, figure out which so we can rewind
4743    bits_read = 1 + ilog(f->mode_count-1);
4744    if (f->mode_config[*mode].blockflag)
4745       bits_read += 2;
4746    bytes_read = (bits_read + 7) / 8;
4747 
4748    f->bytes_in_seg += bytes_read;
4749    f->packet_bytes -= bytes_read;
4750    skip(f, -bytes_read);
4751    if (f->next_seg == -1)
4752       f->next_seg = f->segment_count - 1;
4753    else
4754       f->next_seg--;
4755    f->valid_bits = 0;
4756 
4757    return 1;
4758 }
4759 
stb_vorbis_seek_frame(stb_vorbis * f,unsigned int sample_number)4760 int stb_vorbis_seek_frame(stb_vorbis *f, unsigned int sample_number)
4761 {
4762    uint32 max_frame_samples;
4763 
4764    if (IS_PUSH_MODE(f)) return error(f, VORBIS_invalid_api_mixing);
4765 
4766    // fast page-level search
4767    if (!seek_to_sample_coarse(f, sample_number))
4768       return 0;
4769 
4770    assert(f->current_loc_valid);
4771    assert(f->current_loc <= sample_number);
4772 
4773    // linear search for the relevant packet
4774    max_frame_samples = (f->blocksize_1*3 - f->blocksize_0) >> 2;
4775    while (f->current_loc < sample_number) {
4776       int left_start, left_end, right_start, right_end, mode, frame_samples;
4777       if (!peek_decode_initial(f, &left_start, &left_end, &right_start, &right_end, &mode))
4778          return error(f, VORBIS_seek_failed);
4779       // calculate the number of samples returned by the next frame
4780       frame_samples = right_start - left_start;
4781       if (f->current_loc + frame_samples > sample_number) {
4782          return 1; // the next frame will contain the sample
4783       } else if (f->current_loc + frame_samples + max_frame_samples > sample_number) {
4784          // there's a chance the frame after this could contain the sample
4785          vorbis_pump_first_frame(f);
4786       } else {
4787          // this frame is too early to be relevant
4788          f->current_loc += frame_samples;
4789          f->previous_length = 0;
4790          maybe_start_packet(f);
4791          flush_packet(f);
4792       }
4793    }
4794    // the next frame will start with the sample
4795    assert(f->current_loc == sample_number);
4796    return 1;
4797 }
4798 
stb_vorbis_seek(stb_vorbis * f,unsigned int sample_number)4799 int stb_vorbis_seek(stb_vorbis *f, unsigned int sample_number)
4800 {
4801    if (!stb_vorbis_seek_frame(f, sample_number))
4802       return 0;
4803 
4804    if (sample_number != f->current_loc) {
4805       int n;
4806       uint32 frame_start = f->current_loc;
4807       stb_vorbis_get_frame_float(f, &n, NULL);
4808       assert(sample_number > frame_start);
4809       assert(f->channel_buffer_start + (int) (sample_number-frame_start) <= f->channel_buffer_end);
4810       f->channel_buffer_start += (sample_number - frame_start);
4811    }
4812 
4813    return 1;
4814 }
4815 
stb_vorbis_seek_start(stb_vorbis * f)4816 int stb_vorbis_seek_start(stb_vorbis *f)
4817 {
4818    if (IS_PUSH_MODE(f)) { return error(f, VORBIS_invalid_api_mixing); }
4819    set_file_offset(f, f->first_audio_page_offset);
4820    f->previous_length = 0;
4821    f->first_decode = TRUE;
4822    f->next_seg = -1;
4823    return vorbis_pump_first_frame(f);
4824 }
4825 
stb_vorbis_stream_length_in_samples(stb_vorbis * f)4826 unsigned int stb_vorbis_stream_length_in_samples(stb_vorbis *f)
4827 {
4828    unsigned int restore_offset, previous_safe;
4829    unsigned int end, last_page_loc;
4830 
4831    if (IS_PUSH_MODE(f)) return error(f, VORBIS_invalid_api_mixing);
4832    if (!f->total_samples) {
4833       unsigned int last;
4834       uint32 lo,hi;
4835       char header[6];
4836 
4837       // first, store the current decode position so we can restore it
4838       restore_offset = stb_vorbis_get_file_offset(f);
4839 
4840       // now we want to seek back 64K from the end (the last page must
4841       // be at most a little less than 64K, but let's allow a little slop)
4842       if (f->stream_len >= 65536 && f->stream_len-65536 >= f->first_audio_page_offset)
4843          previous_safe = f->stream_len - 65536;
4844       else
4845          previous_safe = f->first_audio_page_offset;
4846 
4847       set_file_offset(f, previous_safe);
4848       // previous_safe is now our candidate 'earliest known place that seeking
4849       // to will lead to the final page'
4850 
4851       if (!vorbis_find_page(f, &end, &last)) {
4852          // if we can't find a page, we're hosed!
4853          f->error = VORBIS_cant_find_last_page;
4854          f->total_samples = 0xffffffff;
4855          goto done;
4856       }
4857 
4858       // check if there are more pages
4859       last_page_loc = stb_vorbis_get_file_offset(f);
4860 
4861       // stop when the last_page flag is set, not when we reach eof;
4862       // this allows us to stop short of a 'file_section' end without
4863       // explicitly checking the length of the section
4864       while (!last) {
4865          set_file_offset(f, end);
4866          if (!vorbis_find_page(f, &end, &last)) {
4867             // the last page we found didn't have the 'last page' flag
4868             // set. whoops!
4869             break;
4870          }
4871          previous_safe = last_page_loc+1;
4872          last_page_loc = stb_vorbis_get_file_offset(f);
4873       }
4874 
4875       set_file_offset(f, last_page_loc);
4876 
4877       // parse the header
4878       getn(f, (unsigned char *)header, 6);
4879       // extract the absolute granule position
4880       lo = get32(f);
4881       hi = get32(f);
4882       if (lo == 0xffffffff && hi == 0xffffffff) {
4883          f->error = VORBIS_cant_find_last_page;
4884          f->total_samples = SAMPLE_unknown;
4885          goto done;
4886       }
4887       if (hi)
4888          lo = 0xfffffffe; // saturate
4889       f->total_samples = lo;
4890 
4891       f->p_last.page_start = last_page_loc;
4892       f->p_last.page_end   = end;
4893       f->p_last.last_decoded_sample = lo;
4894 
4895      done:
4896       set_file_offset(f, restore_offset);
4897    }
4898    return f->total_samples == SAMPLE_unknown ? 0 : f->total_samples;
4899 }
4900 
stb_vorbis_stream_length_in_seconds(stb_vorbis * f)4901 float stb_vorbis_stream_length_in_seconds(stb_vorbis *f)
4902 {
4903    return stb_vorbis_stream_length_in_samples(f) / (float) f->sample_rate;
4904 }
4905 
4906 
4907 
stb_vorbis_get_frame_float(stb_vorbis * f,int * channels,float *** output)4908 int stb_vorbis_get_frame_float(stb_vorbis *f, int *channels, float ***output)
4909 {
4910    int len, right,left,i;
4911    if (IS_PUSH_MODE(f)) return error(f, VORBIS_invalid_api_mixing);
4912 
4913    if (!vorbis_decode_packet(f, &len, &left, &right)) {
4914       f->channel_buffer_start = f->channel_buffer_end = 0;
4915       return 0;
4916    }
4917 
4918    len = vorbis_finish_frame(f, len, left, right);
4919    for (i=0; i < f->channels; ++i)
4920       f->outputs[i] = f->channel_buffers[i] + left;
4921 
4922    f->channel_buffer_start = left;
4923    f->channel_buffer_end   = left+len;
4924 
4925    if (channels) *channels = f->channels;
4926    if (output)   *output = f->outputs;
4927    return len;
4928 }
4929 
4930 #ifndef STB_VORBIS_NO_STDIO
4931 
stb_vorbis_open_file_section(FILE * file,int close_on_free,int * error,const stb_vorbis_alloc * alloc,unsigned int length)4932 stb_vorbis * stb_vorbis_open_file_section(FILE *file, int close_on_free, int *error, const stb_vorbis_alloc *alloc, unsigned int length)
4933 {
4934    stb_vorbis *f, p;
4935    vorbis_init(&p, alloc);
4936    p.f = file;
4937    p.f_start = (uint32) ftell(file);
4938    p.stream_len   = length;
4939    p.close_on_free = close_on_free;
4940    if (start_decoder(&p)) {
4941       f = vorbis_alloc(&p);
4942       if (f) {
4943          *f = p;
4944          vorbis_pump_first_frame(f);
4945          return f;
4946       }
4947    }
4948    if (error) *error = p.error;
4949    vorbis_deinit(&p);
4950    return NULL;
4951 }
4952 
stb_vorbis_open_file(FILE * file,int close_on_free,int * error,const stb_vorbis_alloc * alloc)4953 stb_vorbis * stb_vorbis_open_file(FILE *file, int close_on_free, int *error, const stb_vorbis_alloc *alloc)
4954 {
4955    unsigned int len, start;
4956    start = (unsigned int) ftell(file);
4957    fseek(file, 0, SEEK_END);
4958    len = (unsigned int) (ftell(file) - start);
4959    fseek(file, start, SEEK_SET);
4960    return stb_vorbis_open_file_section(file, close_on_free, error, alloc, len);
4961 }
4962 
stb_vorbis_open_filename(const char * filename,int * error,const stb_vorbis_alloc * alloc)4963 stb_vorbis * stb_vorbis_open_filename(const char *filename, int *error, const stb_vorbis_alloc *alloc)
4964 {
4965    FILE *f = fopen(filename, "rb");
4966    if (f)
4967       return stb_vorbis_open_file(f, TRUE, error, alloc);
4968    if (error) *error = VORBIS_file_open_failure;
4969    return NULL;
4970 }
4971 #endif // STB_VORBIS_NO_STDIO
4972 
stb_vorbis_open_memory(const unsigned char * data,int len,int * error,const stb_vorbis_alloc * alloc)4973 stb_vorbis * stb_vorbis_open_memory(const unsigned char *data, int len, int *error, const stb_vorbis_alloc *alloc)
4974 {
4975    stb_vorbis *f, p;
4976    if (data == NULL) return NULL;
4977    vorbis_init(&p, alloc);
4978    p.stream = (uint8 *) data;
4979    p.stream_end = (uint8 *) data + len;
4980    p.stream_start = (uint8 *) p.stream;
4981    p.stream_len = len;
4982    p.push_mode = FALSE;
4983    if (start_decoder(&p)) {
4984       f = vorbis_alloc(&p);
4985       if (f) {
4986          *f = p;
4987          vorbis_pump_first_frame(f);
4988          if (error) *error = VORBIS__no_error;
4989          return f;
4990       }
4991    }
4992    if (error) *error = p.error;
4993    vorbis_deinit(&p);
4994    return NULL;
4995 }
4996 
4997 #ifndef STB_VORBIS_NO_INTEGER_CONVERSION
4998 #define PLAYBACK_MONO     1
4999 #define PLAYBACK_LEFT     2
5000 #define PLAYBACK_RIGHT    4
5001 
5002 #define L  (PLAYBACK_LEFT  | PLAYBACK_MONO)
5003 #define C  (PLAYBACK_LEFT  | PLAYBACK_RIGHT | PLAYBACK_MONO)
5004 #define R  (PLAYBACK_RIGHT | PLAYBACK_MONO)
5005 
5006 static int8 channel_position[7][6] =
5007 {
5008    { 0 },
5009    { C },
5010    { L, R },
5011    { L, C, R },
5012    { L, R, L, R },
5013    { L, C, R, L, R },
5014    { L, C, R, L, R, C },
5015 };
5016 
5017 
5018 #ifndef STB_VORBIS_NO_FAST_SCALED_FLOAT
5019    typedef union {
5020       float f;
5021       int i;
5022    } float_conv;
5023    typedef char stb_vorbis_float_size_test[sizeof(float)==4 && sizeof(int) == 4];
5024    #define FASTDEF(x) float_conv x
5025    // add (1<<23) to convert to int, then divide by 2^SHIFT, then add 0.5/2^SHIFT to round
5026    #define MAGIC(SHIFT) (1.5f * (1 << (23-SHIFT)) + 0.5f/(1 << SHIFT))
5027    #define ADDEND(SHIFT) (((150-SHIFT) << 23) + (1 << 22))
5028    #define FAST_SCALED_FLOAT_TO_INT(temp,x,s) (temp.f = (x) + MAGIC(s), temp.i - ADDEND(s))
5029    #define check_endianness()
5030 #else
5031    #define FAST_SCALED_FLOAT_TO_INT(temp,x,s) ((int) ((x) * (1 << (s))))
5032    #define check_endianness()
5033    #define FASTDEF(x)
5034 #endif
5035 
copy_samples(short * dest,float * src,int len)5036 static void copy_samples(short *dest, float *src, int len)
5037 {
5038    int i;
5039    check_endianness();
5040    for (i=0; i < len; ++i) {
5041       FASTDEF(temp);
5042       int v = FAST_SCALED_FLOAT_TO_INT(temp, src[i],15);
5043       if ((unsigned int) (v + 32768) > 65535)
5044          v = v < 0 ? -32768 : 32767;
5045       dest[i] = v;
5046    }
5047 }
5048 
compute_samples(int mask,short * output,int num_c,float ** data,int d_offset,int len)5049 static void compute_samples(int mask, short *output, int num_c, float **data, int d_offset, int len)
5050 {
5051    #define BUFFER_SIZE  32
5052    float buffer[BUFFER_SIZE];
5053    int i,j,o,n = BUFFER_SIZE;
5054    check_endianness();
5055    for (o = 0; o < len; o += BUFFER_SIZE) {
5056       memset(buffer, 0, sizeof(buffer));
5057       if (o + n > len) n = len - o;
5058       for (j=0; j < num_c; ++j) {
5059          if (channel_position[num_c][j] & mask) {
5060             for (i=0; i < n; ++i)
5061                buffer[i] += data[j][d_offset+o+i];
5062          }
5063       }
5064       for (i=0; i < n; ++i) {
5065          FASTDEF(temp);
5066          int v = FAST_SCALED_FLOAT_TO_INT(temp,buffer[i],15);
5067          if ((unsigned int) (v + 32768) > 65535)
5068             v = v < 0 ? -32768 : 32767;
5069          output[o+i] = v;
5070       }
5071    }
5072 }
5073 
compute_stereo_samples(short * output,int num_c,float ** data,int d_offset,int len)5074 static void compute_stereo_samples(short *output, int num_c, float **data, int d_offset, int len)
5075 {
5076    #define BUFFER_SIZE  32
5077    float buffer[BUFFER_SIZE];
5078    int i,j,o,n = BUFFER_SIZE >> 1;
5079    // o is the offset in the source data
5080    check_endianness();
5081    for (o = 0; o < len; o += BUFFER_SIZE >> 1) {
5082       // o2 is the offset in the output data
5083       int o2 = o << 1;
5084       memset(buffer, 0, sizeof(buffer));
5085       if (o + n > len) n = len - o;
5086       for (j=0; j < num_c; ++j) {
5087          int m = channel_position[num_c][j] & (PLAYBACK_LEFT | PLAYBACK_RIGHT);
5088          if (m == (PLAYBACK_LEFT | PLAYBACK_RIGHT)) {
5089             for (i=0; i < n; ++i) {
5090                buffer[i*2+0] += data[j][d_offset+o+i];
5091                buffer[i*2+1] += data[j][d_offset+o+i];
5092             }
5093          } else if (m == PLAYBACK_LEFT) {
5094             for (i=0; i < n; ++i) {
5095                buffer[i*2+0] += data[j][d_offset+o+i];
5096             }
5097          } else if (m == PLAYBACK_RIGHT) {
5098             for (i=0; i < n; ++i) {
5099                buffer[i*2+1] += data[j][d_offset+o+i];
5100             }
5101          }
5102       }
5103       for (i=0; i < (n<<1); ++i) {
5104          FASTDEF(temp);
5105          int v = FAST_SCALED_FLOAT_TO_INT(temp,buffer[i],15);
5106          if ((unsigned int) (v + 32768) > 65535)
5107             v = v < 0 ? -32768 : 32767;
5108          output[o2+i] = v;
5109       }
5110    }
5111 }
5112 
convert_samples_short(int buf_c,short ** buffer,int b_offset,int data_c,float ** data,int d_offset,int samples)5113 static void convert_samples_short(int buf_c, short **buffer, int b_offset, int data_c, float **data, int d_offset, int samples)
5114 {
5115    int i;
5116    if (buf_c != data_c && buf_c <= 2 && data_c <= 6) {
5117       static int channel_selector[3][2] = { {0}, {PLAYBACK_MONO}, {PLAYBACK_LEFT, PLAYBACK_RIGHT} };
5118       for (i=0; i < buf_c; ++i)
5119          compute_samples(channel_selector[buf_c][i], buffer[i]+b_offset, data_c, data, d_offset, samples);
5120    } else {
5121       int limit = buf_c < data_c ? buf_c : data_c;
5122       for (i=0; i < limit; ++i)
5123          copy_samples(buffer[i]+b_offset, data[i]+d_offset, samples);
5124       for (   ; i < buf_c; ++i)
5125          memset(buffer[i]+b_offset, 0, sizeof(short) * samples);
5126    }
5127 }
5128 
stb_vorbis_get_frame_short(stb_vorbis * f,int num_c,short ** buffer,int num_samples)5129 int stb_vorbis_get_frame_short(stb_vorbis *f, int num_c, short **buffer, int num_samples)
5130 {
5131    float **output;
5132    int len = stb_vorbis_get_frame_float(f, NULL, &output);
5133    if (len > num_samples) len = num_samples;
5134    if (len)
5135       convert_samples_short(num_c, buffer, 0, f->channels, output, 0, len);
5136    return len;
5137 }
5138 
convert_channels_short_interleaved(int buf_c,short * buffer,int data_c,float ** data,int d_offset,int len)5139 static void convert_channels_short_interleaved(int buf_c, short *buffer, int data_c, float **data, int d_offset, int len)
5140 {
5141    int i;
5142    check_endianness();
5143    if (buf_c != data_c && buf_c <= 2 && data_c <= 6) {
5144       assert(buf_c == 2);
5145       for (i=0; i < buf_c; ++i)
5146          compute_stereo_samples(buffer, data_c, data, d_offset, len);
5147    } else {
5148       int limit = buf_c < data_c ? buf_c : data_c;
5149       int j;
5150       for (j=0; j < len; ++j) {
5151          for (i=0; i < limit; ++i) {
5152             FASTDEF(temp);
5153             float f = data[i][d_offset+j];
5154             int v = FAST_SCALED_FLOAT_TO_INT(temp, f,15);//data[i][d_offset+j],15);
5155             if ((unsigned int) (v + 32768) > 65535)
5156                v = v < 0 ? -32768 : 32767;
5157             *buffer++ = v;
5158          }
5159          for (   ; i < buf_c; ++i)
5160             *buffer++ = 0;
5161       }
5162    }
5163 }
5164 
stb_vorbis_get_frame_short_interleaved(stb_vorbis * f,int num_c,short * buffer,int num_shorts)5165 int stb_vorbis_get_frame_short_interleaved(stb_vorbis *f, int num_c, short *buffer, int num_shorts)
5166 {
5167    float **output;
5168    int len;
5169    if (num_c == 1) return stb_vorbis_get_frame_short(f,num_c,&buffer, num_shorts);
5170    len = stb_vorbis_get_frame_float(f, NULL, &output);
5171    if (len) {
5172       if (len*num_c > num_shorts) len = num_shorts / num_c;
5173       convert_channels_short_interleaved(num_c, buffer, f->channels, output, 0, len);
5174    }
5175    return len;
5176 }
5177 
stb_vorbis_get_samples_short_interleaved(stb_vorbis * f,int channels,short * buffer,int num_shorts)5178 int stb_vorbis_get_samples_short_interleaved(stb_vorbis *f, int channels, short *buffer, int num_shorts)
5179 {
5180    float **outputs;
5181    int len = num_shorts / channels;
5182    int n=0;
5183    int z = f->channels;
5184    if (z > channels) z = channels;
5185    while (n < len) {
5186       int k = f->channel_buffer_end - f->channel_buffer_start;
5187       if (n+k >= len) k = len - n;
5188       if (k)
5189          convert_channels_short_interleaved(channels, buffer, f->channels, f->channel_buffers, f->channel_buffer_start, k);
5190       buffer += k*channels;
5191       n += k;
5192       f->channel_buffer_start += k;
5193       if (n == len) break;
5194       if (!stb_vorbis_get_frame_float(f, NULL, &outputs)) break;
5195    }
5196    return n;
5197 }
5198 
stb_vorbis_get_samples_short(stb_vorbis * f,int channels,short ** buffer,int len)5199 int stb_vorbis_get_samples_short(stb_vorbis *f, int channels, short **buffer, int len)
5200 {
5201    float **outputs;
5202    int n=0;
5203    int z = f->channels;
5204    if (z > channels) z = channels;
5205    while (n < len) {
5206       int k = f->channel_buffer_end - f->channel_buffer_start;
5207       if (n+k >= len) k = len - n;
5208       if (k)
5209          convert_samples_short(channels, buffer, n, f->channels, f->channel_buffers, f->channel_buffer_start, k);
5210       n += k;
5211       f->channel_buffer_start += k;
5212       if (n == len) break;
5213       if (!stb_vorbis_get_frame_float(f, NULL, &outputs)) break;
5214    }
5215    return n;
5216 }
5217 
5218 #ifndef STB_VORBIS_NO_STDIO
stb_vorbis_decode_filename(const char * filename,int * channels,int * sample_rate,short ** output)5219 int stb_vorbis_decode_filename(const char *filename, int *channels, int *sample_rate, short **output)
5220 {
5221    int data_len, offset, total, limit, error;
5222    short *data;
5223    stb_vorbis *v = stb_vorbis_open_filename(filename, &error, NULL);
5224    if (v == NULL) return -1;
5225    limit = v->channels * 4096;
5226    *channels = v->channels;
5227    if (sample_rate)
5228       *sample_rate = v->sample_rate;
5229    offset = data_len = 0;
5230    total = limit;
5231    data = (short *) malloc(total * sizeof(*data));
5232    if (data == NULL) {
5233       stb_vorbis_close(v);
5234       return -2;
5235    }
5236    for (;;) {
5237       int n = stb_vorbis_get_frame_short_interleaved(v, v->channels, data+offset, total-offset);
5238       if (n == 0) break;
5239       data_len += n;
5240       offset += n * v->channels;
5241       if (offset + limit > total) {
5242          short *data2;
5243          total *= 2;
5244          data2 = (short *) realloc(data, total * sizeof(*data));
5245          if (data2 == NULL) {
5246             free(data);
5247             stb_vorbis_close(v);
5248             return -2;
5249          }
5250          data = data2;
5251       }
5252    }
5253    *output = data;
5254    stb_vorbis_close(v);
5255    return data_len;
5256 }
5257 #endif // NO_STDIO
5258 
stb_vorbis_decode_memory(const uint8 * mem,int len,int * channels,int * sample_rate,short ** output)5259 int stb_vorbis_decode_memory(const uint8 *mem, int len, int *channels, int *sample_rate, short **output)
5260 {
5261    int data_len, offset, total, limit, error;
5262    short *data;
5263    stb_vorbis *v = stb_vorbis_open_memory(mem, len, &error, NULL);
5264    if (v == NULL) return -1;
5265    limit = v->channels * 4096;
5266    *channels = v->channels;
5267    if (sample_rate)
5268       *sample_rate = v->sample_rate;
5269    offset = data_len = 0;
5270    total = limit;
5271    data = (short *) malloc(total * sizeof(*data));
5272    if (data == NULL) {
5273       stb_vorbis_close(v);
5274       return -2;
5275    }
5276    for (;;) {
5277       int n = stb_vorbis_get_frame_short_interleaved(v, v->channels, data+offset, total-offset);
5278       if (n == 0) break;
5279       data_len += n;
5280       offset += n * v->channels;
5281       if (offset + limit > total) {
5282          short *data2;
5283          total *= 2;
5284          data2 = (short *) realloc(data, total * sizeof(*data));
5285          if (data2 == NULL) {
5286             free(data);
5287             stb_vorbis_close(v);
5288             return -2;
5289          }
5290          data = data2;
5291       }
5292    }
5293    *output = data;
5294    stb_vorbis_close(v);
5295    return data_len;
5296 }
5297 #endif // STB_VORBIS_NO_INTEGER_CONVERSION
5298 
stb_vorbis_get_samples_float_interleaved(stb_vorbis * f,int channels,float * buffer,int num_floats)5299 int stb_vorbis_get_samples_float_interleaved(stb_vorbis *f, int channels, float *buffer, int num_floats)
5300 {
5301    float **outputs;
5302    int len = num_floats / channels;
5303    int n=0;
5304    int z = f->channels;
5305    if (z > channels) z = channels;
5306    while (n < len) {
5307       int i,j;
5308       int k = f->channel_buffer_end - f->channel_buffer_start;
5309       if (n+k >= len) k = len - n;
5310       for (j=0; j < k; ++j) {
5311          for (i=0; i < z; ++i)
5312             *buffer++ = f->channel_buffers[i][f->channel_buffer_start+j];
5313          for (   ; i < channels; ++i)
5314             *buffer++ = 0;
5315       }
5316       n += k;
5317       f->channel_buffer_start += k;
5318       if (n == len)
5319          break;
5320       if (!stb_vorbis_get_frame_float(f, NULL, &outputs))
5321          break;
5322    }
5323    return n;
5324 }
5325 
stb_vorbis_get_samples_float(stb_vorbis * f,int channels,float ** buffer,int num_samples)5326 int stb_vorbis_get_samples_float(stb_vorbis *f, int channels, float **buffer, int num_samples)
5327 {
5328    float **outputs;
5329    int n=0;
5330    int z = f->channels;
5331    if (z > channels) z = channels;
5332    while (n < num_samples) {
5333       int i;
5334       int k = f->channel_buffer_end - f->channel_buffer_start;
5335       if (n+k >= num_samples) k = num_samples - n;
5336       if (k) {
5337          for (i=0; i < z; ++i)
5338             memcpy(buffer[i]+n, f->channel_buffers[i]+f->channel_buffer_start, sizeof(float)*k);
5339          for (   ; i < channels; ++i)
5340             memset(buffer[i]+n, 0, sizeof(float) * k);
5341       }
5342       n += k;
5343       f->channel_buffer_start += k;
5344       if (n == num_samples)
5345          break;
5346       if (!stb_vorbis_get_frame_float(f, NULL, &outputs))
5347          break;
5348    }
5349    return n;
5350 }
5351 #endif // STB_VORBIS_NO_PULLDATA_API
5352 
5353 /* Version history
5354     1.10    - 2017/03/03 - more robust seeking; fix negative ilog(); clear error in open_memory
5355     1.09    - 2016/04/04 - back out 'avoid discarding last frame' fix from previous version
5356     1.08    - 2016/04/02 - fixed multiple warnings; fix setup memory leaks;
5357                            avoid discarding last frame of audio data
5358     1.07    - 2015/01/16 - fixed some warnings, fix mingw, const-correct API
5359                            some more crash fixes when out of memory or with corrupt files
5360     1.06    - 2015/08/31 - full, correct support for seeking API (Dougall Johnson)
5361                            some crash fixes when out of memory or with corrupt files
5362     1.05    - 2015/04/19 - don't define __forceinline if it's redundant
5363     1.04    - 2014/08/27 - fix missing const-correct case in API
5364     1.03    - 2014/08/07 - Warning fixes
5365     1.02    - 2014/07/09 - Declare qsort compare function _cdecl on windows
5366     1.01    - 2014/06/18 - fix stb_vorbis_get_samples_float
5367     1.0     - 2014/05/26 - fix memory leaks; fix warnings; fix bugs in multichannel
5368                            (API change) report sample rate for decode-full-file funcs
5369     0.99996 - bracket #include <malloc.h> for macintosh compilation by Laurent Gomila
5370     0.99995 - use union instead of pointer-cast for fast-float-to-int to avoid alias-optimization problem
5371     0.99994 - change fast-float-to-int to work in single-precision FPU mode, remove endian-dependence
5372     0.99993 - remove assert that fired on legal files with empty tables
5373     0.99992 - rewind-to-start
5374     0.99991 - bugfix to stb_vorbis_get_samples_short by Bernhard Wodo
5375     0.9999 - (should have been 0.99990) fix no-CRT support, compiling as C++
5376     0.9998 - add a full-decode function with a memory source
5377     0.9997 - fix a bug in the read-from-FILE case in 0.9996 addition
5378     0.9996 - query length of vorbis stream in samples/seconds
5379     0.9995 - bugfix to another optimization that only happened in certain files
5380     0.9994 - bugfix to one of the optimizations that caused significant (but inaudible?) errors
5381     0.9993 - performance improvements; runs in 99% to 104% of time of reference implementation
5382     0.9992 - performance improvement of IMDCT; now performs close to reference implementation
5383     0.9991 - performance improvement of IMDCT
5384     0.999 - (should have been 0.9990) performance improvement of IMDCT
5385     0.998 - no-CRT support from Casey Muratori
5386     0.997 - bugfixes for bugs found by Terje Mathisen
5387     0.996 - bugfix: fast-huffman decode initialized incorrectly for sparse codebooks; fixing gives 10% speedup - found by Terje Mathisen
5388     0.995 - bugfix: fix to 'effective' overrun detection - found by Terje Mathisen
5389     0.994 - bugfix: garbage decode on final VQ symbol of a non-multiple - found by Terje Mathisen
5390     0.993 - bugfix: pushdata API required 1 extra byte for empty page (failed to consume final page if empty) - found by Terje Mathisen
5391     0.992 - fixes for MinGW warning
5392     0.991 - turn fast-float-conversion on by default
5393     0.990 - fix push-mode seek recovery if you seek into the headers
5394     0.98b - fix to bad release of 0.98
5395     0.98 - fix push-mode seek recovery; robustify float-to-int and support non-fast mode
5396     0.97 - builds under c++ (typecasting, don't use 'class' keyword)
5397     0.96 - somehow MY 0.95 was right, but the web one was wrong, so here's my 0.95 rereleased as 0.96, fixes a typo in the clamping code
5398     0.95 - clamping code for 16-bit functions
5399     0.94 - not publically released
5400     0.93 - fixed all-zero-floor case (was decoding garbage)
5401     0.92 - fixed a memory leak
5402     0.91 - conditional compiles to omit parts of the API and the infrastructure to support them: STB_VORBIS_NO_PULLDATA_API, STB_VORBIS_NO_PUSHDATA_API, STB_VORBIS_NO_STDIO, STB_VORBIS_NO_INTEGER_CONVERSION
5403     0.90 - first public release
5404 */
5405 
5406 #endif // STB_VORBIS_HEADER_ONLY
5407 
5408 
5409 /*
5410 ------------------------------------------------------------------------------
5411 This software is available under 2 licenses -- choose whichever you prefer.
5412 ------------------------------------------------------------------------------
5413 ALTERNATIVE A - MIT License
5414 Copyright (c) 2017 Sean Barrett
5415 Permission is hereby granted, free of charge, to any person obtaining a copy of
5416 this software and associated documentation files (the "Software"), to deal in
5417 the Software without restriction, including without limitation the rights to
5418 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
5419 of the Software, and to permit persons to whom the Software is furnished to do
5420 so, subject to the following conditions:
5421 The above copyright notice and this permission notice shall be included in all
5422 copies or substantial portions of the Software.
5423 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
5424 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
5425 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
5426 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
5427 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
5428 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
5429 SOFTWARE.
5430 ------------------------------------------------------------------------------
5431 ALTERNATIVE B - Public Domain (www.unlicense.org)
5432 This is free and unencumbered software released into the public domain.
5433 Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
5434 software, either in source code form or as a compiled binary, for any purpose,
5435 commercial or non-commercial, and by any means.
5436 In jurisdictions that recognize copyright laws, the author or authors of this
5437 software dedicate any and all copyright interest in the software to the public
5438 domain. We make this dedication for the benefit of the public at large and to
5439 the detriment of our heirs and successors. We intend this dedication to be an
5440 overt act of relinquishment in perpetuity of all present and future rights to
5441 this software under copyright law.
5442 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
5443 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
5444 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
5445 AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
5446 ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
5447 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
5448 ------------------------------------------------------------------------------
5449 */
5450