1 // Ogg Vorbis audio decoder - v1.09 - public domain
2 // http://nothings.org/stb_vorbis/
3 //
4 // Original version written by Sean Barrett in 2007.
5 //
6 // Originally sponsored by RAD Game Tools. Seeking sponsored
7 // by Phillip Bennefall, Marc Andersen, Aaron Baker, Elias Software,
8 // Aras Pranckevicius, and Sean Barrett.
9 //
10 // LICENSE
11 //
12 //   This software is dual-licensed to the public domain and under the following
13 //   license: you are granted a perpetual, irrevocable license to copy, modify,
14 //   publish, and distribute this file as you see fit.
15 //
16 // No warranty for any purpose is expressed or implied by the author (nor
17 // by RAD Game Tools). Report bugs and send enhancements to the author.
18 //
19 // Limitations:
20 //
21 //   - floor 0 not supported (used in old ogg vorbis files pre-2004)
22 //   - lossless sample-truncation at beginning ignored
23 //   - cannot concatenate multiple vorbis streams
24 //   - sample positions are 32-bit, limiting seekable 192Khz
25 //       files to around 6 hours (Ogg supports 64-bit)
26 //
27 // Feature contributors:
28 //    Dougall Johnson (sample-exact seeking)
29 //
30 // Bugfix/warning contributors:
31 //    Terje Mathisen     Niklas Frykholm     Andy Hill
32 //    Casey Muratori     John Bolton         Gargaj
33 //    Laurent Gomila     Marc LeBlanc        Ronny Chevalier
34 //    Bernhard Wodo      Evan Balster        alxprd@github
35 //    Tom Beaumont       Ingo Leitgeb        Nicolas Guillemot
36 //    Phillip Bennefall  Rohit               Thiago Goulart
37 //    manxorist@github   saga musix
38 //
39 // Partial history:
40 //    1.09    - 2016/04/04 - back out 'truncation of last frame' fix from previous version
41 //    1.08    - 2016/04/02 - warnings; setup memory leaks; truncation of last frame
42 //    1.07    - 2015/01/16 - fixes for crashes on invalid files; warning fixes; const
43 //    1.06    - 2015/08/31 - full, correct support for seeking API (Dougall Johnson)
44 //                           some crash fixes when out of memory or with corrupt files
45 //                           fix some inappropriately signed shifts
46 //    1.05    - 2015/04/19 - don't define __forceinline if it's redundant
47 //    1.04    - 2014/08/27 - fix missing const-correct case in API
48 //    1.03    - 2014/08/07 - warning fixes
49 //    1.02    - 2014/07/09 - declare qsort comparison as explicitly _cdecl in Windows
50 //    1.01    - 2014/06/18 - fix stb_vorbis_get_samples_float (interleaved was correct)
51 //    1.0     - 2014/05/26 - fix memory leaks; fix warnings; fix bugs in >2-channel;
52 //                           (API change) report sample rate for decode-full-file funcs
53 //
54 // See end of file for full version history.
55 
56 
57 //////////////////////////////////////////////////////////////////////////////
58 //
59 //  HEADER BEGINS HERE
60 //
61 
62 #ifndef STB_VORBIS_INCLUDE_STB_VORBIS_H
63 #define STB_VORBIS_INCLUDE_STB_VORBIS_H
64 
65 #if defined(STB_VORBIS_NO_CRT) && !defined(STB_VORBIS_NO_STDIO)
66 #define STB_VORBIS_NO_STDIO 1
67 #endif
68 
69 #ifndef STB_VORBIS_NO_STDIO
70 #include <stdio.h>
71 #endif
72 
73 #ifdef __cplusplus
74 extern "C" {
75 #endif
76 
77 ///////////   THREAD SAFETY
78 
79 // Individual stb_vorbis* handles are not thread-safe; you cannot decode from
80 // them from multiple threads at the same time. However, you can have multiple
81 // stb_vorbis* handles and decode from them independently in multiple thrads.
82 
83 
84 ///////////   MEMORY ALLOCATION
85 
86 // normally stb_vorbis uses malloc() to allocate memory at startup,
87 // and alloca() to allocate temporary memory during a frame on the
88 // stack. (Memory consumption will depend on the amount of setup
89 // data in the file and how you set the compile flags for speed
90 // vs. size. In my test files the maximal-size usage is ~150KB.)
91 //
92 // You can modify the wrapper functions in the source (setup_malloc,
93 // setup_temp_malloc, temp_malloc) to change this behavior, or you
94 // can use a simpler allocation model: you pass in a buffer from
95 // which stb_vorbis will allocate _all_ its memory (including the
96 // temp memory). "open" may fail with a VORBIS_outofmem if you
97 // do not pass in enough data; there is no way to determine how
98 // much you do need except to succeed (at which point you can
99 // query get_info to find the exact amount required. yes I know
100 // this is lame).
101 //
102 // If you pass in a non-NULL buffer of the type below, allocation
103 // will occur from it as described above. Otherwise just pass NULL
104 // to use malloc()/alloca()
105 
106 typedef struct
107 {
108    char *alloc_buffer;
109    int   alloc_buffer_length_in_bytes;
110 } stb_vorbis_alloc;
111 
112 
113 ///////////   FUNCTIONS USEABLE WITH ALL INPUT MODES
114 
115 typedef struct stb_vorbis stb_vorbis;
116 
117 typedef struct
118 {
119    unsigned int sample_rate;
120    int channels;
121 
122    unsigned int setup_memory_required;
123    unsigned int setup_temp_memory_required;
124    unsigned int temp_memory_required;
125 
126    int max_frame_size;
127 } stb_vorbis_info;
128 
129 // get general information about the file
130 extern stb_vorbis_info stb_vorbis_get_info(stb_vorbis *f);
131 
132 // get the last error detected (clears it, too)
133 extern int stb_vorbis_get_error(stb_vorbis *f);
134 
135 // close an ogg vorbis file and free all memory in use
136 extern void stb_vorbis_close(stb_vorbis *f);
137 
138 // this function returns the offset (in samples) from the beginning of the
139 // file that will be returned by the next decode, if it is known, or -1
140 // otherwise. after a flush_pushdata() call, this may take a while before
141 // it becomes valid again.
142 // NOT WORKING YET after a seek with PULLDATA API
143 extern int stb_vorbis_get_sample_offset(stb_vorbis *f);
144 
145 // returns the current seek point within the file, or offset from the beginning
146 // of the memory buffer. In pushdata mode it returns 0.
147 extern unsigned int stb_vorbis_get_file_offset(stb_vorbis *f);
148 
149 ///////////   PUSHDATA API
150 
151 #ifndef STB_VORBIS_NO_PUSHDATA_API
152 
153 // this API allows you to get blocks of data from any source and hand
154 // them to stb_vorbis. you have to buffer them; stb_vorbis will tell
155 // you how much it used, and you have to give it the rest next time;
156 // and stb_vorbis may not have enough data to work with and you will
157 // need to give it the same data again PLUS more. Note that the Vorbis
158 // specification does not bound the size of an individual frame.
159 
160 extern stb_vorbis *stb_vorbis_open_pushdata(
161          const unsigned char * datablock, int datablock_length_in_bytes,
162          int *datablock_memory_consumed_in_bytes,
163          int *error,
164          const stb_vorbis_alloc *alloc_buffer);
165 // create a vorbis decoder by passing in the initial data block containing
166 //    the ogg&vorbis headers (you don't need to do parse them, just provide
167 //    the first N bytes of the file--you're told if it's not enough, see below)
168 // on success, returns an stb_vorbis *, does not set error, returns the amount of
169 //    data parsed/consumed on this call in *datablock_memory_consumed_in_bytes;
170 // on failure, returns NULL on error and sets *error, does not change *datablock_memory_consumed
171 // if returns NULL and *error is VORBIS_need_more_data, then the input block was
172 //       incomplete and you need to pass in a larger block from the start of the file
173 
174 extern int stb_vorbis_decode_frame_pushdata(
175          stb_vorbis *f,
176          const unsigned char *datablock, int datablock_length_in_bytes,
177          int *channels,             // place to write number of float * buffers
178          float ***output,           // place to write float ** array of float * buffers
179          int *samples               // place to write number of output samples
180      );
181 // decode a frame of audio sample data if possible from the passed-in data block
182 //
183 // return value: number of bytes we used from datablock
184 //
185 // possible cases:
186 //     0 bytes used, 0 samples output (need more data)
187 //     N bytes used, 0 samples output (resynching the stream, keep going)
188 //     N bytes used, M samples output (one frame of data)
189 // note that after opening a file, you will ALWAYS get one N-bytes,0-sample
190 // frame, because Vorbis always "discards" the first frame.
191 //
192 // Note that on resynch, stb_vorbis will rarely consume all of the buffer,
193 // instead only datablock_length_in_bytes-3 or less. This is because it wants
194 // to avoid missing parts of a page header if they cross a datablock boundary,
195 // without writing state-machiney code to record a partial detection.
196 //
197 // The number of channels returned are stored in *channels (which can be
198 // NULL--it is always the same as the number of channels reported by
199 // get_info). *output will contain an array of float* buffers, one per
200 // channel. In other words, (*output)[0][0] contains the first sample from
201 // the first channel, and (*output)[1][0] contains the first sample from
202 // the second channel.
203 
204 extern void stb_vorbis_flush_pushdata(stb_vorbis *f);
205 // inform stb_vorbis that your next datablock will not be contiguous with
206 // previous ones (e.g. you've seeked in the data); future attempts to decode
207 // frames will cause stb_vorbis to resynchronize (as noted above), and
208 // once it sees a valid Ogg page (typically 4-8KB, as large as 64KB), it
209 // will begin decoding the _next_ frame.
210 //
211 // if you want to seek using pushdata, you need to seek in your file, then
212 // call stb_vorbis_flush_pushdata(), then start calling decoding, then once
213 // decoding is returning you data, call stb_vorbis_get_sample_offset, and
214 // if you don't like the result, seek your file again and repeat.
215 #endif
216 
217 
218 //////////   PULLING INPUT API
219 
220 #ifndef STB_VORBIS_NO_PULLDATA_API
221 // This API assumes stb_vorbis is allowed to pull data from a source--
222 // either a block of memory containing the _entire_ vorbis stream, or a
223 // FILE * that you or it create, or possibly some other reading mechanism
224 // if you go modify the source to replace the FILE * case with some kind
225 // of callback to your code. (But if you don't support seeking, you may
226 // just want to go ahead and use pushdata.)
227 
228 #if !defined(STB_VORBIS_NO_STDIO) && !defined(STB_VORBIS_NO_INTEGER_CONVERSION)
229 extern int stb_vorbis_decode_filename(const char *filename, int *channels, int *sample_rate, short **output);
230 #endif
231 #if !defined(STB_VORBIS_NO_INTEGER_CONVERSION)
232 extern int stb_vorbis_decode_memory(const unsigned char *mem, int len, int *channels, int *sample_rate, short **output);
233 #endif
234 // decode an entire file and output the data interleaved into a malloc()ed
235 // buffer stored in *output. The return value is the number of samples
236 // decoded, or -1 if the file could not be opened or was not an ogg vorbis file.
237 // When you're done with it, just free() the pointer returned in *output.
238 
239 extern stb_vorbis * stb_vorbis_open_memory(const unsigned char *data, int len,
240                                   int *error, const stb_vorbis_alloc *alloc_buffer);
241 // create an ogg vorbis decoder from an ogg vorbis stream in memory (note
242 // this must be the entire stream!). on failure, returns NULL and sets *error
243 
244 #ifndef STB_VORBIS_NO_STDIO
245 extern stb_vorbis * stb_vorbis_open_filename(const char *filename,
246                                   int *error, const stb_vorbis_alloc *alloc_buffer);
247 // create an ogg vorbis decoder from a filename via fopen(). on failure,
248 // returns NULL and sets *error (possibly to VORBIS_file_open_failure).
249 
250 extern stb_vorbis * stb_vorbis_open_file(FILE *f, int close_handle_on_close,
251                                   int *error, const stb_vorbis_alloc *alloc_buffer);
252 // create an ogg vorbis decoder from an open FILE *, looking for a stream at
253 // the _current_ seek point (ftell). on failure, returns NULL and sets *error.
254 // note that stb_vorbis must "own" this stream; if you seek it in between
255 // calls to stb_vorbis, it will become confused. Morever, if you attempt to
256 // perform stb_vorbis_seek_*() operations on this file, it will assume it
257 // owns the _entire_ rest of the file after the start point. Use the next
258 // function, stb_vorbis_open_file_section(), to limit it.
259 
260 extern stb_vorbis * stb_vorbis_open_file_section(FILE *f, int close_handle_on_close,
261                 int *error, const stb_vorbis_alloc *alloc_buffer, unsigned int len);
262 // create an ogg vorbis decoder from an open FILE *, looking for a stream at
263 // the _current_ seek point (ftell); the stream will be of length 'len' bytes.
264 // on failure, returns NULL and sets *error. note that stb_vorbis must "own"
265 // this stream; if you seek it in between calls to stb_vorbis, it will become
266 // confused.
267 #endif
268 
269 extern int stb_vorbis_seek_frame(stb_vorbis *f, unsigned int sample_number);
270 extern int stb_vorbis_seek(stb_vorbis *f, unsigned int sample_number);
271 // these functions seek in the Vorbis file to (approximately) 'sample_number'.
272 // after calling seek_frame(), the next call to get_frame_*() will include
273 // the specified sample. after calling stb_vorbis_seek(), the next call to
274 // stb_vorbis_get_samples_* will start with the specified sample. If you
275 // do not need to seek to EXACTLY the target sample when using get_samples_*,
276 // you can also use seek_frame().
277 
278 extern void stb_vorbis_seek_start(stb_vorbis *f);
279 // this function is equivalent to stb_vorbis_seek(f,0)
280 
281 extern unsigned int stb_vorbis_stream_length_in_samples(stb_vorbis *f);
282 extern float        stb_vorbis_stream_length_in_seconds(stb_vorbis *f);
283 // these functions return the total length of the vorbis stream
284 
285 extern int stb_vorbis_get_frame_float(stb_vorbis *f, int *channels, float ***output);
286 // decode the next frame and return the number of samples. the number of
287 // channels returned are stored in *channels (which can be NULL--it is always
288 // the same as the number of channels reported by get_info). *output will
289 // contain an array of float* buffers, one per channel. These outputs will
290 // be overwritten on the next call to stb_vorbis_get_frame_*.
291 //
292 // You generally should not intermix calls to stb_vorbis_get_frame_*()
293 // and stb_vorbis_get_samples_*(), since the latter calls the former.
294 
295 #ifndef STB_VORBIS_NO_INTEGER_CONVERSION
296 extern int stb_vorbis_get_frame_short_interleaved(stb_vorbis *f, int num_c, short *buffer, int num_shorts);
297 extern int stb_vorbis_get_frame_short            (stb_vorbis *f, int num_c, short **buffer, int num_samples);
298 #endif
299 // decode the next frame and return the number of *samples* per channel.
300 // Note that for interleaved data, you pass in the number of shorts (the
301 // size of your array), but the return value is the number of samples per
302 // channel, not the total number of samples.
303 //
304 // The data is coerced to the number of channels you request according to the
305 // channel coercion rules (see below). You must pass in the size of your
306 // buffer(s) so that stb_vorbis will not overwrite the end of the buffer.
307 // The maximum buffer size needed can be gotten from get_info(); however,
308 // the Vorbis I specification implies an absolute maximum of 4096 samples
309 // per channel.
310 
311 // Channel coercion rules:
312 //    Let M be the number of channels requested, and N the number of channels present,
313 //    and Cn be the nth channel; let stereo L be the sum of all L and center channels,
314 //    and stereo R be the sum of all R and center channels (channel assignment from the
315 //    vorbis spec).
316 //        M    N       output
317 //        1    k      sum(Ck) for all k
318 //        2    *      stereo L, stereo R
319 //        k    l      k > l, the first l channels, then 0s
320 //        k    l      k <= l, the first k channels
321 //    Note that this is not _good_ surround etc. mixing at all! It's just so
322 //    you get something useful.
323 
324 extern int stb_vorbis_get_samples_float_interleaved(stb_vorbis *f, int channels, float *buffer, int num_floats);
325 extern int stb_vorbis_get_samples_float(stb_vorbis *f, int channels, float **buffer, int num_samples);
326 // gets num_samples samples, not necessarily on a frame boundary--this requires
327 // buffering so you have to supply the buffers. DOES NOT APPLY THE COERCION RULES.
328 // Returns the number of samples stored per channel; it may be less than requested
329 // at the end of the file. If there are no more samples in the file, returns 0.
330 
331 #ifndef STB_VORBIS_NO_INTEGER_CONVERSION
332 extern int stb_vorbis_get_samples_short_interleaved(stb_vorbis *f, int channels, short *buffer, int num_shorts);
333 extern int stb_vorbis_get_samples_short(stb_vorbis *f, int channels, short **buffer, int num_samples);
334 #endif
335 // gets num_samples samples, not necessarily on a frame boundary--this requires
336 // buffering so you have to supply the buffers. Applies the coercion rules above
337 // to produce 'channels' channels. Returns the number of samples stored per channel;
338 // it may be less than requested at the end of the file. If there are no more
339 // samples in the file, returns 0.
340 
341 #endif
342 
343 ////////   ERROR CODES
344 
345 enum STBVorbisError
346 {
347    VORBIS__no_error,
348 
349    VORBIS_need_more_data=1,             // not a real error
350 
351    VORBIS_invalid_api_mixing,           // can't mix API modes
352    VORBIS_outofmem,                     // not enough memory
353    VORBIS_feature_not_supported,        // uses floor 0
354    VORBIS_too_many_channels,            // STB_VORBIS_MAX_CHANNELS is too small
355    VORBIS_file_open_failure,            // fopen() failed
356    VORBIS_seek_without_length,          // can't seek in unknown-length file
357 
358    VORBIS_unexpected_eof=10,            // file is truncated?
359    VORBIS_seek_invalid,                 // seek past EOF
360 
361    // decoding errors (corrupt/invalid stream) -- you probably
362    // don't care about the exact details of these
363 
364    // vorbis errors:
365    VORBIS_invalid_setup=20,
366    VORBIS_invalid_stream,
367 
368    // ogg errors:
369    VORBIS_missing_capture_pattern=30,
370    VORBIS_invalid_stream_structure_version,
371    VORBIS_continued_packet_flag_invalid,
372    VORBIS_incorrect_stream_serial_number,
373    VORBIS_invalid_first_page,
374    VORBIS_bad_packet_type,
375    VORBIS_cant_find_last_page,
376    VORBIS_seek_failed
377 };
378 
379 
380 #ifdef __cplusplus
381 }
382 #endif
383 
384 #endif // STB_VORBIS_INCLUDE_STB_VORBIS_H
385 //
386 //  HEADER ENDS HERE
387 //
388 //////////////////////////////////////////////////////////////////////////////
389 
390 #ifndef STB_VORBIS_HEADER_ONLY
391 
392 // global configuration settings (e.g. set these in the project/makefile),
393 // or just set them in this file at the top (although ideally the first few
394 // should be visible when the header file is compiled too, although it's not
395 // crucial)
396 
397 // STB_VORBIS_NO_PUSHDATA_API
398 //     does not compile the code for the various stb_vorbis_*_pushdata()
399 //     functions
400 // #define STB_VORBIS_NO_PUSHDATA_API
401 
402 // STB_VORBIS_NO_PULLDATA_API
403 //     does not compile the code for the non-pushdata APIs
404 // #define STB_VORBIS_NO_PULLDATA_API
405 
406 // STB_VORBIS_NO_STDIO
407 //     does not compile the code for the APIs that use FILE *s internally
408 //     or externally (implied by STB_VORBIS_NO_PULLDATA_API)
409 // #define STB_VORBIS_NO_STDIO
410 
411 // STB_VORBIS_NO_INTEGER_CONVERSION
412 //     does not compile the code for converting audio sample data from
413 //     float to integer (implied by STB_VORBIS_NO_PULLDATA_API)
414 // #define STB_VORBIS_NO_INTEGER_CONVERSION
415 
416 // STB_VORBIS_NO_FAST_SCALED_FLOAT
417 //      does not use a fast float-to-int trick to accelerate float-to-int on
418 //      most platforms which requires endianness be defined correctly.
419 //#define STB_VORBIS_NO_FAST_SCALED_FLOAT
420 
421 
422 // STB_VORBIS_MAX_CHANNELS [number]
423 //     globally define this to the maximum number of channels you need.
424 //     The spec does not put a restriction on channels except that
425 //     the count is stored in a byte, so 255 is the hard limit.
426 //     Reducing this saves about 16 bytes per value, so using 16 saves
427 //     (255-16)*16 or around 4KB. Plus anything other memory usage
428 //     I forgot to account for. Can probably go as low as 8 (7.1 audio),
429 //     6 (5.1 audio), or 2 (stereo only).
430 #ifndef STB_VORBIS_MAX_CHANNELS
431 #define STB_VORBIS_MAX_CHANNELS    16  // enough for anyone?
432 #endif
433 
434 // STB_VORBIS_PUSHDATA_CRC_COUNT [number]
435 //     after a flush_pushdata(), stb_vorbis begins scanning for the
436 //     next valid page, without backtracking. when it finds something
437 //     that looks like a page, it streams through it and verifies its
438 //     CRC32. Should that validation fail, it keeps scanning. But it's
439 //     possible that _while_ streaming through to check the CRC32 of
440 //     one candidate page, it sees another candidate page. This #define
441 //     determines how many "overlapping" candidate pages it can search
442 //     at once. Note that "real" pages are typically ~4KB to ~8KB, whereas
443 //     garbage pages could be as big as 64KB, but probably average ~16KB.
444 //     So don't hose ourselves by scanning an apparent 64KB page and
445 //     missing a ton of real ones in the interim; so minimum of 2
446 #ifndef STB_VORBIS_PUSHDATA_CRC_COUNT
447 #define STB_VORBIS_PUSHDATA_CRC_COUNT  4
448 #endif
449 
450 // STB_VORBIS_FAST_HUFFMAN_LENGTH [number]
451 //     sets the log size of the huffman-acceleration table.  Maximum
452 //     supported value is 24. with larger numbers, more decodings are O(1),
453 //     but the table size is larger so worse cache missing, so you'll have
454 //     to probe (and try multiple ogg vorbis files) to find the sweet spot.
455 #ifndef STB_VORBIS_FAST_HUFFMAN_LENGTH
456 #define STB_VORBIS_FAST_HUFFMAN_LENGTH   10
457 #endif
458 
459 // STB_VORBIS_FAST_BINARY_LENGTH [number]
460 //     sets the log size of the binary-search acceleration table. this
461 //     is used in similar fashion to the fast-huffman size to set initial
462 //     parameters for the binary search
463 
464 // STB_VORBIS_FAST_HUFFMAN_INT
465 //     The fast huffman tables are much more efficient if they can be
466 //     stored as 16-bit results instead of 32-bit results. This restricts
467 //     the codebooks to having only 65535 possible outcomes, though.
468 //     (At least, accelerated by the huffman table.)
469 #ifndef STB_VORBIS_FAST_HUFFMAN_INT
470 #define STB_VORBIS_FAST_HUFFMAN_SHORT
471 #endif
472 
473 // STB_VORBIS_NO_HUFFMAN_BINARY_SEARCH
474 //     If the 'fast huffman' search doesn't succeed, then stb_vorbis falls
475 //     back on binary searching for the correct one. This requires storing
476 //     extra tables with the huffman codes in sorted order. Defining this
477 //     symbol trades off space for speed by forcing a linear search in the
478 //     non-fast case, except for "sparse" codebooks.
479 // #define STB_VORBIS_NO_HUFFMAN_BINARY_SEARCH
480 
481 // STB_VORBIS_DIVIDES_IN_RESIDUE
482 //     stb_vorbis precomputes the result of the scalar residue decoding
483 //     that would otherwise require a divide per chunk. you can trade off
484 //     space for time by defining this symbol.
485 // #define STB_VORBIS_DIVIDES_IN_RESIDUE
486 
487 // STB_VORBIS_DIVIDES_IN_CODEBOOK
488 //     vorbis VQ codebooks can be encoded two ways: with every case explicitly
489 //     stored, or with all elements being chosen from a small range of values,
490 //     and all values possible in all elements. By default, stb_vorbis expands
491 //     this latter kind out to look like the former kind for ease of decoding,
492 //     because otherwise an integer divide-per-vector-element is required to
493 //     unpack the index. If you define STB_VORBIS_DIVIDES_IN_CODEBOOK, you can
494 //     trade off storage for speed.
495 //#define STB_VORBIS_DIVIDES_IN_CODEBOOK
496 
497 #ifdef STB_VORBIS_CODEBOOK_SHORTS
498 #error "STB_VORBIS_CODEBOOK_SHORTS is no longer supported as it produced incorrect results for some input formats"
499 #endif
500 
501 // STB_VORBIS_DIVIDE_TABLE
502 //     this replaces small integer divides in the floor decode loop with
503 //     table lookups. made less than 1% difference, so disabled by default.
504 
505 // STB_VORBIS_NO_INLINE_DECODE
506 //     disables the inlining of the scalar codebook fast-huffman decode.
507 //     might save a little codespace; useful for debugging
508 // #define STB_VORBIS_NO_INLINE_DECODE
509 
510 // STB_VORBIS_NO_DEFER_FLOOR
511 //     Normally we only decode the floor without synthesizing the actual
512 //     full curve. We can instead synthesize the curve immediately. This
513 //     requires more memory and is very likely slower, so I don't think
514 //     you'd ever want to do it except for debugging.
515 // #define STB_VORBIS_NO_DEFER_FLOOR
516 
517 
518 
519 
520 //////////////////////////////////////////////////////////////////////////////
521 
522 #ifdef STB_VORBIS_NO_PULLDATA_API
523    #define STB_VORBIS_NO_INTEGER_CONVERSION
524    #define STB_VORBIS_NO_STDIO
525 #endif
526 
527 #if defined(STB_VORBIS_NO_CRT) && !defined(STB_VORBIS_NO_STDIO)
528    #define STB_VORBIS_NO_STDIO 1
529 #endif
530 
531 #ifndef STB_VORBIS_NO_INTEGER_CONVERSION
532 #ifndef STB_VORBIS_NO_FAST_SCALED_FLOAT
533 
534    // only need endianness for fast-float-to-int, which we don't
535    // use for pushdata
536 
537    #ifndef STB_VORBIS_BIG_ENDIAN
538      #define STB_VORBIS_ENDIAN  0
539    #else
540      #define STB_VORBIS_ENDIAN  1
541    #endif
542 
543 #endif
544 #endif
545 
546 
547 #ifndef STB_VORBIS_NO_STDIO
548 #include <stdio.h>
549 #endif
550 
551 #ifndef STB_VORBIS_NO_CRT
552 #include <stdlib.h>
553 #include <string.h>
554 #include <assert.h>
555 #include <math.h>
556 #if !(defined(__APPLE__) || defined(MACOSX) || defined(macintosh) || defined(Macintosh))
557 #if defined(__linux__) || defined(__linux) || defined(__EMSCRIPTEN__)
558 #include <alloca.h>
559 #endif
560 #endif
561 #else // STB_VORBIS_NO_CRT
562 #define NULL 0
563 #define malloc(s)   0
564 #define free(s)     ((void) 0)
565 #define realloc(s)  0
566 #endif // STB_VORBIS_NO_CRT
567 
568 #include <limits.h>
569 
570 #ifdef __MINGW32__
571    // eff you mingw:
572    //     "fixed":
573    //         http://sourceforge.net/p/mingw-w64/mailman/message/32882927/
574    //     "no that broke the build, reverted, who cares about C":
575    //         http://sourceforge.net/p/mingw-w64/mailman/message/32890381/
576    #ifdef __forceinline
577    #undef __forceinline
578    #endif
579    #define __forceinline
580 #elif !defined(_MSC_VER)
581    #if __GNUC__
582       #define __forceinline inline
583    #else
584       #define __forceinline
585    #endif
586 #endif
587 
588 #if STB_VORBIS_MAX_CHANNELS > 256
589 #error "Value of STB_VORBIS_MAX_CHANNELS outside of allowed range"
590 #endif
591 
592 #if STB_VORBIS_FAST_HUFFMAN_LENGTH > 24
593 #error "Value of STB_VORBIS_FAST_HUFFMAN_LENGTH outside of allowed range"
594 #endif
595 
596 
597 #if 0
598 #include <crtdbg.h>
599 #define CHECK(f)   _CrtIsValidHeapPointer(f->channel_buffers[1])
600 #else
601 #define CHECK(f)   ((void) 0)
602 #endif
603 
604 #define MAX_BLOCKSIZE_LOG  13   // from specification
605 #define MAX_BLOCKSIZE      (1 << MAX_BLOCKSIZE_LOG)
606 
607 
608 typedef unsigned char  uint8;
609 typedef   signed char   int8;
610 typedef unsigned short uint16;
611 typedef   signed short  int16;
612 typedef unsigned int   uint32;
613 typedef   signed int    int32;
614 
615 #ifndef TRUE
616 #define TRUE 1
617 #define FALSE 0
618 #endif
619 
620 typedef float codetype;
621 
622 // @NOTE
623 //
624 // Some arrays below are tagged "//varies", which means it's actually
625 // a variable-sized piece of data, but rather than malloc I assume it's
626 // small enough it's better to just allocate it all together with the
627 // main thing
628 //
629 // Most of the variables are specified with the smallest size I could pack
630 // them into. It might give better performance to make them all full-sized
631 // integers. It should be safe to freely rearrange the structures or change
632 // the sizes larger--nothing relies on silently truncating etc., nor the
633 // order of variables.
634 
635 #define FAST_HUFFMAN_TABLE_SIZE   (1 << STB_VORBIS_FAST_HUFFMAN_LENGTH)
636 #define FAST_HUFFMAN_TABLE_MASK   (FAST_HUFFMAN_TABLE_SIZE - 1)
637 
638 typedef struct
639 {
640    int dimensions, entries;
641    uint8 *codeword_lengths;
642    float  minimum_value;
643    float  delta_value;
644    uint8  value_bits;
645    uint8  lookup_type;
646    uint8  sequence_p;
647    uint8  sparse;
648    uint32 lookup_values;
649    codetype *multiplicands;
650    uint32 *codewords;
651    #ifdef STB_VORBIS_FAST_HUFFMAN_SHORT
652     int16  fast_huffman[FAST_HUFFMAN_TABLE_SIZE];
653    #else
654     int32  fast_huffman[FAST_HUFFMAN_TABLE_SIZE];
655    #endif
656    uint32 *sorted_codewords;
657    int    *sorted_values;
658    int     sorted_entries;
659 } Codebook;
660 
661 typedef struct
662 {
663    uint8 order;
664    uint16 rate;
665    uint16 bark_map_size;
666    uint8 amplitude_bits;
667    uint8 amplitude_offset;
668    uint8 number_of_books;
669    uint8 book_list[16]; // varies
670 } Floor0;
671 
672 typedef struct
673 {
674    uint8 partitions;
675    uint8 partition_class_list[32]; // varies
676    uint8 class_dimensions[16]; // varies
677    uint8 class_subclasses[16]; // varies
678    uint8 class_masterbooks[16]; // varies
679    int16 subclass_books[16][8]; // varies
680    uint16 Xlist[31*8+2]; // varies
681    uint8 sorted_order[31*8+2];
682    uint8 neighbors[31*8+2][2];
683    uint8 floor1_multiplier;
684    uint8 rangebits;
685    int values;
686 } Floor1;
687 
688 typedef union
689 {
690    Floor0 floor0;
691    Floor1 floor1;
692 } Floor;
693 
694 typedef struct
695 {
696    uint32 begin, end;
697    uint32 part_size;
698    uint8 classifications;
699    uint8 classbook;
700    uint8 **classdata;
701    int16 (*residue_books)[8];
702 } Residue;
703 
704 typedef struct
705 {
706    uint8 magnitude;
707    uint8 angle;
708    uint8 mux;
709 } MappingChannel;
710 
711 typedef struct
712 {
713    uint16 coupling_steps;
714    MappingChannel *chan;
715    uint8  submaps;
716    uint8  submap_floor[15]; // varies
717    uint8  submap_residue[15]; // varies
718 } Mapping;
719 
720 typedef struct
721 {
722    uint8 blockflag;
723    uint8 mapping;
724    uint16 windowtype;
725    uint16 transformtype;
726 } Mode;
727 
728 typedef struct
729 {
730    uint32  goal_crc;    // expected crc if match
731    int     bytes_left;  // bytes left in packet
732    uint32  crc_so_far;  // running crc
733    int     bytes_done;  // bytes processed in _current_ chunk
734    uint32  sample_loc;  // granule pos encoded in page
735 } CRCscan;
736 
737 typedef struct
738 {
739    uint32 page_start, page_end;
740    uint32 last_decoded_sample;
741 } ProbedPage;
742 
743 struct stb_vorbis
744 {
745   // user-accessible info
746    unsigned int sample_rate;
747    int channels;
748 
749    unsigned int setup_memory_required;
750    unsigned int temp_memory_required;
751    unsigned int setup_temp_memory_required;
752 
753   // input config
754 #ifndef STB_VORBIS_NO_STDIO
755    FILE *f;
756    uint32 f_start;
757    int close_on_free;
758 #endif
759 
760    uint8 *stream;
761    uint8 *stream_start;
762    uint8 *stream_end;
763 
764    uint32 stream_len;
765 
766    uint8  push_mode;
767 
768    uint32 first_audio_page_offset;
769 
770    ProbedPage p_first, p_last;
771 
772   // memory management
773    stb_vorbis_alloc alloc;
774    int setup_offset;
775    int temp_offset;
776 
777   // run-time results
778    int eof;
779    enum STBVorbisError error;
780 
781   // user-useful data
782 
783   // header info
784    int blocksize[2];
785    int blocksize_0, blocksize_1;
786    int codebook_count;
787    Codebook *codebooks;
788    int floor_count;
789    uint16 floor_types[64]; // varies
790    Floor *floor_config;
791    int residue_count;
792    uint16 residue_types[64]; // varies
793    Residue *residue_config;
794    int mapping_count;
795    Mapping *mapping;
796    int mode_count;
797    Mode mode_config[64];  // varies
798 
799    uint32 total_samples;
800 
801   // decode buffer
802    float *channel_buffers[STB_VORBIS_MAX_CHANNELS];
803    float *outputs        [STB_VORBIS_MAX_CHANNELS];
804 
805    float *previous_window[STB_VORBIS_MAX_CHANNELS];
806    int previous_length;
807 
808    #ifndef STB_VORBIS_NO_DEFER_FLOOR
809    int16 *finalY[STB_VORBIS_MAX_CHANNELS];
810    #else
811    float *floor_buffers[STB_VORBIS_MAX_CHANNELS];
812    #endif
813 
814    uint32 current_loc; // sample location of next frame to decode
815    int    current_loc_valid;
816 
817   // per-blocksize precomputed data
818 
819    // twiddle factors
820    float *A[2],*B[2],*C[2];
821    float *window[2];
822    uint16 *bit_reverse[2];
823 
824   // current page/packet/segment streaming info
825    uint32 serial; // stream serial number for verification
826    int last_page;
827    int segment_count;
828    uint8 segments[255];
829    uint8 page_flag;
830    uint8 bytes_in_seg;
831    uint8 first_decode;
832    int next_seg;
833    int last_seg;  // flag that we're on the last segment
834    int last_seg_which; // what was the segment number of the last seg?
835    uint32 acc;
836    int valid_bits;
837    int packet_bytes;
838    int end_seg_with_known_loc;
839    uint32 known_loc_for_packet;
840    int discard_samples_deferred;
841    uint32 samples_output;
842 
843   // push mode scanning
844    int page_crc_tests; // only in push_mode: number of tests active; -1 if not searching
845 #ifndef STB_VORBIS_NO_PUSHDATA_API
846    CRCscan scan[STB_VORBIS_PUSHDATA_CRC_COUNT];
847 #endif
848 
849   // sample-access
850    int channel_buffer_start;
851    int channel_buffer_end;
852 };
853 
854 #if defined(STB_VORBIS_NO_PUSHDATA_API)
855    #define IS_PUSH_MODE(f)   FALSE
856 #elif defined(STB_VORBIS_NO_PULLDATA_API)
857    #define IS_PUSH_MODE(f)   TRUE
858 #else
859    #define IS_PUSH_MODE(f)   ((f)->push_mode)
860 #endif
861 
862 typedef struct stb_vorbis vorb;
863 
error(vorb * f,enum STBVorbisError e)864 static int error(vorb *f, enum STBVorbisError e)
865 {
866    f->error = e;
867    if (!f->eof && e != VORBIS_need_more_data) {
868       f->error=e; // breakpoint for debugging
869    }
870    return 0;
871 }
872 
873 
874 // these functions are used for allocating temporary memory
875 // while decoding. if you can afford the stack space, use
876 // alloca(); otherwise, provide a temp buffer and it will
877 // allocate out of those.
878 
879 #define array_size_required(count,size)  (count*(sizeof(void *)+(size)))
880 
881 #define temp_alloc(f,size)              (f->alloc.alloc_buffer ? setup_temp_malloc(f,size) : alloca(size))
882 #ifdef dealloca
883 #define temp_free(f,p)                  (f->alloc.alloc_buffer ? 0 : dealloca(size))
884 #else
885 #define temp_free(f,p)                  0
886 #endif
887 #define temp_alloc_save(f)              ((f)->temp_offset)
888 #define temp_alloc_restore(f,p)         ((f)->temp_offset = (p))
889 
890 #define temp_block_array(f,count,size)  make_block_array(temp_alloc(f,array_size_required(count,size)), count, size)
891 
892 // given a sufficiently large block of memory, make an array of pointers to subblocks of it
make_block_array(void * mem,int count,int size)893 static void *make_block_array(void *mem, int count, int size)
894 {
895    int i;
896    void ** p = (void **) mem;
897    char *q = (char *) (p + count);
898    for (i=0; i < count; ++i) {
899       p[i] = q;
900       q += size;
901    }
902    return p;
903 }
904 
setup_malloc(vorb * f,int sz)905 static void *setup_malloc(vorb *f, int sz)
906 {
907    sz = (sz+3) & ~3;
908    f->setup_memory_required += sz;
909    if (f->alloc.alloc_buffer) {
910       void *p = (char *) f->alloc.alloc_buffer + f->setup_offset;
911       if (f->setup_offset + sz > f->temp_offset) return NULL;
912       f->setup_offset += sz;
913       return p;
914    }
915    return sz ? malloc(sz) : NULL;
916 }
917 
setup_free(vorb * f,void * p)918 static void setup_free(vorb *f, void *p)
919 {
920    if (f->alloc.alloc_buffer) return; // do nothing; setup mem is a stack
921    free(p);
922 }
923 
setup_temp_malloc(vorb * f,int sz)924 static void *setup_temp_malloc(vorb *f, int sz)
925 {
926    sz = (sz+3) & ~3;
927    if (f->alloc.alloc_buffer) {
928       if (f->temp_offset - sz < f->setup_offset) return NULL;
929       f->temp_offset -= sz;
930       return (char *) f->alloc.alloc_buffer + f->temp_offset;
931    }
932    return malloc(sz);
933 }
934 
setup_temp_free(vorb * f,void * p,int sz)935 static void setup_temp_free(vorb *f, void *p, int sz)
936 {
937    if (f->alloc.alloc_buffer) {
938       f->temp_offset += (sz+3)&~3;
939       return;
940    }
941    free(p);
942 }
943 
944 #define CRC32_POLY    0x04c11db7   // from spec
945 
946 static uint32 crc_table[256];
crc32_init(void)947 static void crc32_init(void)
948 {
949    int i,j;
950    uint32 s;
951    for(i=0; i < 256; i++) {
952       for (s=(uint32) i << 24, j=0; j < 8; ++j)
953          s = (s << 1) ^ (s >= (1U<<31) ? CRC32_POLY : 0);
954       crc_table[i] = s;
955    }
956 }
957 
crc32_update(uint32 crc,uint8 byte)958 static __forceinline uint32 crc32_update(uint32 crc, uint8 byte)
959 {
960    return (crc << 8) ^ crc_table[byte ^ (crc >> 24)];
961 }
962 
963 
964 // used in setup, and for huffman that doesn't go fast path
bit_reverse(unsigned int n)965 static unsigned int bit_reverse(unsigned int n)
966 {
967   n = ((n & 0xAAAAAAAA) >>  1) | ((n & 0x55555555) << 1);
968   n = ((n & 0xCCCCCCCC) >>  2) | ((n & 0x33333333) << 2);
969   n = ((n & 0xF0F0F0F0) >>  4) | ((n & 0x0F0F0F0F) << 4);
970   n = ((n & 0xFF00FF00) >>  8) | ((n & 0x00FF00FF) << 8);
971   return (n >> 16) | (n << 16);
972 }
973 
square(float x)974 static float square(float x)
975 {
976    return x*x;
977 }
978 
979 // this is a weird definition of log2() for which log2(1) = 1, log2(2) = 2, log2(4) = 3
980 // as required by the specification. fast(?) implementation from stb.h
981 // @OPTIMIZE: called multiple times per-packet with "constants"; move to setup
ilog(int32 n)982 static int ilog(int32 n)
983 {
984    static signed char log2_4[16] = { 0,1,2,2,3,3,3,3,4,4,4,4,4,4,4,4 };
985 
986    // 2 compares if n < 16, 3 compares otherwise (4 if signed or n > 1<<29)
987    if (n < (1 << 14))
988         if (n < (1 <<  4))        return     0 + log2_4[n      ];
989         else if (n < (1 <<  9))      return  5 + log2_4[n >>  5];
990              else                     return 10 + log2_4[n >> 10];
991    else if (n < (1 << 24))
992              if (n < (1 << 19))      return 15 + log2_4[n >> 15];
993              else                     return 20 + log2_4[n >> 20];
994         else if (n < (1 << 29))      return 25 + log2_4[n >> 25];
995              else if (n < (1 << 31)) return 30 + log2_4[n >> 30];
996                   else                return 0; // signed n returns 0
997 }
998 
999 #ifndef M_PI
1000   #define M_PI  3.14159265358979323846264f  // from CRC
1001 #endif
1002 
1003 // code length assigned to a value with no huffman encoding
1004 #define NO_CODE   255
1005 
1006 /////////////////////// LEAF SETUP FUNCTIONS //////////////////////////
1007 //
1008 // these functions are only called at setup, and only a few times
1009 // per file
1010 
float32_unpack(uint32 x)1011 static float float32_unpack(uint32 x)
1012 {
1013    // from the specification
1014    uint32 mantissa = x & 0x1fffff;
1015    uint32 sign = x & 0x80000000;
1016    uint32 exp = (x & 0x7fe00000) >> 21;
1017    double res = sign ? -(double)mantissa : (double)mantissa;
1018    return (float) ldexp((float)res, exp-788);
1019 }
1020 
1021 
1022 // zlib & jpeg huffman tables assume that the output symbols
1023 // can either be arbitrarily arranged, or have monotonically
1024 // increasing frequencies--they rely on the lengths being sorted;
1025 // this makes for a very simple generation algorithm.
1026 // vorbis allows a huffman table with non-sorted lengths. This
1027 // requires a more sophisticated construction, since symbols in
1028 // order do not map to huffman codes "in order".
add_entry(Codebook * c,uint32 huff_code,int symbol,int count,int len,uint32 * values)1029 static void add_entry(Codebook *c, uint32 huff_code, int symbol, int count, int len, uint32 *values)
1030 {
1031    if (!c->sparse) {
1032       c->codewords      [symbol] = huff_code;
1033    } else {
1034       c->codewords       [count] = huff_code;
1035       c->codeword_lengths[count] = len;
1036       values             [count] = symbol;
1037    }
1038 }
1039 
compute_codewords(Codebook * c,uint8 * len,int n,uint32 * values)1040 static int compute_codewords(Codebook *c, uint8 *len, int n, uint32 *values)
1041 {
1042    int i,k,m=0;
1043    uint32 available[32];
1044 
1045    memset(available, 0, sizeof(available));
1046    // find the first entry
1047    for (k=0; k < n; ++k) if (len[k] < NO_CODE) break;
1048    if (k == n) { assert(c->sorted_entries == 0); return TRUE; }
1049    // add to the list
1050    add_entry(c, 0, k, m++, len[k], values);
1051    // add all available leaves
1052    for (i=1; i <= len[k]; ++i)
1053       available[i] = 1U << (32-i);
1054    // note that the above code treats the first case specially,
1055    // but it's really the same as the following code, so they
1056    // could probably be combined (except the initial code is 0,
1057    // and I use 0 in available[] to mean 'empty')
1058    for (i=k+1; i < n; ++i) {
1059       uint32 res;
1060       int z = len[i], y;
1061       if (z == NO_CODE) continue;
1062       // find lowest available leaf (should always be earliest,
1063       // which is what the specification calls for)
1064       // note that this property, and the fact we can never have
1065       // more than one free leaf at a given level, isn't totally
1066       // trivial to prove, but it seems true and the assert never
1067       // fires, so!
1068       while (z > 0 && !available[z]) --z;
1069       if (z == 0) { return FALSE; }
1070       res = available[z];
1071       assert(z >= 0 && z < 32);
1072       available[z] = 0;
1073       add_entry(c, bit_reverse(res), i, m++, len[i], values);
1074       // propogate availability up the tree
1075       if (z != len[i]) {
1076          assert(len[i] >= 0 && len[i] < 32);
1077          for (y=len[i]; y > z; --y) {
1078             assert(available[y] == 0);
1079             available[y] = res + (1 << (32-y));
1080          }
1081       }
1082    }
1083    return TRUE;
1084 }
1085 
1086 // accelerated huffman table allows fast O(1) match of all symbols
1087 // of length <= STB_VORBIS_FAST_HUFFMAN_LENGTH
compute_accelerated_huffman(Codebook * c)1088 static void compute_accelerated_huffman(Codebook *c)
1089 {
1090    int i, len;
1091    for (i=0; i < FAST_HUFFMAN_TABLE_SIZE; ++i)
1092       c->fast_huffman[i] = -1;
1093 
1094    len = c->sparse ? c->sorted_entries : c->entries;
1095    #ifdef STB_VORBIS_FAST_HUFFMAN_SHORT
1096    if (len > 32767) len = 32767; // largest possible value we can encode!
1097    #endif
1098    for (i=0; i < len; ++i) {
1099       if (c->codeword_lengths[i] <= STB_VORBIS_FAST_HUFFMAN_LENGTH) {
1100          uint32 z = c->sparse ? bit_reverse(c->sorted_codewords[i]) : c->codewords[i];
1101          // set table entries for all bit combinations in the higher bits
1102          while (z < FAST_HUFFMAN_TABLE_SIZE) {
1103              c->fast_huffman[z] = i;
1104              z += 1 << c->codeword_lengths[i];
1105          }
1106       }
1107    }
1108 }
1109 
1110 #ifdef _MSC_VER
1111 #define STBV_CDECL __cdecl
1112 #else
1113 #define STBV_CDECL
1114 #endif
1115 
uint32_compare(const void * p,const void * q)1116 static int STBV_CDECL uint32_compare(const void *p, const void *q)
1117 {
1118    uint32 x = * (uint32 *) p;
1119    uint32 y = * (uint32 *) q;
1120    return x < y ? -1 : x > y;
1121 }
1122 
include_in_sort(Codebook * c,uint8 len)1123 static int include_in_sort(Codebook *c, uint8 len)
1124 {
1125    if (c->sparse) { assert(len != NO_CODE); return TRUE; }
1126    if (len == NO_CODE) return FALSE;
1127    if (len > STB_VORBIS_FAST_HUFFMAN_LENGTH) return TRUE;
1128    return FALSE;
1129 }
1130 
1131 // if the fast table above doesn't work, we want to binary
1132 // search them... need to reverse the bits
compute_sorted_huffman(Codebook * c,uint8 * lengths,uint32 * values)1133 static void compute_sorted_huffman(Codebook *c, uint8 *lengths, uint32 *values)
1134 {
1135    int i, len;
1136    // build a list of all the entries
1137    // OPTIMIZATION: don't include the short ones, since they'll be caught by FAST_HUFFMAN.
1138    // this is kind of a frivolous optimization--I don't see any performance improvement,
1139    // but it's like 4 extra lines of code, so.
1140    if (!c->sparse) {
1141       int k = 0;
1142       for (i=0; i < c->entries; ++i)
1143          if (include_in_sort(c, lengths[i]))
1144             c->sorted_codewords[k++] = bit_reverse(c->codewords[i]);
1145       assert(k == c->sorted_entries);
1146    } else {
1147       for (i=0; i < c->sorted_entries; ++i)
1148          c->sorted_codewords[i] = bit_reverse(c->codewords[i]);
1149    }
1150 
1151    qsort(c->sorted_codewords, c->sorted_entries, sizeof(c->sorted_codewords[0]), uint32_compare);
1152    c->sorted_codewords[c->sorted_entries] = 0xffffffff;
1153 
1154    len = c->sparse ? c->sorted_entries : c->entries;
1155    // now we need to indicate how they correspond; we could either
1156    //   #1: sort a different data structure that says who they correspond to
1157    //   #2: for each sorted entry, search the original list to find who corresponds
1158    //   #3: for each original entry, find the sorted entry
1159    // #1 requires extra storage, #2 is slow, #3 can use binary search!
1160    for (i=0; i < len; ++i) {
1161       int huff_len = c->sparse ? lengths[values[i]] : lengths[i];
1162       if (include_in_sort(c,huff_len)) {
1163          uint32 code = bit_reverse(c->codewords[i]);
1164          int x=0, n=c->sorted_entries;
1165          while (n > 1) {
1166             // invariant: sc[x] <= code < sc[x+n]
1167             int m = x + (n >> 1);
1168             if (c->sorted_codewords[m] <= code) {
1169                x = m;
1170                n -= (n>>1);
1171             } else {
1172                n >>= 1;
1173             }
1174          }
1175          assert(c->sorted_codewords[x] == code);
1176          if (c->sparse) {
1177             c->sorted_values[x] = values[i];
1178             c->codeword_lengths[x] = huff_len;
1179          } else {
1180             c->sorted_values[x] = i;
1181          }
1182       }
1183    }
1184 }
1185 
1186 // only run while parsing the header (3 times)
vorbis_validate(uint8 * data)1187 static int vorbis_validate(uint8 *data)
1188 {
1189    static uint8 vorbis[6] = { 'v', 'o', 'r', 'b', 'i', 's' };
1190    return memcmp(data, vorbis, 6) == 0;
1191 }
1192 
1193 // called from setup only, once per code book
1194 // (formula implied by specification)
lookup1_values(int entries,int dim)1195 static int lookup1_values(int entries, int dim)
1196 {
1197    int r = (int) floor(exp((float) log((float) entries) / dim));
1198    if ((int) floor(pow((float) r+1, dim)) <= entries)   // (int) cast for MinGW warning;
1199       ++r;                                              // floor() to avoid _ftol() when non-CRT
1200    assert(pow((float) r+1, dim) > entries);
1201    assert((int) floor(pow((float) r, dim)) <= entries); // (int),floor() as above
1202    return r;
1203 }
1204 
1205 // called twice per file
compute_twiddle_factors(int n,float * A,float * B,float * C)1206 static void compute_twiddle_factors(int n, float *A, float *B, float *C)
1207 {
1208    int n4 = n >> 2, n8 = n >> 3;
1209    int k,k2;
1210 
1211    for (k=k2=0; k < n4; ++k,k2+=2) {
1212       A[k2  ] = (float)  cos(4*k*M_PI/n);
1213       A[k2+1] = (float) -sin(4*k*M_PI/n);
1214       B[k2  ] = (float)  cos((k2+1)*M_PI/n/2) * 0.5f;
1215       B[k2+1] = (float)  sin((k2+1)*M_PI/n/2) * 0.5f;
1216    }
1217    for (k=k2=0; k < n8; ++k,k2+=2) {
1218       C[k2  ] = (float)  cos(2*(k2+1)*M_PI/n);
1219       C[k2+1] = (float) -sin(2*(k2+1)*M_PI/n);
1220    }
1221 }
1222 
compute_window(int n,float * window)1223 static void compute_window(int n, float *window)
1224 {
1225    int n2 = n >> 1, i;
1226    for (i=0; i < n2; ++i)
1227       window[i] = (float) sin(0.5 * M_PI * square((float) sin((i - 0 + 0.5) / n2 * 0.5 * M_PI)));
1228 }
1229 
compute_bitreverse(int n,uint16 * rev)1230 static void compute_bitreverse(int n, uint16 *rev)
1231 {
1232    int ld = ilog(n) - 1; // ilog is off-by-one from normal definitions
1233    int i, n8 = n >> 3;
1234    for (i=0; i < n8; ++i)
1235       rev[i] = (bit_reverse(i) >> (32-ld+3)) << 2;
1236 }
1237 
init_blocksize(vorb * f,int b,int n)1238 static int init_blocksize(vorb *f, int b, int n)
1239 {
1240    int n2 = n >> 1, n4 = n >> 2, n8 = n >> 3;
1241    f->A[b] = (float *) setup_malloc(f, sizeof(float) * n2);
1242    f->B[b] = (float *) setup_malloc(f, sizeof(float) * n2);
1243    f->C[b] = (float *) setup_malloc(f, sizeof(float) * n4);
1244    if (!f->A[b] || !f->B[b] || !f->C[b]) return error(f, VORBIS_outofmem);
1245    compute_twiddle_factors(n, f->A[b], f->B[b], f->C[b]);
1246    f->window[b] = (float *) setup_malloc(f, sizeof(float) * n2);
1247    if (!f->window[b]) return error(f, VORBIS_outofmem);
1248    compute_window(n, f->window[b]);
1249    f->bit_reverse[b] = (uint16 *) setup_malloc(f, sizeof(uint16) * n8);
1250    if (!f->bit_reverse[b]) return error(f, VORBIS_outofmem);
1251    compute_bitreverse(n, f->bit_reverse[b]);
1252    return TRUE;
1253 }
1254 
neighbors(uint16 * x,int n,int * plow,int * phigh)1255 static void neighbors(uint16 *x, int n, int *plow, int *phigh)
1256 {
1257    int low = -1;
1258    int high = 65536;
1259    int i;
1260    for (i=0; i < n; ++i) {
1261       if (x[i] > low  && x[i] < x[n]) { *plow  = i; low = x[i]; }
1262       if (x[i] < high && x[i] > x[n]) { *phigh = i; high = x[i]; }
1263    }
1264 }
1265 
1266 // this has been repurposed so y is now the original index instead of y
1267 typedef struct
1268 {
1269    uint16 x,y;
1270 } Point;
1271 
point_compare(const void * p,const void * q)1272 static int STBV_CDECL point_compare(const void *p, const void *q)
1273 {
1274    Point *a = (Point *) p;
1275    Point *b = (Point *) q;
1276    return a->x < b->x ? -1 : a->x > b->x;
1277 }
1278 
1279 //
1280 /////////////////////// END LEAF SETUP FUNCTIONS //////////////////////////
1281 
1282 
1283 #if defined(STB_VORBIS_NO_STDIO)
1284    #define USE_MEMORY(z)    TRUE
1285 #else
1286    #define USE_MEMORY(z)    ((z)->stream)
1287 #endif
1288 
get8(vorb * z)1289 static uint8 get8(vorb *z)
1290 {
1291    if (USE_MEMORY(z)) {
1292       if (z->stream >= z->stream_end) { z->eof = TRUE; return 0; }
1293       return *z->stream++;
1294    }
1295 
1296    #ifndef STB_VORBIS_NO_STDIO
1297    {
1298    int c = fgetc(z->f);
1299    if (c == EOF) { z->eof = TRUE; return 0; }
1300    return c;
1301    }
1302    #endif
1303 }
1304 
get32(vorb * f)1305 static uint32 get32(vorb *f)
1306 {
1307    uint32 x;
1308    x = get8(f);
1309    x += get8(f) << 8;
1310    x += get8(f) << 16;
1311    x += (uint32) get8(f) << 24;
1312    return x;
1313 }
1314 
getn(vorb * z,uint8 * data,int n)1315 static int getn(vorb *z, uint8 *data, int n)
1316 {
1317    if (USE_MEMORY(z)) {
1318       if (z->stream+n > z->stream_end) { z->eof = 1; return 0; }
1319       memcpy(data, z->stream, n);
1320       z->stream += n;
1321       return 1;
1322    }
1323 
1324    #ifndef STB_VORBIS_NO_STDIO
1325    if (fread(data, n, 1, z->f) == 1)
1326       return 1;
1327    else {
1328       z->eof = 1;
1329       return 0;
1330    }
1331    #endif
1332 }
1333 
skip(vorb * z,int n)1334 static void skip(vorb *z, int n)
1335 {
1336    if (USE_MEMORY(z)) {
1337       z->stream += n;
1338       if (z->stream >= z->stream_end) z->eof = 1;
1339       return;
1340    }
1341    #ifndef STB_VORBIS_NO_STDIO
1342    {
1343       long x = ftell(z->f);
1344       fseek(z->f, x+n, SEEK_SET);
1345    }
1346    #endif
1347 }
1348 
set_file_offset(stb_vorbis * f,unsigned int loc)1349 static int set_file_offset(stb_vorbis *f, unsigned int loc)
1350 {
1351    #ifndef STB_VORBIS_NO_PUSHDATA_API
1352    if (f->push_mode) return 0;
1353    #endif
1354    f->eof = 0;
1355    if (USE_MEMORY(f)) {
1356       if (f->stream_start + loc >= f->stream_end || f->stream_start + loc < f->stream_start) {
1357          f->stream = f->stream_end;
1358          f->eof = 1;
1359          return 0;
1360       } else {
1361          f->stream = f->stream_start + loc;
1362          return 1;
1363       }
1364    }
1365    #ifndef STB_VORBIS_NO_STDIO
1366    if (loc + f->f_start < loc || loc >= 0x80000000) {
1367       loc = 0x7fffffff;
1368       f->eof = 1;
1369    } else {
1370       loc += f->f_start;
1371    }
1372    if (!fseek(f->f, loc, SEEK_SET))
1373       return 1;
1374    f->eof = 1;
1375    fseek(f->f, f->f_start, SEEK_END);
1376    return 0;
1377    #endif
1378 }
1379 
1380 
1381 static uint8 ogg_page_header[4] = { 0x4f, 0x67, 0x67, 0x53 };
1382 
capture_pattern(vorb * f)1383 static int capture_pattern(vorb *f)
1384 {
1385    if (0x4f != get8(f)) return FALSE;
1386    if (0x67 != get8(f)) return FALSE;
1387    if (0x67 != get8(f)) return FALSE;
1388    if (0x53 != get8(f)) return FALSE;
1389    return TRUE;
1390 }
1391 
1392 #define PAGEFLAG_continued_packet   1
1393 #define PAGEFLAG_first_page         2
1394 #define PAGEFLAG_last_page          4
1395 
start_page_no_capturepattern(vorb * f)1396 static int start_page_no_capturepattern(vorb *f)
1397 {
1398    uint32 loc0,loc1,n;
1399    // stream structure version
1400    if (0 != get8(f)) return error(f, VORBIS_invalid_stream_structure_version);
1401    // header flag
1402    f->page_flag = get8(f);
1403    // absolute granule position
1404    loc0 = get32(f);
1405    loc1 = get32(f);
1406    // @TODO: validate loc0,loc1 as valid positions?
1407    // stream serial number -- vorbis doesn't interleave, so discard
1408    get32(f);
1409    //if (f->serial != get32(f)) return error(f, VORBIS_incorrect_stream_serial_number);
1410    // page sequence number
1411    n = get32(f);
1412    f->last_page = n;
1413    // CRC32
1414    get32(f);
1415    // page_segments
1416    f->segment_count = get8(f);
1417    if (!getn(f, f->segments, f->segment_count))
1418       return error(f, VORBIS_unexpected_eof);
1419    // assume we _don't_ know any the sample position of any segments
1420    f->end_seg_with_known_loc = -2;
1421    if (loc0 != ~0U || loc1 != ~0U) {
1422       int i;
1423       // determine which packet is the last one that will complete
1424       for (i=f->segment_count-1; i >= 0; --i)
1425          if (f->segments[i] < 255)
1426             break;
1427       // 'i' is now the index of the _last_ segment of a packet that ends
1428       if (i >= 0) {
1429          f->end_seg_with_known_loc = i;
1430          f->known_loc_for_packet   = loc0;
1431       }
1432    }
1433    if (f->first_decode) {
1434       int i,len;
1435       ProbedPage p;
1436       len = 0;
1437       for (i=0; i < f->segment_count; ++i)
1438          len += f->segments[i];
1439       len += 27 + f->segment_count;
1440       p.page_start = f->first_audio_page_offset;
1441       p.page_end = p.page_start + len;
1442       p.last_decoded_sample = loc0;
1443       f->p_first = p;
1444    }
1445    f->next_seg = 0;
1446    return TRUE;
1447 }
1448 
start_page(vorb * f)1449 static int start_page(vorb *f)
1450 {
1451    if (!capture_pattern(f)) return error(f, VORBIS_missing_capture_pattern);
1452    return start_page_no_capturepattern(f);
1453 }
1454 
start_packet(vorb * f)1455 static int start_packet(vorb *f)
1456 {
1457    while (f->next_seg == -1) {
1458       if (!start_page(f)) return FALSE;
1459       if (f->page_flag & PAGEFLAG_continued_packet)
1460          return error(f, VORBIS_continued_packet_flag_invalid);
1461    }
1462    f->last_seg = FALSE;
1463    f->valid_bits = 0;
1464    f->packet_bytes = 0;
1465    f->bytes_in_seg = 0;
1466    // f->next_seg is now valid
1467    return TRUE;
1468 }
1469 
maybe_start_packet(vorb * f)1470 static int maybe_start_packet(vorb *f)
1471 {
1472    if (f->next_seg == -1) {
1473       int x = get8(f);
1474       if (f->eof) return FALSE; // EOF at page boundary is not an error!
1475       if (0x4f != x      ) return error(f, VORBIS_missing_capture_pattern);
1476       if (0x67 != get8(f)) return error(f, VORBIS_missing_capture_pattern);
1477       if (0x67 != get8(f)) return error(f, VORBIS_missing_capture_pattern);
1478       if (0x53 != get8(f)) return error(f, VORBIS_missing_capture_pattern);
1479       if (!start_page_no_capturepattern(f)) return FALSE;
1480       if (f->page_flag & PAGEFLAG_continued_packet) {
1481          // set up enough state that we can read this packet if we want,
1482          // e.g. during recovery
1483          f->last_seg = FALSE;
1484          f->bytes_in_seg = 0;
1485          return error(f, VORBIS_continued_packet_flag_invalid);
1486       }
1487    }
1488    return start_packet(f);
1489 }
1490 
next_segment(vorb * f)1491 static int next_segment(vorb *f)
1492 {
1493    int len;
1494    if (f->last_seg) return 0;
1495    if (f->next_seg == -1) {
1496       f->last_seg_which = f->segment_count-1; // in case start_page fails
1497       if (!start_page(f)) { f->last_seg = 1; return 0; }
1498       if (!(f->page_flag & PAGEFLAG_continued_packet)) return error(f, VORBIS_continued_packet_flag_invalid);
1499    }
1500    len = f->segments[f->next_seg++];
1501    if (len < 255) {
1502       f->last_seg = TRUE;
1503       f->last_seg_which = f->next_seg-1;
1504    }
1505    if (f->next_seg >= f->segment_count)
1506       f->next_seg = -1;
1507    assert(f->bytes_in_seg == 0);
1508    f->bytes_in_seg = len;
1509    return len;
1510 }
1511 
1512 #define EOP    (-1)
1513 #define INVALID_BITS  (-1)
1514 
get8_packet_raw(vorb * f)1515 static int get8_packet_raw(vorb *f)
1516 {
1517    if (!f->bytes_in_seg) {  // CLANG!
1518       if (f->last_seg) return EOP;
1519       else if (!next_segment(f)) return EOP;
1520    }
1521    assert(f->bytes_in_seg > 0);
1522    --f->bytes_in_seg;
1523    ++f->packet_bytes;
1524    return get8(f);
1525 }
1526 
get8_packet(vorb * f)1527 static int get8_packet(vorb *f)
1528 {
1529    int x = get8_packet_raw(f);
1530    f->valid_bits = 0;
1531    return x;
1532 }
1533 
flush_packet(vorb * f)1534 static void flush_packet(vorb *f)
1535 {
1536    while (get8_packet_raw(f) != EOP);
1537 }
1538 
1539 // @OPTIMIZE: this is the secondary bit decoder, so it's probably not as important
1540 // as the huffman decoder?
get_bits(vorb * f,int n)1541 static uint32 get_bits(vorb *f, int n)
1542 {
1543    uint32 z;
1544 
1545    if (f->valid_bits < 0) return 0;
1546    if (f->valid_bits < n) {
1547       if (n > 24) {
1548          // the accumulator technique below would not work correctly in this case
1549          z = get_bits(f, 24);
1550          z += get_bits(f, n-24) << 24;
1551          return z;
1552       }
1553       if (f->valid_bits == 0) f->acc = 0;
1554       while (f->valid_bits < n) {
1555          int z = get8_packet_raw(f);
1556          if (z == EOP) {
1557             f->valid_bits = INVALID_BITS;
1558             return 0;
1559          }
1560          f->acc += z << f->valid_bits;
1561          f->valid_bits += 8;
1562       }
1563    }
1564    if (f->valid_bits < 0) return 0;
1565    z = f->acc & ((1 << n)-1);
1566    f->acc >>= n;
1567    f->valid_bits -= n;
1568    return z;
1569 }
1570 
1571 // @OPTIMIZE: primary accumulator for huffman
1572 // expand the buffer to as many bits as possible without reading off end of packet
1573 // it might be nice to allow f->valid_bits and f->acc to be stored in registers,
1574 // e.g. cache them locally and decode locally
prep_huffman(vorb * f)1575 static __forceinline void prep_huffman(vorb *f)
1576 {
1577    if (f->valid_bits <= 24) {
1578       if (f->valid_bits == 0) f->acc = 0;
1579       do {
1580          int z;
1581          if (f->last_seg && !f->bytes_in_seg) return;
1582          z = get8_packet_raw(f);
1583          if (z == EOP) return;
1584          f->acc += (unsigned) z << f->valid_bits;
1585          f->valid_bits += 8;
1586       } while (f->valid_bits <= 24);
1587    }
1588 }
1589 
1590 enum
1591 {
1592    VORBIS_packet_id = 1,
1593    VORBIS_packet_comment = 3,
1594    VORBIS_packet_setup = 5
1595 };
1596 
codebook_decode_scalar_raw(vorb * f,Codebook * c)1597 static int codebook_decode_scalar_raw(vorb *f, Codebook *c)
1598 {
1599    int i;
1600    prep_huffman(f);
1601 
1602    if (c->codewords == NULL && c->sorted_codewords == NULL)
1603       return -1;
1604 
1605    // cases to use binary search: sorted_codewords && !c->codewords
1606    //                             sorted_codewords && c->entries > 8
1607    if (c->entries > 8 ? c->sorted_codewords!=NULL : !c->codewords) {
1608       // binary search
1609       uint32 code = bit_reverse(f->acc);
1610       int x=0, n=c->sorted_entries, len;
1611 
1612       while (n > 1) {
1613          // invariant: sc[x] <= code < sc[x+n]
1614          int m = x + (n >> 1);
1615          if (c->sorted_codewords[m] <= code) {
1616             x = m;
1617             n -= (n>>1);
1618          } else {
1619             n >>= 1;
1620          }
1621       }
1622       // x is now the sorted index
1623       if (!c->sparse) x = c->sorted_values[x];
1624       // x is now sorted index if sparse, or symbol otherwise
1625       len = c->codeword_lengths[x];
1626       if (f->valid_bits >= len) {
1627          f->acc >>= len;
1628          f->valid_bits -= len;
1629          return x;
1630       }
1631 
1632       f->valid_bits = 0;
1633       return -1;
1634    }
1635 
1636    // if small, linear search
1637    assert(!c->sparse);
1638    for (i=0; i < c->entries; ++i) {
1639       if (c->codeword_lengths[i] == NO_CODE) continue;
1640       if (c->codewords[i] == (f->acc & ((1 << c->codeword_lengths[i])-1))) {
1641          if (f->valid_bits >= c->codeword_lengths[i]) {
1642             f->acc >>= c->codeword_lengths[i];
1643             f->valid_bits -= c->codeword_lengths[i];
1644             return i;
1645          }
1646          f->valid_bits = 0;
1647          return -1;
1648       }
1649    }
1650 
1651    error(f, VORBIS_invalid_stream);
1652    f->valid_bits = 0;
1653    return -1;
1654 }
1655 
1656 #ifndef STB_VORBIS_NO_INLINE_DECODE
1657 
1658 #define DECODE_RAW(var, f,c)                                  \
1659    if (f->valid_bits < STB_VORBIS_FAST_HUFFMAN_LENGTH)        \
1660       prep_huffman(f);                                        \
1661    var = f->acc & FAST_HUFFMAN_TABLE_MASK;                    \
1662    var = c->fast_huffman[var];                                \
1663    if (var >= 0) {                                            \
1664       int n = c->codeword_lengths[var];                       \
1665       f->acc >>= n;                                           \
1666       f->valid_bits -= n;                                     \
1667       if (f->valid_bits < 0) { f->valid_bits = 0; var = -1; } \
1668    } else {                                                   \
1669       var = codebook_decode_scalar_raw(f,c);                  \
1670    }
1671 
1672 #else
1673 
codebook_decode_scalar(vorb * f,Codebook * c)1674 static int codebook_decode_scalar(vorb *f, Codebook *c)
1675 {
1676    int i;
1677    if (f->valid_bits < STB_VORBIS_FAST_HUFFMAN_LENGTH)
1678       prep_huffman(f);
1679    // fast huffman table lookup
1680    i = f->acc & FAST_HUFFMAN_TABLE_MASK;
1681    i = c->fast_huffman[i];
1682    if (i >= 0) {
1683       f->acc >>= c->codeword_lengths[i];
1684       f->valid_bits -= c->codeword_lengths[i];
1685       if (f->valid_bits < 0) { f->valid_bits = 0; return -1; }
1686       return i;
1687    }
1688    return codebook_decode_scalar_raw(f,c);
1689 }
1690 
1691 #define DECODE_RAW(var,f,c)    var = codebook_decode_scalar(f,c);
1692 
1693 #endif
1694 
1695 #define DECODE(var,f,c)                                       \
1696    DECODE_RAW(var,f,c)                                        \
1697    if (c->sparse) var = c->sorted_values[var];
1698 
1699 #ifndef STB_VORBIS_DIVIDES_IN_CODEBOOK
1700   #define DECODE_VQ(var,f,c)   DECODE_RAW(var,f,c)
1701 #else
1702   #define DECODE_VQ(var,f,c)   DECODE(var,f,c)
1703 #endif
1704 
1705 
1706 
1707 
1708 
1709 
1710 // CODEBOOK_ELEMENT_FAST is an optimization for the CODEBOOK_FLOATS case
1711 // where we avoid one addition
1712 #define CODEBOOK_ELEMENT(c,off)          (c->multiplicands[off])
1713 #define CODEBOOK_ELEMENT_FAST(c,off)     (c->multiplicands[off])
1714 #define CODEBOOK_ELEMENT_BASE(c)         (0)
1715 
codebook_decode_start(vorb * f,Codebook * c)1716 static int codebook_decode_start(vorb *f, Codebook *c)
1717 {
1718    int z = -1;
1719 
1720    // type 0 is only legal in a scalar context
1721    if (c->lookup_type == 0)
1722       error(f, VORBIS_invalid_stream);
1723    else {
1724       DECODE_VQ(z,f,c);
1725       if (c->sparse) assert(z < c->sorted_entries);
1726       if (z < 0) {  // check for EOP
1727          if (!f->bytes_in_seg)
1728             if (f->last_seg)
1729                return z;
1730          error(f, VORBIS_invalid_stream);
1731       }
1732    }
1733    return z;
1734 }
1735 
codebook_decode(vorb * f,Codebook * c,float * output,int len)1736 static int codebook_decode(vorb *f, Codebook *c, float *output, int len)
1737 {
1738    int i,z = codebook_decode_start(f,c);
1739    if (z < 0) return FALSE;
1740    if (len > c->dimensions) len = c->dimensions;
1741 
1742 #ifdef STB_VORBIS_DIVIDES_IN_CODEBOOK
1743    if (c->lookup_type == 1) {
1744       float last = CODEBOOK_ELEMENT_BASE(c);
1745       int div = 1;
1746       for (i=0; i < len; ++i) {
1747          int off = (z / div) % c->lookup_values;
1748          float val = CODEBOOK_ELEMENT_FAST(c,off) + last;
1749          output[i] += val;
1750          if (c->sequence_p) last = val + c->minimum_value;
1751          div *= c->lookup_values;
1752       }
1753       return TRUE;
1754    }
1755 #endif
1756 
1757    z *= c->dimensions;
1758    if (c->sequence_p) {
1759       float last = CODEBOOK_ELEMENT_BASE(c);
1760       for (i=0; i < len; ++i) {
1761          float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1762          output[i] += val;
1763          last = val + c->minimum_value;
1764       }
1765    } else {
1766       float last = CODEBOOK_ELEMENT_BASE(c);
1767       for (i=0; i < len; ++i) {
1768          output[i] += CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1769       }
1770    }
1771 
1772    return TRUE;
1773 }
1774 
codebook_decode_step(vorb * f,Codebook * c,float * output,int len,int step)1775 static int codebook_decode_step(vorb *f, Codebook *c, float *output, int len, int step)
1776 {
1777    int i,z = codebook_decode_start(f,c);
1778    float last = CODEBOOK_ELEMENT_BASE(c);
1779    if (z < 0) return FALSE;
1780    if (len > c->dimensions) len = c->dimensions;
1781 
1782 #ifdef STB_VORBIS_DIVIDES_IN_CODEBOOK
1783    if (c->lookup_type == 1) {
1784       int div = 1;
1785       for (i=0; i < len; ++i) {
1786          int off = (z / div) % c->lookup_values;
1787          float val = CODEBOOK_ELEMENT_FAST(c,off) + last;
1788          output[i*step] += val;
1789          if (c->sequence_p) last = val;
1790          div *= c->lookup_values;
1791       }
1792       return TRUE;
1793    }
1794 #endif
1795 
1796    z *= c->dimensions;
1797    for (i=0; i < len; ++i) {
1798       float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1799       output[i*step] += val;
1800       if (c->sequence_p) last = val;
1801    }
1802 
1803    return TRUE;
1804 }
1805 
codebook_decode_deinterleave_repeat(vorb * f,Codebook * c,float ** outputs,int ch,int * c_inter_p,int * p_inter_p,int len,int total_decode)1806 static int codebook_decode_deinterleave_repeat(vorb *f, Codebook *c, float **outputs, int ch, int *c_inter_p, int *p_inter_p, int len, int total_decode)
1807 {
1808    int c_inter = *c_inter_p;
1809    int p_inter = *p_inter_p;
1810    int i,z, effective = c->dimensions;
1811 
1812    // type 0 is only legal in a scalar context
1813    if (c->lookup_type == 0)   return error(f, VORBIS_invalid_stream);
1814 
1815    while (total_decode > 0) {
1816       float last = CODEBOOK_ELEMENT_BASE(c);
1817       DECODE_VQ(z,f,c);
1818       #ifndef STB_VORBIS_DIVIDES_IN_CODEBOOK
1819       assert(!c->sparse || z < c->sorted_entries);
1820       #endif
1821       if (z < 0) {
1822          if (!f->bytes_in_seg)
1823             if (f->last_seg) return FALSE;
1824          return error(f, VORBIS_invalid_stream);
1825       }
1826 
1827       // if this will take us off the end of the buffers, stop short!
1828       // we check by computing the length of the virtual interleaved
1829       // buffer (len*ch), our current offset within it (p_inter*ch)+(c_inter),
1830       // and the length we'll be using (effective)
1831       if (c_inter + p_inter*ch + effective > len * ch) {
1832          effective = len*ch - (p_inter*ch - c_inter);
1833       }
1834 
1835    #ifdef STB_VORBIS_DIVIDES_IN_CODEBOOK
1836       if (c->lookup_type == 1) {
1837          int div = 1;
1838          for (i=0; i < effective; ++i) {
1839             int off = (z / div) % c->lookup_values;
1840             float val = CODEBOOK_ELEMENT_FAST(c,off) + last;
1841             if (outputs[c_inter])
1842                outputs[c_inter][p_inter] += val;
1843             if (++c_inter == ch) { c_inter = 0; ++p_inter; }
1844             if (c->sequence_p) last = val;
1845             div *= c->lookup_values;
1846          }
1847       } else
1848    #endif
1849       {
1850          z *= c->dimensions;
1851          if (c->sequence_p) {
1852             for (i=0; i < effective; ++i) {
1853                float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1854                if (outputs[c_inter])
1855                   outputs[c_inter][p_inter] += val;
1856                if (++c_inter == ch) { c_inter = 0; ++p_inter; }
1857                last = val;
1858             }
1859          } else {
1860             for (i=0; i < effective; ++i) {
1861                float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1862                if (outputs[c_inter])
1863                   outputs[c_inter][p_inter] += val;
1864                if (++c_inter == ch) { c_inter = 0; ++p_inter; }
1865             }
1866          }
1867       }
1868 
1869       total_decode -= effective;
1870    }
1871    *c_inter_p = c_inter;
1872    *p_inter_p = p_inter;
1873    return TRUE;
1874 }
1875 
predict_point(int x,int x0,int x1,int y0,int y1)1876 static int predict_point(int x, int x0, int x1, int y0, int y1)
1877 {
1878    int dy = y1 - y0;
1879    int adx = x1 - x0;
1880    // @OPTIMIZE: force int division to round in the right direction... is this necessary on x86?
1881    int err = abs(dy) * (x - x0);
1882    int off = err / adx;
1883    return dy < 0 ? y0 - off : y0 + off;
1884 }
1885 
1886 // the following table is block-copied from the specification
1887 static float inverse_db_table[256] =
1888 {
1889   1.0649863e-07f, 1.1341951e-07f, 1.2079015e-07f, 1.2863978e-07f,
1890   1.3699951e-07f, 1.4590251e-07f, 1.5538408e-07f, 1.6548181e-07f,
1891   1.7623575e-07f, 1.8768855e-07f, 1.9988561e-07f, 2.1287530e-07f,
1892   2.2670913e-07f, 2.4144197e-07f, 2.5713223e-07f, 2.7384213e-07f,
1893   2.9163793e-07f, 3.1059021e-07f, 3.3077411e-07f, 3.5226968e-07f,
1894   3.7516214e-07f, 3.9954229e-07f, 4.2550680e-07f, 4.5315863e-07f,
1895   4.8260743e-07f, 5.1396998e-07f, 5.4737065e-07f, 5.8294187e-07f,
1896   6.2082472e-07f, 6.6116941e-07f, 7.0413592e-07f, 7.4989464e-07f,
1897   7.9862701e-07f, 8.5052630e-07f, 9.0579828e-07f, 9.6466216e-07f,
1898   1.0273513e-06f, 1.0941144e-06f, 1.1652161e-06f, 1.2409384e-06f,
1899   1.3215816e-06f, 1.4074654e-06f, 1.4989305e-06f, 1.5963394e-06f,
1900   1.7000785e-06f, 1.8105592e-06f, 1.9282195e-06f, 2.0535261e-06f,
1901   2.1869758e-06f, 2.3290978e-06f, 2.4804557e-06f, 2.6416497e-06f,
1902   2.8133190e-06f, 2.9961443e-06f, 3.1908506e-06f, 3.3982101e-06f,
1903   3.6190449e-06f, 3.8542308e-06f, 4.1047004e-06f, 4.3714470e-06f,
1904   4.6555282e-06f, 4.9580707e-06f, 5.2802740e-06f, 5.6234160e-06f,
1905   5.9888572e-06f, 6.3780469e-06f, 6.7925283e-06f, 7.2339451e-06f,
1906   7.7040476e-06f, 8.2047000e-06f, 8.7378876e-06f, 9.3057248e-06f,
1907   9.9104632e-06f, 1.0554501e-05f, 1.1240392e-05f, 1.1970856e-05f,
1908   1.2748789e-05f, 1.3577278e-05f, 1.4459606e-05f, 1.5399272e-05f,
1909   1.6400004e-05f, 1.7465768e-05f, 1.8600792e-05f, 1.9809576e-05f,
1910   2.1096914e-05f, 2.2467911e-05f, 2.3928002e-05f, 2.5482978e-05f,
1911   2.7139006e-05f, 2.8902651e-05f, 3.0780908e-05f, 3.2781225e-05f,
1912   3.4911534e-05f, 3.7180282e-05f, 3.9596466e-05f, 4.2169667e-05f,
1913   4.4910090e-05f, 4.7828601e-05f, 5.0936773e-05f, 5.4246931e-05f,
1914   5.7772202e-05f, 6.1526565e-05f, 6.5524908e-05f, 6.9783085e-05f,
1915   7.4317983e-05f, 7.9147585e-05f, 8.4291040e-05f, 8.9768747e-05f,
1916   9.5602426e-05f, 0.00010181521f, 0.00010843174f, 0.00011547824f,
1917   0.00012298267f, 0.00013097477f, 0.00013948625f, 0.00014855085f,
1918   0.00015820453f, 0.00016848555f, 0.00017943469f, 0.00019109536f,
1919   0.00020351382f, 0.00021673929f, 0.00023082423f, 0.00024582449f,
1920   0.00026179955f, 0.00027881276f, 0.00029693158f, 0.00031622787f,
1921   0.00033677814f, 0.00035866388f, 0.00038197188f, 0.00040679456f,
1922   0.00043323036f, 0.00046138411f, 0.00049136745f, 0.00052329927f,
1923   0.00055730621f, 0.00059352311f, 0.00063209358f, 0.00067317058f,
1924   0.00071691700f, 0.00076350630f, 0.00081312324f, 0.00086596457f,
1925   0.00092223983f, 0.00098217216f, 0.0010459992f,  0.0011139742f,
1926   0.0011863665f,  0.0012634633f,  0.0013455702f,  0.0014330129f,
1927   0.0015261382f,  0.0016253153f,  0.0017309374f,  0.0018434235f,
1928   0.0019632195f,  0.0020908006f,  0.0022266726f,  0.0023713743f,
1929   0.0025254795f,  0.0026895994f,  0.0028643847f,  0.0030505286f,
1930   0.0032487691f,  0.0034598925f,  0.0036847358f,  0.0039241906f,
1931   0.0041792066f,  0.0044507950f,  0.0047400328f,  0.0050480668f,
1932   0.0053761186f,  0.0057254891f,  0.0060975636f,  0.0064938176f,
1933   0.0069158225f,  0.0073652516f,  0.0078438871f,  0.0083536271f,
1934   0.0088964928f,  0.009474637f,   0.010090352f,   0.010746080f,
1935   0.011444421f,   0.012188144f,   0.012980198f,   0.013823725f,
1936   0.014722068f,   0.015678791f,   0.016697687f,   0.017782797f,
1937   0.018938423f,   0.020169149f,   0.021479854f,   0.022875735f,
1938   0.024362330f,   0.025945531f,   0.027631618f,   0.029427276f,
1939   0.031339626f,   0.033376252f,   0.035545228f,   0.037855157f,
1940   0.040315199f,   0.042935108f,   0.045725273f,   0.048696758f,
1941   0.051861348f,   0.055231591f,   0.058820850f,   0.062643361f,
1942   0.066714279f,   0.071049749f,   0.075666962f,   0.080584227f,
1943   0.085821044f,   0.091398179f,   0.097337747f,   0.10366330f,
1944   0.11039993f,    0.11757434f,    0.12521498f,    0.13335215f,
1945   0.14201813f,    0.15124727f,    0.16107617f,    0.17154380f,
1946   0.18269168f,    0.19456402f,    0.20720788f,    0.22067342f,
1947   0.23501402f,    0.25028656f,    0.26655159f,    0.28387361f,
1948   0.30232132f,    0.32196786f,    0.34289114f,    0.36517414f,
1949   0.38890521f,    0.41417847f,    0.44109412f,    0.46975890f,
1950   0.50028648f,    0.53279791f,    0.56742212f,    0.60429640f,
1951   0.64356699f,    0.68538959f,    0.72993007f,    0.77736504f,
1952   0.82788260f,    0.88168307f,    0.9389798f,     1.0f
1953 };
1954 
1955 
1956 // @OPTIMIZE: if you want to replace this bresenham line-drawing routine,
1957 // note that you must produce bit-identical output to decode correctly;
1958 // this specific sequence of operations is specified in the spec (it's
1959 // drawing integer-quantized frequency-space lines that the encoder
1960 // expects to be exactly the same)
1961 //     ... also, isn't the whole point of Bresenham's algorithm to NOT
1962 // have to divide in the setup? sigh.
1963 #ifndef STB_VORBIS_NO_DEFER_FLOOR
1964 #define LINE_OP(a,b)   a *= b
1965 #else
1966 #define LINE_OP(a,b)   a = b
1967 #endif
1968 
1969 #ifdef STB_VORBIS_DIVIDE_TABLE
1970 #define DIVTAB_NUMER   32
1971 #define DIVTAB_DENOM   64
1972 int8 integer_divide_table[DIVTAB_NUMER][DIVTAB_DENOM]; // 2KB
1973 #endif
1974 
draw_line(float * output,int x0,int y0,int x1,int y1,int n)1975 static __forceinline void draw_line(float *output, int x0, int y0, int x1, int y1, int n)
1976 {
1977    int dy = y1 - y0;
1978    int adx = x1 - x0;
1979    int ady = abs(dy);
1980    int base;
1981    int x=x0,y=y0;
1982    int err = 0;
1983    int sy;
1984 
1985 #ifdef STB_VORBIS_DIVIDE_TABLE
1986    if (adx < DIVTAB_DENOM && ady < DIVTAB_NUMER) {
1987       if (dy < 0) {
1988          base = -integer_divide_table[ady][adx];
1989          sy = base-1;
1990       } else {
1991          base =  integer_divide_table[ady][adx];
1992          sy = base+1;
1993       }
1994    } else {
1995       base = dy / adx;
1996       if (dy < 0)
1997          sy = base - 1;
1998       else
1999          sy = base+1;
2000    }
2001 #else
2002    base = dy / adx;
2003    if (dy < 0)
2004       sy = base - 1;
2005    else
2006       sy = base+1;
2007 #endif
2008    ady -= abs(base) * adx;
2009    if (x1 > n) x1 = n;
2010    if (x < x1) {
2011       LINE_OP(output[x], inverse_db_table[y]);
2012       for (++x; x < x1; ++x) {
2013          err += ady;
2014          if (err >= adx) {
2015             err -= adx;
2016             y += sy;
2017          } else
2018             y += base;
2019          LINE_OP(output[x], inverse_db_table[y]);
2020       }
2021    }
2022 }
2023 
residue_decode(vorb * f,Codebook * book,float * target,int offset,int n,int rtype)2024 static int residue_decode(vorb *f, Codebook *book, float *target, int offset, int n, int rtype)
2025 {
2026    int k;
2027    if (rtype == 0) {
2028       int step = n / book->dimensions;
2029       for (k=0; k < step; ++k)
2030          if (!codebook_decode_step(f, book, target+offset+k, n-offset-k, step))
2031             return FALSE;
2032    } else {
2033       for (k=0; k < n; ) {
2034          if (!codebook_decode(f, book, target+offset, n-k))
2035             return FALSE;
2036          k += book->dimensions;
2037          offset += book->dimensions;
2038       }
2039    }
2040    return TRUE;
2041 }
2042 
decode_residue(vorb * f,float * residue_buffers[],int ch,int n,int rn,uint8 * do_not_decode)2043 static void decode_residue(vorb *f, float *residue_buffers[], int ch, int n, int rn, uint8 *do_not_decode)
2044 {
2045    int i,j,pass;
2046    Residue *r = f->residue_config + rn;
2047    int rtype = f->residue_types[rn];
2048    int c = r->classbook;
2049    int classwords = f->codebooks[c].dimensions;
2050    int n_read = r->end - r->begin;
2051    int part_read = n_read / r->part_size;
2052    int temp_alloc_point = temp_alloc_save(f);
2053    #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2054    uint8 ***part_classdata = (uint8 ***) temp_block_array(f,f->channels, part_read * sizeof(**part_classdata));
2055    #else
2056    int **classifications = (int **) temp_block_array(f,f->channels, part_read * sizeof(**classifications));
2057    #endif
2058 
2059    CHECK(f);
2060 
2061    for (i=0; i < ch; ++i)
2062       if (!do_not_decode[i])
2063          memset(residue_buffers[i], 0, sizeof(float) * n);
2064 
2065    if (rtype == 2 && ch != 1) {
2066       for (j=0; j < ch; ++j)
2067          if (!do_not_decode[j])
2068             break;
2069       if (j == ch)
2070          goto done;
2071 
2072       for (pass=0; pass < 8; ++pass) {
2073          int pcount = 0, class_set = 0;
2074          if (ch == 2) {
2075             while (pcount < part_read) {
2076                int z = r->begin + pcount*r->part_size;
2077                int c_inter = (z & 1), p_inter = z>>1;
2078                if (pass == 0) {
2079                   Codebook *c = f->codebooks+r->classbook;
2080                   int q;
2081                   DECODE(q,f,c);
2082                   if (q == EOP) goto done;
2083                   #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2084                   part_classdata[0][class_set] = r->classdata[q];
2085                   #else
2086                   for (i=classwords-1; i >= 0; --i) {
2087                      classifications[0][i+pcount] = q % r->classifications;
2088                      q /= r->classifications;
2089                   }
2090                   #endif
2091                }
2092                for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) {
2093                   int z = r->begin + pcount*r->part_size;
2094                   #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2095                   int c = part_classdata[0][class_set][i];
2096                   #else
2097                   int c = classifications[0][pcount];
2098                   #endif
2099                   int b = r->residue_books[c][pass];
2100                   if (b >= 0) {
2101                      Codebook *book = f->codebooks + b;
2102                      #ifdef STB_VORBIS_DIVIDES_IN_CODEBOOK
2103                      if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r->part_size))
2104                         goto done;
2105                      #else
2106                      // saves 1%
2107                      if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r->part_size))
2108                         goto done;
2109                      #endif
2110                   } else {
2111                      z += r->part_size;
2112                      c_inter = z & 1;
2113                      p_inter = z >> 1;
2114                   }
2115                }
2116                #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2117                ++class_set;
2118                #endif
2119             }
2120          } else if (ch == 1) {
2121             while (pcount < part_read) {
2122                int z = r->begin + pcount*r->part_size;
2123                int c_inter = 0, p_inter = z;
2124                if (pass == 0) {
2125                   Codebook *c = f->codebooks+r->classbook;
2126                   int q;
2127                   DECODE(q,f,c);
2128                   if (q == EOP) goto done;
2129                   #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2130                   part_classdata[0][class_set] = r->classdata[q];
2131                   #else
2132                   for (i=classwords-1; i >= 0; --i) {
2133                      classifications[0][i+pcount] = q % r->classifications;
2134                      q /= r->classifications;
2135                   }
2136                   #endif
2137                }
2138                for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) {
2139                   int z = r->begin + pcount*r->part_size;
2140                   #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2141                   int c = part_classdata[0][class_set][i];
2142                   #else
2143                   int c = classifications[0][pcount];
2144                   #endif
2145                   int b = r->residue_books[c][pass];
2146                   if (b >= 0) {
2147                      Codebook *book = f->codebooks + b;
2148                      if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r->part_size))
2149                         goto done;
2150                   } else {
2151                      z += r->part_size;
2152                      c_inter = 0;
2153                      p_inter = z;
2154                   }
2155                }
2156                #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2157                ++class_set;
2158                #endif
2159             }
2160          } else {
2161             while (pcount < part_read) {
2162                int z = r->begin + pcount*r->part_size;
2163                int c_inter = z % ch, p_inter = z/ch;
2164                if (pass == 0) {
2165                   Codebook *c = f->codebooks+r->classbook;
2166                   int q;
2167                   DECODE(q,f,c);
2168                   if (q == EOP) goto done;
2169                   #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2170                   part_classdata[0][class_set] = r->classdata[q];
2171                   #else
2172                   for (i=classwords-1; i >= 0; --i) {
2173                      classifications[0][i+pcount] = q % r->classifications;
2174                      q /= r->classifications;
2175                   }
2176                   #endif
2177                }
2178                for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) {
2179                   int z = r->begin + pcount*r->part_size;
2180                   #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2181                   int c = part_classdata[0][class_set][i];
2182                   #else
2183                   int c = classifications[0][pcount];
2184                   #endif
2185                   int b = r->residue_books[c][pass];
2186                   if (b >= 0) {
2187                      Codebook *book = f->codebooks + b;
2188                      if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r->part_size))
2189                         goto done;
2190                   } else {
2191                      z += r->part_size;
2192                      c_inter = z % ch;
2193                      p_inter = z / ch;
2194                   }
2195                }
2196                #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2197                ++class_set;
2198                #endif
2199             }
2200          }
2201       }
2202       goto done;
2203    }
2204    CHECK(f);
2205 
2206    for (pass=0; pass < 8; ++pass) {
2207       int pcount = 0, class_set=0;
2208       while (pcount < part_read) {
2209          if (pass == 0) {
2210             for (j=0; j < ch; ++j) {
2211                if (!do_not_decode[j]) {
2212                   Codebook *c = f->codebooks+r->classbook;
2213                   int temp;
2214                   DECODE(temp,f,c);
2215                   if (temp == EOP) goto done;
2216                   #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2217                   part_classdata[j][class_set] = r->classdata[temp];
2218                   #else
2219                   for (i=classwords-1; i >= 0; --i) {
2220                      classifications[j][i+pcount] = temp % r->classifications;
2221                      temp /= r->classifications;
2222                   }
2223                   #endif
2224                }
2225             }
2226          }
2227          for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) {
2228             for (j=0; j < ch; ++j) {
2229                if (!do_not_decode[j]) {
2230                   #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2231                   int c = part_classdata[j][class_set][i];
2232                   #else
2233                   int c = classifications[j][pcount];
2234                   #endif
2235                   int b = r->residue_books[c][pass];
2236                   if (b >= 0) {
2237                      float *target = residue_buffers[j];
2238                      int offset = r->begin + pcount * r->part_size;
2239                      int n = r->part_size;
2240                      Codebook *book = f->codebooks + b;
2241                      if (!residue_decode(f, book, target, offset, n, rtype))
2242                         goto done;
2243                   }
2244                }
2245             }
2246          }
2247          #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2248          ++class_set;
2249          #endif
2250       }
2251    }
2252   done:
2253    CHECK(f);
2254    #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2255    temp_free(f,part_classdata);
2256    #else
2257    temp_free(f,classifications);
2258    #endif
2259    temp_alloc_restore(f,temp_alloc_point);
2260 }
2261 
2262 
2263 #if 0
2264 // slow way for debugging
2265 void inverse_mdct_slow(float *buffer, int n)
2266 {
2267    int i,j;
2268    int n2 = n >> 1;
2269    float *x = (float *) malloc(sizeof(*x) * n2);
2270    memcpy(x, buffer, sizeof(*x) * n2);
2271    for (i=0; i < n; ++i) {
2272       float acc = 0;
2273       for (j=0; j < n2; ++j)
2274          // formula from paper:
2275          //acc += n/4.0f * x[j] * (float) cos(M_PI / 2 / n * (2 * i + 1 + n/2.0)*(2*j+1));
2276          // formula from wikipedia
2277          //acc += 2.0f / n2 * x[j] * (float) cos(M_PI/n2 * (i + 0.5 + n2/2)*(j + 0.5));
2278          // these are equivalent, except the formula from the paper inverts the multiplier!
2279          // however, what actually works is NO MULTIPLIER!?!
2280          //acc += 64 * 2.0f / n2 * x[j] * (float) cos(M_PI/n2 * (i + 0.5 + n2/2)*(j + 0.5));
2281          acc += x[j] * (float) cos(M_PI / 2 / n * (2 * i + 1 + n/2.0)*(2*j+1));
2282       buffer[i] = acc;
2283    }
2284    free(x);
2285 }
2286 #elif 0
2287 // same as above, but just barely able to run in real time on modern machines
inverse_mdct_slow(float * buffer,int n,vorb * f,int blocktype)2288 void inverse_mdct_slow(float *buffer, int n, vorb *f, int blocktype)
2289 {
2290    float mcos[16384];
2291    int i,j;
2292    int n2 = n >> 1, nmask = (n << 2) -1;
2293    float *x = (float *) malloc(sizeof(*x) * n2);
2294    memcpy(x, buffer, sizeof(*x) * n2);
2295    for (i=0; i < 4*n; ++i)
2296       mcos[i] = (float) cos(M_PI / 2 * i / n);
2297 
2298    for (i=0; i < n; ++i) {
2299       float acc = 0;
2300       for (j=0; j < n2; ++j)
2301          acc += x[j] * mcos[(2 * i + 1 + n2)*(2*j+1) & nmask];
2302       buffer[i] = acc;
2303    }
2304    free(x);
2305 }
2306 #elif 0
2307 // transform to use a slow dct-iv; this is STILL basically trivial,
2308 // but only requires half as many ops
dct_iv_slow(float * buffer,int n)2309 void dct_iv_slow(float *buffer, int n)
2310 {
2311    float mcos[16384];
2312    float x[2048];
2313    int i,j;
2314    int n2 = n >> 1, nmask = (n << 3) - 1;
2315    memcpy(x, buffer, sizeof(*x) * n);
2316    for (i=0; i < 8*n; ++i)
2317       mcos[i] = (float) cos(M_PI / 4 * i / n);
2318    for (i=0; i < n; ++i) {
2319       float acc = 0;
2320       for (j=0; j < n; ++j)
2321          acc += x[j] * mcos[((2 * i + 1)*(2*j+1)) & nmask];
2322       buffer[i] = acc;
2323    }
2324 }
2325 
inverse_mdct_slow(float * buffer,int n,vorb * f,int blocktype)2326 void inverse_mdct_slow(float *buffer, int n, vorb *f, int blocktype)
2327 {
2328    int i, n4 = n >> 2, n2 = n >> 1, n3_4 = n - n4;
2329    float temp[4096];
2330 
2331    memcpy(temp, buffer, n2 * sizeof(float));
2332    dct_iv_slow(temp, n2);  // returns -c'-d, a-b'
2333 
2334    for (i=0; i < n4  ; ++i) buffer[i] = temp[i+n4];            // a-b'
2335    for (   ; i < n3_4; ++i) buffer[i] = -temp[n3_4 - i - 1];   // b-a', c+d'
2336    for (   ; i < n   ; ++i) buffer[i] = -temp[i - n3_4];       // c'+d
2337 }
2338 #endif
2339 
2340 #ifndef LIBVORBIS_MDCT
2341 #define LIBVORBIS_MDCT 0
2342 #endif
2343 
2344 #if LIBVORBIS_MDCT
2345 // directly call the vorbis MDCT using an interface documented
2346 // by Jeff Roberts... useful for performance comparison
2347 typedef struct
2348 {
2349   int n;
2350   int log2n;
2351 
2352   float *trig;
2353   int   *bitrev;
2354 
2355   float scale;
2356 } mdct_lookup;
2357 
2358 extern void mdct_init(mdct_lookup *lookup, int n);
2359 extern void mdct_clear(mdct_lookup *l);
2360 extern void mdct_backward(mdct_lookup *init, float *in, float *out);
2361 
2362 mdct_lookup M1,M2;
2363 
inverse_mdct(float * buffer,int n,vorb * f,int blocktype)2364 void inverse_mdct(float *buffer, int n, vorb *f, int blocktype)
2365 {
2366    mdct_lookup *M;
2367    if (M1.n == n) M = &M1;
2368    else if (M2.n == n) M = &M2;
2369    else if (M1.n == 0) { mdct_init(&M1, n); M = &M1; }
2370    else {
2371       if (M2.n) __asm int 3;
2372       mdct_init(&M2, n);
2373       M = &M2;
2374    }
2375 
2376    mdct_backward(M, buffer, buffer);
2377 }
2378 #endif
2379 
2380 
2381 // the following were split out into separate functions while optimizing;
2382 // they could be pushed back up but eh. __forceinline showed no change;
2383 // they're probably already being inlined.
imdct_step3_iter0_loop(int n,float * e,int i_off,int k_off,float * A)2384 static void imdct_step3_iter0_loop(int n, float *e, int i_off, int k_off, float *A)
2385 {
2386    float *ee0 = e + i_off;
2387    float *ee2 = ee0 + k_off;
2388    int i;
2389 
2390    assert((n & 3) == 0);
2391    for (i=(n>>2); i > 0; --i) {
2392       float k00_20, k01_21;
2393       k00_20  = ee0[ 0] - ee2[ 0];
2394       k01_21  = ee0[-1] - ee2[-1];
2395       ee0[ 0] += ee2[ 0];//ee0[ 0] = ee0[ 0] + ee2[ 0];
2396       ee0[-1] += ee2[-1];//ee0[-1] = ee0[-1] + ee2[-1];
2397       ee2[ 0] = k00_20 * A[0] - k01_21 * A[1];
2398       ee2[-1] = k01_21 * A[0] + k00_20 * A[1];
2399       A += 8;
2400 
2401       k00_20  = ee0[-2] - ee2[-2];
2402       k01_21  = ee0[-3] - ee2[-3];
2403       ee0[-2] += ee2[-2];//ee0[-2] = ee0[-2] + ee2[-2];
2404       ee0[-3] += ee2[-3];//ee0[-3] = ee0[-3] + ee2[-3];
2405       ee2[-2] = k00_20 * A[0] - k01_21 * A[1];
2406       ee2[-3] = k01_21 * A[0] + k00_20 * A[1];
2407       A += 8;
2408 
2409       k00_20  = ee0[-4] - ee2[-4];
2410       k01_21  = ee0[-5] - ee2[-5];
2411       ee0[-4] += ee2[-4];//ee0[-4] = ee0[-4] + ee2[-4];
2412       ee0[-5] += ee2[-5];//ee0[-5] = ee0[-5] + ee2[-5];
2413       ee2[-4] = k00_20 * A[0] - k01_21 * A[1];
2414       ee2[-5] = k01_21 * A[0] + k00_20 * A[1];
2415       A += 8;
2416 
2417       k00_20  = ee0[-6] - ee2[-6];
2418       k01_21  = ee0[-7] - ee2[-7];
2419       ee0[-6] += ee2[-6];//ee0[-6] = ee0[-6] + ee2[-6];
2420       ee0[-7] += ee2[-7];//ee0[-7] = ee0[-7] + ee2[-7];
2421       ee2[-6] = k00_20 * A[0] - k01_21 * A[1];
2422       ee2[-7] = k01_21 * A[0] + k00_20 * A[1];
2423       A += 8;
2424       ee0 -= 8;
2425       ee2 -= 8;
2426    }
2427 }
2428 
imdct_step3_inner_r_loop(int lim,float * e,int d0,int k_off,float * A,int k1)2429 static void imdct_step3_inner_r_loop(int lim, float *e, int d0, int k_off, float *A, int k1)
2430 {
2431    int i;
2432    float k00_20, k01_21;
2433 
2434    float *e0 = e + d0;
2435    float *e2 = e0 + k_off;
2436 
2437    for (i=lim >> 2; i > 0; --i) {
2438       k00_20 = e0[-0] - e2[-0];
2439       k01_21 = e0[-1] - e2[-1];
2440       e0[-0] += e2[-0];//e0[-0] = e0[-0] + e2[-0];
2441       e0[-1] += e2[-1];//e0[-1] = e0[-1] + e2[-1];
2442       e2[-0] = (k00_20)*A[0] - (k01_21) * A[1];
2443       e2[-1] = (k01_21)*A[0] + (k00_20) * A[1];
2444 
2445       A += k1;
2446 
2447       k00_20 = e0[-2] - e2[-2];
2448       k01_21 = e0[-3] - e2[-3];
2449       e0[-2] += e2[-2];//e0[-2] = e0[-2] + e2[-2];
2450       e0[-3] += e2[-3];//e0[-3] = e0[-3] + e2[-3];
2451       e2[-2] = (k00_20)*A[0] - (k01_21) * A[1];
2452       e2[-3] = (k01_21)*A[0] + (k00_20) * A[1];
2453 
2454       A += k1;
2455 
2456       k00_20 = e0[-4] - e2[-4];
2457       k01_21 = e0[-5] - e2[-5];
2458       e0[-4] += e2[-4];//e0[-4] = e0[-4] + e2[-4];
2459       e0[-5] += e2[-5];//e0[-5] = e0[-5] + e2[-5];
2460       e2[-4] = (k00_20)*A[0] - (k01_21) * A[1];
2461       e2[-5] = (k01_21)*A[0] + (k00_20) * A[1];
2462 
2463       A += k1;
2464 
2465       k00_20 = e0[-6] - e2[-6];
2466       k01_21 = e0[-7] - e2[-7];
2467       e0[-6] += e2[-6];//e0[-6] = e0[-6] + e2[-6];
2468       e0[-7] += e2[-7];//e0[-7] = e0[-7] + e2[-7];
2469       e2[-6] = (k00_20)*A[0] - (k01_21) * A[1];
2470       e2[-7] = (k01_21)*A[0] + (k00_20) * A[1];
2471 
2472       e0 -= 8;
2473       e2 -= 8;
2474 
2475       A += k1;
2476    }
2477 }
2478 
imdct_step3_inner_s_loop(int n,float * e,int i_off,int k_off,float * A,int a_off,int k0)2479 static void imdct_step3_inner_s_loop(int n, float *e, int i_off, int k_off, float *A, int a_off, int k0)
2480 {
2481    int i;
2482    float A0 = A[0];
2483    float A1 = A[0+1];
2484    float A2 = A[0+a_off];
2485    float A3 = A[0+a_off+1];
2486    float A4 = A[0+a_off*2+0];
2487    float A5 = A[0+a_off*2+1];
2488    float A6 = A[0+a_off*3+0];
2489    float A7 = A[0+a_off*3+1];
2490 
2491    float k00,k11;
2492 
2493    float *ee0 = e  +i_off;
2494    float *ee2 = ee0+k_off;
2495 
2496    for (i=n; i > 0; --i) {
2497       k00     = ee0[ 0] - ee2[ 0];
2498       k11     = ee0[-1] - ee2[-1];
2499       ee0[ 0] =  ee0[ 0] + ee2[ 0];
2500       ee0[-1] =  ee0[-1] + ee2[-1];
2501       ee2[ 0] = (k00) * A0 - (k11) * A1;
2502       ee2[-1] = (k11) * A0 + (k00) * A1;
2503 
2504       k00     = ee0[-2] - ee2[-2];
2505       k11     = ee0[-3] - ee2[-3];
2506       ee0[-2] =  ee0[-2] + ee2[-2];
2507       ee0[-3] =  ee0[-3] + ee2[-3];
2508       ee2[-2] = (k00) * A2 - (k11) * A3;
2509       ee2[-3] = (k11) * A2 + (k00) * A3;
2510 
2511       k00     = ee0[-4] - ee2[-4];
2512       k11     = ee0[-5] - ee2[-5];
2513       ee0[-4] =  ee0[-4] + ee2[-4];
2514       ee0[-5] =  ee0[-5] + ee2[-5];
2515       ee2[-4] = (k00) * A4 - (k11) * A5;
2516       ee2[-5] = (k11) * A4 + (k00) * A5;
2517 
2518       k00     = ee0[-6] - ee2[-6];
2519       k11     = ee0[-7] - ee2[-7];
2520       ee0[-6] =  ee0[-6] + ee2[-6];
2521       ee0[-7] =  ee0[-7] + ee2[-7];
2522       ee2[-6] = (k00) * A6 - (k11) * A7;
2523       ee2[-7] = (k11) * A6 + (k00) * A7;
2524 
2525       ee0 -= k0;
2526       ee2 -= k0;
2527    }
2528 }
2529 
iter_54(float * z)2530 static __forceinline void iter_54(float *z)
2531 {
2532    float k00,k11,k22,k33;
2533    float y0,y1,y2,y3;
2534 
2535    k00  = z[ 0] - z[-4];
2536    y0   = z[ 0] + z[-4];
2537    y2   = z[-2] + z[-6];
2538    k22  = z[-2] - z[-6];
2539 
2540    z[-0] = y0 + y2;      // z0 + z4 + z2 + z6
2541    z[-2] = y0 - y2;      // z0 + z4 - z2 - z6
2542 
2543    // done with y0,y2
2544 
2545    k33  = z[-3] - z[-7];
2546 
2547    z[-4] = k00 + k33;    // z0 - z4 + z3 - z7
2548    z[-6] = k00 - k33;    // z0 - z4 - z3 + z7
2549 
2550    // done with k33
2551 
2552    k11  = z[-1] - z[-5];
2553    y1   = z[-1] + z[-5];
2554    y3   = z[-3] + z[-7];
2555 
2556    z[-1] = y1 + y3;      // z1 + z5 + z3 + z7
2557    z[-3] = y1 - y3;      // z1 + z5 - z3 - z7
2558    z[-5] = k11 - k22;    // z1 - z5 + z2 - z6
2559    z[-7] = k11 + k22;    // z1 - z5 - z2 + z6
2560 }
2561 
imdct_step3_inner_s_loop_ld654(int n,float * e,int i_off,float * A,int base_n)2562 static void imdct_step3_inner_s_loop_ld654(int n, float *e, int i_off, float *A, int base_n)
2563 {
2564    int a_off = base_n >> 3;
2565    float A2 = A[0+a_off];
2566    float *z = e + i_off;
2567    float *base = z - 16 * n;
2568 
2569    while (z > base) {
2570       float k00,k11;
2571 
2572       k00   = z[-0] - z[-8];
2573       k11   = z[-1] - z[-9];
2574       z[-0] = z[-0] + z[-8];
2575       z[-1] = z[-1] + z[-9];
2576       z[-8] =  k00;
2577       z[-9] =  k11 ;
2578 
2579       k00    = z[ -2] - z[-10];
2580       k11    = z[ -3] - z[-11];
2581       z[ -2] = z[ -2] + z[-10];
2582       z[ -3] = z[ -3] + z[-11];
2583       z[-10] = (k00+k11) * A2;
2584       z[-11] = (k11-k00) * A2;
2585 
2586       k00    = z[-12] - z[ -4];  // reverse to avoid a unary negation
2587       k11    = z[ -5] - z[-13];
2588       z[ -4] = z[ -4] + z[-12];
2589       z[ -5] = z[ -5] + z[-13];
2590       z[-12] = k11;
2591       z[-13] = k00;
2592 
2593       k00    = z[-14] - z[ -6];  // reverse to avoid a unary negation
2594       k11    = z[ -7] - z[-15];
2595       z[ -6] = z[ -6] + z[-14];
2596       z[ -7] = z[ -7] + z[-15];
2597       z[-14] = (k00+k11) * A2;
2598       z[-15] = (k00-k11) * A2;
2599 
2600       iter_54(z);
2601       iter_54(z-8);
2602       z -= 16;
2603    }
2604 }
2605 
inverse_mdct(float * buffer,int n,vorb * f,int blocktype)2606 static void inverse_mdct(float *buffer, int n, vorb *f, int blocktype)
2607 {
2608    int n2 = n >> 1, n4 = n >> 2, n8 = n >> 3, l;
2609    int ld;
2610    // @OPTIMIZE: reduce register pressure by using fewer variables?
2611    int save_point = temp_alloc_save(f);
2612    float *buf2 = (float *) temp_alloc(f, n2 * sizeof(*buf2));
2613    float *u=NULL,*v=NULL;
2614    // twiddle factors
2615    float *A = f->A[blocktype];
2616 
2617    // IMDCT algorithm from "The use of multirate filter banks for coding of high quality digital audio"
2618    // See notes about bugs in that paper in less-optimal implementation 'inverse_mdct_old' after this function.
2619 
2620    // kernel from paper
2621 
2622 
2623    // merged:
2624    //   copy and reflect spectral data
2625    //   step 0
2626 
2627    // note that it turns out that the items added together during
2628    // this step are, in fact, being added to themselves (as reflected
2629    // by step 0). inexplicable inefficiency! this became obvious
2630    // once I combined the passes.
2631 
2632    // so there's a missing 'times 2' here (for adding X to itself).
2633    // this propogates through linearly to the end, where the numbers
2634    // are 1/2 too small, and need to be compensated for.
2635 
2636    {
2637       float *d,*e, *AA, *e_stop;
2638       d = &buf2[n2-2];
2639       AA = A;
2640       e = &buffer[0];
2641       e_stop = &buffer[n2];
2642       while (e != e_stop) {
2643          d[1] = (e[0] * AA[0] - e[2]*AA[1]);
2644          d[0] = (e[0] * AA[1] + e[2]*AA[0]);
2645          d -= 2;
2646          AA += 2;
2647          e += 4;
2648       }
2649 
2650       e = &buffer[n2-3];
2651       while (d >= buf2) {
2652          d[1] = (-e[2] * AA[0] - -e[0]*AA[1]);
2653          d[0] = (-e[2] * AA[1] + -e[0]*AA[0]);
2654          d -= 2;
2655          AA += 2;
2656          e -= 4;
2657       }
2658    }
2659 
2660    // now we use symbolic names for these, so that we can
2661    // possibly swap their meaning as we change which operations
2662    // are in place
2663 
2664    u = buffer;
2665    v = buf2;
2666 
2667    // step 2    (paper output is w, now u)
2668    // this could be in place, but the data ends up in the wrong
2669    // place... _somebody_'s got to swap it, so this is nominated
2670    {
2671       float *AA = &A[n2-8];
2672       float *d0,*d1, *e0, *e1;
2673 
2674       e0 = &v[n4];
2675       e1 = &v[0];
2676 
2677       d0 = &u[n4];
2678       d1 = &u[0];
2679 
2680       while (AA >= A) {
2681          float v40_20, v41_21;
2682 
2683          v41_21 = e0[1] - e1[1];
2684          v40_20 = e0[0] - e1[0];
2685          d0[1]  = e0[1] + e1[1];
2686          d0[0]  = e0[0] + e1[0];
2687          d1[1]  = v41_21*AA[4] - v40_20*AA[5];
2688          d1[0]  = v40_20*AA[4] + v41_21*AA[5];
2689 
2690          v41_21 = e0[3] - e1[3];
2691          v40_20 = e0[2] - e1[2];
2692          d0[3]  = e0[3] + e1[3];
2693          d0[2]  = e0[2] + e1[2];
2694          d1[3]  = v41_21*AA[0] - v40_20*AA[1];
2695          d1[2]  = v40_20*AA[0] + v41_21*AA[1];
2696 
2697          AA -= 8;
2698 
2699          d0 += 4;
2700          d1 += 4;
2701          e0 += 4;
2702          e1 += 4;
2703       }
2704    }
2705 
2706    // step 3
2707    ld = ilog(n) - 1; // ilog is off-by-one from normal definitions
2708 
2709    // optimized step 3:
2710 
2711    // the original step3 loop can be nested r inside s or s inside r;
2712    // it's written originally as s inside r, but this is dumb when r
2713    // iterates many times, and s few. So I have two copies of it and
2714    // switch between them halfway.
2715 
2716    // this is iteration 0 of step 3
2717    imdct_step3_iter0_loop(n >> 4, u, n2-1-n4*0, -(n >> 3), A);
2718    imdct_step3_iter0_loop(n >> 4, u, n2-1-n4*1, -(n >> 3), A);
2719 
2720    // this is iteration 1 of step 3
2721    imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*0, -(n >> 4), A, 16);
2722    imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*1, -(n >> 4), A, 16);
2723    imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*2, -(n >> 4), A, 16);
2724    imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*3, -(n >> 4), A, 16);
2725 
2726    l=2;
2727    for (; l < (ld-3)>>1; ++l) {
2728       int k0 = n >> (l+2), k0_2 = k0>>1;
2729       int lim = 1 << (l+1);
2730       int i;
2731       for (i=0; i < lim; ++i)
2732          imdct_step3_inner_r_loop(n >> (l+4), u, n2-1 - k0*i, -k0_2, A, 1 << (l+3));
2733    }
2734 
2735    for (; l < ld-6; ++l) {
2736       int k0 = n >> (l+2), k1 = 1 << (l+3), k0_2 = k0>>1;
2737       int rlim = n >> (l+6), r;
2738       int lim = 1 << (l+1);
2739       int i_off;
2740       float *A0 = A;
2741       i_off = n2-1;
2742       for (r=rlim; r > 0; --r) {
2743          imdct_step3_inner_s_loop(lim, u, i_off, -k0_2, A0, k1, k0);
2744          A0 += k1*4;
2745          i_off -= 8;
2746       }
2747    }
2748 
2749    // iterations with count:
2750    //   ld-6,-5,-4 all interleaved together
2751    //       the big win comes from getting rid of needless flops
2752    //         due to the constants on pass 5 & 4 being all 1 and 0;
2753    //       combining them to be simultaneous to improve cache made little difference
2754    imdct_step3_inner_s_loop_ld654(n >> 5, u, n2-1, A, n);
2755 
2756    // output is u
2757 
2758    // step 4, 5, and 6
2759    // cannot be in-place because of step 5
2760    {
2761       uint16 *bitrev = f->bit_reverse[blocktype];
2762       // weirdly, I'd have thought reading sequentially and writing
2763       // erratically would have been better than vice-versa, but in
2764       // fact that's not what my testing showed. (That is, with
2765       // j = bitreverse(i), do you read i and write j, or read j and write i.)
2766 
2767       float *d0 = &v[n4-4];
2768       float *d1 = &v[n2-4];
2769       while (d0 >= v) {
2770          int k4;
2771 
2772          k4 = bitrev[0];
2773          d1[3] = u[k4+0];
2774          d1[2] = u[k4+1];
2775          d0[3] = u[k4+2];
2776          d0[2] = u[k4+3];
2777 
2778          k4 = bitrev[1];
2779          d1[1] = u[k4+0];
2780          d1[0] = u[k4+1];
2781          d0[1] = u[k4+2];
2782          d0[0] = u[k4+3];
2783 
2784          d0 -= 4;
2785          d1 -= 4;
2786          bitrev += 2;
2787       }
2788    }
2789    // (paper output is u, now v)
2790 
2791 
2792    // data must be in buf2
2793    assert(v == buf2);
2794 
2795    // step 7   (paper output is v, now v)
2796    // this is now in place
2797    {
2798       float *C = f->C[blocktype];
2799       float *d, *e;
2800 
2801       d = v;
2802       e = v + n2 - 4;
2803 
2804       while (d < e) {
2805          float a02,a11,b0,b1,b2,b3;
2806 
2807          a02 = d[0] - e[2];
2808          a11 = d[1] + e[3];
2809 
2810          b0 = C[1]*a02 + C[0]*a11;
2811          b1 = C[1]*a11 - C[0]*a02;
2812 
2813          b2 = d[0] + e[ 2];
2814          b3 = d[1] - e[ 3];
2815 
2816          d[0] = b2 + b0;
2817          d[1] = b3 + b1;
2818          e[2] = b2 - b0;
2819          e[3] = b1 - b3;
2820 
2821          a02 = d[2] - e[0];
2822          a11 = d[3] + e[1];
2823 
2824          b0 = C[3]*a02 + C[2]*a11;
2825          b1 = C[3]*a11 - C[2]*a02;
2826 
2827          b2 = d[2] + e[ 0];
2828          b3 = d[3] - e[ 1];
2829 
2830          d[2] = b2 + b0;
2831          d[3] = b3 + b1;
2832          e[0] = b2 - b0;
2833          e[1] = b1 - b3;
2834 
2835          C += 4;
2836          d += 4;
2837          e -= 4;
2838       }
2839    }
2840 
2841    // data must be in buf2
2842 
2843 
2844    // step 8+decode   (paper output is X, now buffer)
2845    // this generates pairs of data a la 8 and pushes them directly through
2846    // the decode kernel (pushing rather than pulling) to avoid having
2847    // to make another pass later
2848 
2849    // this cannot POSSIBLY be in place, so we refer to the buffers directly
2850 
2851    {
2852       float *d0,*d1,*d2,*d3;
2853 
2854       float *B = f->B[blocktype] + n2 - 8;
2855       float *e = buf2 + n2 - 8;
2856       d0 = &buffer[0];
2857       d1 = &buffer[n2-4];
2858       d2 = &buffer[n2];
2859       d3 = &buffer[n-4];
2860       while (e >= v) {
2861          float p0,p1,p2,p3;
2862 
2863          p3 =  e[6]*B[7] - e[7]*B[6];
2864          p2 = -e[6]*B[6] - e[7]*B[7];
2865 
2866          d0[0] =   p3;
2867          d1[3] = - p3;
2868          d2[0] =   p2;
2869          d3[3] =   p2;
2870 
2871          p1 =  e[4]*B[5] - e[5]*B[4];
2872          p0 = -e[4]*B[4] - e[5]*B[5];
2873 
2874          d0[1] =   p1;
2875          d1[2] = - p1;
2876          d2[1] =   p0;
2877          d3[2] =   p0;
2878 
2879          p3 =  e[2]*B[3] - e[3]*B[2];
2880          p2 = -e[2]*B[2] - e[3]*B[3];
2881 
2882          d0[2] =   p3;
2883          d1[1] = - p3;
2884          d2[2] =   p2;
2885          d3[1] =   p2;
2886 
2887          p1 =  e[0]*B[1] - e[1]*B[0];
2888          p0 = -e[0]*B[0] - e[1]*B[1];
2889 
2890          d0[3] =   p1;
2891          d1[0] = - p1;
2892          d2[3] =   p0;
2893          d3[0] =   p0;
2894 
2895          B -= 8;
2896          e -= 8;
2897          d0 += 4;
2898          d2 += 4;
2899          d1 -= 4;
2900          d3 -= 4;
2901       }
2902    }
2903 
2904    temp_free(f,buf2);
2905    temp_alloc_restore(f,save_point);
2906 }
2907 
2908 #if 0
2909 // this is the original version of the above code, if you want to optimize it from scratch
2910 void inverse_mdct_naive(float *buffer, int n)
2911 {
2912    float s;
2913    float A[1 << 12], B[1 << 12], C[1 << 11];
2914    int i,k,k2,k4, n2 = n >> 1, n4 = n >> 2, n8 = n >> 3, l;
2915    int n3_4 = n - n4, ld;
2916    // how can they claim this only uses N words?!
2917    // oh, because they're only used sparsely, whoops
2918    float u[1 << 13], X[1 << 13], v[1 << 13], w[1 << 13];
2919    // set up twiddle factors
2920 
2921    for (k=k2=0; k < n4; ++k,k2+=2) {
2922       A[k2  ] = (float)  cos(4*k*M_PI/n);
2923       A[k2+1] = (float) -sin(4*k*M_PI/n);
2924       B[k2  ] = (float)  cos((k2+1)*M_PI/n/2);
2925       B[k2+1] = (float)  sin((k2+1)*M_PI/n/2);
2926    }
2927    for (k=k2=0; k < n8; ++k,k2+=2) {
2928       C[k2  ] = (float)  cos(2*(k2+1)*M_PI/n);
2929       C[k2+1] = (float) -sin(2*(k2+1)*M_PI/n);
2930    }
2931 
2932    // IMDCT algorithm from "The use of multirate filter banks for coding of high quality digital audio"
2933    // Note there are bugs in that pseudocode, presumably due to them attempting
2934    // to rename the arrays nicely rather than representing the way their actual
2935    // implementation bounces buffers back and forth. As a result, even in the
2936    // "some formulars corrected" version, a direct implementation fails. These
2937    // are noted below as "paper bug".
2938 
2939    // copy and reflect spectral data
2940    for (k=0; k < n2; ++k) u[k] = buffer[k];
2941    for (   ; k < n ; ++k) u[k] = -buffer[n - k - 1];
2942    // kernel from paper
2943    // step 1
2944    for (k=k2=k4=0; k < n4; k+=1, k2+=2, k4+=4) {
2945       v[n-k4-1] = (u[k4] - u[n-k4-1]) * A[k2]   - (u[k4+2] - u[n-k4-3])*A[k2+1];
2946       v[n-k4-3] = (u[k4] - u[n-k4-1]) * A[k2+1] + (u[k4+2] - u[n-k4-3])*A[k2];
2947    }
2948    // step 2
2949    for (k=k4=0; k < n8; k+=1, k4+=4) {
2950       w[n2+3+k4] = v[n2+3+k4] + v[k4+3];
2951       w[n2+1+k4] = v[n2+1+k4] + v[k4+1];
2952       w[k4+3]    = (v[n2+3+k4] - v[k4+3])*A[n2-4-k4] - (v[n2+1+k4]-v[k4+1])*A[n2-3-k4];
2953       w[k4+1]    = (v[n2+1+k4] - v[k4+1])*A[n2-4-k4] + (v[n2+3+k4]-v[k4+3])*A[n2-3-k4];
2954    }
2955    // step 3
2956    ld = ilog(n) - 1; // ilog is off-by-one from normal definitions
2957    for (l=0; l < ld-3; ++l) {
2958       int k0 = n >> (l+2), k1 = 1 << (l+3);
2959       int rlim = n >> (l+4), r4, r;
2960       int s2lim = 1 << (l+2), s2;
2961       for (r=r4=0; r < rlim; r4+=4,++r) {
2962          for (s2=0; s2 < s2lim; s2+=2) {
2963             u[n-1-k0*s2-r4] = w[n-1-k0*s2-r4] + w[n-1-k0*(s2+1)-r4];
2964             u[n-3-k0*s2-r4] = w[n-3-k0*s2-r4] + w[n-3-k0*(s2+1)-r4];
2965             u[n-1-k0*(s2+1)-r4] = (w[n-1-k0*s2-r4] - w[n-1-k0*(s2+1)-r4]) * A[r*k1]
2966                                 - (w[n-3-k0*s2-r4] - w[n-3-k0*(s2+1)-r4]) * A[r*k1+1];
2967             u[n-3-k0*(s2+1)-r4] = (w[n-3-k0*s2-r4] - w[n-3-k0*(s2+1)-r4]) * A[r*k1]
2968                                 + (w[n-1-k0*s2-r4] - w[n-1-k0*(s2+1)-r4]) * A[r*k1+1];
2969          }
2970       }
2971       if (l+1 < ld-3) {
2972          // paper bug: ping-ponging of u&w here is omitted
2973          memcpy(w, u, sizeof(u));
2974       }
2975    }
2976 
2977    // step 4
2978    for (i=0; i < n8; ++i) {
2979       int j = bit_reverse(i) >> (32-ld+3);
2980       assert(j < n8);
2981       if (i == j) {
2982          // paper bug: original code probably swapped in place; if copying,
2983          //            need to directly copy in this case
2984          int i8 = i << 3;
2985          v[i8+1] = u[i8+1];
2986          v[i8+3] = u[i8+3];
2987          v[i8+5] = u[i8+5];
2988          v[i8+7] = u[i8+7];
2989       } else if (i < j) {
2990          int i8 = i << 3, j8 = j << 3;
2991          v[j8+1] = u[i8+1], v[i8+1] = u[j8 + 1];
2992          v[j8+3] = u[i8+3], v[i8+3] = u[j8 + 3];
2993          v[j8+5] = u[i8+5], v[i8+5] = u[j8 + 5];
2994          v[j8+7] = u[i8+7], v[i8+7] = u[j8 + 7];
2995       }
2996    }
2997    // step 5
2998    for (k=0; k < n2; ++k) {
2999       w[k] = v[k*2+1];
3000    }
3001    // step 6
3002    for (k=k2=k4=0; k < n8; ++k, k2 += 2, k4 += 4) {
3003       u[n-1-k2] = w[k4];
3004       u[n-2-k2] = w[k4+1];
3005       u[n3_4 - 1 - k2] = w[k4+2];
3006       u[n3_4 - 2 - k2] = w[k4+3];
3007    }
3008    // step 7
3009    for (k=k2=0; k < n8; ++k, k2 += 2) {
3010       v[n2 + k2 ] = ( u[n2 + k2] + u[n-2-k2] + C[k2+1]*(u[n2+k2]-u[n-2-k2]) + C[k2]*(u[n2+k2+1]+u[n-2-k2+1]))/2;
3011       v[n-2 - k2] = ( u[n2 + k2] + u[n-2-k2] - C[k2+1]*(u[n2+k2]-u[n-2-k2]) - C[k2]*(u[n2+k2+1]+u[n-2-k2+1]))/2;
3012       v[n2+1+ k2] = ( u[n2+1+k2] - u[n-1-k2] + C[k2+1]*(u[n2+1+k2]+u[n-1-k2]) - C[k2]*(u[n2+k2]-u[n-2-k2]))/2;
3013       v[n-1 - k2] = (-u[n2+1+k2] + u[n-1-k2] + C[k2+1]*(u[n2+1+k2]+u[n-1-k2]) - C[k2]*(u[n2+k2]-u[n-2-k2]))/2;
3014    }
3015    // step 8
3016    for (k=k2=0; k < n4; ++k,k2 += 2) {
3017       X[k]      = v[k2+n2]*B[k2  ] + v[k2+1+n2]*B[k2+1];
3018       X[n2-1-k] = v[k2+n2]*B[k2+1] - v[k2+1+n2]*B[k2  ];
3019    }
3020 
3021    // decode kernel to output
3022    // determined the following value experimentally
3023    // (by first figuring out what made inverse_mdct_slow work); then matching that here
3024    // (probably vorbis encoder premultiplies by n or n/2, to save it on the decoder?)
3025    s = 0.5; // theoretically would be n4
3026 
3027    // [[[ note! the s value of 0.5 is compensated for by the B[] in the current code,
3028    //     so it needs to use the "old" B values to behave correctly, or else
3029    //     set s to 1.0 ]]]
3030    for (i=0; i < n4  ; ++i) buffer[i] = s * X[i+n4];
3031    for (   ; i < n3_4; ++i) buffer[i] = -s * X[n3_4 - i - 1];
3032    for (   ; i < n   ; ++i) buffer[i] = -s * X[i - n3_4];
3033 }
3034 #endif
3035 
get_window(vorb * f,int len)3036 static float *get_window(vorb *f, int len)
3037 {
3038    len <<= 1;
3039    if (len == f->blocksize_0) return f->window[0];
3040    if (len == f->blocksize_1) return f->window[1];
3041    assert(0);
3042    return NULL;
3043 }
3044 
3045 #ifndef STB_VORBIS_NO_DEFER_FLOOR
3046 typedef int16 YTYPE;
3047 #else
3048 typedef int YTYPE;
3049 #endif
do_floor(vorb * f,Mapping * map,int i,int n,float * target,YTYPE * finalY,uint8 * step2_flag)3050 static int do_floor(vorb *f, Mapping *map, int i, int n, float *target, YTYPE *finalY, uint8 *step2_flag)
3051 {
3052    int n2 = n >> 1;
3053    int s = map->chan[i].mux, floor;
3054    floor = map->submap_floor[s];
3055    if (f->floor_types[floor] == 0) {
3056       return error(f, VORBIS_invalid_stream);
3057    } else {
3058       Floor1 *g = &f->floor_config[floor].floor1;
3059       int j,q;
3060       int lx = 0, ly = finalY[0] * g->floor1_multiplier;
3061       for (q=1; q < g->values; ++q) {
3062          j = g->sorted_order[q];
3063          #ifndef STB_VORBIS_NO_DEFER_FLOOR
3064          if (finalY[j] >= 0)
3065          #else
3066          if (step2_flag[j])
3067          #endif
3068          {
3069             int hy = finalY[j] * g->floor1_multiplier;
3070             int hx = g->Xlist[j];
3071             if (lx != hx)
3072                draw_line(target, lx,ly, hx,hy, n2);
3073             CHECK(f);
3074             lx = hx, ly = hy;
3075          }
3076       }
3077       if (lx < n2) {
3078          // optimization of: draw_line(target, lx,ly, n,ly, n2);
3079          for (j=lx; j < n2; ++j)
3080             LINE_OP(target[j], inverse_db_table[ly]);
3081          CHECK(f);
3082       }
3083    }
3084    return TRUE;
3085 }
3086 
3087 // The meaning of "left" and "right"
3088 //
3089 // For a given frame:
3090 //     we compute samples from 0..n
3091 //     window_center is n/2
3092 //     we'll window and mix the samples from left_start to left_end with data from the previous frame
3093 //     all of the samples from left_end to right_start can be output without mixing; however,
3094 //        this interval is 0-length except when transitioning between short and long frames
3095 //     all of the samples from right_start to right_end need to be mixed with the next frame,
3096 //        which we don't have, so those get saved in a buffer
3097 //     frame N's right_end-right_start, the number of samples to mix with the next frame,
3098 //        has to be the same as frame N+1's left_end-left_start (which they are by
3099 //        construction)
3100 
vorbis_decode_initial(vorb * f,int * p_left_start,int * p_left_end,int * p_right_start,int * p_right_end,int * mode)3101 static int vorbis_decode_initial(vorb *f, int *p_left_start, int *p_left_end, int *p_right_start, int *p_right_end, int *mode)
3102 {
3103    Mode *m;
3104    int i, n, prev, next, window_center;
3105    f->channel_buffer_start = f->channel_buffer_end = 0;
3106 
3107   retry:
3108    if (f->eof) return FALSE;
3109    if (!maybe_start_packet(f))
3110       return FALSE;
3111    // check packet type
3112    if (get_bits(f,1) != 0) {
3113       if (IS_PUSH_MODE(f))
3114          return error(f,VORBIS_bad_packet_type);
3115       while (EOP != get8_packet(f));
3116       goto retry;
3117    }
3118 
3119    if (f->alloc.alloc_buffer)
3120       assert(f->alloc.alloc_buffer_length_in_bytes == f->temp_offset);
3121 
3122    i = get_bits(f, ilog(f->mode_count-1));
3123    if (i == EOP) return FALSE;
3124    if (i >= f->mode_count) return FALSE;
3125    *mode = i;
3126    m = f->mode_config + i;
3127    if (m->blockflag) {
3128       n = f->blocksize_1;
3129       prev = get_bits(f,1);
3130       next = get_bits(f,1);
3131    } else {
3132       prev = next = 0;
3133       n = f->blocksize_0;
3134    }
3135 
3136 // WINDOWING
3137 
3138    window_center = n >> 1;
3139    if (m->blockflag && !prev) {
3140       *p_left_start = (n - f->blocksize_0) >> 2;
3141       *p_left_end   = (n + f->blocksize_0) >> 2;
3142    } else {
3143       *p_left_start = 0;
3144       *p_left_end   = window_center;
3145    }
3146    if (m->blockflag && !next) {
3147       *p_right_start = (n*3 - f->blocksize_0) >> 2;
3148       *p_right_end   = (n*3 + f->blocksize_0) >> 2;
3149    } else {
3150       *p_right_start = window_center;
3151       *p_right_end   = n;
3152    }
3153 
3154    return TRUE;
3155 }
3156 
vorbis_decode_packet_rest(vorb * f,int * len,Mode * m,int left_start,int left_end,int right_start,int right_end,int * p_left)3157 static int vorbis_decode_packet_rest(vorb *f, int *len, Mode *m, int left_start, int left_end, int right_start, int right_end, int *p_left)
3158 {
3159    Mapping *map;
3160    int i,j,k,n,n2;
3161    int zero_channel[256];
3162    int really_zero_channel[256];
3163 
3164 // WINDOWING
3165 
3166    n = f->blocksize[m->blockflag];
3167    map = &f->mapping[m->mapping];
3168 
3169 // FLOORS
3170    n2 = n >> 1;
3171 
3172    CHECK(f);
3173 
3174    for (i=0; i < f->channels; ++i) {
3175       int s = map->chan[i].mux, floor;
3176       zero_channel[i] = FALSE;
3177       floor = map->submap_floor[s];
3178       if (f->floor_types[floor] == 0) {
3179          return error(f, VORBIS_invalid_stream);
3180       } else {
3181          Floor1 *g = &f->floor_config[floor].floor1;
3182          if (get_bits(f, 1)) {
3183             short *finalY;
3184             uint8 step2_flag[256];
3185             static int range_list[4] = { 256, 128, 86, 64 };
3186             int range = range_list[g->floor1_multiplier-1];
3187             int offset = 2;
3188             finalY = f->finalY[i];
3189             finalY[0] = get_bits(f, ilog(range)-1);
3190             finalY[1] = get_bits(f, ilog(range)-1);
3191             for (j=0; j < g->partitions; ++j) {
3192                int pclass = g->partition_class_list[j];
3193                int cdim = g->class_dimensions[pclass];
3194                int cbits = g->class_subclasses[pclass];
3195                int csub = (1 << cbits)-1;
3196                int cval = 0;
3197                if (cbits) {
3198                   Codebook *c = f->codebooks + g->class_masterbooks[pclass];
3199                   DECODE(cval,f,c);
3200                }
3201                for (k=0; k < cdim; ++k) {
3202                   int book = g->subclass_books[pclass][cval & csub];
3203                   cval = cval >> cbits;
3204                   if (book >= 0) {
3205                      int temp;
3206                      Codebook *c = f->codebooks + book;
3207                      DECODE(temp,f,c);
3208                      finalY[offset++] = temp;
3209                   } else
3210                      finalY[offset++] = 0;
3211                }
3212             }
3213             if (f->valid_bits == INVALID_BITS) goto error; // behavior according to spec
3214             step2_flag[0] = step2_flag[1] = 1;
3215             for (j=2; j < g->values; ++j) {
3216                int low, high, pred, highroom, lowroom, room, val;
3217                low = g->neighbors[j][0];
3218                high = g->neighbors[j][1];
3219                //neighbors(g->Xlist, j, &low, &high);
3220                pred = predict_point(g->Xlist[j], g->Xlist[low], g->Xlist[high], finalY[low], finalY[high]);
3221                val = finalY[j];
3222                highroom = range - pred;
3223                lowroom = pred;
3224                if (highroom < lowroom)
3225                   room = highroom * 2;
3226                else
3227                   room = lowroom * 2;
3228                if (val) {
3229                   step2_flag[low] = step2_flag[high] = 1;
3230                   step2_flag[j] = 1;
3231                   if (val >= room)
3232                      if (highroom > lowroom)
3233                         finalY[j] = val - lowroom + pred;
3234                      else
3235                         finalY[j] = pred - val + highroom - 1;
3236                   else
3237                      if (val & 1)
3238                         finalY[j] = pred - ((val+1)>>1);
3239                      else
3240                         finalY[j] = pred + (val>>1);
3241                } else {
3242                   step2_flag[j] = 0;
3243                   finalY[j] = pred;
3244                }
3245             }
3246 
3247 #ifdef STB_VORBIS_NO_DEFER_FLOOR
3248             do_floor(f, map, i, n, f->floor_buffers[i], finalY, step2_flag);
3249 #else
3250             // defer final floor computation until _after_ residue
3251             for (j=0; j < g->values; ++j) {
3252                if (!step2_flag[j])
3253                   finalY[j] = -1;
3254             }
3255 #endif
3256          } else {
3257            error:
3258             zero_channel[i] = TRUE;
3259          }
3260          // So we just defer everything else to later
3261 
3262          // at this point we've decoded the floor into buffer
3263       }
3264    }
3265    CHECK(f);
3266    // at this point we've decoded all floors
3267 
3268    if (f->alloc.alloc_buffer)
3269       assert(f->alloc.alloc_buffer_length_in_bytes == f->temp_offset);
3270 
3271    // re-enable coupled channels if necessary
3272    memcpy(really_zero_channel, zero_channel, sizeof(really_zero_channel[0]) * f->channels);
3273    for (i=0; i < map->coupling_steps; ++i)
3274       if (!zero_channel[map->chan[i].magnitude] || !zero_channel[map->chan[i].angle]) {
3275          zero_channel[map->chan[i].magnitude] = zero_channel[map->chan[i].angle] = FALSE;
3276       }
3277 
3278    CHECK(f);
3279 // RESIDUE DECODE
3280    for (i=0; i < map->submaps; ++i) {
3281       float *residue_buffers[STB_VORBIS_MAX_CHANNELS];
3282       int r;
3283       uint8 do_not_decode[256];
3284       int ch = 0;
3285       for (j=0; j < f->channels; ++j) {
3286          if (map->chan[j].mux == i) {
3287             if (zero_channel[j]) {
3288                do_not_decode[ch] = TRUE;
3289                residue_buffers[ch] = NULL;
3290             } else {
3291                do_not_decode[ch] = FALSE;
3292                residue_buffers[ch] = f->channel_buffers[j];
3293             }
3294             ++ch;
3295          }
3296       }
3297       r = map->submap_residue[i];
3298       decode_residue(f, residue_buffers, ch, n2, r, do_not_decode);
3299    }
3300 
3301    if (f->alloc.alloc_buffer)
3302       assert(f->alloc.alloc_buffer_length_in_bytes == f->temp_offset);
3303    CHECK(f);
3304 
3305 // INVERSE COUPLING
3306    for (i = map->coupling_steps-1; i >= 0; --i) {
3307       int n2 = n >> 1;
3308       float *m = f->channel_buffers[map->chan[i].magnitude];
3309       float *a = f->channel_buffers[map->chan[i].angle    ];
3310       for (j=0; j < n2; ++j) {
3311          float a2,m2;
3312          if (m[j] > 0)
3313             if (a[j] > 0)
3314                m2 = m[j], a2 = m[j] - a[j];
3315             else
3316                a2 = m[j], m2 = m[j] + a[j];
3317          else
3318             if (a[j] > 0)
3319                m2 = m[j], a2 = m[j] + a[j];
3320             else
3321                a2 = m[j], m2 = m[j] - a[j];
3322          m[j] = m2;
3323          a[j] = a2;
3324       }
3325    }
3326    CHECK(f);
3327 
3328    // finish decoding the floors
3329 #ifndef STB_VORBIS_NO_DEFER_FLOOR
3330    for (i=0; i < f->channels; ++i) {
3331       if (really_zero_channel[i]) {
3332          memset(f->channel_buffers[i], 0, sizeof(*f->channel_buffers[i]) * n2);
3333       } else {
3334          do_floor(f, map, i, n, f->channel_buffers[i], f->finalY[i], NULL);
3335       }
3336    }
3337 #else
3338    for (i=0; i < f->channels; ++i) {
3339       if (really_zero_channel[i]) {
3340          memset(f->channel_buffers[i], 0, sizeof(*f->channel_buffers[i]) * n2);
3341       } else {
3342          for (j=0; j < n2; ++j)
3343             f->channel_buffers[i][j] *= f->floor_buffers[i][j];
3344       }
3345    }
3346 #endif
3347 
3348 // INVERSE MDCT
3349    CHECK(f);
3350    for (i=0; i < f->channels; ++i)
3351       inverse_mdct(f->channel_buffers[i], n, f, m->blockflag);
3352    CHECK(f);
3353 
3354    // this shouldn't be necessary, unless we exited on an error
3355    // and want to flush to get to the next packet
3356    flush_packet(f);
3357 
3358    if (f->first_decode) {
3359       // assume we start so first non-discarded sample is sample 0
3360       // this isn't to spec, but spec would require us to read ahead
3361       // and decode the size of all current frames--could be done,
3362       // but presumably it's not a commonly used feature
3363       f->current_loc = -n2; // start of first frame is positioned for discard
3364       // we might have to discard samples "from" the next frame too,
3365       // if we're lapping a large block then a small at the start?
3366       f->discard_samples_deferred = n - right_end;
3367       f->current_loc_valid = TRUE;
3368       f->first_decode = FALSE;
3369    } else if (f->discard_samples_deferred) {
3370       if (f->discard_samples_deferred >= right_start - left_start) {
3371          f->discard_samples_deferred -= (right_start - left_start);
3372          left_start = right_start;
3373          *p_left = left_start;
3374       } else {
3375          left_start += f->discard_samples_deferred;
3376          *p_left = left_start;
3377          f->discard_samples_deferred = 0;
3378       }
3379    } else if (f->previous_length == 0 && f->current_loc_valid) {
3380       // we're recovering from a seek... that means we're going to discard
3381       // the samples from this packet even though we know our position from
3382       // the last page header, so we need to update the position based on
3383       // the discarded samples here
3384       // but wait, the code below is going to add this in itself even
3385       // on a discard, so we don't need to do it here...
3386    }
3387 
3388    // check if we have ogg information about the sample # for this packet
3389    if (f->last_seg_which == f->end_seg_with_known_loc) {
3390       // if we have a valid current loc, and this is final:
3391       if (f->current_loc_valid && (f->page_flag & PAGEFLAG_last_page)) {
3392          uint32 current_end = f->known_loc_for_packet - (n-right_end);
3393          // then let's infer the size of the (probably) short final frame
3394          if (current_end < f->current_loc + (right_end-left_start)) {
3395             if (current_end < f->current_loc) {
3396                // negative truncation, that's impossible!
3397                *len = 0;
3398             } else {
3399                *len = current_end - f->current_loc;
3400             }
3401             *len += left_start;
3402             if (*len > right_end) *len = right_end; // this should never happen
3403             f->current_loc += *len;
3404             return TRUE;
3405          }
3406       }
3407       // otherwise, just set our sample loc
3408       // guess that the ogg granule pos refers to the _middle_ of the
3409       // last frame?
3410       // set f->current_loc to the position of left_start
3411       f->current_loc = f->known_loc_for_packet - (n2-left_start);
3412       f->current_loc_valid = TRUE;
3413    }
3414    if (f->current_loc_valid)
3415       f->current_loc += (right_start - left_start);
3416 
3417    if (f->alloc.alloc_buffer)
3418       assert(f->alloc.alloc_buffer_length_in_bytes == f->temp_offset);
3419    *len = right_end;  // ignore samples after the window goes to 0
3420    CHECK(f);
3421 
3422    return TRUE;
3423 }
3424 
vorbis_decode_packet(vorb * f,int * len,int * p_left,int * p_right)3425 static int vorbis_decode_packet(vorb *f, int *len, int *p_left, int *p_right)
3426 {
3427    int mode, left_end, right_end;
3428    if (!vorbis_decode_initial(f, p_left, &left_end, p_right, &right_end, &mode)) return 0;
3429    return vorbis_decode_packet_rest(f, len, f->mode_config + mode, *p_left, left_end, *p_right, right_end, p_left);
3430 }
3431 
vorbis_finish_frame(stb_vorbis * f,int len,int left,int right)3432 static int vorbis_finish_frame(stb_vorbis *f, int len, int left, int right)
3433 {
3434    int prev,i,j;
3435    // we use right&left (the start of the right- and left-window sin()-regions)
3436    // to determine how much to return, rather than inferring from the rules
3437    // (same result, clearer code); 'left' indicates where our sin() window
3438    // starts, therefore where the previous window's right edge starts, and
3439    // therefore where to start mixing from the previous buffer. 'right'
3440    // indicates where our sin() ending-window starts, therefore that's where
3441    // we start saving, and where our returned-data ends.
3442 
3443    // mixin from previous window
3444    if (f->previous_length) {
3445       int i,j, n = f->previous_length;
3446       float *w = get_window(f, n);
3447       for (i=0; i < f->channels; ++i) {
3448          for (j=0; j < n; ++j)
3449             f->channel_buffers[i][left+j] =
3450                f->channel_buffers[i][left+j]*w[    j] +
3451                f->previous_window[i][     j]*w[n-1-j];
3452       }
3453    }
3454 
3455    prev = f->previous_length;
3456 
3457    // last half of this data becomes previous window
3458    f->previous_length = len - right;
3459 
3460    // @OPTIMIZE: could avoid this copy by double-buffering the
3461    // output (flipping previous_window with channel_buffers), but
3462    // then previous_window would have to be 2x as large, and
3463    // channel_buffers couldn't be temp mem (although they're NOT
3464    // currently temp mem, they could be (unless we want to level
3465    // performance by spreading out the computation))
3466    for (i=0; i < f->channels; ++i)
3467       for (j=0; right+j < len; ++j)
3468          f->previous_window[i][j] = f->channel_buffers[i][right+j];
3469 
3470    if (!prev)
3471       // there was no previous packet, so this data isn't valid...
3472       // this isn't entirely true, only the would-have-overlapped data
3473       // isn't valid, but this seems to be what the spec requires
3474       return 0;
3475 
3476    // truncate a short frame
3477    if (len < right) right = len;
3478 
3479    f->samples_output += right-left;
3480 
3481    return right - left;
3482 }
3483 
vorbis_pump_first_frame(stb_vorbis * f)3484 static void vorbis_pump_first_frame(stb_vorbis *f)
3485 {
3486    int len, right, left;
3487    if (vorbis_decode_packet(f, &len, &left, &right))
3488       vorbis_finish_frame(f, len, left, right);
3489 }
3490 
3491 #ifndef STB_VORBIS_NO_PUSHDATA_API
is_whole_packet_present(stb_vorbis * f,int end_page)3492 static int is_whole_packet_present(stb_vorbis *f, int end_page)
3493 {
3494    // make sure that we have the packet available before continuing...
3495    // this requires a full ogg parse, but we know we can fetch from f->stream
3496 
3497    // instead of coding this out explicitly, we could save the current read state,
3498    // read the next packet with get8() until end-of-packet, check f->eof, then
3499    // reset the state? but that would be slower, esp. since we'd have over 256 bytes
3500    // of state to restore (primarily the page segment table)
3501 
3502    int s = f->next_seg, first = TRUE;
3503    uint8 *p = f->stream;
3504 
3505    if (s != -1) { // if we're not starting the packet with a 'continue on next page' flag
3506       for (; s < f->segment_count; ++s) {
3507          p += f->segments[s];
3508          if (f->segments[s] < 255)               // stop at first short segment
3509             break;
3510       }
3511       // either this continues, or it ends it...
3512       if (end_page)
3513          if (s < f->segment_count-1)             return error(f, VORBIS_invalid_stream);
3514       if (s == f->segment_count)
3515          s = -1; // set 'crosses page' flag
3516       if (p > f->stream_end)                     return error(f, VORBIS_need_more_data);
3517       first = FALSE;
3518    }
3519    for (; s == -1;) {
3520       uint8 *q;
3521       int n;
3522 
3523       // check that we have the page header ready
3524       if (p + 26 >= f->stream_end)               return error(f, VORBIS_need_more_data);
3525       // validate the page
3526       if (memcmp(p, ogg_page_header, 4))         return error(f, VORBIS_invalid_stream);
3527       if (p[4] != 0)                             return error(f, VORBIS_invalid_stream);
3528       if (first) { // the first segment must NOT have 'continued_packet', later ones MUST
3529          if (f->previous_length)
3530             if ((p[5] & PAGEFLAG_continued_packet))  return error(f, VORBIS_invalid_stream);
3531          // if no previous length, we're resynching, so we can come in on a continued-packet,
3532          // which we'll just drop
3533       } else {
3534          if (!(p[5] & PAGEFLAG_continued_packet)) return error(f, VORBIS_invalid_stream);
3535       }
3536       n = p[26]; // segment counts
3537       q = p+27;  // q points to segment table
3538       p = q + n; // advance past header
3539       // make sure we've read the segment table
3540       if (p > f->stream_end)                     return error(f, VORBIS_need_more_data);
3541       for (s=0; s < n; ++s) {
3542          p += q[s];
3543          if (q[s] < 255)
3544             break;
3545       }
3546       if (end_page)
3547          if (s < n-1)                            return error(f, VORBIS_invalid_stream);
3548       if (s == n)
3549          s = -1; // set 'crosses page' flag
3550       if (p > f->stream_end)                     return error(f, VORBIS_need_more_data);
3551       first = FALSE;
3552    }
3553    return TRUE;
3554 }
3555 #endif // !STB_VORBIS_NO_PUSHDATA_API
3556 
start_decoder(vorb * f)3557 static int start_decoder(vorb *f)
3558 {
3559    uint8 header[6], x,y;
3560    int len,i,j,k, max_submaps = 0;
3561    int longest_floorlist=0;
3562 
3563    // first page, first packet
3564 
3565    if (!start_page(f))                              return FALSE;
3566    // validate page flag
3567    if (!(f->page_flag & PAGEFLAG_first_page))       return error(f, VORBIS_invalid_first_page);
3568    if (f->page_flag & PAGEFLAG_last_page)           return error(f, VORBIS_invalid_first_page);
3569    if (f->page_flag & PAGEFLAG_continued_packet)    return error(f, VORBIS_invalid_first_page);
3570    // check for expected packet length
3571    if (f->segment_count != 1)                       return error(f, VORBIS_invalid_first_page);
3572    if (f->segments[0] != 30)                        return error(f, VORBIS_invalid_first_page);
3573    // read packet
3574    // check packet header
3575    if (get8(f) != VORBIS_packet_id)                 return error(f, VORBIS_invalid_first_page);
3576    if (!getn(f, header, 6))                         return error(f, VORBIS_unexpected_eof);
3577    if (!vorbis_validate(header))                    return error(f, VORBIS_invalid_first_page);
3578    // vorbis_version
3579    if (get32(f) != 0)                               return error(f, VORBIS_invalid_first_page);
3580    f->channels = get8(f); if (!f->channels)         return error(f, VORBIS_invalid_first_page);
3581    if (f->channels > STB_VORBIS_MAX_CHANNELS)       return error(f, VORBIS_too_many_channels);
3582    f->sample_rate = get32(f); if (!f->sample_rate)  return error(f, VORBIS_invalid_first_page);
3583    get32(f); // bitrate_maximum
3584    get32(f); // bitrate_nominal
3585    get32(f); // bitrate_minimum
3586    x = get8(f);
3587    {
3588       int log0,log1;
3589       log0 = x & 15;
3590       log1 = x >> 4;
3591       f->blocksize_0 = 1 << log0;
3592       f->blocksize_1 = 1 << log1;
3593       if (log0 < 6 || log0 > 13)                       return error(f, VORBIS_invalid_setup);
3594       if (log1 < 6 || log1 > 13)                       return error(f, VORBIS_invalid_setup);
3595       if (log0 > log1)                                 return error(f, VORBIS_invalid_setup);
3596    }
3597 
3598    // framing_flag
3599    x = get8(f);
3600    if (!(x & 1))                                    return error(f, VORBIS_invalid_first_page);
3601 
3602    // second packet!
3603    if (!start_page(f))                              return FALSE;
3604 
3605    if (!start_packet(f))                            return FALSE;
3606    do {
3607       len = next_segment(f);
3608       skip(f, len);
3609       f->bytes_in_seg = 0;
3610    } while (len);
3611 
3612    // third packet!
3613    if (!start_packet(f))                            return FALSE;
3614 
3615    #ifndef STB_VORBIS_NO_PUSHDATA_API
3616    if (IS_PUSH_MODE(f)) {
3617       if (!is_whole_packet_present(f, TRUE)) {
3618          // convert error in ogg header to write type
3619          if (f->error == VORBIS_invalid_stream)
3620             f->error = VORBIS_invalid_setup;
3621          return FALSE;
3622       }
3623    }
3624    #endif
3625 
3626    crc32_init(); // always init it, to avoid multithread race conditions
3627 
3628    if (get8_packet(f) != VORBIS_packet_setup)       return error(f, VORBIS_invalid_setup);
3629    for (i=0; i < 6; ++i) header[i] = get8_packet(f);
3630    if (!vorbis_validate(header))                    return error(f, VORBIS_invalid_setup);
3631 
3632    // codebooks
3633 
3634    f->codebook_count = get_bits(f,8) + 1;
3635    f->codebooks = (Codebook *) setup_malloc(f, sizeof(*f->codebooks) * f->codebook_count);
3636    if (f->codebooks == NULL)                        return error(f, VORBIS_outofmem);
3637    memset(f->codebooks, 0, sizeof(*f->codebooks) * f->codebook_count);
3638    for (i=0; i < f->codebook_count; ++i) {
3639       uint32 *values;
3640       int ordered, sorted_count;
3641       int total=0;
3642       uint8 *lengths;
3643       Codebook *c = f->codebooks+i;
3644       CHECK(f);
3645       x = get_bits(f, 8); if (x != 0x42)            return error(f, VORBIS_invalid_setup);
3646       x = get_bits(f, 8); if (x != 0x43)            return error(f, VORBIS_invalid_setup);
3647       x = get_bits(f, 8); if (x != 0x56)            return error(f, VORBIS_invalid_setup);
3648       x = get_bits(f, 8);
3649       c->dimensions = (get_bits(f, 8)<<8) + x;
3650       x = get_bits(f, 8);
3651       y = get_bits(f, 8);
3652       c->entries = (get_bits(f, 8)<<16) + (y<<8) + x;
3653       ordered = get_bits(f,1);
3654       c->sparse = ordered ? 0 : get_bits(f,1);
3655 
3656       if (c->dimensions == 0 && c->entries != 0)    return error(f, VORBIS_invalid_setup);
3657 
3658       if (c->sparse)
3659          lengths = (uint8 *) setup_temp_malloc(f, c->entries);
3660       else
3661          lengths = c->codeword_lengths = (uint8 *) setup_malloc(f, c->entries);
3662 
3663       if (!lengths) return error(f, VORBIS_outofmem);
3664 
3665       if (ordered) {
3666          int current_entry = 0;
3667          int current_length = get_bits(f,5) + 1;
3668          while (current_entry < c->entries) {
3669             int limit = c->entries - current_entry;
3670             int n = get_bits(f, ilog(limit));
3671             if (current_entry + n > (int) c->entries) { return error(f, VORBIS_invalid_setup); }
3672             memset(lengths + current_entry, current_length, n);
3673             current_entry += n;
3674             ++current_length;
3675          }
3676       } else {
3677          for (j=0; j < c->entries; ++j) {
3678             int present = c->sparse ? get_bits(f,1) : 1;
3679             if (present) {
3680                lengths[j] = get_bits(f, 5) + 1;
3681                ++total;
3682                if (lengths[j] == 32)
3683                   return error(f, VORBIS_invalid_setup);
3684             } else {
3685                lengths[j] = NO_CODE;
3686             }
3687          }
3688       }
3689 
3690       if (c->sparse && total >= c->entries >> 2) {
3691          // convert sparse items to non-sparse!
3692          if (c->entries > (int) f->setup_temp_memory_required)
3693             f->setup_temp_memory_required = c->entries;
3694 
3695          c->codeword_lengths = (uint8 *) setup_malloc(f, c->entries);
3696          if (c->codeword_lengths == NULL) return error(f, VORBIS_outofmem);
3697          memcpy(c->codeword_lengths, lengths, c->entries);
3698          setup_temp_free(f, lengths, c->entries); // note this is only safe if there have been no intervening temp mallocs!
3699          lengths = c->codeword_lengths;
3700          c->sparse = 0;
3701       }
3702 
3703       // compute the size of the sorted tables
3704       if (c->sparse) {
3705          sorted_count = total;
3706       } else {
3707          sorted_count = 0;
3708          #ifndef STB_VORBIS_NO_HUFFMAN_BINARY_SEARCH
3709          for (j=0; j < c->entries; ++j)
3710             if (lengths[j] > STB_VORBIS_FAST_HUFFMAN_LENGTH && lengths[j] != NO_CODE)
3711                ++sorted_count;
3712          #endif
3713       }
3714 
3715       c->sorted_entries = sorted_count;
3716       values = NULL;
3717 
3718       CHECK(f);
3719       if (!c->sparse) {
3720          c->codewords = (uint32 *) setup_malloc(f, sizeof(c->codewords[0]) * c->entries);
3721          if (!c->codewords)                  return error(f, VORBIS_outofmem);
3722       } else {
3723          unsigned int size;
3724          if (c->sorted_entries) {
3725             c->codeword_lengths = (uint8 *) setup_malloc(f, c->sorted_entries);
3726             if (!c->codeword_lengths)           return error(f, VORBIS_outofmem);
3727             c->codewords = (uint32 *) setup_temp_malloc(f, sizeof(*c->codewords) * c->sorted_entries);
3728             if (!c->codewords)                  return error(f, VORBIS_outofmem);
3729             values = (uint32 *) setup_temp_malloc(f, sizeof(*values) * c->sorted_entries);
3730             if (!values)                        return error(f, VORBIS_outofmem);
3731          }
3732          size = c->entries + (sizeof(*c->codewords) + sizeof(*values)) * c->sorted_entries;
3733          if (size > f->setup_temp_memory_required)
3734             f->setup_temp_memory_required = size;
3735       }
3736 
3737       if (!compute_codewords(c, lengths, c->entries, values)) {
3738          if (c->sparse) setup_temp_free(f, values, 0);
3739          return error(f, VORBIS_invalid_setup);
3740       }
3741 
3742       if (c->sorted_entries) {
3743          // allocate an extra slot for sentinels
3744          c->sorted_codewords = (uint32 *) setup_malloc(f, sizeof(*c->sorted_codewords) * (c->sorted_entries+1));
3745          if (c->sorted_codewords == NULL) return error(f, VORBIS_outofmem);
3746          // allocate an extra slot at the front so that c->sorted_values[-1] is defined
3747          // so that we can catch that case without an extra if
3748          c->sorted_values    = ( int   *) setup_malloc(f, sizeof(*c->sorted_values   ) * (c->sorted_entries+1));
3749          if (c->sorted_values == NULL) return error(f, VORBIS_outofmem);
3750          ++c->sorted_values;
3751          c->sorted_values[-1] = -1;
3752          compute_sorted_huffman(c, lengths, values);
3753       }
3754 
3755       if (c->sparse) {
3756          setup_temp_free(f, values, sizeof(*values)*c->sorted_entries);
3757          setup_temp_free(f, c->codewords, sizeof(*c->codewords)*c->sorted_entries);
3758          setup_temp_free(f, lengths, c->entries);
3759          c->codewords = NULL;
3760       }
3761 
3762       compute_accelerated_huffman(c);
3763 
3764       CHECK(f);
3765       c->lookup_type = get_bits(f, 4);
3766       if (c->lookup_type > 2) return error(f, VORBIS_invalid_setup);
3767       if (c->lookup_type > 0) {
3768          uint16 *mults;
3769          c->minimum_value = float32_unpack(get_bits(f, 32));
3770          c->delta_value = float32_unpack(get_bits(f, 32));
3771          c->value_bits = get_bits(f, 4)+1;
3772          c->sequence_p = get_bits(f,1);
3773          if (c->lookup_type == 1) {
3774             c->lookup_values = lookup1_values(c->entries, c->dimensions);
3775          } else {
3776             c->lookup_values = c->entries * c->dimensions;
3777          }
3778          if (c->lookup_values == 0) return error(f, VORBIS_invalid_setup);
3779          mults = (uint16 *) setup_temp_malloc(f, sizeof(mults[0]) * c->lookup_values);
3780          if (mults == NULL) return error(f, VORBIS_outofmem);
3781          for (j=0; j < (int) c->lookup_values; ++j) {
3782             int q = get_bits(f, c->value_bits);
3783             if (q == EOP) { setup_temp_free(f,mults,sizeof(mults[0])*c->lookup_values); return error(f, VORBIS_invalid_setup); }
3784             mults[j] = q;
3785          }
3786 
3787 #ifndef STB_VORBIS_DIVIDES_IN_CODEBOOK
3788          if (c->lookup_type == 1) {
3789             int len, sparse = c->sparse;
3790             float last=0;
3791             // pre-expand the lookup1-style multiplicands, to avoid a divide in the inner loop
3792             if (sparse) {
3793                if (c->sorted_entries == 0) goto skip;
3794                c->multiplicands = (codetype *) setup_malloc(f, sizeof(c->multiplicands[0]) * c->sorted_entries * c->dimensions);
3795             } else
3796                c->multiplicands = (codetype *) setup_malloc(f, sizeof(c->multiplicands[0]) * c->entries        * c->dimensions);
3797             if (c->multiplicands == NULL) { setup_temp_free(f,mults,sizeof(mults[0])*c->lookup_values); return error(f, VORBIS_outofmem); }
3798             len = sparse ? c->sorted_entries : c->entries;
3799             for (j=0; j < len; ++j) {
3800                unsigned int z = sparse ? c->sorted_values[j] : j;
3801                unsigned int div=1;
3802                for (k=0; k < c->dimensions; ++k) {
3803                   int off = (z / div) % c->lookup_values;
3804                   float val = mults[off];
3805                   val = mults[off]*c->delta_value + c->minimum_value + last;
3806                   c->multiplicands[j*c->dimensions + k] = val;
3807                   if (c->sequence_p)
3808                      last = val;
3809                   if (k+1 < c->dimensions) {
3810                      if (div > UINT_MAX / (unsigned int) c->lookup_values) {
3811                         setup_temp_free(f, mults,sizeof(mults[0])*c->lookup_values);
3812                         return error(f, VORBIS_invalid_setup);
3813                      }
3814                      div *= c->lookup_values;
3815                   }
3816                }
3817             }
3818             c->lookup_type = 2;
3819          }
3820          else
3821 #endif
3822          {
3823             float last=0;
3824             CHECK(f);
3825             c->multiplicands = (codetype *) setup_malloc(f, sizeof(c->multiplicands[0]) * c->lookup_values);
3826             if (c->multiplicands == NULL) { setup_temp_free(f, mults,sizeof(mults[0])*c->lookup_values); return error(f, VORBIS_outofmem); }
3827             for (j=0; j < (int) c->lookup_values; ++j) {
3828                float val = mults[j] * c->delta_value + c->minimum_value + last;
3829                c->multiplicands[j] = val;
3830                if (c->sequence_p)
3831                   last = val;
3832             }
3833          }
3834 #ifndef STB_VORBIS_DIVIDES_IN_CODEBOOK
3835         skip:;
3836 #endif
3837          setup_temp_free(f, mults, sizeof(mults[0])*c->lookup_values);
3838 
3839          CHECK(f);
3840       }
3841       CHECK(f);
3842    }
3843 
3844    // time domain transfers (notused)
3845 
3846    x = get_bits(f, 6) + 1;
3847    for (i=0; i < x; ++i) {
3848       uint32 z = get_bits(f, 16);
3849       if (z != 0) return error(f, VORBIS_invalid_setup);
3850    }
3851 
3852    // Floors
3853    f->floor_count = get_bits(f, 6)+1;
3854    f->floor_config = (Floor *)  setup_malloc(f, f->floor_count * sizeof(*f->floor_config));
3855    if (f->floor_config == NULL) return error(f, VORBIS_outofmem);
3856    for (i=0; i < f->floor_count; ++i) {
3857       f->floor_types[i] = get_bits(f, 16);
3858       if (f->floor_types[i] > 1) return error(f, VORBIS_invalid_setup);
3859       if (f->floor_types[i] == 0) {
3860          Floor0 *g = &f->floor_config[i].floor0;
3861          g->order = get_bits(f,8);
3862          g->rate = get_bits(f,16);
3863          g->bark_map_size = get_bits(f,16);
3864          g->amplitude_bits = get_bits(f,6);
3865          g->amplitude_offset = get_bits(f,8);
3866          g->number_of_books = get_bits(f,4) + 1;
3867          for (j=0; j < g->number_of_books; ++j)
3868             g->book_list[j] = get_bits(f,8);
3869          return error(f, VORBIS_feature_not_supported);
3870       } else {
3871          Point p[31*8+2];
3872          Floor1 *g = &f->floor_config[i].floor1;
3873          int max_class = -1;
3874          g->partitions = get_bits(f, 5);
3875          for (j=0; j < g->partitions; ++j) {
3876             g->partition_class_list[j] = get_bits(f, 4);
3877             if (g->partition_class_list[j] > max_class)
3878                max_class = g->partition_class_list[j];
3879          }
3880          for (j=0; j <= max_class; ++j) {
3881             g->class_dimensions[j] = get_bits(f, 3)+1;
3882             g->class_subclasses[j] = get_bits(f, 2);
3883             if (g->class_subclasses[j]) {
3884                g->class_masterbooks[j] = get_bits(f, 8);
3885                if (g->class_masterbooks[j] >= f->codebook_count) return error(f, VORBIS_invalid_setup);
3886             }
3887             for (k=0; k < 1 << g->class_subclasses[j]; ++k) {
3888                g->subclass_books[j][k] = get_bits(f,8)-1;
3889                if (g->subclass_books[j][k] >= f->codebook_count) return error(f, VORBIS_invalid_setup);
3890             }
3891          }
3892          g->floor1_multiplier = get_bits(f,2)+1;
3893          g->rangebits = get_bits(f,4);
3894          g->Xlist[0] = 0;
3895          g->Xlist[1] = 1 << g->rangebits;
3896          g->values = 2;
3897          for (j=0; j < g->partitions; ++j) {
3898             int c = g->partition_class_list[j];
3899             for (k=0; k < g->class_dimensions[c]; ++k) {
3900                g->Xlist[g->values] = get_bits(f, g->rangebits);
3901                ++g->values;
3902             }
3903          }
3904          // precompute the sorting
3905          for (j=0; j < g->values; ++j) {
3906             p[j].x = g->Xlist[j];
3907             p[j].y = j;
3908          }
3909          qsort(p, g->values, sizeof(p[0]), point_compare);
3910          for (j=0; j < g->values; ++j)
3911             g->sorted_order[j] = (uint8) p[j].y;
3912          // precompute the neighbors
3913          for (j=2; j < g->values; ++j) {
3914             int low,hi;
3915             neighbors(g->Xlist, j, &low,&hi);
3916             g->neighbors[j][0] = low;
3917             g->neighbors[j][1] = hi;
3918          }
3919 
3920          if (g->values > longest_floorlist)
3921             longest_floorlist = g->values;
3922       }
3923    }
3924 
3925    // Residue
3926    f->residue_count = get_bits(f, 6)+1;
3927    f->residue_config = (Residue *) setup_malloc(f, f->residue_count * sizeof(f->residue_config[0]));
3928    if (f->residue_config == NULL) return error(f, VORBIS_outofmem);
3929    memset(f->residue_config, 0, f->residue_count * sizeof(f->residue_config[0]));
3930    for (i=0; i < f->residue_count; ++i) {
3931       uint8 residue_cascade[64];
3932       Residue *r = f->residue_config+i;
3933       f->residue_types[i] = get_bits(f, 16);
3934       if (f->residue_types[i] > 2) return error(f, VORBIS_invalid_setup);
3935       r->begin = get_bits(f, 24);
3936       r->end = get_bits(f, 24);
3937       if (r->end < r->begin) return error(f, VORBIS_invalid_setup);
3938       r->part_size = get_bits(f,24)+1;
3939       r->classifications = get_bits(f,6)+1;
3940       r->classbook = get_bits(f,8);
3941       if (r->classbook >= f->codebook_count) return error(f, VORBIS_invalid_setup);
3942       for (j=0; j < r->classifications; ++j) {
3943          uint8 high_bits=0;
3944          uint8 low_bits=get_bits(f,3);
3945          if (get_bits(f,1))
3946             high_bits = get_bits(f,5);
3947          residue_cascade[j] = high_bits*8 + low_bits;
3948       }
3949       r->residue_books = (short (*)[8]) setup_malloc(f, sizeof(r->residue_books[0]) * r->classifications);
3950       if (r->residue_books == NULL) return error(f, VORBIS_outofmem);
3951       for (j=0; j < r->classifications; ++j) {
3952          for (k=0; k < 8; ++k) {
3953             if (residue_cascade[j] & (1 << k)) {
3954                r->residue_books[j][k] = get_bits(f, 8);
3955                if (r->residue_books[j][k] >= f->codebook_count) return error(f, VORBIS_invalid_setup);
3956             } else {
3957                r->residue_books[j][k] = -1;
3958             }
3959          }
3960       }
3961       // precompute the classifications[] array to avoid inner-loop mod/divide
3962       // call it 'classdata' since we already have r->classifications
3963       r->classdata = (uint8 **) setup_malloc(f, sizeof(*r->classdata) * f->codebooks[r->classbook].entries);
3964       if (!r->classdata) return error(f, VORBIS_outofmem);
3965       memset(r->classdata, 0, sizeof(*r->classdata) * f->codebooks[r->classbook].entries);
3966       for (j=0; j < f->codebooks[r->classbook].entries; ++j) {
3967          int classwords = f->codebooks[r->classbook].dimensions;
3968          int temp = j;
3969          r->classdata[j] = (uint8 *) setup_malloc(f, sizeof(r->classdata[j][0]) * classwords);
3970          if (r->classdata[j] == NULL) return error(f, VORBIS_outofmem);
3971          for (k=classwords-1; k >= 0; --k) {
3972             r->classdata[j][k] = temp % r->classifications;
3973             temp /= r->classifications;
3974          }
3975       }
3976    }
3977 
3978    f->mapping_count = get_bits(f,6)+1;
3979    f->mapping = (Mapping *) setup_malloc(f, f->mapping_count * sizeof(*f->mapping));
3980    if (f->mapping == NULL) return error(f, VORBIS_outofmem);
3981    memset(f->mapping, 0, f->mapping_count * sizeof(*f->mapping));
3982    for (i=0; i < f->mapping_count; ++i) {
3983       Mapping *m = f->mapping + i;
3984       int mapping_type = get_bits(f,16);
3985       if (mapping_type != 0) return error(f, VORBIS_invalid_setup);
3986       m->chan = (MappingChannel *) setup_malloc(f, f->channels * sizeof(*m->chan));
3987       if (m->chan == NULL) return error(f, VORBIS_outofmem);
3988       if (get_bits(f,1))
3989          m->submaps = get_bits(f,4)+1;
3990       else
3991          m->submaps = 1;
3992       if (m->submaps > max_submaps)
3993          max_submaps = m->submaps;
3994       if (get_bits(f,1)) {
3995          m->coupling_steps = get_bits(f,8)+1;
3996          for (k=0; k < m->coupling_steps; ++k) {
3997             m->chan[k].magnitude = get_bits(f, ilog(f->channels-1));
3998             m->chan[k].angle = get_bits(f, ilog(f->channels-1));
3999             if (m->chan[k].magnitude >= f->channels)        return error(f, VORBIS_invalid_setup);
4000             if (m->chan[k].angle     >= f->channels)        return error(f, VORBIS_invalid_setup);
4001             if (m->chan[k].magnitude == m->chan[k].angle)   return error(f, VORBIS_invalid_setup);
4002          }
4003       } else
4004          m->coupling_steps = 0;
4005 
4006       // reserved field
4007       if (get_bits(f,2)) return error(f, VORBIS_invalid_setup);
4008       if (m->submaps > 1) {
4009          for (j=0; j < f->channels; ++j) {
4010             m->chan[j].mux = get_bits(f, 4);
4011             if (m->chan[j].mux >= m->submaps)                return error(f, VORBIS_invalid_setup);
4012          }
4013       } else
4014          // @SPECIFICATION: this case is missing from the spec
4015          for (j=0; j < f->channels; ++j)
4016             m->chan[j].mux = 0;
4017 
4018       for (j=0; j < m->submaps; ++j) {
4019          get_bits(f,8); // discard
4020          m->submap_floor[j] = get_bits(f,8);
4021          m->submap_residue[j] = get_bits(f,8);
4022          if (m->submap_floor[j] >= f->floor_count)      return error(f, VORBIS_invalid_setup);
4023          if (m->submap_residue[j] >= f->residue_count)  return error(f, VORBIS_invalid_setup);
4024       }
4025    }
4026 
4027    // Modes
4028    f->mode_count = get_bits(f, 6)+1;
4029    for (i=0; i < f->mode_count; ++i) {
4030       Mode *m = f->mode_config+i;
4031       m->blockflag = get_bits(f,1);
4032       m->windowtype = get_bits(f,16);
4033       m->transformtype = get_bits(f,16);
4034       m->mapping = get_bits(f,8);
4035       if (m->windowtype != 0)                 return error(f, VORBIS_invalid_setup);
4036       if (m->transformtype != 0)              return error(f, VORBIS_invalid_setup);
4037       if (m->mapping >= f->mapping_count)     return error(f, VORBIS_invalid_setup);
4038    }
4039 
4040    flush_packet(f);
4041 
4042    f->previous_length = 0;
4043 
4044    for (i=0; i < f->channels; ++i) {
4045       f->channel_buffers[i] = (float *) setup_malloc(f, sizeof(float) * f->blocksize_1);
4046       f->previous_window[i] = (float *) setup_malloc(f, sizeof(float) * f->blocksize_1/2);
4047       f->finalY[i]          = (int16 *) setup_malloc(f, sizeof(int16) * longest_floorlist);
4048       if (f->channel_buffers[i] == NULL || f->previous_window[i] == NULL || f->finalY[i] == NULL) return error(f, VORBIS_outofmem);
4049       #ifdef STB_VORBIS_NO_DEFER_FLOOR
4050       f->floor_buffers[i]   = (float *) setup_malloc(f, sizeof(float) * f->blocksize_1/2);
4051       if (f->floor_buffers[i] == NULL) return error(f, VORBIS_outofmem);
4052       #endif
4053    }
4054 
4055    if (!init_blocksize(f, 0, f->blocksize_0)) return FALSE;
4056    if (!init_blocksize(f, 1, f->blocksize_1)) return FALSE;
4057    f->blocksize[0] = f->blocksize_0;
4058    f->blocksize[1] = f->blocksize_1;
4059 
4060 #ifdef STB_VORBIS_DIVIDE_TABLE
4061    if (integer_divide_table[1][1]==0)
4062       for (i=0; i < DIVTAB_NUMER; ++i)
4063          for (j=1; j < DIVTAB_DENOM; ++j)
4064             integer_divide_table[i][j] = i / j;
4065 #endif
4066 
4067    // compute how much temporary memory is needed
4068 
4069    // 1.
4070    {
4071       uint32 imdct_mem = (f->blocksize_1 * sizeof(float) >> 1);
4072       uint32 classify_mem;
4073       int i,max_part_read=0;
4074       for (i=0; i < f->residue_count; ++i) {
4075          Residue *r = f->residue_config + i;
4076          int n_read = r->end - r->begin;
4077          int part_read = n_read / r->part_size;
4078          if (part_read > max_part_read)
4079             max_part_read = part_read;
4080       }
4081       #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
4082       classify_mem = f->channels * (sizeof(void*) + max_part_read * sizeof(uint8 *));
4083       #else
4084       classify_mem = f->channels * (sizeof(void*) + max_part_read * sizeof(int *));
4085       #endif
4086 
4087       f->temp_memory_required = classify_mem;
4088       if (imdct_mem > f->temp_memory_required)
4089          f->temp_memory_required = imdct_mem;
4090    }
4091 
4092    f->first_decode = TRUE;
4093 
4094    if (f->alloc.alloc_buffer) {
4095       assert(f->temp_offset == f->alloc.alloc_buffer_length_in_bytes);
4096       // check if there's enough temp memory so we don't error later
4097       if (f->setup_offset + sizeof(*f) + f->temp_memory_required > (unsigned) f->temp_offset)
4098          return error(f, VORBIS_outofmem);
4099    }
4100 
4101    f->first_audio_page_offset = stb_vorbis_get_file_offset(f);
4102 
4103    return TRUE;
4104 }
4105 
vorbis_deinit(stb_vorbis * p)4106 static void vorbis_deinit(stb_vorbis *p)
4107 {
4108    int i,j;
4109    if (p->residue_config) {
4110       for (i=0; i < p->residue_count; ++i) {
4111          Residue *r = p->residue_config+i;
4112          if (r->classdata) {
4113             for (j=0; j < p->codebooks[r->classbook].entries; ++j)
4114                setup_free(p, r->classdata[j]);
4115             setup_free(p, r->classdata);
4116          }
4117          setup_free(p, r->residue_books);
4118       }
4119    }
4120 
4121    if (p->codebooks) {
4122       CHECK(p);
4123       for (i=0; i < p->codebook_count; ++i) {
4124          Codebook *c = p->codebooks + i;
4125          setup_free(p, c->codeword_lengths);
4126          setup_free(p, c->multiplicands);
4127          setup_free(p, c->codewords);
4128          setup_free(p, c->sorted_codewords);
4129          // c->sorted_values[-1] is the first entry in the array
4130          setup_free(p, c->sorted_values ? c->sorted_values-1 : NULL);
4131       }
4132       setup_free(p, p->codebooks);
4133    }
4134    setup_free(p, p->floor_config);
4135    setup_free(p, p->residue_config);
4136    if (p->mapping) {
4137       for (i=0; i < p->mapping_count; ++i)
4138          setup_free(p, p->mapping[i].chan);
4139       setup_free(p, p->mapping);
4140    }
4141    CHECK(p);
4142    for (i=0; i < p->channels && i < STB_VORBIS_MAX_CHANNELS; ++i) {
4143       setup_free(p, p->channel_buffers[i]);
4144       setup_free(p, p->previous_window[i]);
4145       #ifdef STB_VORBIS_NO_DEFER_FLOOR
4146       setup_free(p, p->floor_buffers[i]);
4147       #endif
4148       setup_free(p, p->finalY[i]);
4149    }
4150    for (i=0; i < 2; ++i) {
4151       setup_free(p, p->A[i]);
4152       setup_free(p, p->B[i]);
4153       setup_free(p, p->C[i]);
4154       setup_free(p, p->window[i]);
4155       setup_free(p, p->bit_reverse[i]);
4156    }
4157    #ifndef STB_VORBIS_NO_STDIO
4158    if (p->close_on_free) fclose(p->f);
4159    #endif
4160 }
4161 
stb_vorbis_close(stb_vorbis * p)4162 void stb_vorbis_close(stb_vorbis *p)
4163 {
4164    if (p == NULL) return;
4165    vorbis_deinit(p);
4166    setup_free(p,p);
4167 }
4168 
vorbis_init(stb_vorbis * p,const stb_vorbis_alloc * z)4169 static void vorbis_init(stb_vorbis *p, const stb_vorbis_alloc *z)
4170 {
4171    memset(p, 0, sizeof(*p)); // NULL out all malloc'd pointers to start
4172    if (z) {
4173       p->alloc = *z;
4174       p->alloc.alloc_buffer_length_in_bytes = (p->alloc.alloc_buffer_length_in_bytes+3) & ~3;
4175       p->temp_offset = p->alloc.alloc_buffer_length_in_bytes;
4176    }
4177    p->eof = 0;
4178    p->error = VORBIS__no_error;
4179    p->stream = NULL;
4180    p->codebooks = NULL;
4181    p->page_crc_tests = -1;
4182    #ifndef STB_VORBIS_NO_STDIO
4183    p->close_on_free = FALSE;
4184    p->f = NULL;
4185    #endif
4186 }
4187 
stb_vorbis_get_sample_offset(stb_vorbis * f)4188 int stb_vorbis_get_sample_offset(stb_vorbis *f)
4189 {
4190    if (f->current_loc_valid)
4191       return f->current_loc;
4192    else
4193       return -1;
4194 }
4195 
stb_vorbis_get_info(stb_vorbis * f)4196 stb_vorbis_info stb_vorbis_get_info(stb_vorbis *f)
4197 {
4198    stb_vorbis_info d;
4199    d.channels = f->channels;
4200    d.sample_rate = f->sample_rate;
4201    d.setup_memory_required = f->setup_memory_required;
4202    d.setup_temp_memory_required = f->setup_temp_memory_required;
4203    d.temp_memory_required = f->temp_memory_required;
4204    d.max_frame_size = f->blocksize_1 >> 1;
4205    return d;
4206 }
4207 
stb_vorbis_get_error(stb_vorbis * f)4208 int stb_vorbis_get_error(stb_vorbis *f)
4209 {
4210    int e = f->error;
4211    f->error = VORBIS__no_error;
4212    return e;
4213 }
4214 
vorbis_alloc(stb_vorbis * f)4215 static stb_vorbis * vorbis_alloc(stb_vorbis *f)
4216 {
4217    stb_vorbis *p = (stb_vorbis *) setup_malloc(f, sizeof(*p));
4218    return p;
4219 }
4220 
4221 #ifndef STB_VORBIS_NO_PUSHDATA_API
4222 
stb_vorbis_flush_pushdata(stb_vorbis * f)4223 void stb_vorbis_flush_pushdata(stb_vorbis *f)
4224 {
4225    f->previous_length = 0;
4226    f->page_crc_tests  = 0;
4227    f->discard_samples_deferred = 0;
4228    f->current_loc_valid = FALSE;
4229    f->first_decode = FALSE;
4230    f->samples_output = 0;
4231    f->channel_buffer_start = 0;
4232    f->channel_buffer_end = 0;
4233 }
4234 
vorbis_search_for_page_pushdata(vorb * f,uint8 * data,int data_len)4235 static int vorbis_search_for_page_pushdata(vorb *f, uint8 *data, int data_len)
4236 {
4237    int i,n;
4238    for (i=0; i < f->page_crc_tests; ++i)
4239       f->scan[i].bytes_done = 0;
4240 
4241    // if we have room for more scans, search for them first, because
4242    // they may cause us to stop early if their header is incomplete
4243    if (f->page_crc_tests < STB_VORBIS_PUSHDATA_CRC_COUNT) {
4244       if (data_len < 4) return 0;
4245       data_len -= 3; // need to look for 4-byte sequence, so don't miss
4246                      // one that straddles a boundary
4247       for (i=0; i < data_len; ++i) {
4248          if (data[i] == 0x4f) {
4249             if (0==memcmp(data+i, ogg_page_header, 4)) {
4250                int j,len;
4251                uint32 crc;
4252                // make sure we have the whole page header
4253                if (i+26 >= data_len || i+27+data[i+26] >= data_len) {
4254                   // only read up to this page start, so hopefully we'll
4255                   // have the whole page header start next time
4256                   data_len = i;
4257                   break;
4258                }
4259                // ok, we have it all; compute the length of the page
4260                len = 27 + data[i+26];
4261                for (j=0; j < data[i+26]; ++j)
4262                   len += data[i+27+j];
4263                // scan everything up to the embedded crc (which we must 0)
4264                crc = 0;
4265                for (j=0; j < 22; ++j)
4266                   crc = crc32_update(crc, data[i+j]);
4267                // now process 4 0-bytes
4268                for (   ; j < 26; ++j)
4269                   crc = crc32_update(crc, 0);
4270                // len is the total number of bytes we need to scan
4271                n = f->page_crc_tests++;
4272                f->scan[n].bytes_left = len-j;
4273                f->scan[n].crc_so_far = crc;
4274                f->scan[n].goal_crc = data[i+22] + (data[i+23] << 8) + (data[i+24]<<16) + (data[i+25]<<24);
4275                // if the last frame on a page is continued to the next, then
4276                // we can't recover the sample_loc immediately
4277                if (data[i+27+data[i+26]-1] == 255)
4278                   f->scan[n].sample_loc = ~0;
4279                else
4280                   f->scan[n].sample_loc = data[i+6] + (data[i+7] << 8) + (data[i+ 8]<<16) + (data[i+ 9]<<24);
4281                f->scan[n].bytes_done = i+j;
4282                if (f->page_crc_tests == STB_VORBIS_PUSHDATA_CRC_COUNT)
4283                   break;
4284                // keep going if we still have room for more
4285             }
4286          }
4287       }
4288    }
4289 
4290    for (i=0; i < f->page_crc_tests;) {
4291       uint32 crc;
4292       int j;
4293       int n = f->scan[i].bytes_done;
4294       int m = f->scan[i].bytes_left;
4295       if (m > data_len - n) m = data_len - n;
4296       // m is the bytes to scan in the current chunk
4297       crc = f->scan[i].crc_so_far;
4298       for (j=0; j < m; ++j)
4299          crc = crc32_update(crc, data[n+j]);
4300       f->scan[i].bytes_left -= m;
4301       f->scan[i].crc_so_far = crc;
4302       if (f->scan[i].bytes_left == 0) {
4303          // does it match?
4304          if (f->scan[i].crc_so_far == f->scan[i].goal_crc) {
4305             // Houston, we have page
4306             data_len = n+m; // consumption amount is wherever that scan ended
4307             f->page_crc_tests = -1; // drop out of page scan mode
4308             f->previous_length = 0; // decode-but-don't-output one frame
4309             f->next_seg = -1;       // start a new page
4310             f->current_loc = f->scan[i].sample_loc; // set the current sample location
4311                                     // to the amount we'd have decoded had we decoded this page
4312             f->current_loc_valid = f->current_loc != ~0U;
4313             return data_len;
4314          }
4315          // delete entry
4316          f->scan[i] = f->scan[--f->page_crc_tests];
4317       } else {
4318          ++i;
4319       }
4320    }
4321 
4322    return data_len;
4323 }
4324 
4325 // return value: number of bytes we used
stb_vorbis_decode_frame_pushdata(stb_vorbis * f,const uint8 * data,int data_len,int * channels,float *** output,int * samples)4326 int stb_vorbis_decode_frame_pushdata(
4327          stb_vorbis *f,                   // the file we're decoding
4328          const uint8 *data, int data_len, // the memory available for decoding
4329          int *channels,                   // place to write number of float * buffers
4330          float ***output,                 // place to write float ** array of float * buffers
4331          int *samples                     // place to write number of output samples
4332      )
4333 {
4334    int i;
4335    int len,right,left;
4336 
4337    if (!IS_PUSH_MODE(f)) return error(f, VORBIS_invalid_api_mixing);
4338 
4339    if (f->page_crc_tests >= 0) {
4340       *samples = 0;
4341       return vorbis_search_for_page_pushdata(f, (uint8 *) data, data_len);
4342    }
4343 
4344    f->stream     = (uint8 *) data;
4345    f->stream_end = (uint8 *) data + data_len;
4346    f->error      = VORBIS__no_error;
4347 
4348    // check that we have the entire packet in memory
4349    if (!is_whole_packet_present(f, FALSE)) {
4350       *samples = 0;
4351       return 0;
4352    }
4353 
4354    if (!vorbis_decode_packet(f, &len, &left, &right)) {
4355       // save the actual error we encountered
4356       enum STBVorbisError error = f->error;
4357       if (error == VORBIS_bad_packet_type) {
4358          // flush and resynch
4359          f->error = VORBIS__no_error;
4360          while (get8_packet(f) != EOP)
4361             if (f->eof) break;
4362          *samples = 0;
4363          return (int) (f->stream - data);
4364       }
4365       if (error == VORBIS_continued_packet_flag_invalid) {
4366          if (f->previous_length == 0) {
4367             // we may be resynching, in which case it's ok to hit one
4368             // of these; just discard the packet
4369             f->error = VORBIS__no_error;
4370             while (get8_packet(f) != EOP)
4371                if (f->eof) break;
4372             *samples = 0;
4373             return (int) (f->stream - data);
4374          }
4375       }
4376       // if we get an error while parsing, what to do?
4377       // well, it DEFINITELY won't work to continue from where we are!
4378       stb_vorbis_flush_pushdata(f);
4379       // restore the error that actually made us bail
4380       f->error = error;
4381       *samples = 0;
4382       return 1;
4383    }
4384 
4385    // success!
4386    len = vorbis_finish_frame(f, len, left, right);
4387    for (i=0; i < f->channels; ++i)
4388       f->outputs[i] = f->channel_buffers[i] + left;
4389 
4390    if (channels) *channels = f->channels;
4391    *samples = len;
4392    *output = f->outputs;
4393    return (int) (f->stream - data);
4394 }
4395 
stb_vorbis_open_pushdata(const unsigned char * data,int data_len,int * data_used,int * error,const stb_vorbis_alloc * alloc)4396 stb_vorbis *stb_vorbis_open_pushdata(
4397          const unsigned char *data, int data_len, // the memory available for decoding
4398          int *data_used,              // only defined if result is not NULL
4399          int *error, const stb_vorbis_alloc *alloc)
4400 {
4401    stb_vorbis *f, p;
4402    vorbis_init(&p, alloc);
4403    p.stream     = (uint8 *) data;
4404    p.stream_end = (uint8 *) data + data_len;
4405    p.push_mode  = TRUE;
4406    if (!start_decoder(&p)) {
4407       if (p.eof)
4408          *error = VORBIS_need_more_data;
4409       else
4410          *error = p.error;
4411       return NULL;
4412    }
4413    f = vorbis_alloc(&p);
4414    if (f) {
4415       *f = p;
4416       *data_used = (int) (f->stream - data);
4417       *error = 0;
4418       return f;
4419    } else {
4420       vorbis_deinit(&p);
4421       return NULL;
4422    }
4423 }
4424 #endif // STB_VORBIS_NO_PUSHDATA_API
4425 
stb_vorbis_get_file_offset(stb_vorbis * f)4426 unsigned int stb_vorbis_get_file_offset(stb_vorbis *f)
4427 {
4428    #ifndef STB_VORBIS_NO_PUSHDATA_API
4429    if (f->push_mode) return 0;
4430    #endif
4431    if (USE_MEMORY(f)) return (unsigned int) (f->stream - f->stream_start);
4432    #ifndef STB_VORBIS_NO_STDIO
4433    return (unsigned int) (ftell(f->f) - f->f_start);
4434    #endif
4435 }
4436 
4437 #ifndef STB_VORBIS_NO_PULLDATA_API
4438 //
4439 // DATA-PULLING API
4440 //
4441 
vorbis_find_page(stb_vorbis * f,uint32 * end,uint32 * last)4442 static uint32 vorbis_find_page(stb_vorbis *f, uint32 *end, uint32 *last)
4443 {
4444    for(;;) {
4445       int n;
4446       if (f->eof) return 0;
4447       n = get8(f);
4448       if (n == 0x4f) { // page header candidate
4449          unsigned int retry_loc = stb_vorbis_get_file_offset(f);
4450          int i;
4451          // check if we're off the end of a file_section stream
4452          if (retry_loc - 25 > f->stream_len)
4453             return 0;
4454          // check the rest of the header
4455          for (i=1; i < 4; ++i)
4456             if (get8(f) != ogg_page_header[i])
4457                break;
4458          if (f->eof) return 0;
4459          if (i == 4) {
4460             uint8 header[27];
4461             uint32 i, crc, goal, len;
4462             for (i=0; i < 4; ++i)
4463                header[i] = ogg_page_header[i];
4464             for (; i < 27; ++i)
4465                header[i] = get8(f);
4466             if (f->eof) return 0;
4467             if (header[4] != 0) goto invalid;
4468             goal = header[22] + (header[23] << 8) + (header[24]<<16) + (header[25]<<24);
4469             for (i=22; i < 26; ++i)
4470                header[i] = 0;
4471             crc = 0;
4472             for (i=0; i < 27; ++i)
4473                crc = crc32_update(crc, header[i]);
4474             len = 0;
4475             for (i=0; i < header[26]; ++i) {
4476                int s = get8(f);
4477                crc = crc32_update(crc, s);
4478                len += s;
4479             }
4480             if (len && f->eof) return 0;
4481             for (i=0; i < len; ++i)
4482                crc = crc32_update(crc, get8(f));
4483             // finished parsing probable page
4484             if (crc == goal) {
4485                // we could now check that it's either got the last
4486                // page flag set, OR it's followed by the capture
4487                // pattern, but I guess TECHNICALLY you could have
4488                // a file with garbage between each ogg page and recover
4489                // from it automatically? So even though that paranoia
4490                // might decrease the chance of an invalid decode by
4491                // another 2^32, not worth it since it would hose those
4492                // invalid-but-useful files?
4493                if (end)
4494                   *end = stb_vorbis_get_file_offset(f);
4495                if (last) {
4496                   if (header[5] & 0x04)
4497                      *last = 1;
4498                   else
4499                      *last = 0;
4500                }
4501                set_file_offset(f, retry_loc-1);
4502                return 1;
4503             }
4504          }
4505         invalid:
4506          // not a valid page, so rewind and look for next one
4507          set_file_offset(f, retry_loc);
4508       }
4509    }
4510 }
4511 
4512 
4513 #define SAMPLE_unknown  0xffffffff
4514 
4515 // seeking is implemented with a binary search, which narrows down the range to
4516 // 64K, before using a linear search (because finding the synchronization
4517 // pattern can be expensive, and the chance we'd find the end page again is
4518 // relatively high for small ranges)
4519 //
4520 // two initial interpolation-style probes are used at the start of the search
4521 // to try to bound either side of the binary search sensibly, while still
4522 // working in O(log n) time if they fail.
4523 
get_seek_page_info(stb_vorbis * f,ProbedPage * z)4524 static int get_seek_page_info(stb_vorbis *f, ProbedPage *z)
4525 {
4526    uint8 header[27], lacing[255];
4527    int i,len;
4528 
4529    // record where the page starts
4530    z->page_start = stb_vorbis_get_file_offset(f);
4531 
4532    // parse the header
4533    getn(f, header, 27);
4534    if (header[0] != 'O' || header[1] != 'g' || header[2] != 'g' || header[3] != 'S')
4535       return 0;
4536    getn(f, lacing, header[26]);
4537 
4538    // determine the length of the payload
4539    len = 0;
4540    for (i=0; i < header[26]; ++i)
4541       len += lacing[i];
4542 
4543    // this implies where the page ends
4544    z->page_end = z->page_start + 27 + header[26] + len;
4545 
4546    // read the last-decoded sample out of the data
4547    z->last_decoded_sample = header[6] + (header[7] << 8) + (header[8] << 16) + (header[9] << 24);
4548 
4549    // restore file state to where we were
4550    set_file_offset(f, z->page_start);
4551    return 1;
4552 }
4553 
4554 // rarely used function to seek back to the preceeding page while finding the
4555 // start of a packet
go_to_page_before(stb_vorbis * f,unsigned int limit_offset)4556 static int go_to_page_before(stb_vorbis *f, unsigned int limit_offset)
4557 {
4558    unsigned int previous_safe, end;
4559 
4560    // now we want to seek back 64K from the limit
4561    if (limit_offset >= 65536 && limit_offset-65536 >= f->first_audio_page_offset)
4562       previous_safe = limit_offset - 65536;
4563    else
4564       previous_safe = f->first_audio_page_offset;
4565 
4566    set_file_offset(f, previous_safe);
4567 
4568    while (vorbis_find_page(f, &end, NULL)) {
4569       if (end >= limit_offset && stb_vorbis_get_file_offset(f) < limit_offset)
4570          return 1;
4571       set_file_offset(f, end);
4572    }
4573 
4574    return 0;
4575 }
4576 
4577 // implements the search logic for finding a page and starting decoding. if
4578 // the function succeeds, current_loc_valid will be true and current_loc will
4579 // be less than or equal to the provided sample number (the closer the
4580 // better).
seek_to_sample_coarse(stb_vorbis * f,uint32 sample_number)4581 static int seek_to_sample_coarse(stb_vorbis *f, uint32 sample_number)
4582 {
4583    ProbedPage left, right, mid;
4584    int i, start_seg_with_known_loc, end_pos, page_start;
4585    uint32 delta, stream_length, padding;
4586    double offset, bytes_per_sample;
4587    int probe = 0;
4588 
4589    // find the last page and validate the target sample
4590    stream_length = stb_vorbis_stream_length_in_samples(f);
4591    if (stream_length == 0)            return error(f, VORBIS_seek_without_length);
4592    if (sample_number > stream_length) return error(f, VORBIS_seek_invalid);
4593 
4594    // this is the maximum difference between the window-center (which is the
4595    // actual granule position value), and the right-start (which the spec
4596    // indicates should be the granule position (give or take one)).
4597    padding = ((f->blocksize_1 - f->blocksize_0) >> 2);
4598    if (sample_number < padding)
4599       sample_number = 0;
4600    else
4601       sample_number -= padding;
4602 
4603    left = f->p_first;
4604    while (left.last_decoded_sample == ~0U) {
4605       // (untested) the first page does not have a 'last_decoded_sample'
4606       set_file_offset(f, left.page_end);
4607       if (!get_seek_page_info(f, &left)) goto error;
4608    }
4609 
4610    right = f->p_last;
4611    assert(right.last_decoded_sample != ~0U);
4612 
4613    // starting from the start is handled differently
4614    if (sample_number <= left.last_decoded_sample) {
4615       stb_vorbis_seek_start(f);
4616       return 1;
4617    }
4618 
4619    while (left.page_end != right.page_start) {
4620       assert(left.page_end < right.page_start);
4621       // search range in bytes
4622       delta = right.page_start - left.page_end;
4623       if (delta <= 65536) {
4624          // there's only 64K left to search - handle it linearly
4625          set_file_offset(f, left.page_end);
4626       } else {
4627          if (probe < 2) {
4628             if (probe == 0) {
4629                // first probe (interpolate)
4630                double data_bytes = right.page_end - left.page_start;
4631                bytes_per_sample = data_bytes / right.last_decoded_sample;
4632                offset = left.page_start + bytes_per_sample * (sample_number - left.last_decoded_sample);
4633             } else {
4634                // second probe (try to bound the other side)
4635                double error = ((double) sample_number - mid.last_decoded_sample) * bytes_per_sample;
4636                if (error >= 0 && error <  8000) error =  8000;
4637                if (error <  0 && error > -8000) error = -8000;
4638                offset += error * 2;
4639             }
4640 
4641             // ensure the offset is valid
4642             if (offset < left.page_end)
4643                offset = left.page_end;
4644             if (offset > right.page_start - 65536)
4645                offset = right.page_start - 65536;
4646 
4647             set_file_offset(f, (unsigned int) offset);
4648          } else {
4649             // binary search for large ranges (offset by 32K to ensure
4650             // we don't hit the right page)
4651             set_file_offset(f, left.page_end + (delta / 2) - 32768);
4652          }
4653 
4654          if (!vorbis_find_page(f, NULL, NULL)) goto error;
4655       }
4656 
4657       for (;;) {
4658          if (!get_seek_page_info(f, &mid)) goto error;
4659          if (mid.last_decoded_sample != ~0U) break;
4660          // (untested) no frames end on this page
4661          set_file_offset(f, mid.page_end);
4662          assert(mid.page_start < right.page_start);
4663       }
4664 
4665       // if we've just found the last page again then we're in a tricky file,
4666       // and we're close enough.
4667       if (mid.page_start == right.page_start)
4668          break;
4669 
4670       if (sample_number < mid.last_decoded_sample)
4671          right = mid;
4672       else
4673          left = mid;
4674 
4675       ++probe;
4676    }
4677 
4678    // seek back to start of the last packet
4679    page_start = left.page_start;
4680    set_file_offset(f, page_start);
4681    if (!start_page(f)) return error(f, VORBIS_seek_failed);
4682    end_pos = f->end_seg_with_known_loc;
4683    assert(end_pos >= 0);
4684 
4685    for (;;) {
4686       for (i = end_pos; i > 0; --i)
4687          if (f->segments[i-1] != 255)
4688             break;
4689 
4690       start_seg_with_known_loc = i;
4691 
4692       if (start_seg_with_known_loc > 0 || !(f->page_flag & PAGEFLAG_continued_packet))
4693          break;
4694 
4695       // (untested) the final packet begins on an earlier page
4696       if (!go_to_page_before(f, page_start))
4697          goto error;
4698 
4699       page_start = stb_vorbis_get_file_offset(f);
4700       if (!start_page(f)) goto error;
4701       end_pos = f->segment_count - 1;
4702    }
4703 
4704    // prepare to start decoding
4705    f->current_loc_valid = FALSE;
4706    f->last_seg = FALSE;
4707    f->valid_bits = 0;
4708    f->packet_bytes = 0;
4709    f->bytes_in_seg = 0;
4710    f->previous_length = 0;
4711    f->next_seg = start_seg_with_known_loc;
4712 
4713    for (i = 0; i < start_seg_with_known_loc; i++)
4714       skip(f, f->segments[i]);
4715 
4716    // start decoding (optimizable - this frame is generally discarded)
4717    vorbis_pump_first_frame(f);
4718    return 1;
4719 
4720 error:
4721    // try to restore the file to a valid state
4722    stb_vorbis_seek_start(f);
4723    return error(f, VORBIS_seek_failed);
4724 }
4725 
4726 // the same as vorbis_decode_initial, but without advancing
peek_decode_initial(vorb * f,int * p_left_start,int * p_left_end,int * p_right_start,int * p_right_end,int * mode)4727 static int peek_decode_initial(vorb *f, int *p_left_start, int *p_left_end, int *p_right_start, int *p_right_end, int *mode)
4728 {
4729    int bits_read, bytes_read;
4730 
4731    if (!vorbis_decode_initial(f, p_left_start, p_left_end, p_right_start, p_right_end, mode))
4732       return 0;
4733 
4734    // either 1 or 2 bytes were read, figure out which so we can rewind
4735    bits_read = 1 + ilog(f->mode_count-1);
4736    if (f->mode_config[*mode].blockflag)
4737       bits_read += 2;
4738    bytes_read = (bits_read + 7) / 8;
4739 
4740    f->bytes_in_seg += bytes_read;
4741    f->packet_bytes -= bytes_read;
4742    skip(f, -bytes_read);
4743    if (f->next_seg == -1)
4744       f->next_seg = f->segment_count - 1;
4745    else
4746       f->next_seg--;
4747    f->valid_bits = 0;
4748 
4749    return 1;
4750 }
4751 
stb_vorbis_seek_frame(stb_vorbis * f,unsigned int sample_number)4752 int stb_vorbis_seek_frame(stb_vorbis *f, unsigned int sample_number)
4753 {
4754    uint32 max_frame_samples;
4755 
4756    if (IS_PUSH_MODE(f)) return error(f, VORBIS_invalid_api_mixing);
4757 
4758    // fast page-level search
4759    if (!seek_to_sample_coarse(f, sample_number))
4760       return 0;
4761 
4762    assert(f->current_loc_valid);
4763    assert(f->current_loc <= sample_number);
4764 
4765    // linear search for the relevant packet
4766    max_frame_samples = (f->blocksize_1*3 - f->blocksize_0) >> 2;
4767    while (f->current_loc < sample_number) {
4768       int left_start, left_end, right_start, right_end, mode, frame_samples;
4769       if (!peek_decode_initial(f, &left_start, &left_end, &right_start, &right_end, &mode))
4770          return error(f, VORBIS_seek_failed);
4771       // calculate the number of samples returned by the next frame
4772       frame_samples = right_start - left_start;
4773       if (f->current_loc + frame_samples > sample_number) {
4774          return 1; // the next frame will contain the sample
4775       } else if (f->current_loc + frame_samples + max_frame_samples > sample_number) {
4776          // there's a chance the frame after this could contain the sample
4777          vorbis_pump_first_frame(f);
4778       } else {
4779          // this frame is too early to be relevant
4780          f->current_loc += frame_samples;
4781          f->previous_length = 0;
4782          maybe_start_packet(f);
4783          flush_packet(f);
4784       }
4785    }
4786    // the next frame will start with the sample
4787    assert(f->current_loc == sample_number);
4788    return 1;
4789 }
4790 
stb_vorbis_seek(stb_vorbis * f,unsigned int sample_number)4791 int stb_vorbis_seek(stb_vorbis *f, unsigned int sample_number)
4792 {
4793    if (!stb_vorbis_seek_frame(f, sample_number))
4794       return 0;
4795 
4796    if (sample_number != f->current_loc) {
4797       int n;
4798       uint32 frame_start = f->current_loc;
4799       stb_vorbis_get_frame_float(f, &n, NULL);
4800       assert(sample_number > frame_start);
4801       assert(f->channel_buffer_start + (int) (sample_number-frame_start) <= f->channel_buffer_end);
4802       f->channel_buffer_start += (sample_number - frame_start);
4803    }
4804 
4805    return 1;
4806 }
4807 
stb_vorbis_seek_start(stb_vorbis * f)4808 void stb_vorbis_seek_start(stb_vorbis *f)
4809 {
4810    if (IS_PUSH_MODE(f)) { error(f, VORBIS_invalid_api_mixing); return; }
4811    set_file_offset(f, f->first_audio_page_offset);
4812    f->previous_length = 0;
4813    f->first_decode = TRUE;
4814    f->next_seg = -1;
4815    vorbis_pump_first_frame(f);
4816 }
4817 
stb_vorbis_stream_length_in_samples(stb_vorbis * f)4818 unsigned int stb_vorbis_stream_length_in_samples(stb_vorbis *f)
4819 {
4820    unsigned int restore_offset, previous_safe;
4821    unsigned int end, last_page_loc;
4822 
4823    if (IS_PUSH_MODE(f)) return error(f, VORBIS_invalid_api_mixing);
4824    if (!f->total_samples) {
4825       unsigned int last;
4826       uint32 lo,hi;
4827       char header[6];
4828 
4829       // first, store the current decode position so we can restore it
4830       restore_offset = stb_vorbis_get_file_offset(f);
4831 
4832       // now we want to seek back 64K from the end (the last page must
4833       // be at most a little less than 64K, but let's allow a little slop)
4834       if (f->stream_len >= 65536 && f->stream_len-65536 >= f->first_audio_page_offset)
4835          previous_safe = f->stream_len - 65536;
4836       else
4837          previous_safe = f->first_audio_page_offset;
4838 
4839       set_file_offset(f, previous_safe);
4840       // previous_safe is now our candidate 'earliest known place that seeking
4841       // to will lead to the final page'
4842 
4843       if (!vorbis_find_page(f, &end, &last)) {
4844          // if we can't find a page, we're hosed!
4845          f->error = VORBIS_cant_find_last_page;
4846          f->total_samples = 0xffffffff;
4847          goto done;
4848       }
4849 
4850       // check if there are more pages
4851       last_page_loc = stb_vorbis_get_file_offset(f);
4852 
4853       // stop when the last_page flag is set, not when we reach eof;
4854       // this allows us to stop short of a 'file_section' end without
4855       // explicitly checking the length of the section
4856       while (!last) {
4857          set_file_offset(f, end);
4858          if (!vorbis_find_page(f, &end, &last)) {
4859             // the last page we found didn't have the 'last page' flag
4860             // set. whoops!
4861             break;
4862          }
4863          previous_safe = last_page_loc+1;
4864          last_page_loc = stb_vorbis_get_file_offset(f);
4865       }
4866 
4867       set_file_offset(f, last_page_loc);
4868 
4869       // parse the header
4870       getn(f, (unsigned char *)header, 6);
4871       // extract the absolute granule position
4872       lo = get32(f);
4873       hi = get32(f);
4874       if (lo == 0xffffffff && hi == 0xffffffff) {
4875          f->error = VORBIS_cant_find_last_page;
4876          f->total_samples = SAMPLE_unknown;
4877          goto done;
4878       }
4879       if (hi)
4880          lo = 0xfffffffe; // saturate
4881       f->total_samples = lo;
4882 
4883       f->p_last.page_start = last_page_loc;
4884       f->p_last.page_end   = end;
4885       f->p_last.last_decoded_sample = lo;
4886 
4887      done:
4888       set_file_offset(f, restore_offset);
4889    }
4890    return f->total_samples == SAMPLE_unknown ? 0 : f->total_samples;
4891 }
4892 
stb_vorbis_stream_length_in_seconds(stb_vorbis * f)4893 float stb_vorbis_stream_length_in_seconds(stb_vorbis *f)
4894 {
4895    return stb_vorbis_stream_length_in_samples(f) / (float) f->sample_rate;
4896 }
4897 
4898 
4899 
stb_vorbis_get_frame_float(stb_vorbis * f,int * channels,float *** output)4900 int stb_vorbis_get_frame_float(stb_vorbis *f, int *channels, float ***output)
4901 {
4902    int len, right,left,i;
4903    if (IS_PUSH_MODE(f)) return error(f, VORBIS_invalid_api_mixing);
4904 
4905    if (!vorbis_decode_packet(f, &len, &left, &right)) {
4906       f->channel_buffer_start = f->channel_buffer_end = 0;
4907       return 0;
4908    }
4909 
4910    len = vorbis_finish_frame(f, len, left, right);
4911    for (i=0; i < f->channels; ++i)
4912       f->outputs[i] = f->channel_buffers[i] + left;
4913 
4914    f->channel_buffer_start = left;
4915    f->channel_buffer_end   = left+len;
4916 
4917    if (channels) *channels = f->channels;
4918    if (output)   *output = f->outputs;
4919    return len;
4920 }
4921 
4922 #ifndef STB_VORBIS_NO_STDIO
4923 
stb_vorbis_open_file_section(FILE * file,int close_on_free,int * error,const stb_vorbis_alloc * alloc,unsigned int length)4924 stb_vorbis * stb_vorbis_open_file_section(FILE *file, int close_on_free, int *error, const stb_vorbis_alloc *alloc, unsigned int length)
4925 {
4926    stb_vorbis *f, p;
4927    vorbis_init(&p, alloc);
4928    p.f = file;
4929    p.f_start = (uint32) ftell(file);
4930    p.stream_len   = length;
4931    p.close_on_free = close_on_free;
4932    if (start_decoder(&p)) {
4933       f = vorbis_alloc(&p);
4934       if (f) {
4935          *f = p;
4936          vorbis_pump_first_frame(f);
4937          return f;
4938       }
4939    }
4940    if (error) *error = p.error;
4941    vorbis_deinit(&p);
4942    return NULL;
4943 }
4944 
stb_vorbis_open_file(FILE * file,int close_on_free,int * error,const stb_vorbis_alloc * alloc)4945 stb_vorbis * stb_vorbis_open_file(FILE *file, int close_on_free, int *error, const stb_vorbis_alloc *alloc)
4946 {
4947    unsigned int len, start;
4948    start = (unsigned int) ftell(file);
4949    fseek(file, 0, SEEK_END);
4950    len = (unsigned int) (ftell(file) - start);
4951    fseek(file, start, SEEK_SET);
4952    return stb_vorbis_open_file_section(file, close_on_free, error, alloc, len);
4953 }
4954 
stb_vorbis_open_filename(const char * filename,int * error,const stb_vorbis_alloc * alloc)4955 stb_vorbis * stb_vorbis_open_filename(const char *filename, int *error, const stb_vorbis_alloc *alloc)
4956 {
4957    FILE *f = fopen(filename, "rb");
4958    if (f)
4959       return stb_vorbis_open_file(f, TRUE, error, alloc);
4960    if (error) *error = VORBIS_file_open_failure;
4961    return NULL;
4962 }
4963 #endif // STB_VORBIS_NO_STDIO
4964 
stb_vorbis_open_memory(const unsigned char * data,int len,int * error,const stb_vorbis_alloc * alloc)4965 stb_vorbis * stb_vorbis_open_memory(const unsigned char *data, int len, int *error, const stb_vorbis_alloc *alloc)
4966 {
4967    stb_vorbis *f, p;
4968    if (data == NULL) return NULL;
4969    vorbis_init(&p, alloc);
4970    p.stream = (uint8 *) data;
4971    p.stream_end = (uint8 *) data + len;
4972    p.stream_start = (uint8 *) p.stream;
4973    p.stream_len = len;
4974    p.push_mode = FALSE;
4975    if (start_decoder(&p)) {
4976       f = vorbis_alloc(&p);
4977       if (f) {
4978          *f = p;
4979          vorbis_pump_first_frame(f);
4980          return f;
4981       }
4982    }
4983    if (error) *error = p.error;
4984    vorbis_deinit(&p);
4985    return NULL;
4986 }
4987 
4988 #ifndef STB_VORBIS_NO_INTEGER_CONVERSION
4989 #define PLAYBACK_MONO     1
4990 #define PLAYBACK_LEFT     2
4991 #define PLAYBACK_RIGHT    4
4992 
4993 #define L  (PLAYBACK_LEFT  | PLAYBACK_MONO)
4994 #define C  (PLAYBACK_LEFT  | PLAYBACK_RIGHT | PLAYBACK_MONO)
4995 #define R  (PLAYBACK_RIGHT | PLAYBACK_MONO)
4996 
4997 static int8 channel_position[7][6] =
4998 {
4999    { 0 },
5000    { C },
5001    { L, R },
5002    { L, C, R },
5003    { L, R, L, R },
5004    { L, C, R, L, R },
5005    { L, C, R, L, R, C },
5006 };
5007 
5008 
5009 #ifndef STB_VORBIS_NO_FAST_SCALED_FLOAT
5010    typedef union {
5011       float f;
5012       int i;
5013    } float_conv;
5014    typedef char stb_vorbis_float_size_test[sizeof(float)==4 && sizeof(int) == 4];
5015    #define FASTDEF(x) float_conv x
5016    // add (1<<23) to convert to int, then divide by 2^SHIFT, then add 0.5/2^SHIFT to round
5017    #define MAGIC(SHIFT) (1.5f * (1 << (23-SHIFT)) + 0.5f/(1 << SHIFT))
5018    #define ADDEND(SHIFT) (((150-SHIFT) << 23) + (1 << 22))
5019    #define FAST_SCALED_FLOAT_TO_INT(temp,x,s) (temp.f = (x) + MAGIC(s), temp.i - ADDEND(s))
5020    #define check_endianness()
5021 #else
5022    #define FAST_SCALED_FLOAT_TO_INT(temp,x,s) ((int) ((x) * (1 << (s))))
5023    #define check_endianness()
5024    #define FASTDEF(x)
5025 #endif
5026 
copy_samples(short * dest,float * src,int len)5027 static void copy_samples(short *dest, float *src, int len)
5028 {
5029    int i;
5030    check_endianness();
5031    for (i=0; i < len; ++i) {
5032       FASTDEF(temp);
5033       int v = FAST_SCALED_FLOAT_TO_INT(temp, src[i],15);
5034       if ((unsigned int) (v + 32768) > 65535)
5035          v = v < 0 ? -32768 : 32767;
5036       dest[i] = v;
5037    }
5038 }
5039 
compute_samples(int mask,short * output,int num_c,float ** data,int d_offset,int len)5040 static void compute_samples(int mask, short *output, int num_c, float **data, int d_offset, int len)
5041 {
5042    #define BUFFER_SIZE  32
5043    float buffer[BUFFER_SIZE];
5044    int i,j,o,n = BUFFER_SIZE;
5045    check_endianness();
5046    for (o = 0; o < len; o += BUFFER_SIZE) {
5047       memset(buffer, 0, sizeof(buffer));
5048       if (o + n > len) n = len - o;
5049       for (j=0; j < num_c; ++j) {
5050          if (channel_position[num_c][j] & mask) {
5051             for (i=0; i < n; ++i)
5052                buffer[i] += data[j][d_offset+o+i];
5053          }
5054       }
5055       for (i=0; i < n; ++i) {
5056          FASTDEF(temp);
5057          int v = FAST_SCALED_FLOAT_TO_INT(temp,buffer[i],15);
5058          if ((unsigned int) (v + 32768) > 65535)
5059             v = v < 0 ? -32768 : 32767;
5060          output[o+i] = v;
5061       }
5062    }
5063 }
5064 
compute_stereo_samples(short * output,int num_c,float ** data,int d_offset,int len)5065 static void compute_stereo_samples(short *output, int num_c, float **data, int d_offset, int len)
5066 {
5067    #define BUFFER_SIZE  32
5068    float buffer[BUFFER_SIZE];
5069    int i,j,o,n = BUFFER_SIZE >> 1;
5070    // o is the offset in the source data
5071    check_endianness();
5072    for (o = 0; o < len; o += BUFFER_SIZE >> 1) {
5073       // o2 is the offset in the output data
5074       int o2 = o << 1;
5075       memset(buffer, 0, sizeof(buffer));
5076       if (o + n > len) n = len - o;
5077       for (j=0; j < num_c; ++j) {
5078          int m = channel_position[num_c][j] & (PLAYBACK_LEFT | PLAYBACK_RIGHT);
5079          if (m == (PLAYBACK_LEFT | PLAYBACK_RIGHT)) {
5080             for (i=0; i < n; ++i) {
5081                buffer[i*2+0] += data[j][d_offset+o+i];
5082                buffer[i*2+1] += data[j][d_offset+o+i];
5083             }
5084          } else if (m == PLAYBACK_LEFT) {
5085             for (i=0; i < n; ++i) {
5086                buffer[i*2+0] += data[j][d_offset+o+i];
5087             }
5088          } else if (m == PLAYBACK_RIGHT) {
5089             for (i=0; i < n; ++i) {
5090                buffer[i*2+1] += data[j][d_offset+o+i];
5091             }
5092          }
5093       }
5094       for (i=0; i < (n<<1); ++i) {
5095          FASTDEF(temp);
5096          int v = FAST_SCALED_FLOAT_TO_INT(temp,buffer[i],15);
5097          if ((unsigned int) (v + 32768) > 65535)
5098             v = v < 0 ? -32768 : 32767;
5099          output[o2+i] = v;
5100       }
5101    }
5102 }
5103 
convert_samples_short(int buf_c,short ** buffer,int b_offset,int data_c,float ** data,int d_offset,int samples)5104 static void convert_samples_short(int buf_c, short **buffer, int b_offset, int data_c, float **data, int d_offset, int samples)
5105 {
5106    int i;
5107    if (buf_c != data_c && buf_c <= 2 && data_c <= 6) {
5108       static int channel_selector[3][2] = { {0}, {PLAYBACK_MONO}, {PLAYBACK_LEFT, PLAYBACK_RIGHT} };
5109       for (i=0; i < buf_c; ++i)
5110          compute_samples(channel_selector[buf_c][i], buffer[i]+b_offset, data_c, data, d_offset, samples);
5111    } else {
5112       int limit = buf_c < data_c ? buf_c : data_c;
5113       for (i=0; i < limit; ++i)
5114          copy_samples(buffer[i]+b_offset, data[i]+d_offset, samples);
5115       for (   ; i < buf_c; ++i)
5116          memset(buffer[i]+b_offset, 0, sizeof(short) * samples);
5117    }
5118 }
5119 
stb_vorbis_get_frame_short(stb_vorbis * f,int num_c,short ** buffer,int num_samples)5120 int stb_vorbis_get_frame_short(stb_vorbis *f, int num_c, short **buffer, int num_samples)
5121 {
5122    float **output;
5123    int len = stb_vorbis_get_frame_float(f, NULL, &output);
5124    if (len > num_samples) len = num_samples;
5125    if (len)
5126       convert_samples_short(num_c, buffer, 0, f->channels, output, 0, len);
5127    return len;
5128 }
5129 
convert_channels_short_interleaved(int buf_c,short * buffer,int data_c,float ** data,int d_offset,int len)5130 static void convert_channels_short_interleaved(int buf_c, short *buffer, int data_c, float **data, int d_offset, int len)
5131 {
5132    int i;
5133    check_endianness();
5134    if (buf_c != data_c && buf_c <= 2 && data_c <= 6) {
5135       assert(buf_c == 2);
5136       for (i=0; i < buf_c; ++i)
5137          compute_stereo_samples(buffer, data_c, data, d_offset, len);
5138    } else {
5139       int limit = buf_c < data_c ? buf_c : data_c;
5140       int j;
5141       for (j=0; j < len; ++j) {
5142          for (i=0; i < limit; ++i) {
5143             FASTDEF(temp);
5144             float f = data[i][d_offset+j];
5145             int v = FAST_SCALED_FLOAT_TO_INT(temp, f,15);//data[i][d_offset+j],15);
5146             if ((unsigned int) (v + 32768) > 65535)
5147                v = v < 0 ? -32768 : 32767;
5148             *buffer++ = v;
5149          }
5150          for (   ; i < buf_c; ++i)
5151             *buffer++ = 0;
5152       }
5153    }
5154 }
5155 
stb_vorbis_get_frame_short_interleaved(stb_vorbis * f,int num_c,short * buffer,int num_shorts)5156 int stb_vorbis_get_frame_short_interleaved(stb_vorbis *f, int num_c, short *buffer, int num_shorts)
5157 {
5158    float **output;
5159    int len;
5160    if (num_c == 1) return stb_vorbis_get_frame_short(f,num_c,&buffer, num_shorts);
5161    len = stb_vorbis_get_frame_float(f, NULL, &output);
5162    if (len) {
5163       if (len*num_c > num_shorts) len = num_shorts / num_c;
5164       convert_channels_short_interleaved(num_c, buffer, f->channels, output, 0, len);
5165    }
5166    return len;
5167 }
5168 
stb_vorbis_get_samples_short_interleaved(stb_vorbis * f,int channels,short * buffer,int num_shorts)5169 int stb_vorbis_get_samples_short_interleaved(stb_vorbis *f, int channels, short *buffer, int num_shorts)
5170 {
5171    float **outputs;
5172    int len = num_shorts / channels;
5173    int n=0;
5174    int z = f->channels;
5175    if (z > channels) z = channels;
5176    while (n < len) {
5177       int k = f->channel_buffer_end - f->channel_buffer_start;
5178       if (n+k >= len) k = len - n;
5179       if (k)
5180          convert_channels_short_interleaved(channels, buffer, f->channels, f->channel_buffers, f->channel_buffer_start, k);
5181       buffer += k*channels;
5182       n += k;
5183       f->channel_buffer_start += k;
5184       if (n == len) break;
5185       if (!stb_vorbis_get_frame_float(f, NULL, &outputs)) break;
5186    }
5187    return n;
5188 }
5189 
stb_vorbis_get_samples_short(stb_vorbis * f,int channels,short ** buffer,int len)5190 int stb_vorbis_get_samples_short(stb_vorbis *f, int channels, short **buffer, int len)
5191 {
5192    float **outputs;
5193    int n=0;
5194    int z = f->channels;
5195    if (z > channels) z = channels;
5196    while (n < len) {
5197       int k = f->channel_buffer_end - f->channel_buffer_start;
5198       if (n+k >= len) k = len - n;
5199       if (k)
5200          convert_samples_short(channels, buffer, n, f->channels, f->channel_buffers, f->channel_buffer_start, k);
5201       n += k;
5202       f->channel_buffer_start += k;
5203       if (n == len) break;
5204       if (!stb_vorbis_get_frame_float(f, NULL, &outputs)) break;
5205    }
5206    return n;
5207 }
5208 
5209 #ifndef STB_VORBIS_NO_STDIO
stb_vorbis_decode_filename(const char * filename,int * channels,int * sample_rate,short ** output)5210 int stb_vorbis_decode_filename(const char *filename, int *channels, int *sample_rate, short **output)
5211 {
5212    int data_len, offset, total, limit, error;
5213    short *data;
5214    stb_vorbis *v = stb_vorbis_open_filename(filename, &error, NULL);
5215    if (v == NULL) return -1;
5216    limit = v->channels * 4096;
5217    *channels = v->channels;
5218    if (sample_rate)
5219       *sample_rate = v->sample_rate;
5220    offset = data_len = 0;
5221    total = limit;
5222    data = (short *) malloc(total * sizeof(*data));
5223    if (data == NULL) {
5224       stb_vorbis_close(v);
5225       return -2;
5226    }
5227    for (;;) {
5228       int n = stb_vorbis_get_frame_short_interleaved(v, v->channels, data+offset, total-offset);
5229       if (n == 0) break;
5230       data_len += n;
5231       offset += n * v->channels;
5232       if (offset + limit > total) {
5233          short *data2;
5234          total *= 2;
5235          data2 = (short *) realloc(data, total * sizeof(*data));
5236          if (data2 == NULL) {
5237             free(data);
5238             stb_vorbis_close(v);
5239             return -2;
5240          }
5241          data = data2;
5242       }
5243    }
5244    *output = data;
5245    stb_vorbis_close(v);
5246    return data_len;
5247 }
5248 #endif // NO_STDIO
5249 
stb_vorbis_decode_memory(const uint8 * mem,int len,int * channels,int * sample_rate,short ** output)5250 int stb_vorbis_decode_memory(const uint8 *mem, int len, int *channels, int *sample_rate, short **output)
5251 {
5252    int data_len, offset, total, limit, error;
5253    short *data;
5254    stb_vorbis *v = stb_vorbis_open_memory(mem, len, &error, NULL);
5255    if (v == NULL) return -1;
5256    limit = v->channels * 4096;
5257    *channels = v->channels;
5258    if (sample_rate)
5259       *sample_rate = v->sample_rate;
5260    offset = data_len = 0;
5261    total = limit;
5262    data = (short *) malloc(total * sizeof(*data));
5263    if (data == NULL) {
5264       stb_vorbis_close(v);
5265       return -2;
5266    }
5267    for (;;) {
5268       int n = stb_vorbis_get_frame_short_interleaved(v, v->channels, data+offset, total-offset);
5269       if (n == 0) break;
5270       data_len += n;
5271       offset += n * v->channels;
5272       if (offset + limit > total) {
5273          short *data2;
5274          total *= 2;
5275          data2 = (short *) realloc(data, total * sizeof(*data));
5276          if (data2 == NULL) {
5277             free(data);
5278             stb_vorbis_close(v);
5279             return -2;
5280          }
5281          data = data2;
5282       }
5283    }
5284    *output = data;
5285    stb_vorbis_close(v);
5286    return data_len;
5287 }
5288 #endif // STB_VORBIS_NO_INTEGER_CONVERSION
5289 
stb_vorbis_get_samples_float_interleaved(stb_vorbis * f,int channels,float * buffer,int num_floats)5290 int stb_vorbis_get_samples_float_interleaved(stb_vorbis *f, int channels, float *buffer, int num_floats)
5291 {
5292    float **outputs;
5293    int len = num_floats / channels;
5294    int n=0;
5295    int z = f->channels;
5296    if (z > channels) z = channels;
5297    while (n < len) {
5298       int i,j;
5299       int k = f->channel_buffer_end - f->channel_buffer_start;
5300       if (n+k >= len) k = len - n;
5301       for (j=0; j < k; ++j) {
5302          for (i=0; i < z; ++i)
5303             *buffer++ = f->channel_buffers[i][f->channel_buffer_start+j];
5304          for (   ; i < channels; ++i)
5305             *buffer++ = 0;
5306       }
5307       n += k;
5308       f->channel_buffer_start += k;
5309       if (n == len)
5310          break;
5311       if (!stb_vorbis_get_frame_float(f, NULL, &outputs))
5312          break;
5313    }
5314    return n;
5315 }
5316 
stb_vorbis_get_samples_float(stb_vorbis * f,int channels,float ** buffer,int num_samples)5317 int stb_vorbis_get_samples_float(stb_vorbis *f, int channels, float **buffer, int num_samples)
5318 {
5319    float **outputs;
5320    int n=0;
5321    int z = f->channels;
5322    if (z > channels) z = channels;
5323    while (n < num_samples) {
5324       int i;
5325       int k = f->channel_buffer_end - f->channel_buffer_start;
5326       if (n+k >= num_samples) k = num_samples - n;
5327       if (k) {
5328          for (i=0; i < z; ++i)
5329             memcpy(buffer[i]+n, f->channel_buffers[i]+f->channel_buffer_start, sizeof(float)*k);
5330          for (   ; i < channels; ++i)
5331             memset(buffer[i]+n, 0, sizeof(float) * k);
5332       }
5333       n += k;
5334       f->channel_buffer_start += k;
5335       if (n == num_samples)
5336          break;
5337       if (!stb_vorbis_get_frame_float(f, NULL, &outputs))
5338          break;
5339    }
5340    return n;
5341 }
5342 #endif // STB_VORBIS_NO_PULLDATA_API
5343 
5344 /* Version history
5345     1.09    - 2016/04/04 - back out 'avoid discarding last frame' fix from previous version
5346     1.08    - 2016/04/02 - fixed multiple warnings; fix setup memory leaks;
5347                            avoid discarding last frame of audio data
5348     1.07    - 2015/01/16 - fixed some warnings, fix mingw, const-correct API
5349                            some more crash fixes when out of memory or with corrupt files
5350     1.06    - 2015/08/31 - full, correct support for seeking API (Dougall Johnson)
5351                            some crash fixes when out of memory or with corrupt files
5352     1.05    - 2015/04/19 - don't define __forceinline if it's redundant
5353     1.04    - 2014/08/27 - fix missing const-correct case in API
5354     1.03    - 2014/08/07 - Warning fixes
5355     1.02    - 2014/07/09 - Declare qsort compare function _cdecl on windows
5356     1.01    - 2014/06/18 - fix stb_vorbis_get_samples_float
5357     1.0     - 2014/05/26 - fix memory leaks; fix warnings; fix bugs in multichannel
5358                            (API change) report sample rate for decode-full-file funcs
5359     0.99996 - bracket #include <malloc.h> for macintosh compilation by Laurent Gomila
5360     0.99995 - use union instead of pointer-cast for fast-float-to-int to avoid alias-optimization problem
5361     0.99994 - change fast-float-to-int to work in single-precision FPU mode, remove endian-dependence
5362     0.99993 - remove assert that fired on legal files with empty tables
5363     0.99992 - rewind-to-start
5364     0.99991 - bugfix to stb_vorbis_get_samples_short by Bernhard Wodo
5365     0.9999 - (should have been 0.99990) fix no-CRT support, compiling as C++
5366     0.9998 - add a full-decode function with a memory source
5367     0.9997 - fix a bug in the read-from-FILE case in 0.9996 addition
5368     0.9996 - query length of vorbis stream in samples/seconds
5369     0.9995 - bugfix to another optimization that only happened in certain files
5370     0.9994 - bugfix to one of the optimizations that caused significant (but inaudible?) errors
5371     0.9993 - performance improvements; runs in 99% to 104% of time of reference implementation
5372     0.9992 - performance improvement of IMDCT; now performs close to reference implementation
5373     0.9991 - performance improvement of IMDCT
5374     0.999 - (should have been 0.9990) performance improvement of IMDCT
5375     0.998 - no-CRT support from Casey Muratori
5376     0.997 - bugfixes for bugs found by Terje Mathisen
5377     0.996 - bugfix: fast-huffman decode initialized incorrectly for sparse codebooks; fixing gives 10% speedup - found by Terje Mathisen
5378     0.995 - bugfix: fix to 'effective' overrun detection - found by Terje Mathisen
5379     0.994 - bugfix: garbage decode on final VQ symbol of a non-multiple - found by Terje Mathisen
5380     0.993 - bugfix: pushdata API required 1 extra byte for empty page (failed to consume final page if empty) - found by Terje Mathisen
5381     0.992 - fixes for MinGW warning
5382     0.991 - turn fast-float-conversion on by default
5383     0.990 - fix push-mode seek recovery if you seek into the headers
5384     0.98b - fix to bad release of 0.98
5385     0.98 - fix push-mode seek recovery; robustify float-to-int and support non-fast mode
5386     0.97 - builds under c++ (typecasting, don't use 'class' keyword)
5387     0.96 - somehow MY 0.95 was right, but the web one was wrong, so here's my 0.95 rereleased as 0.96, fixes a typo in the clamping code
5388     0.95 - clamping code for 16-bit functions
5389     0.94 - not publically released
5390     0.93 - fixed all-zero-floor case (was decoding garbage)
5391     0.92 - fixed a memory leak
5392     0.91 - conditional compiles to omit parts of the API and the infrastructure to support them: STB_VORBIS_NO_PULLDATA_API, STB_VORBIS_NO_PUSHDATA_API, STB_VORBIS_NO_STDIO, STB_VORBIS_NO_INTEGER_CONVERSION
5393     0.90 - first public release
5394 */
5395 
5396 #endif // STB_VORBIS_HEADER_ONLY
5397