1 // Ogg Vorbis audio decoder - v1.11 - public domain
2 // http://nothings.org/stb_vorbis/
3 //
4 // Original version written by Sean Barrett in 2007.
5 //
6 // Originally sponsored by RAD Game Tools. Seeking sponsored
7 // by Phillip Bennefall, Marc Andersen, Aaron Baker, Elias Software,
8 // Aras Pranckevicius, and Sean Barrett.
9 //
10 // LICENSE
11 //
12 // See end of file for license information.
13 //
14 // Limitations:
15 //
16 // - floor 0 not supported (used in old ogg vorbis files pre-2004)
17 // - lossless sample-truncation at beginning ignored
18 // - cannot concatenate multiple vorbis streams
19 // - sample positions are 32-bit, limiting seekable 192Khz
20 // files to around 6 hours (Ogg supports 64-bit)
21 //
22 // Feature contributors:
23 // Dougall Johnson (sample-exact seeking)
24 //
25 // Bugfix/warning contributors:
26 // Terje Mathisen Niklas Frykholm Andy Hill
27 // Casey Muratori John Bolton Gargaj
28 // Laurent Gomila Marc LeBlanc Ronny Chevalier
29 // Bernhard Wodo Evan Balster alxprd@github
30 // Tom Beaumont Ingo Leitgeb Nicolas Guillemot
31 // Phillip Bennefall Rohit Thiago Goulart
32 // manxorist@github saga musix github:infatum
33 //
34 // Partial history:
35 // 1.11 - 2017/07/23 - fix MinGW compilation
36 // 1.10 - 2017/03/03 - more robust seeking; fix negative ilog(); clear error in open_memory
37 // 1.09 - 2016/04/04 - back out 'truncation of last frame' fix from previous version
38 // 1.08 - 2016/04/02 - warnings; setup memory leaks; truncation of last frame
39 // 1.07 - 2015/01/16 - fixes for crashes on invalid files; warning fixes; const
40 // 1.06 - 2015/08/31 - full, correct support for seeking API (Dougall Johnson)
41 // some crash fixes when out of memory or with corrupt files
42 // fix some inappropriately signed shifts
43 // 1.05 - 2015/04/19 - don't define __forceinline if it's redundant
44 // 1.04 - 2014/08/27 - fix missing const-correct case in API
45 // 1.03 - 2014/08/07 - warning fixes
46 // 1.02 - 2014/07/09 - declare qsort comparison as explicitly _cdecl in Windows
47 // 1.01 - 2014/06/18 - fix stb_vorbis_get_samples_float (interleaved was correct)
48 // 1.0 - 2014/05/26 - fix memory leaks; fix warnings; fix bugs in >2-channel;
49 // (API change) report sample rate for decode-full-file funcs
50 //
51 // See end of file for full version history.
52
53
54 //////////////////////////////////////////////////////////////////////////////
55 //
56 // HEADER BEGINS HERE
57 //
58
59 #ifndef STB_VORBIS_INCLUDE_STB_VORBIS_H
60 #define STB_VORBIS_INCLUDE_STB_VORBIS_H
61
62 #if defined(STB_VORBIS_NO_CRT) && !defined(STB_VORBIS_NO_STDIO)
63 #define STB_VORBIS_NO_STDIO 1
64 #endif
65
66 #ifndef STB_VORBIS_NO_STDIO
67 #include <stdio.h>
68 #endif
69
70 #ifdef __cplusplus
71 extern "C" {
72 #endif
73
74 /////////// THREAD SAFETY
75
76 // Individual stb_vorbis* handles are not thread-safe; you cannot decode from
77 // them from multiple threads at the same time. However, you can have multiple
78 // stb_vorbis* handles and decode from them independently in multiple thrads.
79
80
81 /////////// MEMORY ALLOCATION
82
83 // normally stb_vorbis uses malloc() to allocate memory at startup,
84 // and alloca() to allocate temporary memory during a frame on the
85 // stack. (Memory consumption will depend on the amount of setup
86 // data in the file and how you set the compile flags for speed
87 // vs. size. In my test files the maximal-size usage is ~150KB.)
88 //
89 // You can modify the wrapper functions in the source (setup_malloc,
90 // setup_temp_malloc, temp_malloc) to change this behavior, or you
91 // can use a simpler allocation model: you pass in a buffer from
92 // which stb_vorbis will allocate _all_ its memory (including the
93 // temp memory). "open" may fail with a VORBIS_outofmem if you
94 // do not pass in enough data; there is no way to determine how
95 // much you do need except to succeed (at which point you can
96 // query get_info to find the exact amount required. yes I know
97 // this is lame).
98 //
99 // If you pass in a non-NULL buffer of the type below, allocation
100 // will occur from it as described above. Otherwise just pass NULL
101 // to use malloc()/alloca()
102
103 typedef struct
104 {
105 char *alloc_buffer;
106 int alloc_buffer_length_in_bytes;
107 } stb_vorbis_alloc;
108
109
110 /////////// FUNCTIONS USEABLE WITH ALL INPUT MODES
111
112 typedef struct stb_vorbis stb_vorbis;
113
114 typedef struct
115 {
116 unsigned int sample_rate;
117 int channels;
118
119 unsigned int setup_memory_required;
120 unsigned int setup_temp_memory_required;
121 unsigned int temp_memory_required;
122
123 int max_frame_size;
124 } stb_vorbis_info;
125
126 // get general information about the file
127 extern stb_vorbis_info stb_vorbis_get_info(stb_vorbis *f);
128
129 // get the last error detected (clears it, too)
130 extern int stb_vorbis_get_error(stb_vorbis *f);
131
132 // close an ogg vorbis file and free all memory in use
133 extern void stb_vorbis_close(stb_vorbis *f);
134
135 // this function returns the offset (in samples) from the beginning of the
136 // file that will be returned by the next decode, if it is known, or -1
137 // otherwise. after a flush_pushdata() call, this may take a while before
138 // it becomes valid again.
139 // NOT WORKING YET after a seek with PULLDATA API
140 extern int stb_vorbis_get_sample_offset(stb_vorbis *f);
141
142 // returns the current seek point within the file, or offset from the beginning
143 // of the memory buffer. In pushdata mode it returns 0.
144 extern unsigned int stb_vorbis_get_file_offset(stb_vorbis *f);
145
146 /////////// PUSHDATA API
147
148 #ifndef STB_VORBIS_NO_PUSHDATA_API
149
150 // this API allows you to get blocks of data from any source and hand
151 // them to stb_vorbis. you have to buffer them; stb_vorbis will tell
152 // you how much it used, and you have to give it the rest next time;
153 // and stb_vorbis may not have enough data to work with and you will
154 // need to give it the same data again PLUS more. Note that the Vorbis
155 // specification does not bound the size of an individual frame.
156
157 extern stb_vorbis *stb_vorbis_open_pushdata(
158 const unsigned char * datablock, int datablock_length_in_bytes,
159 int *datablock_memory_consumed_in_bytes,
160 int *error,
161 const stb_vorbis_alloc *alloc_buffer);
162 // create a vorbis decoder by passing in the initial data block containing
163 // the ogg&vorbis headers (you don't need to do parse them, just provide
164 // the first N bytes of the file--you're told if it's not enough, see below)
165 // on success, returns an stb_vorbis *, does not set error, returns the amount of
166 // data parsed/consumed on this call in *datablock_memory_consumed_in_bytes;
167 // on failure, returns NULL on error and sets *error, does not change *datablock_memory_consumed
168 // if returns NULL and *error is VORBIS_need_more_data, then the input block was
169 // incomplete and you need to pass in a larger block from the start of the file
170
171 extern int stb_vorbis_decode_frame_pushdata(
172 stb_vorbis *f,
173 const unsigned char *datablock, int datablock_length_in_bytes,
174 int *channels, // place to write number of float * buffers
175 float ***output, // place to write float ** array of float * buffers
176 int *samples // place to write number of output samples
177 );
178 // decode a frame of audio sample data if possible from the passed-in data block
179 //
180 // return value: number of bytes we used from datablock
181 //
182 // possible cases:
183 // 0 bytes used, 0 samples output (need more data)
184 // N bytes used, 0 samples output (resynching the stream, keep going)
185 // N bytes used, M samples output (one frame of data)
186 // note that after opening a file, you will ALWAYS get one N-bytes,0-sample
187 // frame, because Vorbis always "discards" the first frame.
188 //
189 // Note that on resynch, stb_vorbis will rarely consume all of the buffer,
190 // instead only datablock_length_in_bytes-3 or less. This is because it wants
191 // to avoid missing parts of a page header if they cross a datablock boundary,
192 // without writing state-machiney code to record a partial detection.
193 //
194 // The number of channels returned are stored in *channels (which can be
195 // NULL--it is always the same as the number of channels reported by
196 // get_info). *output will contain an array of float* buffers, one per
197 // channel. In other words, (*output)[0][0] contains the first sample from
198 // the first channel, and (*output)[1][0] contains the first sample from
199 // the second channel.
200
201 extern void stb_vorbis_flush_pushdata(stb_vorbis *f);
202 // inform stb_vorbis that your next datablock will not be contiguous with
203 // previous ones (e.g. you've seeked in the data); future attempts to decode
204 // frames will cause stb_vorbis to resynchronize (as noted above), and
205 // once it sees a valid Ogg page (typically 4-8KB, as large as 64KB), it
206 // will begin decoding the _next_ frame.
207 //
208 // if you want to seek using pushdata, you need to seek in your file, then
209 // call stb_vorbis_flush_pushdata(), then start calling decoding, then once
210 // decoding is returning you data, call stb_vorbis_get_sample_offset, and
211 // if you don't like the result, seek your file again and repeat.
212 #endif
213
214
215 ////////// PULLING INPUT API
216
217 #ifndef STB_VORBIS_NO_PULLDATA_API
218 // This API assumes stb_vorbis is allowed to pull data from a source--
219 // either a block of memory containing the _entire_ vorbis stream, or a
220 // FILE * that you or it create, or possibly some other reading mechanism
221 // if you go modify the source to replace the FILE * case with some kind
222 // of callback to your code. (But if you don't support seeking, you may
223 // just want to go ahead and use pushdata.)
224
225 #if !defined(STB_VORBIS_NO_STDIO) && !defined(STB_VORBIS_NO_INTEGER_CONVERSION)
226 extern int stb_vorbis_decode_filename(const char *filename, int *channels, int *sample_rate, short **output);
227 #endif
228 #if !defined(STB_VORBIS_NO_INTEGER_CONVERSION)
229 extern int stb_vorbis_decode_memory(const unsigned char *mem, int len, int *channels, int *sample_rate, short **output);
230 #endif
231 // decode an entire file and output the data interleaved into a malloc()ed
232 // buffer stored in *output. The return value is the number of samples
233 // decoded, or -1 if the file could not be opened or was not an ogg vorbis file.
234 // When you're done with it, just free() the pointer returned in *output.
235
236 extern stb_vorbis * stb_vorbis_open_memory(const unsigned char *data, int len,
237 int *error, const stb_vorbis_alloc *alloc_buffer);
238 // create an ogg vorbis decoder from an ogg vorbis stream in memory (note
239 // this must be the entire stream!). on failure, returns NULL and sets *error
240
241 #ifndef STB_VORBIS_NO_STDIO
242 extern stb_vorbis * stb_vorbis_open_filename(const char *filename,
243 int *error, const stb_vorbis_alloc *alloc_buffer);
244 // create an ogg vorbis decoder from a filename via fopen(). on failure,
245 // returns NULL and sets *error (possibly to VORBIS_file_open_failure).
246
247 extern stb_vorbis * stb_vorbis_open_file(FILE *f, int close_handle_on_close,
248 int *error, const stb_vorbis_alloc *alloc_buffer);
249 // create an ogg vorbis decoder from an open FILE *, looking for a stream at
250 // the _current_ seek point (ftell). on failure, returns NULL and sets *error.
251 // note that stb_vorbis must "own" this stream; if you seek it in between
252 // calls to stb_vorbis, it will become confused. Morever, if you attempt to
253 // perform stb_vorbis_seek_*() operations on this file, it will assume it
254 // owns the _entire_ rest of the file after the start point. Use the next
255 // function, stb_vorbis_open_file_section(), to limit it.
256
257 extern stb_vorbis * stb_vorbis_open_file_section(FILE *f, int close_handle_on_close,
258 int *error, const stb_vorbis_alloc *alloc_buffer, unsigned int len);
259 // create an ogg vorbis decoder from an open FILE *, looking for a stream at
260 // the _current_ seek point (ftell); the stream will be of length 'len' bytes.
261 // on failure, returns NULL and sets *error. note that stb_vorbis must "own"
262 // this stream; if you seek it in between calls to stb_vorbis, it will become
263 // confused.
264 #endif
265
266 extern int stb_vorbis_seek_frame(stb_vorbis *f, unsigned int sample_number);
267 extern int stb_vorbis_seek(stb_vorbis *f, unsigned int sample_number);
268 // these functions seek in the Vorbis file to (approximately) 'sample_number'.
269 // after calling seek_frame(), the next call to get_frame_*() will include
270 // the specified sample. after calling stb_vorbis_seek(), the next call to
271 // stb_vorbis_get_samples_* will start with the specified sample. If you
272 // do not need to seek to EXACTLY the target sample when using get_samples_*,
273 // you can also use seek_frame().
274
275 extern int stb_vorbis_seek_start(stb_vorbis *f);
276 // this function is equivalent to stb_vorbis_seek(f,0)
277
278 extern unsigned int stb_vorbis_stream_length_in_samples(stb_vorbis *f);
279 extern float stb_vorbis_stream_length_in_seconds(stb_vorbis *f);
280 // these functions return the total length of the vorbis stream
281
282 extern int stb_vorbis_get_frame_float(stb_vorbis *f, int *channels, float ***output);
283 // decode the next frame and return the number of samples. the number of
284 // channels returned are stored in *channels (which can be NULL--it is always
285 // the same as the number of channels reported by get_info). *output will
286 // contain an array of float* buffers, one per channel. These outputs will
287 // be overwritten on the next call to stb_vorbis_get_frame_*.
288 //
289 // You generally should not intermix calls to stb_vorbis_get_frame_*()
290 // and stb_vorbis_get_samples_*(), since the latter calls the former.
291
292 #ifndef STB_VORBIS_NO_INTEGER_CONVERSION
293 extern int stb_vorbis_get_frame_short_interleaved(stb_vorbis *f, int num_c, short *buffer, int num_shorts);
294 extern int stb_vorbis_get_frame_short (stb_vorbis *f, int num_c, short **buffer, int num_samples);
295 #endif
296 // decode the next frame and return the number of *samples* per channel.
297 // Note that for interleaved data, you pass in the number of shorts (the
298 // size of your array), but the return value is the number of samples per
299 // channel, not the total number of samples.
300 //
301 // The data is coerced to the number of channels you request according to the
302 // channel coercion rules (see below). You must pass in the size of your
303 // buffer(s) so that stb_vorbis will not overwrite the end of the buffer.
304 // The maximum buffer size needed can be gotten from get_info(); however,
305 // the Vorbis I specification implies an absolute maximum of 4096 samples
306 // per channel.
307
308 // Channel coercion rules:
309 // Let M be the number of channels requested, and N the number of channels present,
310 // and Cn be the nth channel; let stereo L be the sum of all L and center channels,
311 // and stereo R be the sum of all R and center channels (channel assignment from the
312 // vorbis spec).
313 // M N output
314 // 1 k sum(Ck) for all k
315 // 2 * stereo L, stereo R
316 // k l k > l, the first l channels, then 0s
317 // k l k <= l, the first k channels
318 // Note that this is not _good_ surround etc. mixing at all! It's just so
319 // you get something useful.
320
321 extern int stb_vorbis_get_samples_float_interleaved(stb_vorbis *f, int channels, float *buffer, int num_floats);
322 extern int stb_vorbis_get_samples_float(stb_vorbis *f, int channels, float **buffer, int num_samples);
323 // gets num_samples samples, not necessarily on a frame boundary--this requires
324 // buffering so you have to supply the buffers. DOES NOT APPLY THE COERCION RULES.
325 // Returns the number of samples stored per channel; it may be less than requested
326 // at the end of the file. If there are no more samples in the file, returns 0.
327
328 #ifndef STB_VORBIS_NO_INTEGER_CONVERSION
329 extern int stb_vorbis_get_samples_short_interleaved(stb_vorbis *f, int channels, short *buffer, int num_shorts);
330 extern int stb_vorbis_get_samples_short(stb_vorbis *f, int channels, short **buffer, int num_samples);
331 #endif
332 // gets num_samples samples, not necessarily on a frame boundary--this requires
333 // buffering so you have to supply the buffers. Applies the coercion rules above
334 // to produce 'channels' channels. Returns the number of samples stored per channel;
335 // it may be less than requested at the end of the file. If there are no more
336 // samples in the file, returns 0.
337
338 #endif
339
340 //////// ERROR CODES
341
342 enum STBVorbisError
343 {
344 VORBIS__no_error,
345
346 VORBIS_need_more_data=1, // not a real error
347
348 VORBIS_invalid_api_mixing, // can't mix API modes
349 VORBIS_outofmem, // not enough memory
350 VORBIS_feature_not_supported, // uses floor 0
351 VORBIS_too_many_channels, // STB_VORBIS_MAX_CHANNELS is too small
352 VORBIS_file_open_failure, // fopen() failed
353 VORBIS_seek_without_length, // can't seek in unknown-length file
354
355 VORBIS_unexpected_eof=10, // file is truncated?
356 VORBIS_seek_invalid, // seek past EOF
357
358 // decoding errors (corrupt/invalid stream) -- you probably
359 // don't care about the exact details of these
360
361 // vorbis errors:
362 VORBIS_invalid_setup=20,
363 VORBIS_invalid_stream,
364
365 // ogg errors:
366 VORBIS_missing_capture_pattern=30,
367 VORBIS_invalid_stream_structure_version,
368 VORBIS_continued_packet_flag_invalid,
369 VORBIS_incorrect_stream_serial_number,
370 VORBIS_invalid_first_page,
371 VORBIS_bad_packet_type,
372 VORBIS_cant_find_last_page,
373 VORBIS_seek_failed
374 };
375
376
377 #ifdef __cplusplus
378 }
379 #endif
380
381 #endif // STB_VORBIS_INCLUDE_STB_VORBIS_H
382 //
383 // HEADER ENDS HERE
384 //
385 //////////////////////////////////////////////////////////////////////////////
386
387 #ifndef STB_VORBIS_HEADER_ONLY
388
389 // global configuration settings (e.g. set these in the project/makefile),
390 // or just set them in this file at the top (although ideally the first few
391 // should be visible when the header file is compiled too, although it's not
392 // crucial)
393
394 // STB_VORBIS_NO_PUSHDATA_API
395 // does not compile the code for the various stb_vorbis_*_pushdata()
396 // functions
397 // #define STB_VORBIS_NO_PUSHDATA_API
398
399 // STB_VORBIS_NO_PULLDATA_API
400 // does not compile the code for the non-pushdata APIs
401 // #define STB_VORBIS_NO_PULLDATA_API
402
403 // STB_VORBIS_NO_STDIO
404 // does not compile the code for the APIs that use FILE *s internally
405 // or externally (implied by STB_VORBIS_NO_PULLDATA_API)
406 // #define STB_VORBIS_NO_STDIO
407
408 // STB_VORBIS_NO_INTEGER_CONVERSION
409 // does not compile the code for converting audio sample data from
410 // float to integer (implied by STB_VORBIS_NO_PULLDATA_API)
411 // #define STB_VORBIS_NO_INTEGER_CONVERSION
412
413 // STB_VORBIS_NO_FAST_SCALED_FLOAT
414 // does not use a fast float-to-int trick to accelerate float-to-int on
415 // most platforms which requires endianness be defined correctly.
416 //#define STB_VORBIS_NO_FAST_SCALED_FLOAT
417
418
419 // STB_VORBIS_MAX_CHANNELS [number]
420 // globally define this to the maximum number of channels you need.
421 // The spec does not put a restriction on channels except that
422 // the count is stored in a byte, so 255 is the hard limit.
423 // Reducing this saves about 16 bytes per value, so using 16 saves
424 // (255-16)*16 or around 4KB. Plus anything other memory usage
425 // I forgot to account for. Can probably go as low as 8 (7.1 audio),
426 // 6 (5.1 audio), or 2 (stereo only).
427 #ifndef STB_VORBIS_MAX_CHANNELS
428 #define STB_VORBIS_MAX_CHANNELS 16 // enough for anyone?
429 #endif
430
431 // STB_VORBIS_PUSHDATA_CRC_COUNT [number]
432 // after a flush_pushdata(), stb_vorbis begins scanning for the
433 // next valid page, without backtracking. when it finds something
434 // that looks like a page, it streams through it and verifies its
435 // CRC32. Should that validation fail, it keeps scanning. But it's
436 // possible that _while_ streaming through to check the CRC32 of
437 // one candidate page, it sees another candidate page. This #define
438 // determines how many "overlapping" candidate pages it can search
439 // at once. Note that "real" pages are typically ~4KB to ~8KB, whereas
440 // garbage pages could be as big as 64KB, but probably average ~16KB.
441 // So don't hose ourselves by scanning an apparent 64KB page and
442 // missing a ton of real ones in the interim; so minimum of 2
443 #ifndef STB_VORBIS_PUSHDATA_CRC_COUNT
444 #define STB_VORBIS_PUSHDATA_CRC_COUNT 4
445 #endif
446
447 // STB_VORBIS_FAST_HUFFMAN_LENGTH [number]
448 // sets the log size of the huffman-acceleration table. Maximum
449 // supported value is 24. with larger numbers, more decodings are O(1),
450 // but the table size is larger so worse cache missing, so you'll have
451 // to probe (and try multiple ogg vorbis files) to find the sweet spot.
452 #ifndef STB_VORBIS_FAST_HUFFMAN_LENGTH
453 #define STB_VORBIS_FAST_HUFFMAN_LENGTH 10
454 #endif
455
456 // STB_VORBIS_FAST_BINARY_LENGTH [number]
457 // sets the log size of the binary-search acceleration table. this
458 // is used in similar fashion to the fast-huffman size to set initial
459 // parameters for the binary search
460
461 // STB_VORBIS_FAST_HUFFMAN_INT
462 // The fast huffman tables are much more efficient if they can be
463 // stored as 16-bit results instead of 32-bit results. This restricts
464 // the codebooks to having only 65535 possible outcomes, though.
465 // (At least, accelerated by the huffman table.)
466 #ifndef STB_VORBIS_FAST_HUFFMAN_INT
467 #define STB_VORBIS_FAST_HUFFMAN_SHORT
468 #endif
469
470 // STB_VORBIS_NO_HUFFMAN_BINARY_SEARCH
471 // If the 'fast huffman' search doesn't succeed, then stb_vorbis falls
472 // back on binary searching for the correct one. This requires storing
473 // extra tables with the huffman codes in sorted order. Defining this
474 // symbol trades off space for speed by forcing a linear search in the
475 // non-fast case, except for "sparse" codebooks.
476 // #define STB_VORBIS_NO_HUFFMAN_BINARY_SEARCH
477
478 // STB_VORBIS_DIVIDES_IN_RESIDUE
479 // stb_vorbis precomputes the result of the scalar residue decoding
480 // that would otherwise require a divide per chunk. you can trade off
481 // space for time by defining this symbol.
482 // #define STB_VORBIS_DIVIDES_IN_RESIDUE
483
484 // STB_VORBIS_DIVIDES_IN_CODEBOOK
485 // vorbis VQ codebooks can be encoded two ways: with every case explicitly
486 // stored, or with all elements being chosen from a small range of values,
487 // and all values possible in all elements. By default, stb_vorbis expands
488 // this latter kind out to look like the former kind for ease of decoding,
489 // because otherwise an integer divide-per-vector-element is required to
490 // unpack the index. If you define STB_VORBIS_DIVIDES_IN_CODEBOOK, you can
491 // trade off storage for speed.
492 //#define STB_VORBIS_DIVIDES_IN_CODEBOOK
493
494 #ifdef STB_VORBIS_CODEBOOK_SHORTS
495 #error "STB_VORBIS_CODEBOOK_SHORTS is no longer supported as it produced incorrect results for some input formats"
496 #endif
497
498 // STB_VORBIS_DIVIDE_TABLE
499 // this replaces small integer divides in the floor decode loop with
500 // table lookups. made less than 1% difference, so disabled by default.
501
502 // STB_VORBIS_NO_INLINE_DECODE
503 // disables the inlining of the scalar codebook fast-huffman decode.
504 // might save a little codespace; useful for debugging
505 // #define STB_VORBIS_NO_INLINE_DECODE
506
507 // STB_VORBIS_NO_DEFER_FLOOR
508 // Normally we only decode the floor without synthesizing the actual
509 // full curve. We can instead synthesize the curve immediately. This
510 // requires more memory and is very likely slower, so I don't think
511 // you'd ever want to do it except for debugging.
512 // #define STB_VORBIS_NO_DEFER_FLOOR
513
514
515
516
517 //////////////////////////////////////////////////////////////////////////////
518
519 #ifdef STB_VORBIS_NO_PULLDATA_API
520 #define STB_VORBIS_NO_INTEGER_CONVERSION
521 #define STB_VORBIS_NO_STDIO
522 #endif
523
524 #if defined(STB_VORBIS_NO_CRT) && !defined(STB_VORBIS_NO_STDIO)
525 #define STB_VORBIS_NO_STDIO 1
526 #endif
527
528 #ifndef STB_VORBIS_NO_INTEGER_CONVERSION
529 #ifndef STB_VORBIS_NO_FAST_SCALED_FLOAT
530
531 // only need endianness for fast-float-to-int, which we don't
532 // use for pushdata
533
534 #ifndef STB_VORBIS_BIG_ENDIAN
535 #define STB_VORBIS_ENDIAN 0
536 #else
537 #define STB_VORBIS_ENDIAN 1
538 #endif
539
540 #endif
541 #endif
542
543
544 #ifndef STB_VORBIS_NO_STDIO
545 #include <stdio.h>
546 #endif
547
548 #ifndef STB_VORBIS_NO_CRT
549 #include <stdlib.h>
550 #include <string.h>
551 #include <assert.h>
552 #include <math.h>
553
554 // find definition of alloca if it's not in stdlib.h:
555 #if defined(_MSC_VER) || defined(__MINGW32__)
556 #include <malloc.h>
557 #endif
558 #if defined(__linux__) || defined(__linux) || defined(__EMSCRIPTEN__)
559 #include <alloca.h>
560 #endif
561 #else // STB_VORBIS_NO_CRT
562 #define NULL 0
563 #define malloc(s) 0
564 #define free(s) ((void) 0)
565 #define realloc(s) 0
566 #endif // STB_VORBIS_NO_CRT
567
568 #include <limits.h>
569
570 #ifdef __MINGW32__
571 // eff you mingw:
572 // "fixed":
573 // http://sourceforge.net/p/mingw-w64/mailman/message/32882927/
574 // "no that broke the build, reverted, who cares about C":
575 // http://sourceforge.net/p/mingw-w64/mailman/message/32890381/
576 #ifdef __forceinline
577 #undef __forceinline
578 #endif
579 #define __forceinline
580 #define alloca __builtin_alloca
581 #elif !defined(_MSC_VER)
582 #if __GNUC__
583 #define __forceinline inline
584 #else
585 #define __forceinline
586 #endif
587 #endif
588
589 #if STB_VORBIS_MAX_CHANNELS > 256
590 #error "Value of STB_VORBIS_MAX_CHANNELS outside of allowed range"
591 #endif
592
593 #if STB_VORBIS_FAST_HUFFMAN_LENGTH > 24
594 #error "Value of STB_VORBIS_FAST_HUFFMAN_LENGTH outside of allowed range"
595 #endif
596
597
598 #if 0
599 #include <crtdbg.h>
600 #define CHECK(f) _CrtIsValidHeapPointer(f->channel_buffers[1])
601 #else
602 #define CHECK(f) ((void) 0)
603 #endif
604
605 #define MAX_BLOCKSIZE_LOG 13 // from specification
606 #define MAX_BLOCKSIZE (1 << MAX_BLOCKSIZE_LOG)
607
608
609 typedef unsigned char uint8;
610 typedef signed char int8;
611 typedef unsigned short uint16;
612 typedef signed short int16;
613 typedef unsigned int uint32;
614 typedef signed int int32;
615
616 #ifndef TRUE
617 #define TRUE 1
618 #define FALSE 0
619 #endif
620
621 typedef float codetype;
622
623 // @NOTE
624 //
625 // Some arrays below are tagged "//varies", which means it's actually
626 // a variable-sized piece of data, but rather than malloc I assume it's
627 // small enough it's better to just allocate it all together with the
628 // main thing
629 //
630 // Most of the variables are specified with the smallest size I could pack
631 // them into. It might give better performance to make them all full-sized
632 // integers. It should be safe to freely rearrange the structures or change
633 // the sizes larger--nothing relies on silently truncating etc., nor the
634 // order of variables.
635
636 #define FAST_HUFFMAN_TABLE_SIZE (1 << STB_VORBIS_FAST_HUFFMAN_LENGTH)
637 #define FAST_HUFFMAN_TABLE_MASK (FAST_HUFFMAN_TABLE_SIZE - 1)
638
639 typedef struct
640 {
641 int dimensions, entries;
642 uint8 *codeword_lengths;
643 float minimum_value;
644 float delta_value;
645 uint8 value_bits;
646 uint8 lookup_type;
647 uint8 sequence_p;
648 uint8 sparse;
649 uint32 lookup_values;
650 codetype *multiplicands;
651 uint32 *codewords;
652 #ifdef STB_VORBIS_FAST_HUFFMAN_SHORT
653 int16 fast_huffman[FAST_HUFFMAN_TABLE_SIZE];
654 #else
655 int32 fast_huffman[FAST_HUFFMAN_TABLE_SIZE];
656 #endif
657 uint32 *sorted_codewords;
658 int *sorted_values;
659 int sorted_entries;
660 } Codebook;
661
662 typedef struct
663 {
664 uint8 order;
665 uint16 rate;
666 uint16 bark_map_size;
667 uint8 amplitude_bits;
668 uint8 amplitude_offset;
669 uint8 number_of_books;
670 uint8 book_list[16]; // varies
671 } Floor0;
672
673 typedef struct
674 {
675 uint8 partitions;
676 uint8 partition_class_list[32]; // varies
677 uint8 class_dimensions[16]; // varies
678 uint8 class_subclasses[16]; // varies
679 uint8 class_masterbooks[16]; // varies
680 int16 subclass_books[16][8]; // varies
681 uint16 Xlist[31*8+2]; // varies
682 uint8 sorted_order[31*8+2];
683 uint8 neighbors[31*8+2][2];
684 uint8 floor1_multiplier;
685 uint8 rangebits;
686 int values;
687 } Floor1;
688
689 typedef union
690 {
691 Floor0 floor0;
692 Floor1 floor1;
693 } Floor;
694
695 typedef struct
696 {
697 uint32 begin, end;
698 uint32 part_size;
699 uint8 classifications;
700 uint8 classbook;
701 uint8 **classdata;
702 int16 (*residue_books)[8];
703 } Residue;
704
705 typedef struct
706 {
707 uint8 magnitude;
708 uint8 angle;
709 uint8 mux;
710 } MappingChannel;
711
712 typedef struct
713 {
714 uint16 coupling_steps;
715 MappingChannel *chan;
716 uint8 submaps;
717 uint8 submap_floor[15]; // varies
718 uint8 submap_residue[15]; // varies
719 } Mapping;
720
721 typedef struct
722 {
723 uint8 blockflag;
724 uint8 mapping;
725 uint16 windowtype;
726 uint16 transformtype;
727 } Mode;
728
729 typedef struct
730 {
731 uint32 goal_crc; // expected crc if match
732 int bytes_left; // bytes left in packet
733 uint32 crc_so_far; // running crc
734 int bytes_done; // bytes processed in _current_ chunk
735 uint32 sample_loc; // granule pos encoded in page
736 } CRCscan;
737
738 typedef struct
739 {
740 uint32 page_start, page_end;
741 uint32 last_decoded_sample;
742 } ProbedPage;
743
744 struct stb_vorbis
745 {
746 // user-accessible info
747 unsigned int sample_rate;
748 int channels;
749
750 unsigned int setup_memory_required;
751 unsigned int temp_memory_required;
752 unsigned int setup_temp_memory_required;
753
754 // input config
755 #ifndef STB_VORBIS_NO_STDIO
756 FILE *f;
757 uint32 f_start;
758 int close_on_free;
759 #endif
760
761 uint8 *stream;
762 uint8 *stream_start;
763 uint8 *stream_end;
764
765 uint32 stream_len;
766
767 uint8 push_mode;
768
769 uint32 first_audio_page_offset;
770
771 ProbedPage p_first, p_last;
772
773 // memory management
774 stb_vorbis_alloc alloc;
775 int setup_offset;
776 int temp_offset;
777
778 // run-time results
779 int eof;
780 enum STBVorbisError error;
781
782 // user-useful data
783
784 // header info
785 int blocksize[2];
786 int blocksize_0, blocksize_1;
787 int codebook_count;
788 Codebook *codebooks;
789 int floor_count;
790 uint16 floor_types[64]; // varies
791 Floor *floor_config;
792 int residue_count;
793 uint16 residue_types[64]; // varies
794 Residue *residue_config;
795 int mapping_count;
796 Mapping *mapping;
797 int mode_count;
798 Mode mode_config[64]; // varies
799
800 uint32 total_samples;
801
802 // decode buffer
803 float *channel_buffers[STB_VORBIS_MAX_CHANNELS];
804 float *outputs [STB_VORBIS_MAX_CHANNELS];
805
806 float *previous_window[STB_VORBIS_MAX_CHANNELS];
807 int previous_length;
808
809 #ifndef STB_VORBIS_NO_DEFER_FLOOR
810 int16 *finalY[STB_VORBIS_MAX_CHANNELS];
811 #else
812 float *floor_buffers[STB_VORBIS_MAX_CHANNELS];
813 #endif
814
815 uint32 current_loc; // sample location of next frame to decode
816 int current_loc_valid;
817
818 // per-blocksize precomputed data
819
820 // twiddle factors
821 float *A[2],*B[2],*C[2];
822 float *window[2];
823 uint16 *bit_reverse[2];
824
825 // current page/packet/segment streaming info
826 uint32 serial; // stream serial number for verification
827 int last_page;
828 int segment_count;
829 uint8 segments[255];
830 uint8 page_flag;
831 uint8 bytes_in_seg;
832 uint8 first_decode;
833 int next_seg;
834 int last_seg; // flag that we're on the last segment
835 int last_seg_which; // what was the segment number of the last seg?
836 uint32 acc;
837 int valid_bits;
838 int packet_bytes;
839 int end_seg_with_known_loc;
840 uint32 known_loc_for_packet;
841 int discard_samples_deferred;
842 uint32 samples_output;
843
844 // push mode scanning
845 int page_crc_tests; // only in push_mode: number of tests active; -1 if not searching
846 #ifndef STB_VORBIS_NO_PUSHDATA_API
847 CRCscan scan[STB_VORBIS_PUSHDATA_CRC_COUNT];
848 #endif
849
850 // sample-access
851 int channel_buffer_start;
852 int channel_buffer_end;
853 };
854
855 #if defined(STB_VORBIS_NO_PUSHDATA_API)
856 #define IS_PUSH_MODE(f) FALSE
857 #elif defined(STB_VORBIS_NO_PULLDATA_API)
858 #define IS_PUSH_MODE(f) TRUE
859 #else
860 #define IS_PUSH_MODE(f) ((f)->push_mode)
861 #endif
862
863 typedef struct stb_vorbis vorb;
864
error(vorb * f,enum STBVorbisError e)865 static int error(vorb *f, enum STBVorbisError e)
866 {
867 f->error = e;
868 if (!f->eof && e != VORBIS_need_more_data) {
869 f->error=e; // breakpoint for debugging
870 }
871 return 0;
872 }
873
874
875 // these functions are used for allocating temporary memory
876 // while decoding. if you can afford the stack space, use
877 // alloca(); otherwise, provide a temp buffer and it will
878 // allocate out of those.
879
880 #define array_size_required(count,size) (count*(sizeof(void *)+(size)))
881
882 #define temp_alloc(f,size) (f->alloc.alloc_buffer ? setup_temp_malloc(f,size) : alloca(size))
883 #ifdef dealloca
884 #define temp_free(f,p) (f->alloc.alloc_buffer ? 0 : dealloca(size))
885 #else
886 #define temp_free(f,p) 0
887 #endif
888 #define temp_alloc_save(f) ((f)->temp_offset)
889 #define temp_alloc_restore(f,p) ((f)->temp_offset = (p))
890
891 #define temp_block_array(f,count,size) make_block_array(temp_alloc(f,array_size_required(count,size)), count, size)
892
893 // given a sufficiently large block of memory, make an array of pointers to subblocks of it
make_block_array(void * mem,int count,int size)894 static void *make_block_array(void *mem, int count, int size)
895 {
896 int i;
897 void ** p = (void **) mem;
898 char *q = (char *) (p + count);
899 for (i=0; i < count; ++i) {
900 p[i] = q;
901 q += size;
902 }
903 return p;
904 }
905
setup_malloc(vorb * f,int sz)906 static void *setup_malloc(vorb *f, int sz)
907 {
908 sz = (sz+3) & ~3;
909 f->setup_memory_required += sz;
910 if (f->alloc.alloc_buffer) {
911 void *p = (char *) f->alloc.alloc_buffer + f->setup_offset;
912 if (f->setup_offset + sz > f->temp_offset) return NULL;
913 f->setup_offset += sz;
914 return p;
915 }
916 return sz ? malloc(sz) : NULL;
917 }
918
setup_free(vorb * f,void * p)919 static void setup_free(vorb *f, void *p)
920 {
921 if (f->alloc.alloc_buffer) return; // do nothing; setup mem is a stack
922 free(p);
923 }
924
setup_temp_malloc(vorb * f,int sz)925 static void *setup_temp_malloc(vorb *f, int sz)
926 {
927 sz = (sz+3) & ~3;
928 if (f->alloc.alloc_buffer) {
929 if (f->temp_offset - sz < f->setup_offset) return NULL;
930 f->temp_offset -= sz;
931 return (char *) f->alloc.alloc_buffer + f->temp_offset;
932 }
933 return malloc(sz);
934 }
935
setup_temp_free(vorb * f,void * p,int sz)936 static void setup_temp_free(vorb *f, void *p, int sz)
937 {
938 if (f->alloc.alloc_buffer) {
939 f->temp_offset += (sz+3)&~3;
940 return;
941 }
942 free(p);
943 }
944
945 #define CRC32_POLY 0x04c11db7 // from spec
946
947 static uint32 crc_table[256];
crc32_init(void)948 static void crc32_init(void)
949 {
950 int i,j;
951 uint32 s;
952 for(i=0; i < 256; i++) {
953 for (s=(uint32) i << 24, j=0; j < 8; ++j)
954 s = (s << 1) ^ (s >= (1U<<31) ? CRC32_POLY : 0);
955 crc_table[i] = s;
956 }
957 }
958
crc32_update(uint32 crc,uint8 byte)959 static __forceinline uint32 crc32_update(uint32 crc, uint8 byte)
960 {
961 return (crc << 8) ^ crc_table[byte ^ (crc >> 24)];
962 }
963
964
965 // used in setup, and for huffman that doesn't go fast path
bit_reverse(unsigned int n)966 static unsigned int bit_reverse(unsigned int n)
967 {
968 n = ((n & 0xAAAAAAAA) >> 1) | ((n & 0x55555555) << 1);
969 n = ((n & 0xCCCCCCCC) >> 2) | ((n & 0x33333333) << 2);
970 n = ((n & 0xF0F0F0F0) >> 4) | ((n & 0x0F0F0F0F) << 4);
971 n = ((n & 0xFF00FF00) >> 8) | ((n & 0x00FF00FF) << 8);
972 return (n >> 16) | (n << 16);
973 }
974
square(float x)975 static float square(float x)
976 {
977 return x*x;
978 }
979
980 // this is a weird definition of log2() for which log2(1) = 1, log2(2) = 2, log2(4) = 3
981 // as required by the specification. fast(?) implementation from stb.h
982 // @OPTIMIZE: called multiple times per-packet with "constants"; move to setup
ilog(int32 n)983 static int ilog(int32 n)
984 {
985 static signed char log2_4[16] = { 0,1,2,2,3,3,3,3,4,4,4,4,4,4,4,4 };
986
987 if (n < 0) return 0; // signed n returns 0
988
989 // 2 compares if n < 16, 3 compares otherwise (4 if signed or n > 1<<29)
990 if (n < (1 << 14))
991 if (n < (1 << 4)) return 0 + log2_4[n ];
992 else if (n < (1 << 9)) return 5 + log2_4[n >> 5];
993 else return 10 + log2_4[n >> 10];
994 else if (n < (1 << 24))
995 if (n < (1 << 19)) return 15 + log2_4[n >> 15];
996 else return 20 + log2_4[n >> 20];
997 else if (n < (1 << 29)) return 25 + log2_4[n >> 25];
998 else return 30 + log2_4[n >> 30];
999 }
1000
1001 #ifndef M_PI
1002 #define M_PI 3.14159265358979323846264f // from CRC
1003 #endif
1004
1005 // code length assigned to a value with no huffman encoding
1006 #define NO_CODE 255
1007
1008 /////////////////////// LEAF SETUP FUNCTIONS //////////////////////////
1009 //
1010 // these functions are only called at setup, and only a few times
1011 // per file
1012
float32_unpack(uint32 x)1013 static float float32_unpack(uint32 x)
1014 {
1015 // from the specification
1016 uint32 mantissa = x & 0x1fffff;
1017 uint32 sign = x & 0x80000000;
1018 uint32 exp = (x & 0x7fe00000) >> 21;
1019 double res = sign ? -(double)mantissa : (double)mantissa;
1020 return (float) ldexp((float)res, exp-788);
1021 }
1022
1023
1024 // zlib & jpeg huffman tables assume that the output symbols
1025 // can either be arbitrarily arranged, or have monotonically
1026 // increasing frequencies--they rely on the lengths being sorted;
1027 // this makes for a very simple generation algorithm.
1028 // vorbis allows a huffman table with non-sorted lengths. This
1029 // requires a more sophisticated construction, since symbols in
1030 // order do not map to huffman codes "in order".
add_entry(Codebook * c,uint32 huff_code,int symbol,int count,int len,uint32 * values)1031 static void add_entry(Codebook *c, uint32 huff_code, int symbol, int count, int len, uint32 *values)
1032 {
1033 if (!c->sparse) {
1034 c->codewords [symbol] = huff_code;
1035 } else {
1036 c->codewords [count] = huff_code;
1037 c->codeword_lengths[count] = len;
1038 values [count] = symbol;
1039 }
1040 }
1041
compute_codewords(Codebook * c,uint8 * len,int n,uint32 * values)1042 static int compute_codewords(Codebook *c, uint8 *len, int n, uint32 *values)
1043 {
1044 int i,k,m=0;
1045 uint32 available[32];
1046
1047 memset(available, 0, sizeof(available));
1048 // find the first entry
1049 for (k=0; k < n; ++k) if (len[k] < NO_CODE) break;
1050 if (k == n) { assert(c->sorted_entries == 0); return TRUE; }
1051 // add to the list
1052 add_entry(c, 0, k, m++, len[k], values);
1053 // add all available leaves
1054 for (i=1; i <= len[k]; ++i)
1055 available[i] = 1U << (32-i);
1056 // note that the above code treats the first case specially,
1057 // but it's really the same as the following code, so they
1058 // could probably be combined (except the initial code is 0,
1059 // and I use 0 in available[] to mean 'empty')
1060 for (i=k+1; i < n; ++i) {
1061 uint32 res;
1062 int z = len[i], y;
1063 if (z == NO_CODE) continue;
1064 // find lowest available leaf (should always be earliest,
1065 // which is what the specification calls for)
1066 // note that this property, and the fact we can never have
1067 // more than one free leaf at a given level, isn't totally
1068 // trivial to prove, but it seems true and the assert never
1069 // fires, so!
1070 while (z > 0 && !available[z]) --z;
1071 if (z == 0) { return FALSE; }
1072 res = available[z];
1073 assert(z >= 0 && z < 32);
1074 available[z] = 0;
1075 add_entry(c, bit_reverse(res), i, m++, len[i], values);
1076 // propogate availability up the tree
1077 if (z != len[i]) {
1078 assert(len[i] >= 0 && len[i] < 32);
1079 for (y=len[i]; y > z; --y) {
1080 assert(available[y] == 0);
1081 available[y] = res + (1 << (32-y));
1082 }
1083 }
1084 }
1085 return TRUE;
1086 }
1087
1088 // accelerated huffman table allows fast O(1) match of all symbols
1089 // of length <= STB_VORBIS_FAST_HUFFMAN_LENGTH
compute_accelerated_huffman(Codebook * c)1090 static void compute_accelerated_huffman(Codebook *c)
1091 {
1092 int i, len;
1093 for (i=0; i < FAST_HUFFMAN_TABLE_SIZE; ++i)
1094 c->fast_huffman[i] = -1;
1095
1096 len = c->sparse ? c->sorted_entries : c->entries;
1097 #ifdef STB_VORBIS_FAST_HUFFMAN_SHORT
1098 if (len > 32767) len = 32767; // largest possible value we can encode!
1099 #endif
1100 for (i=0; i < len; ++i) {
1101 if (c->codeword_lengths[i] <= STB_VORBIS_FAST_HUFFMAN_LENGTH) {
1102 uint32 z = c->sparse ? bit_reverse(c->sorted_codewords[i]) : c->codewords[i];
1103 // set table entries for all bit combinations in the higher bits
1104 while (z < FAST_HUFFMAN_TABLE_SIZE) {
1105 c->fast_huffman[z] = i;
1106 z += 1 << c->codeword_lengths[i];
1107 }
1108 }
1109 }
1110 }
1111
1112 #ifdef _MSC_VER
1113 #define STBV_CDECL __cdecl
1114 #else
1115 #define STBV_CDECL
1116 #endif
1117
uint32_compare(const void * p,const void * q)1118 static int STBV_CDECL uint32_compare(const void *p, const void *q)
1119 {
1120 uint32 x = * (uint32 *) p;
1121 uint32 y = * (uint32 *) q;
1122 return x < y ? -1 : x > y;
1123 }
1124
include_in_sort(Codebook * c,uint8 len)1125 static int include_in_sort(Codebook *c, uint8 len)
1126 {
1127 if (c->sparse) { assert(len != NO_CODE); return TRUE; }
1128 if (len == NO_CODE) return FALSE;
1129 if (len > STB_VORBIS_FAST_HUFFMAN_LENGTH) return TRUE;
1130 return FALSE;
1131 }
1132
1133 // if the fast table above doesn't work, we want to binary
1134 // search them... need to reverse the bits
compute_sorted_huffman(Codebook * c,uint8 * lengths,uint32 * values)1135 static void compute_sorted_huffman(Codebook *c, uint8 *lengths, uint32 *values)
1136 {
1137 int i, len;
1138 // build a list of all the entries
1139 // OPTIMIZATION: don't include the short ones, since they'll be caught by FAST_HUFFMAN.
1140 // this is kind of a frivolous optimization--I don't see any performance improvement,
1141 // but it's like 4 extra lines of code, so.
1142 if (!c->sparse) {
1143 int k = 0;
1144 for (i=0; i < c->entries; ++i)
1145 if (include_in_sort(c, lengths[i]))
1146 c->sorted_codewords[k++] = bit_reverse(c->codewords[i]);
1147 assert(k == c->sorted_entries);
1148 } else {
1149 for (i=0; i < c->sorted_entries; ++i)
1150 c->sorted_codewords[i] = bit_reverse(c->codewords[i]);
1151 }
1152
1153 qsort(c->sorted_codewords, c->sorted_entries, sizeof(c->sorted_codewords[0]), uint32_compare);
1154 c->sorted_codewords[c->sorted_entries] = 0xffffffff;
1155
1156 len = c->sparse ? c->sorted_entries : c->entries;
1157 // now we need to indicate how they correspond; we could either
1158 // #1: sort a different data structure that says who they correspond to
1159 // #2: for each sorted entry, search the original list to find who corresponds
1160 // #3: for each original entry, find the sorted entry
1161 // #1 requires extra storage, #2 is slow, #3 can use binary search!
1162 for (i=0; i < len; ++i) {
1163 int huff_len = c->sparse ? lengths[values[i]] : lengths[i];
1164 if (include_in_sort(c,huff_len)) {
1165 uint32 code = bit_reverse(c->codewords[i]);
1166 int x=0, n=c->sorted_entries;
1167 while (n > 1) {
1168 // invariant: sc[x] <= code < sc[x+n]
1169 int m = x + (n >> 1);
1170 if (c->sorted_codewords[m] <= code) {
1171 x = m;
1172 n -= (n>>1);
1173 } else {
1174 n >>= 1;
1175 }
1176 }
1177 assert(c->sorted_codewords[x] == code);
1178 if (c->sparse) {
1179 c->sorted_values[x] = values[i];
1180 c->codeword_lengths[x] = huff_len;
1181 } else {
1182 c->sorted_values[x] = i;
1183 }
1184 }
1185 }
1186 }
1187
1188 // only run while parsing the header (3 times)
vorbis_validate(uint8 * data)1189 static int vorbis_validate(uint8 *data)
1190 {
1191 static uint8 vorbis[6] = { 'v', 'o', 'r', 'b', 'i', 's' };
1192 return memcmp(data, vorbis, 6) == 0;
1193 }
1194
1195 // called from setup only, once per code book
1196 // (formula implied by specification)
lookup1_values(int entries,int dim)1197 static int lookup1_values(int entries, int dim)
1198 {
1199 int r = (int) floor(exp((float) log((float) entries) / dim));
1200 if ((int) floor(pow((float) r+1, dim)) <= entries) // (int) cast for MinGW warning;
1201 ++r; // floor() to avoid _ftol() when non-CRT
1202 assert(pow((float) r+1, dim) > entries);
1203 assert((int) floor(pow((float) r, dim)) <= entries); // (int),floor() as above
1204 return r;
1205 }
1206
1207 // called twice per file
compute_twiddle_factors(int n,float * A,float * B,float * C)1208 static void compute_twiddle_factors(int n, float *A, float *B, float *C)
1209 {
1210 int n4 = n >> 2, n8 = n >> 3;
1211 int k,k2;
1212
1213 for (k=k2=0; k < n4; ++k,k2+=2) {
1214 A[k2 ] = (float) cos(4*k*M_PI/n);
1215 A[k2+1] = (float) -sin(4*k*M_PI/n);
1216 B[k2 ] = (float) cos((k2+1)*M_PI/n/2) * 0.5f;
1217 B[k2+1] = (float) sin((k2+1)*M_PI/n/2) * 0.5f;
1218 }
1219 for (k=k2=0; k < n8; ++k,k2+=2) {
1220 C[k2 ] = (float) cos(2*(k2+1)*M_PI/n);
1221 C[k2+1] = (float) -sin(2*(k2+1)*M_PI/n);
1222 }
1223 }
1224
compute_window(int n,float * window)1225 static void compute_window(int n, float *window)
1226 {
1227 int n2 = n >> 1, i;
1228 for (i=0; i < n2; ++i)
1229 window[i] = (float) sin(0.5 * M_PI * square((float) sin((i - 0 + 0.5) / n2 * 0.5 * M_PI)));
1230 }
1231
compute_bitreverse(int n,uint16 * rev)1232 static void compute_bitreverse(int n, uint16 *rev)
1233 {
1234 int ld = ilog(n) - 1; // ilog is off-by-one from normal definitions
1235 int i, n8 = n >> 3;
1236 for (i=0; i < n8; ++i)
1237 rev[i] = (bit_reverse(i) >> (32-ld+3)) << 2;
1238 }
1239
init_blocksize(vorb * f,int b,int n)1240 static int init_blocksize(vorb *f, int b, int n)
1241 {
1242 int n2 = n >> 1, n4 = n >> 2, n8 = n >> 3;
1243 f->A[b] = (float *) setup_malloc(f, sizeof(float) * n2);
1244 f->B[b] = (float *) setup_malloc(f, sizeof(float) * n2);
1245 f->C[b] = (float *) setup_malloc(f, sizeof(float) * n4);
1246 if (!f->A[b] || !f->B[b] || !f->C[b]) return error(f, VORBIS_outofmem);
1247 compute_twiddle_factors(n, f->A[b], f->B[b], f->C[b]);
1248 f->window[b] = (float *) setup_malloc(f, sizeof(float) * n2);
1249 if (!f->window[b]) return error(f, VORBIS_outofmem);
1250 compute_window(n, f->window[b]);
1251 f->bit_reverse[b] = (uint16 *) setup_malloc(f, sizeof(uint16) * n8);
1252 if (!f->bit_reverse[b]) return error(f, VORBIS_outofmem);
1253 compute_bitreverse(n, f->bit_reverse[b]);
1254 return TRUE;
1255 }
1256
neighbors(uint16 * x,int n,int * plow,int * phigh)1257 static void neighbors(uint16 *x, int n, int *plow, int *phigh)
1258 {
1259 int low = -1;
1260 int high = 65536;
1261 int i;
1262 for (i=0; i < n; ++i) {
1263 if (x[i] > low && x[i] < x[n]) { *plow = i; low = x[i]; }
1264 if (x[i] < high && x[i] > x[n]) { *phigh = i; high = x[i]; }
1265 }
1266 }
1267
1268 // this has been repurposed so y is now the original index instead of y
1269 typedef struct
1270 {
1271 uint16 x,id;
1272 } stbv__floor_ordering;
1273
point_compare(const void * p,const void * q)1274 static int STBV_CDECL point_compare(const void *p, const void *q)
1275 {
1276 stbv__floor_ordering *a = (stbv__floor_ordering *) p;
1277 stbv__floor_ordering *b = (stbv__floor_ordering *) q;
1278 return a->x < b->x ? -1 : a->x > b->x;
1279 }
1280
1281 //
1282 /////////////////////// END LEAF SETUP FUNCTIONS //////////////////////////
1283
1284
1285 #if defined(STB_VORBIS_NO_STDIO)
1286 #define USE_MEMORY(z) TRUE
1287 #else
1288 #define USE_MEMORY(z) ((z)->stream)
1289 #endif
1290
get8(vorb * z)1291 static uint8 get8(vorb *z)
1292 {
1293 if (USE_MEMORY(z)) {
1294 if (z->stream >= z->stream_end) { z->eof = TRUE; return 0; }
1295 return *z->stream++;
1296 }
1297
1298 #ifndef STB_VORBIS_NO_STDIO
1299 {
1300 int c = fgetc(z->f);
1301 if (c == EOF) { z->eof = TRUE; return 0; }
1302 return c;
1303 }
1304 #endif
1305 }
1306
get32(vorb * f)1307 static uint32 get32(vorb *f)
1308 {
1309 uint32 x;
1310 x = get8(f);
1311 x += get8(f) << 8;
1312 x += get8(f) << 16;
1313 x += (uint32) get8(f) << 24;
1314 return x;
1315 }
1316
getn(vorb * z,uint8 * data,int n)1317 static int getn(vorb *z, uint8 *data, int n)
1318 {
1319 if (USE_MEMORY(z)) {
1320 if (z->stream+n > z->stream_end) { z->eof = 1; return 0; }
1321 memcpy(data, z->stream, n);
1322 z->stream += n;
1323 return 1;
1324 }
1325
1326 #ifndef STB_VORBIS_NO_STDIO
1327 if (fread(data, n, 1, z->f) == 1)
1328 return 1;
1329 else {
1330 z->eof = 1;
1331 return 0;
1332 }
1333 #endif
1334 }
1335
skip(vorb * z,int n)1336 static void skip(vorb *z, int n)
1337 {
1338 if (USE_MEMORY(z)) {
1339 z->stream += n;
1340 if (z->stream >= z->stream_end) z->eof = 1;
1341 return;
1342 }
1343 #ifndef STB_VORBIS_NO_STDIO
1344 {
1345 long x = ftell(z->f);
1346 fseek(z->f, x+n, SEEK_SET);
1347 }
1348 #endif
1349 }
1350
set_file_offset(stb_vorbis * f,unsigned int loc)1351 static int set_file_offset(stb_vorbis *f, unsigned int loc)
1352 {
1353 #ifndef STB_VORBIS_NO_PUSHDATA_API
1354 if (f->push_mode) return 0;
1355 #endif
1356 f->eof = 0;
1357 if (USE_MEMORY(f)) {
1358 if (f->stream_start + loc >= f->stream_end || f->stream_start + loc < f->stream_start) {
1359 f->stream = f->stream_end;
1360 f->eof = 1;
1361 return 0;
1362 } else {
1363 f->stream = f->stream_start + loc;
1364 return 1;
1365 }
1366 }
1367 #ifndef STB_VORBIS_NO_STDIO
1368 if (loc + f->f_start < loc || loc >= 0x80000000) {
1369 loc = 0x7fffffff;
1370 f->eof = 1;
1371 } else {
1372 loc += f->f_start;
1373 }
1374 if (!fseek(f->f, loc, SEEK_SET))
1375 return 1;
1376 f->eof = 1;
1377 fseek(f->f, f->f_start, SEEK_END);
1378 return 0;
1379 #endif
1380 }
1381
1382
1383 static uint8 ogg_page_header[4] = { 0x4f, 0x67, 0x67, 0x53 };
1384
capture_pattern(vorb * f)1385 static int capture_pattern(vorb *f)
1386 {
1387 if (0x4f != get8(f)) return FALSE;
1388 if (0x67 != get8(f)) return FALSE;
1389 if (0x67 != get8(f)) return FALSE;
1390 if (0x53 != get8(f)) return FALSE;
1391 return TRUE;
1392 }
1393
1394 #define PAGEFLAG_continued_packet 1
1395 #define PAGEFLAG_first_page 2
1396 #define PAGEFLAG_last_page 4
1397
start_page_no_capturepattern(vorb * f)1398 static int start_page_no_capturepattern(vorb *f)
1399 {
1400 uint32 loc0,loc1,n;
1401 // stream structure version
1402 if (0 != get8(f)) return error(f, VORBIS_invalid_stream_structure_version);
1403 // header flag
1404 f->page_flag = get8(f);
1405 // absolute granule position
1406 loc0 = get32(f);
1407 loc1 = get32(f);
1408 // @TODO: validate loc0,loc1 as valid positions?
1409 // stream serial number -- vorbis doesn't interleave, so discard
1410 get32(f);
1411 //if (f->serial != get32(f)) return error(f, VORBIS_incorrect_stream_serial_number);
1412 // page sequence number
1413 n = get32(f);
1414 f->last_page = n;
1415 // CRC32
1416 get32(f);
1417 // page_segments
1418 f->segment_count = get8(f);
1419 if (!getn(f, f->segments, f->segment_count))
1420 return error(f, VORBIS_unexpected_eof);
1421 // assume we _don't_ know any the sample position of any segments
1422 f->end_seg_with_known_loc = -2;
1423 if (loc0 != ~0U || loc1 != ~0U) {
1424 int i;
1425 // determine which packet is the last one that will complete
1426 for (i=f->segment_count-1; i >= 0; --i)
1427 if (f->segments[i] < 255)
1428 break;
1429 // 'i' is now the index of the _last_ segment of a packet that ends
1430 if (i >= 0) {
1431 f->end_seg_with_known_loc = i;
1432 f->known_loc_for_packet = loc0;
1433 }
1434 }
1435 if (f->first_decode) {
1436 int i,len;
1437 ProbedPage p;
1438 len = 0;
1439 for (i=0; i < f->segment_count; ++i)
1440 len += f->segments[i];
1441 len += 27 + f->segment_count;
1442 p.page_start = f->first_audio_page_offset;
1443 p.page_end = p.page_start + len;
1444 p.last_decoded_sample = loc0;
1445 f->p_first = p;
1446 }
1447 f->next_seg = 0;
1448 return TRUE;
1449 }
1450
start_page(vorb * f)1451 static int start_page(vorb *f)
1452 {
1453 if (!capture_pattern(f)) return error(f, VORBIS_missing_capture_pattern);
1454 return start_page_no_capturepattern(f);
1455 }
1456
start_packet(vorb * f)1457 static int start_packet(vorb *f)
1458 {
1459 while (f->next_seg == -1) {
1460 if (!start_page(f)) return FALSE;
1461 if (f->page_flag & PAGEFLAG_continued_packet)
1462 return error(f, VORBIS_continued_packet_flag_invalid);
1463 }
1464 f->last_seg = FALSE;
1465 f->valid_bits = 0;
1466 f->packet_bytes = 0;
1467 f->bytes_in_seg = 0;
1468 // f->next_seg is now valid
1469 return TRUE;
1470 }
1471
maybe_start_packet(vorb * f)1472 static int maybe_start_packet(vorb *f)
1473 {
1474 if (f->next_seg == -1) {
1475 int x = get8(f);
1476 if (f->eof) return FALSE; // EOF at page boundary is not an error!
1477 if (0x4f != x ) return error(f, VORBIS_missing_capture_pattern);
1478 if (0x67 != get8(f)) return error(f, VORBIS_missing_capture_pattern);
1479 if (0x67 != get8(f)) return error(f, VORBIS_missing_capture_pattern);
1480 if (0x53 != get8(f)) return error(f, VORBIS_missing_capture_pattern);
1481 if (!start_page_no_capturepattern(f)) return FALSE;
1482 if (f->page_flag & PAGEFLAG_continued_packet) {
1483 // set up enough state that we can read this packet if we want,
1484 // e.g. during recovery
1485 f->last_seg = FALSE;
1486 f->bytes_in_seg = 0;
1487 return error(f, VORBIS_continued_packet_flag_invalid);
1488 }
1489 }
1490 return start_packet(f);
1491 }
1492
next_segment(vorb * f)1493 static int next_segment(vorb *f)
1494 {
1495 int len;
1496 if (f->last_seg) return 0;
1497 if (f->next_seg == -1) {
1498 f->last_seg_which = f->segment_count-1; // in case start_page fails
1499 if (!start_page(f)) { f->last_seg = 1; return 0; }
1500 if (!(f->page_flag & PAGEFLAG_continued_packet)) return error(f, VORBIS_continued_packet_flag_invalid);
1501 }
1502 len = f->segments[f->next_seg++];
1503 if (len < 255) {
1504 f->last_seg = TRUE;
1505 f->last_seg_which = f->next_seg-1;
1506 }
1507 if (f->next_seg >= f->segment_count)
1508 f->next_seg = -1;
1509 assert(f->bytes_in_seg == 0);
1510 f->bytes_in_seg = len;
1511 return len;
1512 }
1513
1514 #define EOP (-1)
1515 #define INVALID_BITS (-1)
1516
get8_packet_raw(vorb * f)1517 static int get8_packet_raw(vorb *f)
1518 {
1519 if (!f->bytes_in_seg) { // CLANG!
1520 if (f->last_seg) return EOP;
1521 else if (!next_segment(f)) return EOP;
1522 }
1523 assert(f->bytes_in_seg > 0);
1524 --f->bytes_in_seg;
1525 ++f->packet_bytes;
1526 return get8(f);
1527 }
1528
get8_packet(vorb * f)1529 static int get8_packet(vorb *f)
1530 {
1531 int x = get8_packet_raw(f);
1532 f->valid_bits = 0;
1533 return x;
1534 }
1535
flush_packet(vorb * f)1536 static void flush_packet(vorb *f)
1537 {
1538 while (get8_packet_raw(f) != EOP);
1539 }
1540
1541 // @OPTIMIZE: this is the secondary bit decoder, so it's probably not as important
1542 // as the huffman decoder?
get_bits(vorb * f,int n)1543 static uint32 get_bits(vorb *f, int n)
1544 {
1545 uint32 z;
1546
1547 if (f->valid_bits < 0) return 0;
1548 if (f->valid_bits < n) {
1549 if (n > 24) {
1550 // the accumulator technique below would not work correctly in this case
1551 z = get_bits(f, 24);
1552 z += get_bits(f, n-24) << 24;
1553 return z;
1554 }
1555 if (f->valid_bits == 0) f->acc = 0;
1556 while (f->valid_bits < n) {
1557 int z = get8_packet_raw(f);
1558 if (z == EOP) {
1559 f->valid_bits = INVALID_BITS;
1560 return 0;
1561 }
1562 f->acc += z << f->valid_bits;
1563 f->valid_bits += 8;
1564 }
1565 }
1566 if (f->valid_bits < 0) return 0;
1567 z = f->acc & ((1 << n)-1);
1568 f->acc >>= n;
1569 f->valid_bits -= n;
1570 return z;
1571 }
1572
1573 // @OPTIMIZE: primary accumulator for huffman
1574 // expand the buffer to as many bits as possible without reading off end of packet
1575 // it might be nice to allow f->valid_bits and f->acc to be stored in registers,
1576 // e.g. cache them locally and decode locally
prep_huffman(vorb * f)1577 static __forceinline void prep_huffman(vorb *f)
1578 {
1579 if (f->valid_bits <= 24) {
1580 if (f->valid_bits == 0) f->acc = 0;
1581 do {
1582 int z;
1583 if (f->last_seg && !f->bytes_in_seg) return;
1584 z = get8_packet_raw(f);
1585 if (z == EOP) return;
1586 f->acc += (unsigned) z << f->valid_bits;
1587 f->valid_bits += 8;
1588 } while (f->valid_bits <= 24);
1589 }
1590 }
1591
1592 enum
1593 {
1594 VORBIS_packet_id = 1,
1595 VORBIS_packet_comment = 3,
1596 VORBIS_packet_setup = 5
1597 };
1598
codebook_decode_scalar_raw(vorb * f,Codebook * c)1599 static int codebook_decode_scalar_raw(vorb *f, Codebook *c)
1600 {
1601 int i;
1602 prep_huffman(f);
1603
1604 if (c->codewords == NULL && c->sorted_codewords == NULL)
1605 return -1;
1606
1607 // cases to use binary search: sorted_codewords && !c->codewords
1608 // sorted_codewords && c->entries > 8
1609 if (c->entries > 8 ? c->sorted_codewords!=NULL : !c->codewords) {
1610 // binary search
1611 uint32 code = bit_reverse(f->acc);
1612 int x=0, n=c->sorted_entries, len;
1613
1614 while (n > 1) {
1615 // invariant: sc[x] <= code < sc[x+n]
1616 int m = x + (n >> 1);
1617 if (c->sorted_codewords[m] <= code) {
1618 x = m;
1619 n -= (n>>1);
1620 } else {
1621 n >>= 1;
1622 }
1623 }
1624 // x is now the sorted index
1625 if (!c->sparse) x = c->sorted_values[x];
1626 // x is now sorted index if sparse, or symbol otherwise
1627 len = c->codeword_lengths[x];
1628 if (f->valid_bits >= len) {
1629 f->acc >>= len;
1630 f->valid_bits -= len;
1631 return x;
1632 }
1633
1634 f->valid_bits = 0;
1635 return -1;
1636 }
1637
1638 // if small, linear search
1639 assert(!c->sparse);
1640 for (i=0; i < c->entries; ++i) {
1641 if (c->codeword_lengths[i] == NO_CODE) continue;
1642 if (c->codewords[i] == (f->acc & ((1 << c->codeword_lengths[i])-1))) {
1643 if (f->valid_bits >= c->codeword_lengths[i]) {
1644 f->acc >>= c->codeword_lengths[i];
1645 f->valid_bits -= c->codeword_lengths[i];
1646 return i;
1647 }
1648 f->valid_bits = 0;
1649 return -1;
1650 }
1651 }
1652
1653 error(f, VORBIS_invalid_stream);
1654 f->valid_bits = 0;
1655 return -1;
1656 }
1657
1658 #ifndef STB_VORBIS_NO_INLINE_DECODE
1659
1660 #define DECODE_RAW(var, f,c) \
1661 if (f->valid_bits < STB_VORBIS_FAST_HUFFMAN_LENGTH) \
1662 prep_huffman(f); \
1663 var = f->acc & FAST_HUFFMAN_TABLE_MASK; \
1664 var = c->fast_huffman[var]; \
1665 if (var >= 0) { \
1666 int n = c->codeword_lengths[var]; \
1667 f->acc >>= n; \
1668 f->valid_bits -= n; \
1669 if (f->valid_bits < 0) { f->valid_bits = 0; var = -1; } \
1670 } else { \
1671 var = codebook_decode_scalar_raw(f,c); \
1672 }
1673
1674 #else
1675
codebook_decode_scalar(vorb * f,Codebook * c)1676 static int codebook_decode_scalar(vorb *f, Codebook *c)
1677 {
1678 int i;
1679 if (f->valid_bits < STB_VORBIS_FAST_HUFFMAN_LENGTH)
1680 prep_huffman(f);
1681 // fast huffman table lookup
1682 i = f->acc & FAST_HUFFMAN_TABLE_MASK;
1683 i = c->fast_huffman[i];
1684 if (i >= 0) {
1685 f->acc >>= c->codeword_lengths[i];
1686 f->valid_bits -= c->codeword_lengths[i];
1687 if (f->valid_bits < 0) { f->valid_bits = 0; return -1; }
1688 return i;
1689 }
1690 return codebook_decode_scalar_raw(f,c);
1691 }
1692
1693 #define DECODE_RAW(var,f,c) var = codebook_decode_scalar(f,c);
1694
1695 #endif
1696
1697 #define DECODE(var,f,c) \
1698 DECODE_RAW(var,f,c) \
1699 if (c->sparse) var = c->sorted_values[var];
1700
1701 #ifndef STB_VORBIS_DIVIDES_IN_CODEBOOK
1702 #define DECODE_VQ(var,f,c) DECODE_RAW(var,f,c)
1703 #else
1704 #define DECODE_VQ(var,f,c) DECODE(var,f,c)
1705 #endif
1706
1707
1708
1709
1710
1711
1712 // CODEBOOK_ELEMENT_FAST is an optimization for the CODEBOOK_FLOATS case
1713 // where we avoid one addition
1714 #define CODEBOOK_ELEMENT(c,off) (c->multiplicands[off])
1715 #define CODEBOOK_ELEMENT_FAST(c,off) (c->multiplicands[off])
1716 #define CODEBOOK_ELEMENT_BASE(c) (0)
1717
codebook_decode_start(vorb * f,Codebook * c)1718 static int codebook_decode_start(vorb *f, Codebook *c)
1719 {
1720 int z = -1;
1721
1722 // type 0 is only legal in a scalar context
1723 if (c->lookup_type == 0)
1724 error(f, VORBIS_invalid_stream);
1725 else {
1726 DECODE_VQ(z,f,c);
1727 if (c->sparse) assert(z < c->sorted_entries);
1728 if (z < 0) { // check for EOP
1729 if (!f->bytes_in_seg)
1730 if (f->last_seg)
1731 return z;
1732 error(f, VORBIS_invalid_stream);
1733 }
1734 }
1735 return z;
1736 }
1737
codebook_decode(vorb * f,Codebook * c,float * output,int len)1738 static int codebook_decode(vorb *f, Codebook *c, float *output, int len)
1739 {
1740 int i,z = codebook_decode_start(f,c);
1741 if (z < 0) return FALSE;
1742 if (len > c->dimensions) len = c->dimensions;
1743
1744 #ifdef STB_VORBIS_DIVIDES_IN_CODEBOOK
1745 if (c->lookup_type == 1) {
1746 float last = CODEBOOK_ELEMENT_BASE(c);
1747 int div = 1;
1748 for (i=0; i < len; ++i) {
1749 int off = (z / div) % c->lookup_values;
1750 float val = CODEBOOK_ELEMENT_FAST(c,off) + last;
1751 output[i] += val;
1752 if (c->sequence_p) last = val + c->minimum_value;
1753 div *= c->lookup_values;
1754 }
1755 return TRUE;
1756 }
1757 #endif
1758
1759 z *= c->dimensions;
1760 if (c->sequence_p) {
1761 float last = CODEBOOK_ELEMENT_BASE(c);
1762 for (i=0; i < len; ++i) {
1763 float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1764 output[i] += val;
1765 last = val + c->minimum_value;
1766 }
1767 } else {
1768 float last = CODEBOOK_ELEMENT_BASE(c);
1769 for (i=0; i < len; ++i) {
1770 output[i] += CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1771 }
1772 }
1773
1774 return TRUE;
1775 }
1776
codebook_decode_step(vorb * f,Codebook * c,float * output,int len,int step)1777 static int codebook_decode_step(vorb *f, Codebook *c, float *output, int len, int step)
1778 {
1779 int i,z = codebook_decode_start(f,c);
1780 float last = CODEBOOK_ELEMENT_BASE(c);
1781 if (z < 0) return FALSE;
1782 if (len > c->dimensions) len = c->dimensions;
1783
1784 #ifdef STB_VORBIS_DIVIDES_IN_CODEBOOK
1785 if (c->lookup_type == 1) {
1786 int div = 1;
1787 for (i=0; i < len; ++i) {
1788 int off = (z / div) % c->lookup_values;
1789 float val = CODEBOOK_ELEMENT_FAST(c,off) + last;
1790 output[i*step] += val;
1791 if (c->sequence_p) last = val;
1792 div *= c->lookup_values;
1793 }
1794 return TRUE;
1795 }
1796 #endif
1797
1798 z *= c->dimensions;
1799 for (i=0; i < len; ++i) {
1800 float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1801 output[i*step] += val;
1802 if (c->sequence_p) last = val;
1803 }
1804
1805 return TRUE;
1806 }
1807
codebook_decode_deinterleave_repeat(vorb * f,Codebook * c,float ** outputs,int ch,int * c_inter_p,int * p_inter_p,int len,int total_decode)1808 static int codebook_decode_deinterleave_repeat(vorb *f, Codebook *c, float **outputs, int ch, int *c_inter_p, int *p_inter_p, int len, int total_decode)
1809 {
1810 int c_inter = *c_inter_p;
1811 int p_inter = *p_inter_p;
1812 int i,z, effective = c->dimensions;
1813
1814 // type 0 is only legal in a scalar context
1815 if (c->lookup_type == 0) return error(f, VORBIS_invalid_stream);
1816
1817 while (total_decode > 0) {
1818 float last = CODEBOOK_ELEMENT_BASE(c);
1819 DECODE_VQ(z,f,c);
1820 #ifndef STB_VORBIS_DIVIDES_IN_CODEBOOK
1821 assert(!c->sparse || z < c->sorted_entries);
1822 #endif
1823 if (z < 0) {
1824 if (!f->bytes_in_seg)
1825 if (f->last_seg) return FALSE;
1826 return error(f, VORBIS_invalid_stream);
1827 }
1828
1829 // if this will take us off the end of the buffers, stop short!
1830 // we check by computing the length of the virtual interleaved
1831 // buffer (len*ch), our current offset within it (p_inter*ch)+(c_inter),
1832 // and the length we'll be using (effective)
1833 if (c_inter + p_inter*ch + effective > len * ch) {
1834 effective = len*ch - (p_inter*ch - c_inter);
1835 }
1836
1837 #ifdef STB_VORBIS_DIVIDES_IN_CODEBOOK
1838 if (c->lookup_type == 1) {
1839 int div = 1;
1840 for (i=0; i < effective; ++i) {
1841 int off = (z / div) % c->lookup_values;
1842 float val = CODEBOOK_ELEMENT_FAST(c,off) + last;
1843 if (outputs[c_inter])
1844 outputs[c_inter][p_inter] += val;
1845 if (++c_inter == ch) { c_inter = 0; ++p_inter; }
1846 if (c->sequence_p) last = val;
1847 div *= c->lookup_values;
1848 }
1849 } else
1850 #endif
1851 {
1852 z *= c->dimensions;
1853 if (c->sequence_p) {
1854 for (i=0; i < effective; ++i) {
1855 float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1856 if (outputs[c_inter])
1857 outputs[c_inter][p_inter] += val;
1858 if (++c_inter == ch) { c_inter = 0; ++p_inter; }
1859 last = val;
1860 }
1861 } else {
1862 for (i=0; i < effective; ++i) {
1863 float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1864 if (outputs[c_inter])
1865 outputs[c_inter][p_inter] += val;
1866 if (++c_inter == ch) { c_inter = 0; ++p_inter; }
1867 }
1868 }
1869 }
1870
1871 total_decode -= effective;
1872 }
1873 *c_inter_p = c_inter;
1874 *p_inter_p = p_inter;
1875 return TRUE;
1876 }
1877
predict_point(int x,int x0,int x1,int y0,int y1)1878 static int predict_point(int x, int x0, int x1, int y0, int y1)
1879 {
1880 int dy = y1 - y0;
1881 int adx = x1 - x0;
1882 // @OPTIMIZE: force int division to round in the right direction... is this necessary on x86?
1883 int err = abs(dy) * (x - x0);
1884 int off = err / adx;
1885 return dy < 0 ? y0 - off : y0 + off;
1886 }
1887
1888 // the following table is block-copied from the specification
1889 static float inverse_db_table[256] =
1890 {
1891 1.0649863e-07f, 1.1341951e-07f, 1.2079015e-07f, 1.2863978e-07f,
1892 1.3699951e-07f, 1.4590251e-07f, 1.5538408e-07f, 1.6548181e-07f,
1893 1.7623575e-07f, 1.8768855e-07f, 1.9988561e-07f, 2.1287530e-07f,
1894 2.2670913e-07f, 2.4144197e-07f, 2.5713223e-07f, 2.7384213e-07f,
1895 2.9163793e-07f, 3.1059021e-07f, 3.3077411e-07f, 3.5226968e-07f,
1896 3.7516214e-07f, 3.9954229e-07f, 4.2550680e-07f, 4.5315863e-07f,
1897 4.8260743e-07f, 5.1396998e-07f, 5.4737065e-07f, 5.8294187e-07f,
1898 6.2082472e-07f, 6.6116941e-07f, 7.0413592e-07f, 7.4989464e-07f,
1899 7.9862701e-07f, 8.5052630e-07f, 9.0579828e-07f, 9.6466216e-07f,
1900 1.0273513e-06f, 1.0941144e-06f, 1.1652161e-06f, 1.2409384e-06f,
1901 1.3215816e-06f, 1.4074654e-06f, 1.4989305e-06f, 1.5963394e-06f,
1902 1.7000785e-06f, 1.8105592e-06f, 1.9282195e-06f, 2.0535261e-06f,
1903 2.1869758e-06f, 2.3290978e-06f, 2.4804557e-06f, 2.6416497e-06f,
1904 2.8133190e-06f, 2.9961443e-06f, 3.1908506e-06f, 3.3982101e-06f,
1905 3.6190449e-06f, 3.8542308e-06f, 4.1047004e-06f, 4.3714470e-06f,
1906 4.6555282e-06f, 4.9580707e-06f, 5.2802740e-06f, 5.6234160e-06f,
1907 5.9888572e-06f, 6.3780469e-06f, 6.7925283e-06f, 7.2339451e-06f,
1908 7.7040476e-06f, 8.2047000e-06f, 8.7378876e-06f, 9.3057248e-06f,
1909 9.9104632e-06f, 1.0554501e-05f, 1.1240392e-05f, 1.1970856e-05f,
1910 1.2748789e-05f, 1.3577278e-05f, 1.4459606e-05f, 1.5399272e-05f,
1911 1.6400004e-05f, 1.7465768e-05f, 1.8600792e-05f, 1.9809576e-05f,
1912 2.1096914e-05f, 2.2467911e-05f, 2.3928002e-05f, 2.5482978e-05f,
1913 2.7139006e-05f, 2.8902651e-05f, 3.0780908e-05f, 3.2781225e-05f,
1914 3.4911534e-05f, 3.7180282e-05f, 3.9596466e-05f, 4.2169667e-05f,
1915 4.4910090e-05f, 4.7828601e-05f, 5.0936773e-05f, 5.4246931e-05f,
1916 5.7772202e-05f, 6.1526565e-05f, 6.5524908e-05f, 6.9783085e-05f,
1917 7.4317983e-05f, 7.9147585e-05f, 8.4291040e-05f, 8.9768747e-05f,
1918 9.5602426e-05f, 0.00010181521f, 0.00010843174f, 0.00011547824f,
1919 0.00012298267f, 0.00013097477f, 0.00013948625f, 0.00014855085f,
1920 0.00015820453f, 0.00016848555f, 0.00017943469f, 0.00019109536f,
1921 0.00020351382f, 0.00021673929f, 0.00023082423f, 0.00024582449f,
1922 0.00026179955f, 0.00027881276f, 0.00029693158f, 0.00031622787f,
1923 0.00033677814f, 0.00035866388f, 0.00038197188f, 0.00040679456f,
1924 0.00043323036f, 0.00046138411f, 0.00049136745f, 0.00052329927f,
1925 0.00055730621f, 0.00059352311f, 0.00063209358f, 0.00067317058f,
1926 0.00071691700f, 0.00076350630f, 0.00081312324f, 0.00086596457f,
1927 0.00092223983f, 0.00098217216f, 0.0010459992f, 0.0011139742f,
1928 0.0011863665f, 0.0012634633f, 0.0013455702f, 0.0014330129f,
1929 0.0015261382f, 0.0016253153f, 0.0017309374f, 0.0018434235f,
1930 0.0019632195f, 0.0020908006f, 0.0022266726f, 0.0023713743f,
1931 0.0025254795f, 0.0026895994f, 0.0028643847f, 0.0030505286f,
1932 0.0032487691f, 0.0034598925f, 0.0036847358f, 0.0039241906f,
1933 0.0041792066f, 0.0044507950f, 0.0047400328f, 0.0050480668f,
1934 0.0053761186f, 0.0057254891f, 0.0060975636f, 0.0064938176f,
1935 0.0069158225f, 0.0073652516f, 0.0078438871f, 0.0083536271f,
1936 0.0088964928f, 0.009474637f, 0.010090352f, 0.010746080f,
1937 0.011444421f, 0.012188144f, 0.012980198f, 0.013823725f,
1938 0.014722068f, 0.015678791f, 0.016697687f, 0.017782797f,
1939 0.018938423f, 0.020169149f, 0.021479854f, 0.022875735f,
1940 0.024362330f, 0.025945531f, 0.027631618f, 0.029427276f,
1941 0.031339626f, 0.033376252f, 0.035545228f, 0.037855157f,
1942 0.040315199f, 0.042935108f, 0.045725273f, 0.048696758f,
1943 0.051861348f, 0.055231591f, 0.058820850f, 0.062643361f,
1944 0.066714279f, 0.071049749f, 0.075666962f, 0.080584227f,
1945 0.085821044f, 0.091398179f, 0.097337747f, 0.10366330f,
1946 0.11039993f, 0.11757434f, 0.12521498f, 0.13335215f,
1947 0.14201813f, 0.15124727f, 0.16107617f, 0.17154380f,
1948 0.18269168f, 0.19456402f, 0.20720788f, 0.22067342f,
1949 0.23501402f, 0.25028656f, 0.26655159f, 0.28387361f,
1950 0.30232132f, 0.32196786f, 0.34289114f, 0.36517414f,
1951 0.38890521f, 0.41417847f, 0.44109412f, 0.46975890f,
1952 0.50028648f, 0.53279791f, 0.56742212f, 0.60429640f,
1953 0.64356699f, 0.68538959f, 0.72993007f, 0.77736504f,
1954 0.82788260f, 0.88168307f, 0.9389798f, 1.0f
1955 };
1956
1957
1958 // @OPTIMIZE: if you want to replace this bresenham line-drawing routine,
1959 // note that you must produce bit-identical output to decode correctly;
1960 // this specific sequence of operations is specified in the spec (it's
1961 // drawing integer-quantized frequency-space lines that the encoder
1962 // expects to be exactly the same)
1963 // ... also, isn't the whole point of Bresenham's algorithm to NOT
1964 // have to divide in the setup? sigh.
1965 #ifndef STB_VORBIS_NO_DEFER_FLOOR
1966 #define LINE_OP(a,b) a *= b
1967 #else
1968 #define LINE_OP(a,b) a = b
1969 #endif
1970
1971 #ifdef STB_VORBIS_DIVIDE_TABLE
1972 #define DIVTAB_NUMER 32
1973 #define DIVTAB_DENOM 64
1974 int8 integer_divide_table[DIVTAB_NUMER][DIVTAB_DENOM]; // 2KB
1975 #endif
1976
draw_line(float * output,int x0,int y0,int x1,int y1,int n)1977 static __forceinline void draw_line(float *output, int x0, int y0, int x1, int y1, int n)
1978 {
1979 int dy = y1 - y0;
1980 int adx = x1 - x0;
1981 int ady = abs(dy);
1982 int base;
1983 int x=x0,y=y0;
1984 int err = 0;
1985 int sy;
1986
1987 #ifdef STB_VORBIS_DIVIDE_TABLE
1988 if (adx < DIVTAB_DENOM && ady < DIVTAB_NUMER) {
1989 if (dy < 0) {
1990 base = -integer_divide_table[ady][adx];
1991 sy = base-1;
1992 } else {
1993 base = integer_divide_table[ady][adx];
1994 sy = base+1;
1995 }
1996 } else {
1997 base = dy / adx;
1998 if (dy < 0)
1999 sy = base - 1;
2000 else
2001 sy = base+1;
2002 }
2003 #else
2004 base = dy / adx;
2005 if (dy < 0)
2006 sy = base - 1;
2007 else
2008 sy = base+1;
2009 #endif
2010 ady -= abs(base) * adx;
2011 if (x1 > n) x1 = n;
2012 if (x < x1) {
2013 LINE_OP(output[x], inverse_db_table[y]);
2014 for (++x; x < x1; ++x) {
2015 err += ady;
2016 if (err >= adx) {
2017 err -= adx;
2018 y += sy;
2019 } else
2020 y += base;
2021 LINE_OP(output[x], inverse_db_table[y]);
2022 }
2023 }
2024 }
2025
residue_decode(vorb * f,Codebook * book,float * target,int offset,int n,int rtype)2026 static int residue_decode(vorb *f, Codebook *book, float *target, int offset, int n, int rtype)
2027 {
2028 int k;
2029 if (rtype == 0) {
2030 int step = n / book->dimensions;
2031 for (k=0; k < step; ++k)
2032 if (!codebook_decode_step(f, book, target+offset+k, n-offset-k, step))
2033 return FALSE;
2034 } else {
2035 for (k=0; k < n; ) {
2036 if (!codebook_decode(f, book, target+offset, n-k))
2037 return FALSE;
2038 k += book->dimensions;
2039 offset += book->dimensions;
2040 }
2041 }
2042 return TRUE;
2043 }
2044
decode_residue(vorb * f,float * residue_buffers[],int ch,int n,int rn,uint8 * do_not_decode)2045 static void decode_residue(vorb *f, float *residue_buffers[], int ch, int n, int rn, uint8 *do_not_decode)
2046 {
2047 int i,j,pass;
2048 Residue *r = f->residue_config + rn;
2049 int rtype = f->residue_types[rn];
2050 int c = r->classbook;
2051 int classwords = f->codebooks[c].dimensions;
2052 int n_read = r->end - r->begin;
2053 int part_read = n_read / r->part_size;
2054 int temp_alloc_point = temp_alloc_save(f);
2055 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2056 uint8 ***part_classdata = (uint8 ***) temp_block_array(f,f->channels, part_read * sizeof(**part_classdata));
2057 #else
2058 int **classifications = (int **) temp_block_array(f,f->channels, part_read * sizeof(**classifications));
2059 #endif
2060
2061 CHECK(f);
2062
2063 for (i=0; i < ch; ++i)
2064 if (!do_not_decode[i])
2065 memset(residue_buffers[i], 0, sizeof(float) * n);
2066
2067 if (rtype == 2 && ch != 1) {
2068 for (j=0; j < ch; ++j)
2069 if (!do_not_decode[j])
2070 break;
2071 if (j == ch)
2072 goto done;
2073
2074 for (pass=0; pass < 8; ++pass) {
2075 int pcount = 0, class_set = 0;
2076 if (ch == 2) {
2077 while (pcount < part_read) {
2078 int z = r->begin + pcount*r->part_size;
2079 int c_inter = (z & 1), p_inter = z>>1;
2080 if (pass == 0) {
2081 Codebook *c = f->codebooks+r->classbook;
2082 int q;
2083 DECODE(q,f,c);
2084 if (q == EOP) goto done;
2085 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2086 part_classdata[0][class_set] = r->classdata[q];
2087 #else
2088 for (i=classwords-1; i >= 0; --i) {
2089 classifications[0][i+pcount] = q % r->classifications;
2090 q /= r->classifications;
2091 }
2092 #endif
2093 }
2094 for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) {
2095 int z = r->begin + pcount*r->part_size;
2096 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2097 int c = part_classdata[0][class_set][i];
2098 #else
2099 int c = classifications[0][pcount];
2100 #endif
2101 int b = r->residue_books[c][pass];
2102 if (b >= 0) {
2103 Codebook *book = f->codebooks + b;
2104 #ifdef STB_VORBIS_DIVIDES_IN_CODEBOOK
2105 if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r->part_size))
2106 goto done;
2107 #else
2108 // saves 1%
2109 if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r->part_size))
2110 goto done;
2111 #endif
2112 } else {
2113 z += r->part_size;
2114 c_inter = z & 1;
2115 p_inter = z >> 1;
2116 }
2117 }
2118 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2119 ++class_set;
2120 #endif
2121 }
2122 } else if (ch == 1) {
2123 while (pcount < part_read) {
2124 int z = r->begin + pcount*r->part_size;
2125 int c_inter = 0, p_inter = z;
2126 if (pass == 0) {
2127 Codebook *c = f->codebooks+r->classbook;
2128 int q;
2129 DECODE(q,f,c);
2130 if (q == EOP) goto done;
2131 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2132 part_classdata[0][class_set] = r->classdata[q];
2133 #else
2134 for (i=classwords-1; i >= 0; --i) {
2135 classifications[0][i+pcount] = q % r->classifications;
2136 q /= r->classifications;
2137 }
2138 #endif
2139 }
2140 for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) {
2141 int z = r->begin + pcount*r->part_size;
2142 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2143 int c = part_classdata[0][class_set][i];
2144 #else
2145 int c = classifications[0][pcount];
2146 #endif
2147 int b = r->residue_books[c][pass];
2148 if (b >= 0) {
2149 Codebook *book = f->codebooks + b;
2150 if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r->part_size))
2151 goto done;
2152 } else {
2153 z += r->part_size;
2154 c_inter = 0;
2155 p_inter = z;
2156 }
2157 }
2158 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2159 ++class_set;
2160 #endif
2161 }
2162 } else {
2163 while (pcount < part_read) {
2164 int z = r->begin + pcount*r->part_size;
2165 int c_inter = z % ch, p_inter = z/ch;
2166 if (pass == 0) {
2167 Codebook *c = f->codebooks+r->classbook;
2168 int q;
2169 DECODE(q,f,c);
2170 if (q == EOP) goto done;
2171 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2172 part_classdata[0][class_set] = r->classdata[q];
2173 #else
2174 for (i=classwords-1; i >= 0; --i) {
2175 classifications[0][i+pcount] = q % r->classifications;
2176 q /= r->classifications;
2177 }
2178 #endif
2179 }
2180 for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) {
2181 int z = r->begin + pcount*r->part_size;
2182 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2183 int c = part_classdata[0][class_set][i];
2184 #else
2185 int c = classifications[0][pcount];
2186 #endif
2187 int b = r->residue_books[c][pass];
2188 if (b >= 0) {
2189 Codebook *book = f->codebooks + b;
2190 if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r->part_size))
2191 goto done;
2192 } else {
2193 z += r->part_size;
2194 c_inter = z % ch;
2195 p_inter = z / ch;
2196 }
2197 }
2198 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2199 ++class_set;
2200 #endif
2201 }
2202 }
2203 }
2204 goto done;
2205 }
2206 CHECK(f);
2207
2208 for (pass=0; pass < 8; ++pass) {
2209 int pcount = 0, class_set=0;
2210 while (pcount < part_read) {
2211 if (pass == 0) {
2212 for (j=0; j < ch; ++j) {
2213 if (!do_not_decode[j]) {
2214 Codebook *c = f->codebooks+r->classbook;
2215 int temp;
2216 DECODE(temp,f,c);
2217 if (temp == EOP) goto done;
2218 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2219 part_classdata[j][class_set] = r->classdata[temp];
2220 #else
2221 for (i=classwords-1; i >= 0; --i) {
2222 classifications[j][i+pcount] = temp % r->classifications;
2223 temp /= r->classifications;
2224 }
2225 #endif
2226 }
2227 }
2228 }
2229 for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) {
2230 for (j=0; j < ch; ++j) {
2231 if (!do_not_decode[j]) {
2232 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2233 int c = part_classdata[j][class_set][i];
2234 #else
2235 int c = classifications[j][pcount];
2236 #endif
2237 int b = r->residue_books[c][pass];
2238 if (b >= 0) {
2239 float *target = residue_buffers[j];
2240 int offset = r->begin + pcount * r->part_size;
2241 int n = r->part_size;
2242 Codebook *book = f->codebooks + b;
2243 if (!residue_decode(f, book, target, offset, n, rtype))
2244 goto done;
2245 }
2246 }
2247 }
2248 }
2249 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2250 ++class_set;
2251 #endif
2252 }
2253 }
2254 done:
2255 CHECK(f);
2256 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2257 temp_free(f,part_classdata);
2258 #else
2259 temp_free(f,classifications);
2260 #endif
2261 temp_alloc_restore(f,temp_alloc_point);
2262 }
2263
2264
2265 #if 0
2266 // slow way for debugging
2267 void inverse_mdct_slow(float *buffer, int n)
2268 {
2269 int i,j;
2270 int n2 = n >> 1;
2271 float *x = (float *) malloc(sizeof(*x) * n2);
2272 memcpy(x, buffer, sizeof(*x) * n2);
2273 for (i=0; i < n; ++i) {
2274 float acc = 0;
2275 for (j=0; j < n2; ++j)
2276 // formula from paper:
2277 //acc += n/4.0f * x[j] * (float) cos(M_PI / 2 / n * (2 * i + 1 + n/2.0)*(2*j+1));
2278 // formula from wikipedia
2279 //acc += 2.0f / n2 * x[j] * (float) cos(M_PI/n2 * (i + 0.5 + n2/2)*(j + 0.5));
2280 // these are equivalent, except the formula from the paper inverts the multiplier!
2281 // however, what actually works is NO MULTIPLIER!?!
2282 //acc += 64 * 2.0f / n2 * x[j] * (float) cos(M_PI/n2 * (i + 0.5 + n2/2)*(j + 0.5));
2283 acc += x[j] * (float) cos(M_PI / 2 / n * (2 * i + 1 + n/2.0)*(2*j+1));
2284 buffer[i] = acc;
2285 }
2286 free(x);
2287 }
2288 #elif 0
2289 // same as above, but just barely able to run in real time on modern machines
inverse_mdct_slow(float * buffer,int n,vorb * f,int blocktype)2290 void inverse_mdct_slow(float *buffer, int n, vorb *f, int blocktype)
2291 {
2292 float mcos[16384];
2293 int i,j;
2294 int n2 = n >> 1, nmask = (n << 2) -1;
2295 float *x = (float *) malloc(sizeof(*x) * n2);
2296 memcpy(x, buffer, sizeof(*x) * n2);
2297 for (i=0; i < 4*n; ++i)
2298 mcos[i] = (float) cos(M_PI / 2 * i / n);
2299
2300 for (i=0; i < n; ++i) {
2301 float acc = 0;
2302 for (j=0; j < n2; ++j)
2303 acc += x[j] * mcos[(2 * i + 1 + n2)*(2*j+1) & nmask];
2304 buffer[i] = acc;
2305 }
2306 free(x);
2307 }
2308 #elif 0
2309 // transform to use a slow dct-iv; this is STILL basically trivial,
2310 // but only requires half as many ops
dct_iv_slow(float * buffer,int n)2311 void dct_iv_slow(float *buffer, int n)
2312 {
2313 float mcos[16384];
2314 float x[2048];
2315 int i,j;
2316 int n2 = n >> 1, nmask = (n << 3) - 1;
2317 memcpy(x, buffer, sizeof(*x) * n);
2318 for (i=0; i < 8*n; ++i)
2319 mcos[i] = (float) cos(M_PI / 4 * i / n);
2320 for (i=0; i < n; ++i) {
2321 float acc = 0;
2322 for (j=0; j < n; ++j)
2323 acc += x[j] * mcos[((2 * i + 1)*(2*j+1)) & nmask];
2324 buffer[i] = acc;
2325 }
2326 }
2327
inverse_mdct_slow(float * buffer,int n,vorb * f,int blocktype)2328 void inverse_mdct_slow(float *buffer, int n, vorb *f, int blocktype)
2329 {
2330 int i, n4 = n >> 2, n2 = n >> 1, n3_4 = n - n4;
2331 float temp[4096];
2332
2333 memcpy(temp, buffer, n2 * sizeof(float));
2334 dct_iv_slow(temp, n2); // returns -c'-d, a-b'
2335
2336 for (i=0; i < n4 ; ++i) buffer[i] = temp[i+n4]; // a-b'
2337 for ( ; i < n3_4; ++i) buffer[i] = -temp[n3_4 - i - 1]; // b-a', c+d'
2338 for ( ; i < n ; ++i) buffer[i] = -temp[i - n3_4]; // c'+d
2339 }
2340 #endif
2341
2342 #ifndef LIBVORBIS_MDCT
2343 #define LIBVORBIS_MDCT 0
2344 #endif
2345
2346 #if LIBVORBIS_MDCT
2347 // directly call the vorbis MDCT using an interface documented
2348 // by Jeff Roberts... useful for performance comparison
2349 typedef struct
2350 {
2351 int n;
2352 int log2n;
2353
2354 float *trig;
2355 int *bitrev;
2356
2357 float scale;
2358 } mdct_lookup;
2359
2360 extern void mdct_init(mdct_lookup *lookup, int n);
2361 extern void mdct_clear(mdct_lookup *l);
2362 extern void mdct_backward(mdct_lookup *init, float *in, float *out);
2363
2364 mdct_lookup M1,M2;
2365
inverse_mdct(float * buffer,int n,vorb * f,int blocktype)2366 void inverse_mdct(float *buffer, int n, vorb *f, int blocktype)
2367 {
2368 mdct_lookup *M;
2369 if (M1.n == n) M = &M1;
2370 else if (M2.n == n) M = &M2;
2371 else if (M1.n == 0) { mdct_init(&M1, n); M = &M1; }
2372 else {
2373 if (M2.n) __asm int 3;
2374 mdct_init(&M2, n);
2375 M = &M2;
2376 }
2377
2378 mdct_backward(M, buffer, buffer);
2379 }
2380 #endif
2381
2382
2383 // the following were split out into separate functions while optimizing;
2384 // they could be pushed back up but eh. __forceinline showed no change;
2385 // they're probably already being inlined.
imdct_step3_iter0_loop(int n,float * e,int i_off,int k_off,float * A)2386 static void imdct_step3_iter0_loop(int n, float *e, int i_off, int k_off, float *A)
2387 {
2388 float *ee0 = e + i_off;
2389 float *ee2 = ee0 + k_off;
2390 int i;
2391
2392 assert((n & 3) == 0);
2393 for (i=(n>>2); i > 0; --i) {
2394 float k00_20, k01_21;
2395 k00_20 = ee0[ 0] - ee2[ 0];
2396 k01_21 = ee0[-1] - ee2[-1];
2397 ee0[ 0] += ee2[ 0];//ee0[ 0] = ee0[ 0] + ee2[ 0];
2398 ee0[-1] += ee2[-1];//ee0[-1] = ee0[-1] + ee2[-1];
2399 ee2[ 0] = k00_20 * A[0] - k01_21 * A[1];
2400 ee2[-1] = k01_21 * A[0] + k00_20 * A[1];
2401 A += 8;
2402
2403 k00_20 = ee0[-2] - ee2[-2];
2404 k01_21 = ee0[-3] - ee2[-3];
2405 ee0[-2] += ee2[-2];//ee0[-2] = ee0[-2] + ee2[-2];
2406 ee0[-3] += ee2[-3];//ee0[-3] = ee0[-3] + ee2[-3];
2407 ee2[-2] = k00_20 * A[0] - k01_21 * A[1];
2408 ee2[-3] = k01_21 * A[0] + k00_20 * A[1];
2409 A += 8;
2410
2411 k00_20 = ee0[-4] - ee2[-4];
2412 k01_21 = ee0[-5] - ee2[-5];
2413 ee0[-4] += ee2[-4];//ee0[-4] = ee0[-4] + ee2[-4];
2414 ee0[-5] += ee2[-5];//ee0[-5] = ee0[-5] + ee2[-5];
2415 ee2[-4] = k00_20 * A[0] - k01_21 * A[1];
2416 ee2[-5] = k01_21 * A[0] + k00_20 * A[1];
2417 A += 8;
2418
2419 k00_20 = ee0[-6] - ee2[-6];
2420 k01_21 = ee0[-7] - ee2[-7];
2421 ee0[-6] += ee2[-6];//ee0[-6] = ee0[-6] + ee2[-6];
2422 ee0[-7] += ee2[-7];//ee0[-7] = ee0[-7] + ee2[-7];
2423 ee2[-6] = k00_20 * A[0] - k01_21 * A[1];
2424 ee2[-7] = k01_21 * A[0] + k00_20 * A[1];
2425 A += 8;
2426 ee0 -= 8;
2427 ee2 -= 8;
2428 }
2429 }
2430
imdct_step3_inner_r_loop(int lim,float * e,int d0,int k_off,float * A,int k1)2431 static void imdct_step3_inner_r_loop(int lim, float *e, int d0, int k_off, float *A, int k1)
2432 {
2433 int i;
2434 float k00_20, k01_21;
2435
2436 float *e0 = e + d0;
2437 float *e2 = e0 + k_off;
2438
2439 for (i=lim >> 2; i > 0; --i) {
2440 k00_20 = e0[-0] - e2[-0];
2441 k01_21 = e0[-1] - e2[-1];
2442 e0[-0] += e2[-0];//e0[-0] = e0[-0] + e2[-0];
2443 e0[-1] += e2[-1];//e0[-1] = e0[-1] + e2[-1];
2444 e2[-0] = (k00_20)*A[0] - (k01_21) * A[1];
2445 e2[-1] = (k01_21)*A[0] + (k00_20) * A[1];
2446
2447 A += k1;
2448
2449 k00_20 = e0[-2] - e2[-2];
2450 k01_21 = e0[-3] - e2[-3];
2451 e0[-2] += e2[-2];//e0[-2] = e0[-2] + e2[-2];
2452 e0[-3] += e2[-3];//e0[-3] = e0[-3] + e2[-3];
2453 e2[-2] = (k00_20)*A[0] - (k01_21) * A[1];
2454 e2[-3] = (k01_21)*A[0] + (k00_20) * A[1];
2455
2456 A += k1;
2457
2458 k00_20 = e0[-4] - e2[-4];
2459 k01_21 = e0[-5] - e2[-5];
2460 e0[-4] += e2[-4];//e0[-4] = e0[-4] + e2[-4];
2461 e0[-5] += e2[-5];//e0[-5] = e0[-5] + e2[-5];
2462 e2[-4] = (k00_20)*A[0] - (k01_21) * A[1];
2463 e2[-5] = (k01_21)*A[0] + (k00_20) * A[1];
2464
2465 A += k1;
2466
2467 k00_20 = e0[-6] - e2[-6];
2468 k01_21 = e0[-7] - e2[-7];
2469 e0[-6] += e2[-6];//e0[-6] = e0[-6] + e2[-6];
2470 e0[-7] += e2[-7];//e0[-7] = e0[-7] + e2[-7];
2471 e2[-6] = (k00_20)*A[0] - (k01_21) * A[1];
2472 e2[-7] = (k01_21)*A[0] + (k00_20) * A[1];
2473
2474 e0 -= 8;
2475 e2 -= 8;
2476
2477 A += k1;
2478 }
2479 }
2480
imdct_step3_inner_s_loop(int n,float * e,int i_off,int k_off,float * A,int a_off,int k0)2481 static void imdct_step3_inner_s_loop(int n, float *e, int i_off, int k_off, float *A, int a_off, int k0)
2482 {
2483 int i;
2484 float A0 = A[0];
2485 float A1 = A[0+1];
2486 float A2 = A[0+a_off];
2487 float A3 = A[0+a_off+1];
2488 float A4 = A[0+a_off*2+0];
2489 float A5 = A[0+a_off*2+1];
2490 float A6 = A[0+a_off*3+0];
2491 float A7 = A[0+a_off*3+1];
2492
2493 float k00,k11;
2494
2495 float *ee0 = e +i_off;
2496 float *ee2 = ee0+k_off;
2497
2498 for (i=n; i > 0; --i) {
2499 k00 = ee0[ 0] - ee2[ 0];
2500 k11 = ee0[-1] - ee2[-1];
2501 ee0[ 0] = ee0[ 0] + ee2[ 0];
2502 ee0[-1] = ee0[-1] + ee2[-1];
2503 ee2[ 0] = (k00) * A0 - (k11) * A1;
2504 ee2[-1] = (k11) * A0 + (k00) * A1;
2505
2506 k00 = ee0[-2] - ee2[-2];
2507 k11 = ee0[-3] - ee2[-3];
2508 ee0[-2] = ee0[-2] + ee2[-2];
2509 ee0[-3] = ee0[-3] + ee2[-3];
2510 ee2[-2] = (k00) * A2 - (k11) * A3;
2511 ee2[-3] = (k11) * A2 + (k00) * A3;
2512
2513 k00 = ee0[-4] - ee2[-4];
2514 k11 = ee0[-5] - ee2[-5];
2515 ee0[-4] = ee0[-4] + ee2[-4];
2516 ee0[-5] = ee0[-5] + ee2[-5];
2517 ee2[-4] = (k00) * A4 - (k11) * A5;
2518 ee2[-5] = (k11) * A4 + (k00) * A5;
2519
2520 k00 = ee0[-6] - ee2[-6];
2521 k11 = ee0[-7] - ee2[-7];
2522 ee0[-6] = ee0[-6] + ee2[-6];
2523 ee0[-7] = ee0[-7] + ee2[-7];
2524 ee2[-6] = (k00) * A6 - (k11) * A7;
2525 ee2[-7] = (k11) * A6 + (k00) * A7;
2526
2527 ee0 -= k0;
2528 ee2 -= k0;
2529 }
2530 }
2531
iter_54(float * z)2532 static __forceinline void iter_54(float *z)
2533 {
2534 float k00,k11,k22,k33;
2535 float y0,y1,y2,y3;
2536
2537 k00 = z[ 0] - z[-4];
2538 y0 = z[ 0] + z[-4];
2539 y2 = z[-2] + z[-6];
2540 k22 = z[-2] - z[-6];
2541
2542 z[-0] = y0 + y2; // z0 + z4 + z2 + z6
2543 z[-2] = y0 - y2; // z0 + z4 - z2 - z6
2544
2545 // done with y0,y2
2546
2547 k33 = z[-3] - z[-7];
2548
2549 z[-4] = k00 + k33; // z0 - z4 + z3 - z7
2550 z[-6] = k00 - k33; // z0 - z4 - z3 + z7
2551
2552 // done with k33
2553
2554 k11 = z[-1] - z[-5];
2555 y1 = z[-1] + z[-5];
2556 y3 = z[-3] + z[-7];
2557
2558 z[-1] = y1 + y3; // z1 + z5 + z3 + z7
2559 z[-3] = y1 - y3; // z1 + z5 - z3 - z7
2560 z[-5] = k11 - k22; // z1 - z5 + z2 - z6
2561 z[-7] = k11 + k22; // z1 - z5 - z2 + z6
2562 }
2563
imdct_step3_inner_s_loop_ld654(int n,float * e,int i_off,float * A,int base_n)2564 static void imdct_step3_inner_s_loop_ld654(int n, float *e, int i_off, float *A, int base_n)
2565 {
2566 int a_off = base_n >> 3;
2567 float A2 = A[0+a_off];
2568 float *z = e + i_off;
2569 float *base = z - 16 * n;
2570
2571 while (z > base) {
2572 float k00,k11;
2573
2574 k00 = z[-0] - z[-8];
2575 k11 = z[-1] - z[-9];
2576 z[-0] = z[-0] + z[-8];
2577 z[-1] = z[-1] + z[-9];
2578 z[-8] = k00;
2579 z[-9] = k11 ;
2580
2581 k00 = z[ -2] - z[-10];
2582 k11 = z[ -3] - z[-11];
2583 z[ -2] = z[ -2] + z[-10];
2584 z[ -3] = z[ -3] + z[-11];
2585 z[-10] = (k00+k11) * A2;
2586 z[-11] = (k11-k00) * A2;
2587
2588 k00 = z[-12] - z[ -4]; // reverse to avoid a unary negation
2589 k11 = z[ -5] - z[-13];
2590 z[ -4] = z[ -4] + z[-12];
2591 z[ -5] = z[ -5] + z[-13];
2592 z[-12] = k11;
2593 z[-13] = k00;
2594
2595 k00 = z[-14] - z[ -6]; // reverse to avoid a unary negation
2596 k11 = z[ -7] - z[-15];
2597 z[ -6] = z[ -6] + z[-14];
2598 z[ -7] = z[ -7] + z[-15];
2599 z[-14] = (k00+k11) * A2;
2600 z[-15] = (k00-k11) * A2;
2601
2602 iter_54(z);
2603 iter_54(z-8);
2604 z -= 16;
2605 }
2606 }
2607
inverse_mdct(float * buffer,int n,vorb * f,int blocktype)2608 static void inverse_mdct(float *buffer, int n, vorb *f, int blocktype)
2609 {
2610 int n2 = n >> 1, n4 = n >> 2, n8 = n >> 3, l;
2611 int ld;
2612 // @OPTIMIZE: reduce register pressure by using fewer variables?
2613 int save_point = temp_alloc_save(f);
2614 float *buf2 = (float *) temp_alloc(f, n2 * sizeof(*buf2));
2615 float *u=NULL,*v=NULL;
2616 // twiddle factors
2617 float *A = f->A[blocktype];
2618
2619 // IMDCT algorithm from "The use of multirate filter banks for coding of high quality digital audio"
2620 // See notes about bugs in that paper in less-optimal implementation 'inverse_mdct_old' after this function.
2621
2622 // kernel from paper
2623
2624
2625 // merged:
2626 // copy and reflect spectral data
2627 // step 0
2628
2629 // note that it turns out that the items added together during
2630 // this step are, in fact, being added to themselves (as reflected
2631 // by step 0). inexplicable inefficiency! this became obvious
2632 // once I combined the passes.
2633
2634 // so there's a missing 'times 2' here (for adding X to itself).
2635 // this propogates through linearly to the end, where the numbers
2636 // are 1/2 too small, and need to be compensated for.
2637
2638 {
2639 float *d,*e, *AA, *e_stop;
2640 d = &buf2[n2-2];
2641 AA = A;
2642 e = &buffer[0];
2643 e_stop = &buffer[n2];
2644 while (e != e_stop) {
2645 d[1] = (e[0] * AA[0] - e[2]*AA[1]);
2646 d[0] = (e[0] * AA[1] + e[2]*AA[0]);
2647 d -= 2;
2648 AA += 2;
2649 e += 4;
2650 }
2651
2652 e = &buffer[n2-3];
2653 while (d >= buf2) {
2654 d[1] = (-e[2] * AA[0] - -e[0]*AA[1]);
2655 d[0] = (-e[2] * AA[1] + -e[0]*AA[0]);
2656 d -= 2;
2657 AA += 2;
2658 e -= 4;
2659 }
2660 }
2661
2662 // now we use symbolic names for these, so that we can
2663 // possibly swap their meaning as we change which operations
2664 // are in place
2665
2666 u = buffer;
2667 v = buf2;
2668
2669 // step 2 (paper output is w, now u)
2670 // this could be in place, but the data ends up in the wrong
2671 // place... _somebody_'s got to swap it, so this is nominated
2672 {
2673 float *AA = &A[n2-8];
2674 float *d0,*d1, *e0, *e1;
2675
2676 e0 = &v[n4];
2677 e1 = &v[0];
2678
2679 d0 = &u[n4];
2680 d1 = &u[0];
2681
2682 while (AA >= A) {
2683 float v40_20, v41_21;
2684
2685 v41_21 = e0[1] - e1[1];
2686 v40_20 = e0[0] - e1[0];
2687 d0[1] = e0[1] + e1[1];
2688 d0[0] = e0[0] + e1[0];
2689 d1[1] = v41_21*AA[4] - v40_20*AA[5];
2690 d1[0] = v40_20*AA[4] + v41_21*AA[5];
2691
2692 v41_21 = e0[3] - e1[3];
2693 v40_20 = e0[2] - e1[2];
2694 d0[3] = e0[3] + e1[3];
2695 d0[2] = e0[2] + e1[2];
2696 d1[3] = v41_21*AA[0] - v40_20*AA[1];
2697 d1[2] = v40_20*AA[0] + v41_21*AA[1];
2698
2699 AA -= 8;
2700
2701 d0 += 4;
2702 d1 += 4;
2703 e0 += 4;
2704 e1 += 4;
2705 }
2706 }
2707
2708 // step 3
2709 ld = ilog(n) - 1; // ilog is off-by-one from normal definitions
2710
2711 // optimized step 3:
2712
2713 // the original step3 loop can be nested r inside s or s inside r;
2714 // it's written originally as s inside r, but this is dumb when r
2715 // iterates many times, and s few. So I have two copies of it and
2716 // switch between them halfway.
2717
2718 // this is iteration 0 of step 3
2719 imdct_step3_iter0_loop(n >> 4, u, n2-1-n4*0, -(n >> 3), A);
2720 imdct_step3_iter0_loop(n >> 4, u, n2-1-n4*1, -(n >> 3), A);
2721
2722 // this is iteration 1 of step 3
2723 imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*0, -(n >> 4), A, 16);
2724 imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*1, -(n >> 4), A, 16);
2725 imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*2, -(n >> 4), A, 16);
2726 imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*3, -(n >> 4), A, 16);
2727
2728 l=2;
2729 for (; l < (ld-3)>>1; ++l) {
2730 int k0 = n >> (l+2), k0_2 = k0>>1;
2731 int lim = 1 << (l+1);
2732 int i;
2733 for (i=0; i < lim; ++i)
2734 imdct_step3_inner_r_loop(n >> (l+4), u, n2-1 - k0*i, -k0_2, A, 1 << (l+3));
2735 }
2736
2737 for (; l < ld-6; ++l) {
2738 int k0 = n >> (l+2), k1 = 1 << (l+3), k0_2 = k0>>1;
2739 int rlim = n >> (l+6), r;
2740 int lim = 1 << (l+1);
2741 int i_off;
2742 float *A0 = A;
2743 i_off = n2-1;
2744 for (r=rlim; r > 0; --r) {
2745 imdct_step3_inner_s_loop(lim, u, i_off, -k0_2, A0, k1, k0);
2746 A0 += k1*4;
2747 i_off -= 8;
2748 }
2749 }
2750
2751 // iterations with count:
2752 // ld-6,-5,-4 all interleaved together
2753 // the big win comes from getting rid of needless flops
2754 // due to the constants on pass 5 & 4 being all 1 and 0;
2755 // combining them to be simultaneous to improve cache made little difference
2756 imdct_step3_inner_s_loop_ld654(n >> 5, u, n2-1, A, n);
2757
2758 // output is u
2759
2760 // step 4, 5, and 6
2761 // cannot be in-place because of step 5
2762 {
2763 uint16 *bitrev = f->bit_reverse[blocktype];
2764 // weirdly, I'd have thought reading sequentially and writing
2765 // erratically would have been better than vice-versa, but in
2766 // fact that's not what my testing showed. (That is, with
2767 // j = bitreverse(i), do you read i and write j, or read j and write i.)
2768
2769 float *d0 = &v[n4-4];
2770 float *d1 = &v[n2-4];
2771 while (d0 >= v) {
2772 int k4;
2773
2774 k4 = bitrev[0];
2775 d1[3] = u[k4+0];
2776 d1[2] = u[k4+1];
2777 d0[3] = u[k4+2];
2778 d0[2] = u[k4+3];
2779
2780 k4 = bitrev[1];
2781 d1[1] = u[k4+0];
2782 d1[0] = u[k4+1];
2783 d0[1] = u[k4+2];
2784 d0[0] = u[k4+3];
2785
2786 d0 -= 4;
2787 d1 -= 4;
2788 bitrev += 2;
2789 }
2790 }
2791 // (paper output is u, now v)
2792
2793
2794 // data must be in buf2
2795 assert(v == buf2);
2796
2797 // step 7 (paper output is v, now v)
2798 // this is now in place
2799 {
2800 float *C = f->C[blocktype];
2801 float *d, *e;
2802
2803 d = v;
2804 e = v + n2 - 4;
2805
2806 while (d < e) {
2807 float a02,a11,b0,b1,b2,b3;
2808
2809 a02 = d[0] - e[2];
2810 a11 = d[1] + e[3];
2811
2812 b0 = C[1]*a02 + C[0]*a11;
2813 b1 = C[1]*a11 - C[0]*a02;
2814
2815 b2 = d[0] + e[ 2];
2816 b3 = d[1] - e[ 3];
2817
2818 d[0] = b2 + b0;
2819 d[1] = b3 + b1;
2820 e[2] = b2 - b0;
2821 e[3] = b1 - b3;
2822
2823 a02 = d[2] - e[0];
2824 a11 = d[3] + e[1];
2825
2826 b0 = C[3]*a02 + C[2]*a11;
2827 b1 = C[3]*a11 - C[2]*a02;
2828
2829 b2 = d[2] + e[ 0];
2830 b3 = d[3] - e[ 1];
2831
2832 d[2] = b2 + b0;
2833 d[3] = b3 + b1;
2834 e[0] = b2 - b0;
2835 e[1] = b1 - b3;
2836
2837 C += 4;
2838 d += 4;
2839 e -= 4;
2840 }
2841 }
2842
2843 // data must be in buf2
2844
2845
2846 // step 8+decode (paper output is X, now buffer)
2847 // this generates pairs of data a la 8 and pushes them directly through
2848 // the decode kernel (pushing rather than pulling) to avoid having
2849 // to make another pass later
2850
2851 // this cannot POSSIBLY be in place, so we refer to the buffers directly
2852
2853 {
2854 float *d0,*d1,*d2,*d3;
2855
2856 float *B = f->B[blocktype] + n2 - 8;
2857 float *e = buf2 + n2 - 8;
2858 d0 = &buffer[0];
2859 d1 = &buffer[n2-4];
2860 d2 = &buffer[n2];
2861 d3 = &buffer[n-4];
2862 while (e >= v) {
2863 float p0,p1,p2,p3;
2864
2865 p3 = e[6]*B[7] - e[7]*B[6];
2866 p2 = -e[6]*B[6] - e[7]*B[7];
2867
2868 d0[0] = p3;
2869 d1[3] = - p3;
2870 d2[0] = p2;
2871 d3[3] = p2;
2872
2873 p1 = e[4]*B[5] - e[5]*B[4];
2874 p0 = -e[4]*B[4] - e[5]*B[5];
2875
2876 d0[1] = p1;
2877 d1[2] = - p1;
2878 d2[1] = p0;
2879 d3[2] = p0;
2880
2881 p3 = e[2]*B[3] - e[3]*B[2];
2882 p2 = -e[2]*B[2] - e[3]*B[3];
2883
2884 d0[2] = p3;
2885 d1[1] = - p3;
2886 d2[2] = p2;
2887 d3[1] = p2;
2888
2889 p1 = e[0]*B[1] - e[1]*B[0];
2890 p0 = -e[0]*B[0] - e[1]*B[1];
2891
2892 d0[3] = p1;
2893 d1[0] = - p1;
2894 d2[3] = p0;
2895 d3[0] = p0;
2896
2897 B -= 8;
2898 e -= 8;
2899 d0 += 4;
2900 d2 += 4;
2901 d1 -= 4;
2902 d3 -= 4;
2903 }
2904 }
2905
2906 temp_free(f,buf2);
2907 temp_alloc_restore(f,save_point);
2908 }
2909
2910 #if 0
2911 // this is the original version of the above code, if you want to optimize it from scratch
2912 void inverse_mdct_naive(float *buffer, int n)
2913 {
2914 float s;
2915 float A[1 << 12], B[1 << 12], C[1 << 11];
2916 int i,k,k2,k4, n2 = n >> 1, n4 = n >> 2, n8 = n >> 3, l;
2917 int n3_4 = n - n4, ld;
2918 // how can they claim this only uses N words?!
2919 // oh, because they're only used sparsely, whoops
2920 float u[1 << 13], X[1 << 13], v[1 << 13], w[1 << 13];
2921 // set up twiddle factors
2922
2923 for (k=k2=0; k < n4; ++k,k2+=2) {
2924 A[k2 ] = (float) cos(4*k*M_PI/n);
2925 A[k2+1] = (float) -sin(4*k*M_PI/n);
2926 B[k2 ] = (float) cos((k2+1)*M_PI/n/2);
2927 B[k2+1] = (float) sin((k2+1)*M_PI/n/2);
2928 }
2929 for (k=k2=0; k < n8; ++k,k2+=2) {
2930 C[k2 ] = (float) cos(2*(k2+1)*M_PI/n);
2931 C[k2+1] = (float) -sin(2*(k2+1)*M_PI/n);
2932 }
2933
2934 // IMDCT algorithm from "The use of multirate filter banks for coding of high quality digital audio"
2935 // Note there are bugs in that pseudocode, presumably due to them attempting
2936 // to rename the arrays nicely rather than representing the way their actual
2937 // implementation bounces buffers back and forth. As a result, even in the
2938 // "some formulars corrected" version, a direct implementation fails. These
2939 // are noted below as "paper bug".
2940
2941 // copy and reflect spectral data
2942 for (k=0; k < n2; ++k) u[k] = buffer[k];
2943 for ( ; k < n ; ++k) u[k] = -buffer[n - k - 1];
2944 // kernel from paper
2945 // step 1
2946 for (k=k2=k4=0; k < n4; k+=1, k2+=2, k4+=4) {
2947 v[n-k4-1] = (u[k4] - u[n-k4-1]) * A[k2] - (u[k4+2] - u[n-k4-3])*A[k2+1];
2948 v[n-k4-3] = (u[k4] - u[n-k4-1]) * A[k2+1] + (u[k4+2] - u[n-k4-3])*A[k2];
2949 }
2950 // step 2
2951 for (k=k4=0; k < n8; k+=1, k4+=4) {
2952 w[n2+3+k4] = v[n2+3+k4] + v[k4+3];
2953 w[n2+1+k4] = v[n2+1+k4] + v[k4+1];
2954 w[k4+3] = (v[n2+3+k4] - v[k4+3])*A[n2-4-k4] - (v[n2+1+k4]-v[k4+1])*A[n2-3-k4];
2955 w[k4+1] = (v[n2+1+k4] - v[k4+1])*A[n2-4-k4] + (v[n2+3+k4]-v[k4+3])*A[n2-3-k4];
2956 }
2957 // step 3
2958 ld = ilog(n) - 1; // ilog is off-by-one from normal definitions
2959 for (l=0; l < ld-3; ++l) {
2960 int k0 = n >> (l+2), k1 = 1 << (l+3);
2961 int rlim = n >> (l+4), r4, r;
2962 int s2lim = 1 << (l+2), s2;
2963 for (r=r4=0; r < rlim; r4+=4,++r) {
2964 for (s2=0; s2 < s2lim; s2+=2) {
2965 u[n-1-k0*s2-r4] = w[n-1-k0*s2-r4] + w[n-1-k0*(s2+1)-r4];
2966 u[n-3-k0*s2-r4] = w[n-3-k0*s2-r4] + w[n-3-k0*(s2+1)-r4];
2967 u[n-1-k0*(s2+1)-r4] = (w[n-1-k0*s2-r4] - w[n-1-k0*(s2+1)-r4]) * A[r*k1]
2968 - (w[n-3-k0*s2-r4] - w[n-3-k0*(s2+1)-r4]) * A[r*k1+1];
2969 u[n-3-k0*(s2+1)-r4] = (w[n-3-k0*s2-r4] - w[n-3-k0*(s2+1)-r4]) * A[r*k1]
2970 + (w[n-1-k0*s2-r4] - w[n-1-k0*(s2+1)-r4]) * A[r*k1+1];
2971 }
2972 }
2973 if (l+1 < ld-3) {
2974 // paper bug: ping-ponging of u&w here is omitted
2975 memcpy(w, u, sizeof(u));
2976 }
2977 }
2978
2979 // step 4
2980 for (i=0; i < n8; ++i) {
2981 int j = bit_reverse(i) >> (32-ld+3);
2982 assert(j < n8);
2983 if (i == j) {
2984 // paper bug: original code probably swapped in place; if copying,
2985 // need to directly copy in this case
2986 int i8 = i << 3;
2987 v[i8+1] = u[i8+1];
2988 v[i8+3] = u[i8+3];
2989 v[i8+5] = u[i8+5];
2990 v[i8+7] = u[i8+7];
2991 } else if (i < j) {
2992 int i8 = i << 3, j8 = j << 3;
2993 v[j8+1] = u[i8+1], v[i8+1] = u[j8 + 1];
2994 v[j8+3] = u[i8+3], v[i8+3] = u[j8 + 3];
2995 v[j8+5] = u[i8+5], v[i8+5] = u[j8 + 5];
2996 v[j8+7] = u[i8+7], v[i8+7] = u[j8 + 7];
2997 }
2998 }
2999 // step 5
3000 for (k=0; k < n2; ++k) {
3001 w[k] = v[k*2+1];
3002 }
3003 // step 6
3004 for (k=k2=k4=0; k < n8; ++k, k2 += 2, k4 += 4) {
3005 u[n-1-k2] = w[k4];
3006 u[n-2-k2] = w[k4+1];
3007 u[n3_4 - 1 - k2] = w[k4+2];
3008 u[n3_4 - 2 - k2] = w[k4+3];
3009 }
3010 // step 7
3011 for (k=k2=0; k < n8; ++k, k2 += 2) {
3012 v[n2 + k2 ] = ( u[n2 + k2] + u[n-2-k2] + C[k2+1]*(u[n2+k2]-u[n-2-k2]) + C[k2]*(u[n2+k2+1]+u[n-2-k2+1]))/2;
3013 v[n-2 - k2] = ( u[n2 + k2] + u[n-2-k2] - C[k2+1]*(u[n2+k2]-u[n-2-k2]) - C[k2]*(u[n2+k2+1]+u[n-2-k2+1]))/2;
3014 v[n2+1+ k2] = ( u[n2+1+k2] - u[n-1-k2] + C[k2+1]*(u[n2+1+k2]+u[n-1-k2]) - C[k2]*(u[n2+k2]-u[n-2-k2]))/2;
3015 v[n-1 - k2] = (-u[n2+1+k2] + u[n-1-k2] + C[k2+1]*(u[n2+1+k2]+u[n-1-k2]) - C[k2]*(u[n2+k2]-u[n-2-k2]))/2;
3016 }
3017 // step 8
3018 for (k=k2=0; k < n4; ++k,k2 += 2) {
3019 X[k] = v[k2+n2]*B[k2 ] + v[k2+1+n2]*B[k2+1];
3020 X[n2-1-k] = v[k2+n2]*B[k2+1] - v[k2+1+n2]*B[k2 ];
3021 }
3022
3023 // decode kernel to output
3024 // determined the following value experimentally
3025 // (by first figuring out what made inverse_mdct_slow work); then matching that here
3026 // (probably vorbis encoder premultiplies by n or n/2, to save it on the decoder?)
3027 s = 0.5; // theoretically would be n4
3028
3029 // [[[ note! the s value of 0.5 is compensated for by the B[] in the current code,
3030 // so it needs to use the "old" B values to behave correctly, or else
3031 // set s to 1.0 ]]]
3032 for (i=0; i < n4 ; ++i) buffer[i] = s * X[i+n4];
3033 for ( ; i < n3_4; ++i) buffer[i] = -s * X[n3_4 - i - 1];
3034 for ( ; i < n ; ++i) buffer[i] = -s * X[i - n3_4];
3035 }
3036 #endif
3037
get_window(vorb * f,int len)3038 static float *get_window(vorb *f, int len)
3039 {
3040 len <<= 1;
3041 if (len == f->blocksize_0) return f->window[0];
3042 if (len == f->blocksize_1) return f->window[1];
3043 assert(0);
3044 return NULL;
3045 }
3046
3047 #ifndef STB_VORBIS_NO_DEFER_FLOOR
3048 typedef int16 YTYPE;
3049 #else
3050 typedef int YTYPE;
3051 #endif
do_floor(vorb * f,Mapping * map,int i,int n,float * target,YTYPE * finalY,uint8 * step2_flag)3052 static int do_floor(vorb *f, Mapping *map, int i, int n, float *target, YTYPE *finalY, uint8 *step2_flag)
3053 {
3054 int n2 = n >> 1;
3055 int s = map->chan[i].mux, floor;
3056 floor = map->submap_floor[s];
3057 if (f->floor_types[floor] == 0) {
3058 return error(f, VORBIS_invalid_stream);
3059 } else {
3060 Floor1 *g = &f->floor_config[floor].floor1;
3061 int j,q;
3062 int lx = 0, ly = finalY[0] * g->floor1_multiplier;
3063 for (q=1; q < g->values; ++q) {
3064 j = g->sorted_order[q];
3065 #ifndef STB_VORBIS_NO_DEFER_FLOOR
3066 if (finalY[j] >= 0)
3067 #else
3068 if (step2_flag[j])
3069 #endif
3070 {
3071 int hy = finalY[j] * g->floor1_multiplier;
3072 int hx = g->Xlist[j];
3073 if (lx != hx)
3074 draw_line(target, lx,ly, hx,hy, n2);
3075 CHECK(f);
3076 lx = hx, ly = hy;
3077 }
3078 }
3079 if (lx < n2) {
3080 // optimization of: draw_line(target, lx,ly, n,ly, n2);
3081 for (j=lx; j < n2; ++j)
3082 LINE_OP(target[j], inverse_db_table[ly]);
3083 CHECK(f);
3084 }
3085 }
3086 return TRUE;
3087 }
3088
3089 // The meaning of "left" and "right"
3090 //
3091 // For a given frame:
3092 // we compute samples from 0..n
3093 // window_center is n/2
3094 // we'll window and mix the samples from left_start to left_end with data from the previous frame
3095 // all of the samples from left_end to right_start can be output without mixing; however,
3096 // this interval is 0-length except when transitioning between short and long frames
3097 // all of the samples from right_start to right_end need to be mixed with the next frame,
3098 // which we don't have, so those get saved in a buffer
3099 // frame N's right_end-right_start, the number of samples to mix with the next frame,
3100 // has to be the same as frame N+1's left_end-left_start (which they are by
3101 // construction)
3102
vorbis_decode_initial(vorb * f,int * p_left_start,int * p_left_end,int * p_right_start,int * p_right_end,int * mode)3103 static int vorbis_decode_initial(vorb *f, int *p_left_start, int *p_left_end, int *p_right_start, int *p_right_end, int *mode)
3104 {
3105 Mode *m;
3106 int i, n, prev, next, window_center;
3107 f->channel_buffer_start = f->channel_buffer_end = 0;
3108
3109 retry:
3110 if (f->eof) return FALSE;
3111 if (!maybe_start_packet(f))
3112 return FALSE;
3113 // check packet type
3114 if (get_bits(f,1) != 0) {
3115 if (IS_PUSH_MODE(f))
3116 return error(f,VORBIS_bad_packet_type);
3117 while (EOP != get8_packet(f));
3118 goto retry;
3119 }
3120
3121 if (f->alloc.alloc_buffer)
3122 assert(f->alloc.alloc_buffer_length_in_bytes == f->temp_offset);
3123
3124 i = get_bits(f, ilog(f->mode_count-1));
3125 if (i == EOP) return FALSE;
3126 if (i >= f->mode_count) return FALSE;
3127 *mode = i;
3128 m = f->mode_config + i;
3129 if (m->blockflag) {
3130 n = f->blocksize_1;
3131 prev = get_bits(f,1);
3132 next = get_bits(f,1);
3133 } else {
3134 prev = next = 0;
3135 n = f->blocksize_0;
3136 }
3137
3138 // WINDOWING
3139
3140 window_center = n >> 1;
3141 if (m->blockflag && !prev) {
3142 *p_left_start = (n - f->blocksize_0) >> 2;
3143 *p_left_end = (n + f->blocksize_0) >> 2;
3144 } else {
3145 *p_left_start = 0;
3146 *p_left_end = window_center;
3147 }
3148 if (m->blockflag && !next) {
3149 *p_right_start = (n*3 - f->blocksize_0) >> 2;
3150 *p_right_end = (n*3 + f->blocksize_0) >> 2;
3151 } else {
3152 *p_right_start = window_center;
3153 *p_right_end = n;
3154 }
3155
3156 return TRUE;
3157 }
3158
vorbis_decode_packet_rest(vorb * f,int * len,Mode * m,int left_start,int left_end,int right_start,int right_end,int * p_left)3159 static int vorbis_decode_packet_rest(vorb *f, int *len, Mode *m, int left_start, int left_end, int right_start, int right_end, int *p_left)
3160 {
3161 Mapping *map;
3162 int i,j,k,n,n2;
3163 int zero_channel[256];
3164 int really_zero_channel[256];
3165
3166 // WINDOWING
3167
3168 n = f->blocksize[m->blockflag];
3169 map = &f->mapping[m->mapping];
3170
3171 // FLOORS
3172 n2 = n >> 1;
3173
3174 CHECK(f);
3175
3176 for (i=0; i < f->channels; ++i) {
3177 int s = map->chan[i].mux, floor;
3178 zero_channel[i] = FALSE;
3179 floor = map->submap_floor[s];
3180 if (f->floor_types[floor] == 0) {
3181 return error(f, VORBIS_invalid_stream);
3182 } else {
3183 Floor1 *g = &f->floor_config[floor].floor1;
3184 if (get_bits(f, 1)) {
3185 short *finalY;
3186 uint8 step2_flag[256];
3187 static int range_list[4] = { 256, 128, 86, 64 };
3188 int range = range_list[g->floor1_multiplier-1];
3189 int offset = 2;
3190 finalY = f->finalY[i];
3191 finalY[0] = get_bits(f, ilog(range)-1);
3192 finalY[1] = get_bits(f, ilog(range)-1);
3193 for (j=0; j < g->partitions; ++j) {
3194 int pclass = g->partition_class_list[j];
3195 int cdim = g->class_dimensions[pclass];
3196 int cbits = g->class_subclasses[pclass];
3197 int csub = (1 << cbits)-1;
3198 int cval = 0;
3199 if (cbits) {
3200 Codebook *c = f->codebooks + g->class_masterbooks[pclass];
3201 DECODE(cval,f,c);
3202 }
3203 for (k=0; k < cdim; ++k) {
3204 int book = g->subclass_books[pclass][cval & csub];
3205 cval = cval >> cbits;
3206 if (book >= 0) {
3207 int temp;
3208 Codebook *c = f->codebooks + book;
3209 DECODE(temp,f,c);
3210 finalY[offset++] = temp;
3211 } else
3212 finalY[offset++] = 0;
3213 }
3214 }
3215 if (f->valid_bits == INVALID_BITS) goto error; // behavior according to spec
3216 step2_flag[0] = step2_flag[1] = 1;
3217 for (j=2; j < g->values; ++j) {
3218 int low, high, pred, highroom, lowroom, room, val;
3219 low = g->neighbors[j][0];
3220 high = g->neighbors[j][1];
3221 //neighbors(g->Xlist, j, &low, &high);
3222 pred = predict_point(g->Xlist[j], g->Xlist[low], g->Xlist[high], finalY[low], finalY[high]);
3223 val = finalY[j];
3224 highroom = range - pred;
3225 lowroom = pred;
3226 if (highroom < lowroom)
3227 room = highroom * 2;
3228 else
3229 room = lowroom * 2;
3230 if (val) {
3231 step2_flag[low] = step2_flag[high] = 1;
3232 step2_flag[j] = 1;
3233 if (val >= room)
3234 if (highroom > lowroom)
3235 finalY[j] = val - lowroom + pred;
3236 else
3237 finalY[j] = pred - val + highroom - 1;
3238 else
3239 if (val & 1)
3240 finalY[j] = pred - ((val+1)>>1);
3241 else
3242 finalY[j] = pred + (val>>1);
3243 } else {
3244 step2_flag[j] = 0;
3245 finalY[j] = pred;
3246 }
3247 }
3248
3249 #ifdef STB_VORBIS_NO_DEFER_FLOOR
3250 do_floor(f, map, i, n, f->floor_buffers[i], finalY, step2_flag);
3251 #else
3252 // defer final floor computation until _after_ residue
3253 for (j=0; j < g->values; ++j) {
3254 if (!step2_flag[j])
3255 finalY[j] = -1;
3256 }
3257 #endif
3258 } else {
3259 error:
3260 zero_channel[i] = TRUE;
3261 }
3262 // So we just defer everything else to later
3263
3264 // at this point we've decoded the floor into buffer
3265 }
3266 }
3267 CHECK(f);
3268 // at this point we've decoded all floors
3269
3270 if (f->alloc.alloc_buffer)
3271 assert(f->alloc.alloc_buffer_length_in_bytes == f->temp_offset);
3272
3273 // re-enable coupled channels if necessary
3274 memcpy(really_zero_channel, zero_channel, sizeof(really_zero_channel[0]) * f->channels);
3275 for (i=0; i < map->coupling_steps; ++i)
3276 if (!zero_channel[map->chan[i].magnitude] || !zero_channel[map->chan[i].angle]) {
3277 zero_channel[map->chan[i].magnitude] = zero_channel[map->chan[i].angle] = FALSE;
3278 }
3279
3280 CHECK(f);
3281 // RESIDUE DECODE
3282 for (i=0; i < map->submaps; ++i) {
3283 float *residue_buffers[STB_VORBIS_MAX_CHANNELS];
3284 int r;
3285 uint8 do_not_decode[256];
3286 int ch = 0;
3287 for (j=0; j < f->channels; ++j) {
3288 if (map->chan[j].mux == i) {
3289 if (zero_channel[j]) {
3290 do_not_decode[ch] = TRUE;
3291 residue_buffers[ch] = NULL;
3292 } else {
3293 do_not_decode[ch] = FALSE;
3294 residue_buffers[ch] = f->channel_buffers[j];
3295 }
3296 ++ch;
3297 }
3298 }
3299 r = map->submap_residue[i];
3300 decode_residue(f, residue_buffers, ch, n2, r, do_not_decode);
3301 }
3302
3303 if (f->alloc.alloc_buffer)
3304 assert(f->alloc.alloc_buffer_length_in_bytes == f->temp_offset);
3305 CHECK(f);
3306
3307 // INVERSE COUPLING
3308 for (i = map->coupling_steps-1; i >= 0; --i) {
3309 int n2 = n >> 1;
3310 float *m = f->channel_buffers[map->chan[i].magnitude];
3311 float *a = f->channel_buffers[map->chan[i].angle ];
3312 for (j=0; j < n2; ++j) {
3313 float a2,m2;
3314 if (m[j] > 0)
3315 if (a[j] > 0)
3316 m2 = m[j], a2 = m[j] - a[j];
3317 else
3318 a2 = m[j], m2 = m[j] + a[j];
3319 else
3320 if (a[j] > 0)
3321 m2 = m[j], a2 = m[j] + a[j];
3322 else
3323 a2 = m[j], m2 = m[j] - a[j];
3324 m[j] = m2;
3325 a[j] = a2;
3326 }
3327 }
3328 CHECK(f);
3329
3330 // finish decoding the floors
3331 #ifndef STB_VORBIS_NO_DEFER_FLOOR
3332 for (i=0; i < f->channels; ++i) {
3333 if (really_zero_channel[i]) {
3334 memset(f->channel_buffers[i], 0, sizeof(*f->channel_buffers[i]) * n2);
3335 } else {
3336 do_floor(f, map, i, n, f->channel_buffers[i], f->finalY[i], NULL);
3337 }
3338 }
3339 #else
3340 for (i=0; i < f->channels; ++i) {
3341 if (really_zero_channel[i]) {
3342 memset(f->channel_buffers[i], 0, sizeof(*f->channel_buffers[i]) * n2);
3343 } else {
3344 for (j=0; j < n2; ++j)
3345 f->channel_buffers[i][j] *= f->floor_buffers[i][j];
3346 }
3347 }
3348 #endif
3349
3350 // INVERSE MDCT
3351 CHECK(f);
3352 for (i=0; i < f->channels; ++i)
3353 inverse_mdct(f->channel_buffers[i], n, f, m->blockflag);
3354 CHECK(f);
3355
3356 // this shouldn't be necessary, unless we exited on an error
3357 // and want to flush to get to the next packet
3358 flush_packet(f);
3359
3360 if (f->first_decode) {
3361 // assume we start so first non-discarded sample is sample 0
3362 // this isn't to spec, but spec would require us to read ahead
3363 // and decode the size of all current frames--could be done,
3364 // but presumably it's not a commonly used feature
3365 f->current_loc = -n2; // start of first frame is positioned for discard
3366 // we might have to discard samples "from" the next frame too,
3367 // if we're lapping a large block then a small at the start?
3368 f->discard_samples_deferred = n - right_end;
3369 f->current_loc_valid = TRUE;
3370 f->first_decode = FALSE;
3371 } else if (f->discard_samples_deferred) {
3372 if (f->discard_samples_deferred >= right_start - left_start) {
3373 f->discard_samples_deferred -= (right_start - left_start);
3374 left_start = right_start;
3375 *p_left = left_start;
3376 } else {
3377 left_start += f->discard_samples_deferred;
3378 *p_left = left_start;
3379 f->discard_samples_deferred = 0;
3380 }
3381 } else if (f->previous_length == 0 && f->current_loc_valid) {
3382 // we're recovering from a seek... that means we're going to discard
3383 // the samples from this packet even though we know our position from
3384 // the last page header, so we need to update the position based on
3385 // the discarded samples here
3386 // but wait, the code below is going to add this in itself even
3387 // on a discard, so we don't need to do it here...
3388 }
3389
3390 // check if we have ogg information about the sample # for this packet
3391 if (f->last_seg_which == f->end_seg_with_known_loc) {
3392 // if we have a valid current loc, and this is final:
3393 if (f->current_loc_valid && (f->page_flag & PAGEFLAG_last_page)) {
3394 uint32 current_end = f->known_loc_for_packet - (n-right_end);
3395 // then let's infer the size of the (probably) short final frame
3396 if (current_end < f->current_loc + (right_end-left_start)) {
3397 if (current_end < f->current_loc) {
3398 // negative truncation, that's impossible!
3399 *len = 0;
3400 } else {
3401 *len = current_end - f->current_loc;
3402 }
3403 *len += left_start;
3404 if (*len > right_end) *len = right_end; // this should never happen
3405 f->current_loc += *len;
3406 return TRUE;
3407 }
3408 }
3409 // otherwise, just set our sample loc
3410 // guess that the ogg granule pos refers to the _middle_ of the
3411 // last frame?
3412 // set f->current_loc to the position of left_start
3413 f->current_loc = f->known_loc_for_packet - (n2-left_start);
3414 f->current_loc_valid = TRUE;
3415 }
3416 if (f->current_loc_valid)
3417 f->current_loc += (right_start - left_start);
3418
3419 if (f->alloc.alloc_buffer)
3420 assert(f->alloc.alloc_buffer_length_in_bytes == f->temp_offset);
3421 *len = right_end; // ignore samples after the window goes to 0
3422 CHECK(f);
3423
3424 return TRUE;
3425 }
3426
vorbis_decode_packet(vorb * f,int * len,int * p_left,int * p_right)3427 static int vorbis_decode_packet(vorb *f, int *len, int *p_left, int *p_right)
3428 {
3429 int mode, left_end, right_end;
3430 if (!vorbis_decode_initial(f, p_left, &left_end, p_right, &right_end, &mode)) return 0;
3431 return vorbis_decode_packet_rest(f, len, f->mode_config + mode, *p_left, left_end, *p_right, right_end, p_left);
3432 }
3433
vorbis_finish_frame(stb_vorbis * f,int len,int left,int right)3434 static int vorbis_finish_frame(stb_vorbis *f, int len, int left, int right)
3435 {
3436 int prev,i,j;
3437 // we use right&left (the start of the right- and left-window sin()-regions)
3438 // to determine how much to return, rather than inferring from the rules
3439 // (same result, clearer code); 'left' indicates where our sin() window
3440 // starts, therefore where the previous window's right edge starts, and
3441 // therefore where to start mixing from the previous buffer. 'right'
3442 // indicates where our sin() ending-window starts, therefore that's where
3443 // we start saving, and where our returned-data ends.
3444
3445 // mixin from previous window
3446 if (f->previous_length) {
3447 int i,j, n = f->previous_length;
3448 float *w = get_window(f, n);
3449 for (i=0; i < f->channels; ++i) {
3450 for (j=0; j < n; ++j)
3451 f->channel_buffers[i][left+j] =
3452 f->channel_buffers[i][left+j]*w[ j] +
3453 f->previous_window[i][ j]*w[n-1-j];
3454 }
3455 }
3456
3457 prev = f->previous_length;
3458
3459 // last half of this data becomes previous window
3460 f->previous_length = len - right;
3461
3462 // @OPTIMIZE: could avoid this copy by double-buffering the
3463 // output (flipping previous_window with channel_buffers), but
3464 // then previous_window would have to be 2x as large, and
3465 // channel_buffers couldn't be temp mem (although they're NOT
3466 // currently temp mem, they could be (unless we want to level
3467 // performance by spreading out the computation))
3468 for (i=0; i < f->channels; ++i)
3469 for (j=0; right+j < len; ++j)
3470 f->previous_window[i][j] = f->channel_buffers[i][right+j];
3471
3472 if (!prev)
3473 // there was no previous packet, so this data isn't valid...
3474 // this isn't entirely true, only the would-have-overlapped data
3475 // isn't valid, but this seems to be what the spec requires
3476 return 0;
3477
3478 // truncate a short frame
3479 if (len < right) right = len;
3480
3481 f->samples_output += right-left;
3482
3483 return right - left;
3484 }
3485
vorbis_pump_first_frame(stb_vorbis * f)3486 static int vorbis_pump_first_frame(stb_vorbis *f)
3487 {
3488 int len, right, left, res;
3489 res = vorbis_decode_packet(f, &len, &left, &right);
3490 if (res)
3491 vorbis_finish_frame(f, len, left, right);
3492 return res;
3493 }
3494
3495 #ifndef STB_VORBIS_NO_PUSHDATA_API
is_whole_packet_present(stb_vorbis * f,int end_page)3496 static int is_whole_packet_present(stb_vorbis *f, int end_page)
3497 {
3498 // make sure that we have the packet available before continuing...
3499 // this requires a full ogg parse, but we know we can fetch from f->stream
3500
3501 // instead of coding this out explicitly, we could save the current read state,
3502 // read the next packet with get8() until end-of-packet, check f->eof, then
3503 // reset the state? but that would be slower, esp. since we'd have over 256 bytes
3504 // of state to restore (primarily the page segment table)
3505
3506 int s = f->next_seg, first = TRUE;
3507 uint8 *p = f->stream;
3508
3509 if (s != -1) { // if we're not starting the packet with a 'continue on next page' flag
3510 for (; s < f->segment_count; ++s) {
3511 p += f->segments[s];
3512 if (f->segments[s] < 255) // stop at first short segment
3513 break;
3514 }
3515 // either this continues, or it ends it...
3516 if (end_page)
3517 if (s < f->segment_count-1) return error(f, VORBIS_invalid_stream);
3518 if (s == f->segment_count)
3519 s = -1; // set 'crosses page' flag
3520 if (p > f->stream_end) return error(f, VORBIS_need_more_data);
3521 first = FALSE;
3522 }
3523 for (; s == -1;) {
3524 uint8 *q;
3525 int n;
3526
3527 // check that we have the page header ready
3528 if (p + 26 >= f->stream_end) return error(f, VORBIS_need_more_data);
3529 // validate the page
3530 if (memcmp(p, ogg_page_header, 4)) return error(f, VORBIS_invalid_stream);
3531 if (p[4] != 0) return error(f, VORBIS_invalid_stream);
3532 if (first) { // the first segment must NOT have 'continued_packet', later ones MUST
3533 if (f->previous_length)
3534 if ((p[5] & PAGEFLAG_continued_packet)) return error(f, VORBIS_invalid_stream);
3535 // if no previous length, we're resynching, so we can come in on a continued-packet,
3536 // which we'll just drop
3537 } else {
3538 if (!(p[5] & PAGEFLAG_continued_packet)) return error(f, VORBIS_invalid_stream);
3539 }
3540 n = p[26]; // segment counts
3541 q = p+27; // q points to segment table
3542 p = q + n; // advance past header
3543 // make sure we've read the segment table
3544 if (p > f->stream_end) return error(f, VORBIS_need_more_data);
3545 for (s=0; s < n; ++s) {
3546 p += q[s];
3547 if (q[s] < 255)
3548 break;
3549 }
3550 if (end_page)
3551 if (s < n-1) return error(f, VORBIS_invalid_stream);
3552 if (s == n)
3553 s = -1; // set 'crosses page' flag
3554 if (p > f->stream_end) return error(f, VORBIS_need_more_data);
3555 first = FALSE;
3556 }
3557 return TRUE;
3558 }
3559 #endif // !STB_VORBIS_NO_PUSHDATA_API
3560
start_decoder(vorb * f)3561 static int start_decoder(vorb *f)
3562 {
3563 uint8 header[6], x,y;
3564 int len,i,j,k, max_submaps = 0;
3565 int longest_floorlist=0;
3566
3567 // first page, first packet
3568
3569 if (!start_page(f)) return FALSE;
3570 // validate page flag
3571 if (!(f->page_flag & PAGEFLAG_first_page)) return error(f, VORBIS_invalid_first_page);
3572 if (f->page_flag & PAGEFLAG_last_page) return error(f, VORBIS_invalid_first_page);
3573 if (f->page_flag & PAGEFLAG_continued_packet) return error(f, VORBIS_invalid_first_page);
3574 // check for expected packet length
3575 if (f->segment_count != 1) return error(f, VORBIS_invalid_first_page);
3576 if (f->segments[0] != 30) return error(f, VORBIS_invalid_first_page);
3577 // read packet
3578 // check packet header
3579 if (get8(f) != VORBIS_packet_id) return error(f, VORBIS_invalid_first_page);
3580 if (!getn(f, header, 6)) return error(f, VORBIS_unexpected_eof);
3581 if (!vorbis_validate(header)) return error(f, VORBIS_invalid_first_page);
3582 // vorbis_version
3583 if (get32(f) != 0) return error(f, VORBIS_invalid_first_page);
3584 f->channels = get8(f); if (!f->channels) return error(f, VORBIS_invalid_first_page);
3585 if (f->channels > STB_VORBIS_MAX_CHANNELS) return error(f, VORBIS_too_many_channels);
3586 f->sample_rate = get32(f); if (!f->sample_rate) return error(f, VORBIS_invalid_first_page);
3587 get32(f); // bitrate_maximum
3588 get32(f); // bitrate_nominal
3589 get32(f); // bitrate_minimum
3590 x = get8(f);
3591 {
3592 int log0,log1;
3593 log0 = x & 15;
3594 log1 = x >> 4;
3595 f->blocksize_0 = 1 << log0;
3596 f->blocksize_1 = 1 << log1;
3597 if (log0 < 6 || log0 > 13) return error(f, VORBIS_invalid_setup);
3598 if (log1 < 6 || log1 > 13) return error(f, VORBIS_invalid_setup);
3599 if (log0 > log1) return error(f, VORBIS_invalid_setup);
3600 }
3601
3602 // framing_flag
3603 x = get8(f);
3604 if (!(x & 1)) return error(f, VORBIS_invalid_first_page);
3605
3606 // second packet!
3607 if (!start_page(f)) return FALSE;
3608
3609 if (!start_packet(f)) return FALSE;
3610 do {
3611 len = next_segment(f);
3612 skip(f, len);
3613 f->bytes_in_seg = 0;
3614 } while (len);
3615
3616 // third packet!
3617 if (!start_packet(f)) return FALSE;
3618
3619 #ifndef STB_VORBIS_NO_PUSHDATA_API
3620 if (IS_PUSH_MODE(f)) {
3621 if (!is_whole_packet_present(f, TRUE)) {
3622 // convert error in ogg header to write type
3623 if (f->error == VORBIS_invalid_stream)
3624 f->error = VORBIS_invalid_setup;
3625 return FALSE;
3626 }
3627 }
3628 #endif
3629
3630 crc32_init(); // always init it, to avoid multithread race conditions
3631
3632 if (get8_packet(f) != VORBIS_packet_setup) return error(f, VORBIS_invalid_setup);
3633 for (i=0; i < 6; ++i) header[i] = get8_packet(f);
3634 if (!vorbis_validate(header)) return error(f, VORBIS_invalid_setup);
3635
3636 // codebooks
3637
3638 f->codebook_count = get_bits(f,8) + 1;
3639 f->codebooks = (Codebook *) setup_malloc(f, sizeof(*f->codebooks) * f->codebook_count);
3640 if (f->codebooks == NULL) return error(f, VORBIS_outofmem);
3641 memset(f->codebooks, 0, sizeof(*f->codebooks) * f->codebook_count);
3642 for (i=0; i < f->codebook_count; ++i) {
3643 uint32 *values;
3644 int ordered, sorted_count;
3645 int total=0;
3646 uint8 *lengths;
3647 Codebook *c = f->codebooks+i;
3648 CHECK(f);
3649 x = get_bits(f, 8); if (x != 0x42) return error(f, VORBIS_invalid_setup);
3650 x = get_bits(f, 8); if (x != 0x43) return error(f, VORBIS_invalid_setup);
3651 x = get_bits(f, 8); if (x != 0x56) return error(f, VORBIS_invalid_setup);
3652 x = get_bits(f, 8);
3653 c->dimensions = (get_bits(f, 8)<<8) + x;
3654 x = get_bits(f, 8);
3655 y = get_bits(f, 8);
3656 c->entries = (get_bits(f, 8)<<16) + (y<<8) + x;
3657 ordered = get_bits(f,1);
3658 c->sparse = ordered ? 0 : get_bits(f,1);
3659
3660 if (c->dimensions == 0 && c->entries != 0) return error(f, VORBIS_invalid_setup);
3661
3662 if (c->sparse)
3663 lengths = (uint8 *) setup_temp_malloc(f, c->entries);
3664 else
3665 lengths = c->codeword_lengths = (uint8 *) setup_malloc(f, c->entries);
3666
3667 if (!lengths) return error(f, VORBIS_outofmem);
3668
3669 if (ordered) {
3670 int current_entry = 0;
3671 int current_length = get_bits(f,5) + 1;
3672 while (current_entry < c->entries) {
3673 int limit = c->entries - current_entry;
3674 int n = get_bits(f, ilog(limit));
3675 if (current_entry + n > (int) c->entries) { return error(f, VORBIS_invalid_setup); }
3676 memset(lengths + current_entry, current_length, n);
3677 current_entry += n;
3678 ++current_length;
3679 }
3680 } else {
3681 for (j=0; j < c->entries; ++j) {
3682 int present = c->sparse ? get_bits(f,1) : 1;
3683 if (present) {
3684 lengths[j] = get_bits(f, 5) + 1;
3685 ++total;
3686 if (lengths[j] == 32)
3687 return error(f, VORBIS_invalid_setup);
3688 } else {
3689 lengths[j] = NO_CODE;
3690 }
3691 }
3692 }
3693
3694 if (c->sparse && total >= c->entries >> 2) {
3695 // convert sparse items to non-sparse!
3696 if (c->entries > (int) f->setup_temp_memory_required)
3697 f->setup_temp_memory_required = c->entries;
3698
3699 c->codeword_lengths = (uint8 *) setup_malloc(f, c->entries);
3700 if (c->codeword_lengths == NULL) return error(f, VORBIS_outofmem);
3701 memcpy(c->codeword_lengths, lengths, c->entries);
3702 setup_temp_free(f, lengths, c->entries); // note this is only safe if there have been no intervening temp mallocs!
3703 lengths = c->codeword_lengths;
3704 c->sparse = 0;
3705 }
3706
3707 // compute the size of the sorted tables
3708 if (c->sparse) {
3709 sorted_count = total;
3710 } else {
3711 sorted_count = 0;
3712 #ifndef STB_VORBIS_NO_HUFFMAN_BINARY_SEARCH
3713 for (j=0; j < c->entries; ++j)
3714 if (lengths[j] > STB_VORBIS_FAST_HUFFMAN_LENGTH && lengths[j] != NO_CODE)
3715 ++sorted_count;
3716 #endif
3717 }
3718
3719 c->sorted_entries = sorted_count;
3720 values = NULL;
3721
3722 CHECK(f);
3723 if (!c->sparse) {
3724 c->codewords = (uint32 *) setup_malloc(f, sizeof(c->codewords[0]) * c->entries);
3725 if (!c->codewords) return error(f, VORBIS_outofmem);
3726 } else {
3727 unsigned int size;
3728 if (c->sorted_entries) {
3729 c->codeword_lengths = (uint8 *) setup_malloc(f, c->sorted_entries);
3730 if (!c->codeword_lengths) return error(f, VORBIS_outofmem);
3731 c->codewords = (uint32 *) setup_temp_malloc(f, sizeof(*c->codewords) * c->sorted_entries);
3732 if (!c->codewords) return error(f, VORBIS_outofmem);
3733 values = (uint32 *) setup_temp_malloc(f, sizeof(*values) * c->sorted_entries);
3734 if (!values) return error(f, VORBIS_outofmem);
3735 }
3736 size = c->entries + (sizeof(*c->codewords) + sizeof(*values)) * c->sorted_entries;
3737 if (size > f->setup_temp_memory_required)
3738 f->setup_temp_memory_required = size;
3739 }
3740
3741 if (!compute_codewords(c, lengths, c->entries, values)) {
3742 if (c->sparse) setup_temp_free(f, values, 0);
3743 return error(f, VORBIS_invalid_setup);
3744 }
3745
3746 if (c->sorted_entries) {
3747 // allocate an extra slot for sentinels
3748 c->sorted_codewords = (uint32 *) setup_malloc(f, sizeof(*c->sorted_codewords) * (c->sorted_entries+1));
3749 if (c->sorted_codewords == NULL) return error(f, VORBIS_outofmem);
3750 // allocate an extra slot at the front so that c->sorted_values[-1] is defined
3751 // so that we can catch that case without an extra if
3752 c->sorted_values = ( int *) setup_malloc(f, sizeof(*c->sorted_values ) * (c->sorted_entries+1));
3753 if (c->sorted_values == NULL) return error(f, VORBIS_outofmem);
3754 ++c->sorted_values;
3755 c->sorted_values[-1] = -1;
3756 compute_sorted_huffman(c, lengths, values);
3757 }
3758
3759 if (c->sparse) {
3760 setup_temp_free(f, values, sizeof(*values)*c->sorted_entries);
3761 setup_temp_free(f, c->codewords, sizeof(*c->codewords)*c->sorted_entries);
3762 setup_temp_free(f, lengths, c->entries);
3763 c->codewords = NULL;
3764 }
3765
3766 compute_accelerated_huffman(c);
3767
3768 CHECK(f);
3769 c->lookup_type = get_bits(f, 4);
3770 if (c->lookup_type > 2) return error(f, VORBIS_invalid_setup);
3771 if (c->lookup_type > 0) {
3772 uint16 *mults;
3773 c->minimum_value = float32_unpack(get_bits(f, 32));
3774 c->delta_value = float32_unpack(get_bits(f, 32));
3775 c->value_bits = get_bits(f, 4)+1;
3776 c->sequence_p = get_bits(f,1);
3777 if (c->lookup_type == 1) {
3778 c->lookup_values = lookup1_values(c->entries, c->dimensions);
3779 } else {
3780 c->lookup_values = c->entries * c->dimensions;
3781 }
3782 if (c->lookup_values == 0) return error(f, VORBIS_invalid_setup);
3783 mults = (uint16 *) setup_temp_malloc(f, sizeof(mults[0]) * c->lookup_values);
3784 if (mults == NULL) return error(f, VORBIS_outofmem);
3785 for (j=0; j < (int) c->lookup_values; ++j) {
3786 int q = get_bits(f, c->value_bits);
3787 if (q == EOP) { setup_temp_free(f,mults,sizeof(mults[0])*c->lookup_values); return error(f, VORBIS_invalid_setup); }
3788 mults[j] = q;
3789 }
3790
3791 #ifndef STB_VORBIS_DIVIDES_IN_CODEBOOK
3792 if (c->lookup_type == 1) {
3793 int len, sparse = c->sparse;
3794 float last=0;
3795 // pre-expand the lookup1-style multiplicands, to avoid a divide in the inner loop
3796 if (sparse) {
3797 if (c->sorted_entries == 0) goto skip;
3798 c->multiplicands = (codetype *) setup_malloc(f, sizeof(c->multiplicands[0]) * c->sorted_entries * c->dimensions);
3799 } else
3800 c->multiplicands = (codetype *) setup_malloc(f, sizeof(c->multiplicands[0]) * c->entries * c->dimensions);
3801 if (c->multiplicands == NULL) { setup_temp_free(f,mults,sizeof(mults[0])*c->lookup_values); return error(f, VORBIS_outofmem); }
3802 len = sparse ? c->sorted_entries : c->entries;
3803 for (j=0; j < len; ++j) {
3804 unsigned int z = sparse ? c->sorted_values[j] : j;
3805 unsigned int div=1;
3806 for (k=0; k < c->dimensions; ++k) {
3807 int off = (z / div) % c->lookup_values;
3808 float val = mults[off];
3809 val = mults[off]*c->delta_value + c->minimum_value + last;
3810 c->multiplicands[j*c->dimensions + k] = val;
3811 if (c->sequence_p)
3812 last = val;
3813 if (k+1 < c->dimensions) {
3814 if (div > UINT_MAX / (unsigned int) c->lookup_values) {
3815 setup_temp_free(f, mults,sizeof(mults[0])*c->lookup_values);
3816 return error(f, VORBIS_invalid_setup);
3817 }
3818 div *= c->lookup_values;
3819 }
3820 }
3821 }
3822 c->lookup_type = 2;
3823 }
3824 else
3825 #endif
3826 {
3827 float last=0;
3828 CHECK(f);
3829 c->multiplicands = (codetype *) setup_malloc(f, sizeof(c->multiplicands[0]) * c->lookup_values);
3830 if (c->multiplicands == NULL) { setup_temp_free(f, mults,sizeof(mults[0])*c->lookup_values); return error(f, VORBIS_outofmem); }
3831 for (j=0; j < (int) c->lookup_values; ++j) {
3832 float val = mults[j] * c->delta_value + c->minimum_value + last;
3833 c->multiplicands[j] = val;
3834 if (c->sequence_p)
3835 last = val;
3836 }
3837 }
3838 #ifndef STB_VORBIS_DIVIDES_IN_CODEBOOK
3839 skip:;
3840 #endif
3841 setup_temp_free(f, mults, sizeof(mults[0])*c->lookup_values);
3842
3843 CHECK(f);
3844 }
3845 CHECK(f);
3846 }
3847
3848 // time domain transfers (notused)
3849
3850 x = get_bits(f, 6) + 1;
3851 for (i=0; i < x; ++i) {
3852 uint32 z = get_bits(f, 16);
3853 if (z != 0) return error(f, VORBIS_invalid_setup);
3854 }
3855
3856 // Floors
3857 f->floor_count = get_bits(f, 6)+1;
3858 f->floor_config = (Floor *) setup_malloc(f, f->floor_count * sizeof(*f->floor_config));
3859 if (f->floor_config == NULL) return error(f, VORBIS_outofmem);
3860 for (i=0; i < f->floor_count; ++i) {
3861 f->floor_types[i] = get_bits(f, 16);
3862 if (f->floor_types[i] > 1) return error(f, VORBIS_invalid_setup);
3863 if (f->floor_types[i] == 0) {
3864 Floor0 *g = &f->floor_config[i].floor0;
3865 g->order = get_bits(f,8);
3866 g->rate = get_bits(f,16);
3867 g->bark_map_size = get_bits(f,16);
3868 g->amplitude_bits = get_bits(f,6);
3869 g->amplitude_offset = get_bits(f,8);
3870 g->number_of_books = get_bits(f,4) + 1;
3871 for (j=0; j < g->number_of_books; ++j)
3872 g->book_list[j] = get_bits(f,8);
3873 return error(f, VORBIS_feature_not_supported);
3874 } else {
3875 stbv__floor_ordering p[31*8+2];
3876 Floor1 *g = &f->floor_config[i].floor1;
3877 int max_class = -1;
3878 g->partitions = get_bits(f, 5);
3879 for (j=0; j < g->partitions; ++j) {
3880 g->partition_class_list[j] = get_bits(f, 4);
3881 if (g->partition_class_list[j] > max_class)
3882 max_class = g->partition_class_list[j];
3883 }
3884 for (j=0; j <= max_class; ++j) {
3885 g->class_dimensions[j] = get_bits(f, 3)+1;
3886 g->class_subclasses[j] = get_bits(f, 2);
3887 if (g->class_subclasses[j]) {
3888 g->class_masterbooks[j] = get_bits(f, 8);
3889 if (g->class_masterbooks[j] >= f->codebook_count) return error(f, VORBIS_invalid_setup);
3890 }
3891 for (k=0; k < 1 << g->class_subclasses[j]; ++k) {
3892 g->subclass_books[j][k] = get_bits(f,8)-1;
3893 if (g->subclass_books[j][k] >= f->codebook_count) return error(f, VORBIS_invalid_setup);
3894 }
3895 }
3896 g->floor1_multiplier = get_bits(f,2)+1;
3897 g->rangebits = get_bits(f,4);
3898 g->Xlist[0] = 0;
3899 g->Xlist[1] = 1 << g->rangebits;
3900 g->values = 2;
3901 for (j=0; j < g->partitions; ++j) {
3902 int c = g->partition_class_list[j];
3903 for (k=0; k < g->class_dimensions[c]; ++k) {
3904 g->Xlist[g->values] = get_bits(f, g->rangebits);
3905 ++g->values;
3906 }
3907 }
3908 // precompute the sorting
3909 for (j=0; j < g->values; ++j) {
3910 p[j].x = g->Xlist[j];
3911 p[j].id = j;
3912 }
3913 qsort(p, g->values, sizeof(p[0]), point_compare);
3914 for (j=0; j < g->values; ++j)
3915 g->sorted_order[j] = (uint8) p[j].id;
3916 // precompute the neighbors
3917 for (j=2; j < g->values; ++j) {
3918 int low = 0, hi = 0;
3919 neighbors(g->Xlist, j, &low,&hi);
3920 g->neighbors[j][0] = low;
3921 g->neighbors[j][1] = hi;
3922 }
3923
3924 if (g->values > longest_floorlist)
3925 longest_floorlist = g->values;
3926 }
3927 }
3928
3929 // Residue
3930 f->residue_count = get_bits(f, 6)+1;
3931 f->residue_config = (Residue *) setup_malloc(f, f->residue_count * sizeof(f->residue_config[0]));
3932 if (f->residue_config == NULL) return error(f, VORBIS_outofmem);
3933 memset(f->residue_config, 0, f->residue_count * sizeof(f->residue_config[0]));
3934 for (i=0; i < f->residue_count; ++i) {
3935 uint8 residue_cascade[64];
3936 Residue *r = f->residue_config+i;
3937 f->residue_types[i] = get_bits(f, 16);
3938 if (f->residue_types[i] > 2) return error(f, VORBIS_invalid_setup);
3939 r->begin = get_bits(f, 24);
3940 r->end = get_bits(f, 24);
3941 if (r->end < r->begin) return error(f, VORBIS_invalid_setup);
3942 r->part_size = get_bits(f,24)+1;
3943 r->classifications = get_bits(f,6)+1;
3944 r->classbook = get_bits(f,8);
3945 if (r->classbook >= f->codebook_count) return error(f, VORBIS_invalid_setup);
3946 for (j=0; j < r->classifications; ++j) {
3947 uint8 high_bits=0;
3948 uint8 low_bits=get_bits(f,3);
3949 if (get_bits(f,1))
3950 high_bits = get_bits(f,5);
3951 residue_cascade[j] = high_bits*8 + low_bits;
3952 }
3953 r->residue_books = (short (*)[8]) setup_malloc(f, sizeof(r->residue_books[0]) * r->classifications);
3954 if (r->residue_books == NULL) return error(f, VORBIS_outofmem);
3955 for (j=0; j < r->classifications; ++j) {
3956 for (k=0; k < 8; ++k) {
3957 if (residue_cascade[j] & (1 << k)) {
3958 r->residue_books[j][k] = get_bits(f, 8);
3959 if (r->residue_books[j][k] >= f->codebook_count) return error(f, VORBIS_invalid_setup);
3960 } else {
3961 r->residue_books[j][k] = -1;
3962 }
3963 }
3964 }
3965 // precompute the classifications[] array to avoid inner-loop mod/divide
3966 // call it 'classdata' since we already have r->classifications
3967 r->classdata = (uint8 **) setup_malloc(f, sizeof(*r->classdata) * f->codebooks[r->classbook].entries);
3968 if (!r->classdata) return error(f, VORBIS_outofmem);
3969 memset(r->classdata, 0, sizeof(*r->classdata) * f->codebooks[r->classbook].entries);
3970 for (j=0; j < f->codebooks[r->classbook].entries; ++j) {
3971 int classwords = f->codebooks[r->classbook].dimensions;
3972 int temp = j;
3973 r->classdata[j] = (uint8 *) setup_malloc(f, sizeof(r->classdata[j][0]) * classwords);
3974 if (r->classdata[j] == NULL) return error(f, VORBIS_outofmem);
3975 for (k=classwords-1; k >= 0; --k) {
3976 r->classdata[j][k] = temp % r->classifications;
3977 temp /= r->classifications;
3978 }
3979 }
3980 }
3981
3982 f->mapping_count = get_bits(f,6)+1;
3983 f->mapping = (Mapping *) setup_malloc(f, f->mapping_count * sizeof(*f->mapping));
3984 if (f->mapping == NULL) return error(f, VORBIS_outofmem);
3985 memset(f->mapping, 0, f->mapping_count * sizeof(*f->mapping));
3986 for (i=0; i < f->mapping_count; ++i) {
3987 Mapping *m = f->mapping + i;
3988 int mapping_type = get_bits(f,16);
3989 if (mapping_type != 0) return error(f, VORBIS_invalid_setup);
3990 m->chan = (MappingChannel *) setup_malloc(f, f->channels * sizeof(*m->chan));
3991 if (m->chan == NULL) return error(f, VORBIS_outofmem);
3992 if (get_bits(f,1))
3993 m->submaps = get_bits(f,4)+1;
3994 else
3995 m->submaps = 1;
3996 if (m->submaps > max_submaps)
3997 max_submaps = m->submaps;
3998 if (get_bits(f,1)) {
3999 m->coupling_steps = get_bits(f,8)+1;
4000 for (k=0; k < m->coupling_steps; ++k) {
4001 m->chan[k].magnitude = get_bits(f, ilog(f->channels-1));
4002 m->chan[k].angle = get_bits(f, ilog(f->channels-1));
4003 if (m->chan[k].magnitude >= f->channels) return error(f, VORBIS_invalid_setup);
4004 if (m->chan[k].angle >= f->channels) return error(f, VORBIS_invalid_setup);
4005 if (m->chan[k].magnitude == m->chan[k].angle) return error(f, VORBIS_invalid_setup);
4006 }
4007 } else
4008 m->coupling_steps = 0;
4009
4010 // reserved field
4011 if (get_bits(f,2)) return error(f, VORBIS_invalid_setup);
4012 if (m->submaps > 1) {
4013 for (j=0; j < f->channels; ++j) {
4014 m->chan[j].mux = get_bits(f, 4);
4015 if (m->chan[j].mux >= m->submaps) return error(f, VORBIS_invalid_setup);
4016 }
4017 } else
4018 // @SPECIFICATION: this case is missing from the spec
4019 for (j=0; j < f->channels; ++j)
4020 m->chan[j].mux = 0;
4021
4022 for (j=0; j < m->submaps; ++j) {
4023 get_bits(f,8); // discard
4024 m->submap_floor[j] = get_bits(f,8);
4025 m->submap_residue[j] = get_bits(f,8);
4026 if (m->submap_floor[j] >= f->floor_count) return error(f, VORBIS_invalid_setup);
4027 if (m->submap_residue[j] >= f->residue_count) return error(f, VORBIS_invalid_setup);
4028 }
4029 }
4030
4031 // Modes
4032 f->mode_count = get_bits(f, 6)+1;
4033 for (i=0; i < f->mode_count; ++i) {
4034 Mode *m = f->mode_config+i;
4035 m->blockflag = get_bits(f,1);
4036 m->windowtype = get_bits(f,16);
4037 m->transformtype = get_bits(f,16);
4038 m->mapping = get_bits(f,8);
4039 if (m->windowtype != 0) return error(f, VORBIS_invalid_setup);
4040 if (m->transformtype != 0) return error(f, VORBIS_invalid_setup);
4041 if (m->mapping >= f->mapping_count) return error(f, VORBIS_invalid_setup);
4042 }
4043
4044 flush_packet(f);
4045
4046 f->previous_length = 0;
4047
4048 for (i=0; i < f->channels; ++i) {
4049 f->channel_buffers[i] = (float *) setup_malloc(f, sizeof(float) * f->blocksize_1);
4050 f->previous_window[i] = (float *) setup_malloc(f, sizeof(float) * f->blocksize_1/2);
4051 f->finalY[i] = (int16 *) setup_malloc(f, sizeof(int16) * longest_floorlist);
4052 if (f->channel_buffers[i] == NULL || f->previous_window[i] == NULL || f->finalY[i] == NULL) return error(f, VORBIS_outofmem);
4053 #ifdef STB_VORBIS_NO_DEFER_FLOOR
4054 f->floor_buffers[i] = (float *) setup_malloc(f, sizeof(float) * f->blocksize_1/2);
4055 if (f->floor_buffers[i] == NULL) return error(f, VORBIS_outofmem);
4056 #endif
4057 }
4058
4059 if (!init_blocksize(f, 0, f->blocksize_0)) return FALSE;
4060 if (!init_blocksize(f, 1, f->blocksize_1)) return FALSE;
4061 f->blocksize[0] = f->blocksize_0;
4062 f->blocksize[1] = f->blocksize_1;
4063
4064 #ifdef STB_VORBIS_DIVIDE_TABLE
4065 if (integer_divide_table[1][1]==0)
4066 for (i=0; i < DIVTAB_NUMER; ++i)
4067 for (j=1; j < DIVTAB_DENOM; ++j)
4068 integer_divide_table[i][j] = i / j;
4069 #endif
4070
4071 // compute how much temporary memory is needed
4072
4073 // 1.
4074 {
4075 uint32 imdct_mem = (f->blocksize_1 * sizeof(float) >> 1);
4076 uint32 classify_mem;
4077 int i,max_part_read=0;
4078 for (i=0; i < f->residue_count; ++i) {
4079 Residue *r = f->residue_config + i;
4080 int n_read = r->end - r->begin;
4081 int part_read = n_read / r->part_size;
4082 if (part_read > max_part_read)
4083 max_part_read = part_read;
4084 }
4085 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
4086 classify_mem = f->channels * (sizeof(void*) + max_part_read * sizeof(uint8 *));
4087 #else
4088 classify_mem = f->channels * (sizeof(void*) + max_part_read * sizeof(int *));
4089 #endif
4090
4091 f->temp_memory_required = classify_mem;
4092 if (imdct_mem > f->temp_memory_required)
4093 f->temp_memory_required = imdct_mem;
4094 }
4095
4096 f->first_decode = TRUE;
4097
4098 if (f->alloc.alloc_buffer) {
4099 assert(f->temp_offset == f->alloc.alloc_buffer_length_in_bytes);
4100 // check if there's enough temp memory so we don't error later
4101 if (f->setup_offset + sizeof(*f) + f->temp_memory_required > (unsigned) f->temp_offset)
4102 return error(f, VORBIS_outofmem);
4103 }
4104
4105 f->first_audio_page_offset = stb_vorbis_get_file_offset(f);
4106
4107 return TRUE;
4108 }
4109
vorbis_deinit(stb_vorbis * p)4110 static void vorbis_deinit(stb_vorbis *p)
4111 {
4112 int i,j;
4113 if (p->residue_config) {
4114 for (i=0; i < p->residue_count; ++i) {
4115 Residue *r = p->residue_config+i;
4116 if (r->classdata) {
4117 for (j=0; j < p->codebooks[r->classbook].entries; ++j)
4118 setup_free(p, r->classdata[j]);
4119 setup_free(p, r->classdata);
4120 }
4121 setup_free(p, r->residue_books);
4122 }
4123 }
4124
4125 if (p->codebooks) {
4126 CHECK(p);
4127 for (i=0; i < p->codebook_count; ++i) {
4128 Codebook *c = p->codebooks + i;
4129 setup_free(p, c->codeword_lengths);
4130 setup_free(p, c->multiplicands);
4131 setup_free(p, c->codewords);
4132 setup_free(p, c->sorted_codewords);
4133 // c->sorted_values[-1] is the first entry in the array
4134 setup_free(p, c->sorted_values ? c->sorted_values-1 : NULL);
4135 }
4136 setup_free(p, p->codebooks);
4137 }
4138 setup_free(p, p->floor_config);
4139 setup_free(p, p->residue_config);
4140 if (p->mapping) {
4141 for (i=0; i < p->mapping_count; ++i)
4142 setup_free(p, p->mapping[i].chan);
4143 setup_free(p, p->mapping);
4144 }
4145 CHECK(p);
4146 for (i=0; i < p->channels && i < STB_VORBIS_MAX_CHANNELS; ++i) {
4147 setup_free(p, p->channel_buffers[i]);
4148 setup_free(p, p->previous_window[i]);
4149 #ifdef STB_VORBIS_NO_DEFER_FLOOR
4150 setup_free(p, p->floor_buffers[i]);
4151 #endif
4152 setup_free(p, p->finalY[i]);
4153 }
4154 for (i=0; i < 2; ++i) {
4155 setup_free(p, p->A[i]);
4156 setup_free(p, p->B[i]);
4157 setup_free(p, p->C[i]);
4158 setup_free(p, p->window[i]);
4159 setup_free(p, p->bit_reverse[i]);
4160 }
4161 #ifndef STB_VORBIS_NO_STDIO
4162 if (p->close_on_free) fclose(p->f);
4163 #endif
4164 }
4165
stb_vorbis_close(stb_vorbis * p)4166 void stb_vorbis_close(stb_vorbis *p)
4167 {
4168 if (p == NULL) return;
4169 vorbis_deinit(p);
4170 setup_free(p,p);
4171 }
4172
vorbis_init(stb_vorbis * p,const stb_vorbis_alloc * z)4173 static void vorbis_init(stb_vorbis *p, const stb_vorbis_alloc *z)
4174 {
4175 memset(p, 0, sizeof(*p)); // NULL out all malloc'd pointers to start
4176 if (z) {
4177 p->alloc = *z;
4178 p->alloc.alloc_buffer_length_in_bytes = (p->alloc.alloc_buffer_length_in_bytes+3) & ~3;
4179 p->temp_offset = p->alloc.alloc_buffer_length_in_bytes;
4180 }
4181 p->eof = 0;
4182 p->error = VORBIS__no_error;
4183 p->stream = NULL;
4184 p->codebooks = NULL;
4185 p->page_crc_tests = -1;
4186 #ifndef STB_VORBIS_NO_STDIO
4187 p->close_on_free = FALSE;
4188 p->f = NULL;
4189 #endif
4190 }
4191
stb_vorbis_get_sample_offset(stb_vorbis * f)4192 int stb_vorbis_get_sample_offset(stb_vorbis *f)
4193 {
4194 if (f->current_loc_valid)
4195 return f->current_loc;
4196 else
4197 return -1;
4198 }
4199
stb_vorbis_get_info(stb_vorbis * f)4200 stb_vorbis_info stb_vorbis_get_info(stb_vorbis *f)
4201 {
4202 stb_vorbis_info d;
4203 d.channels = f->channels;
4204 d.sample_rate = f->sample_rate;
4205 d.setup_memory_required = f->setup_memory_required;
4206 d.setup_temp_memory_required = f->setup_temp_memory_required;
4207 d.temp_memory_required = f->temp_memory_required;
4208 d.max_frame_size = f->blocksize_1 >> 1;
4209 return d;
4210 }
4211
stb_vorbis_get_error(stb_vorbis * f)4212 int stb_vorbis_get_error(stb_vorbis *f)
4213 {
4214 int e = f->error;
4215 f->error = VORBIS__no_error;
4216 return e;
4217 }
4218
vorbis_alloc(stb_vorbis * f)4219 static stb_vorbis * vorbis_alloc(stb_vorbis *f)
4220 {
4221 stb_vorbis *p = (stb_vorbis *) setup_malloc(f, sizeof(*p));
4222 return p;
4223 }
4224
4225 #ifndef STB_VORBIS_NO_PUSHDATA_API
4226
stb_vorbis_flush_pushdata(stb_vorbis * f)4227 void stb_vorbis_flush_pushdata(stb_vorbis *f)
4228 {
4229 f->previous_length = 0;
4230 f->page_crc_tests = 0;
4231 f->discard_samples_deferred = 0;
4232 f->current_loc_valid = FALSE;
4233 f->first_decode = FALSE;
4234 f->samples_output = 0;
4235 f->channel_buffer_start = 0;
4236 f->channel_buffer_end = 0;
4237 }
4238
vorbis_search_for_page_pushdata(vorb * f,uint8 * data,int data_len)4239 static int vorbis_search_for_page_pushdata(vorb *f, uint8 *data, int data_len)
4240 {
4241 int i,n;
4242 for (i=0; i < f->page_crc_tests; ++i)
4243 f->scan[i].bytes_done = 0;
4244
4245 // if we have room for more scans, search for them first, because
4246 // they may cause us to stop early if their header is incomplete
4247 if (f->page_crc_tests < STB_VORBIS_PUSHDATA_CRC_COUNT) {
4248 if (data_len < 4) return 0;
4249 data_len -= 3; // need to look for 4-byte sequence, so don't miss
4250 // one that straddles a boundary
4251 for (i=0; i < data_len; ++i) {
4252 if (data[i] == 0x4f) {
4253 if (0==memcmp(data+i, ogg_page_header, 4)) {
4254 int j,len;
4255 uint32 crc;
4256 // make sure we have the whole page header
4257 if (i+26 >= data_len || i+27+data[i+26] >= data_len) {
4258 // only read up to this page start, so hopefully we'll
4259 // have the whole page header start next time
4260 data_len = i;
4261 break;
4262 }
4263 // ok, we have it all; compute the length of the page
4264 len = 27 + data[i+26];
4265 for (j=0; j < data[i+26]; ++j)
4266 len += data[i+27+j];
4267 // scan everything up to the embedded crc (which we must 0)
4268 crc = 0;
4269 for (j=0; j < 22; ++j)
4270 crc = crc32_update(crc, data[i+j]);
4271 // now process 4 0-bytes
4272 for ( ; j < 26; ++j)
4273 crc = crc32_update(crc, 0);
4274 // len is the total number of bytes we need to scan
4275 n = f->page_crc_tests++;
4276 f->scan[n].bytes_left = len-j;
4277 f->scan[n].crc_so_far = crc;
4278 f->scan[n].goal_crc = data[i+22] + (data[i+23] << 8) + (data[i+24]<<16) + (data[i+25]<<24);
4279 // if the last frame on a page is continued to the next, then
4280 // we can't recover the sample_loc immediately
4281 if (data[i+27+data[i+26]-1] == 255)
4282 f->scan[n].sample_loc = ~0;
4283 else
4284 f->scan[n].sample_loc = data[i+6] + (data[i+7] << 8) + (data[i+ 8]<<16) + (data[i+ 9]<<24);
4285 f->scan[n].bytes_done = i+j;
4286 if (f->page_crc_tests == STB_VORBIS_PUSHDATA_CRC_COUNT)
4287 break;
4288 // keep going if we still have room for more
4289 }
4290 }
4291 }
4292 }
4293
4294 for (i=0; i < f->page_crc_tests;) {
4295 uint32 crc;
4296 int j;
4297 int n = f->scan[i].bytes_done;
4298 int m = f->scan[i].bytes_left;
4299 if (m > data_len - n) m = data_len - n;
4300 // m is the bytes to scan in the current chunk
4301 crc = f->scan[i].crc_so_far;
4302 for (j=0; j < m; ++j)
4303 crc = crc32_update(crc, data[n+j]);
4304 f->scan[i].bytes_left -= m;
4305 f->scan[i].crc_so_far = crc;
4306 if (f->scan[i].bytes_left == 0) {
4307 // does it match?
4308 if (f->scan[i].crc_so_far == f->scan[i].goal_crc) {
4309 // Houston, we have page
4310 data_len = n+m; // consumption amount is wherever that scan ended
4311 f->page_crc_tests = -1; // drop out of page scan mode
4312 f->previous_length = 0; // decode-but-don't-output one frame
4313 f->next_seg = -1; // start a new page
4314 f->current_loc = f->scan[i].sample_loc; // set the current sample location
4315 // to the amount we'd have decoded had we decoded this page
4316 f->current_loc_valid = f->current_loc != ~0U;
4317 return data_len;
4318 }
4319 // delete entry
4320 f->scan[i] = f->scan[--f->page_crc_tests];
4321 } else {
4322 ++i;
4323 }
4324 }
4325
4326 return data_len;
4327 }
4328
4329 // return value: number of bytes we used
stb_vorbis_decode_frame_pushdata(stb_vorbis * f,const uint8 * data,int data_len,int * channels,float *** output,int * samples)4330 int stb_vorbis_decode_frame_pushdata(
4331 stb_vorbis *f, // the file we're decoding
4332 const uint8 *data, int data_len, // the memory available for decoding
4333 int *channels, // place to write number of float * buffers
4334 float ***output, // place to write float ** array of float * buffers
4335 int *samples // place to write number of output samples
4336 )
4337 {
4338 int i;
4339 int len,right,left;
4340
4341 if (!IS_PUSH_MODE(f)) return error(f, VORBIS_invalid_api_mixing);
4342
4343 if (f->page_crc_tests >= 0) {
4344 *samples = 0;
4345 return vorbis_search_for_page_pushdata(f, (uint8 *) data, data_len);
4346 }
4347
4348 f->stream = (uint8 *) data;
4349 f->stream_end = (uint8 *) data + data_len;
4350 f->error = VORBIS__no_error;
4351
4352 // check that we have the entire packet in memory
4353 if (!is_whole_packet_present(f, FALSE)) {
4354 *samples = 0;
4355 return 0;
4356 }
4357
4358 if (!vorbis_decode_packet(f, &len, &left, &right)) {
4359 // save the actual error we encountered
4360 enum STBVorbisError error = f->error;
4361 if (error == VORBIS_bad_packet_type) {
4362 // flush and resynch
4363 f->error = VORBIS__no_error;
4364 while (get8_packet(f) != EOP)
4365 if (f->eof) break;
4366 *samples = 0;
4367 return (int) (f->stream - data);
4368 }
4369 if (error == VORBIS_continued_packet_flag_invalid) {
4370 if (f->previous_length == 0) {
4371 // we may be resynching, in which case it's ok to hit one
4372 // of these; just discard the packet
4373 f->error = VORBIS__no_error;
4374 while (get8_packet(f) != EOP)
4375 if (f->eof) break;
4376 *samples = 0;
4377 return (int) (f->stream - data);
4378 }
4379 }
4380 // if we get an error while parsing, what to do?
4381 // well, it DEFINITELY won't work to continue from where we are!
4382 stb_vorbis_flush_pushdata(f);
4383 // restore the error that actually made us bail
4384 f->error = error;
4385 *samples = 0;
4386 return 1;
4387 }
4388
4389 // success!
4390 len = vorbis_finish_frame(f, len, left, right);
4391 for (i=0; i < f->channels; ++i)
4392 f->outputs[i] = f->channel_buffers[i] + left;
4393
4394 if (channels) *channels = f->channels;
4395 *samples = len;
4396 *output = f->outputs;
4397 return (int) (f->stream - data);
4398 }
4399
stb_vorbis_open_pushdata(const unsigned char * data,int data_len,int * data_used,int * error,const stb_vorbis_alloc * alloc)4400 stb_vorbis *stb_vorbis_open_pushdata(
4401 const unsigned char *data, int data_len, // the memory available for decoding
4402 int *data_used, // only defined if result is not NULL
4403 int *error, const stb_vorbis_alloc *alloc)
4404 {
4405 stb_vorbis *f, p;
4406 vorbis_init(&p, alloc);
4407 p.stream = (uint8 *) data;
4408 p.stream_end = (uint8 *) data + data_len;
4409 p.push_mode = TRUE;
4410 if (!start_decoder(&p)) {
4411 if (p.eof)
4412 *error = VORBIS_need_more_data;
4413 else
4414 *error = p.error;
4415 return NULL;
4416 }
4417 f = vorbis_alloc(&p);
4418 if (f) {
4419 *f = p;
4420 *data_used = (int) (f->stream - data);
4421 *error = 0;
4422 return f;
4423 } else {
4424 vorbis_deinit(&p);
4425 return NULL;
4426 }
4427 }
4428 #endif // STB_VORBIS_NO_PUSHDATA_API
4429
stb_vorbis_get_file_offset(stb_vorbis * f)4430 unsigned int stb_vorbis_get_file_offset(stb_vorbis *f)
4431 {
4432 #ifndef STB_VORBIS_NO_PUSHDATA_API
4433 if (f->push_mode) return 0;
4434 #endif
4435 if (USE_MEMORY(f)) return (unsigned int) (f->stream - f->stream_start);
4436 #ifndef STB_VORBIS_NO_STDIO
4437 return (unsigned int) (ftell(f->f) - f->f_start);
4438 #endif
4439 }
4440
4441 #ifndef STB_VORBIS_NO_PULLDATA_API
4442 //
4443 // DATA-PULLING API
4444 //
4445
vorbis_find_page(stb_vorbis * f,uint32 * end,uint32 * last)4446 static uint32 vorbis_find_page(stb_vorbis *f, uint32 *end, uint32 *last)
4447 {
4448 for(;;) {
4449 int n;
4450 if (f->eof) return 0;
4451 n = get8(f);
4452 if (n == 0x4f) { // page header candidate
4453 unsigned int retry_loc = stb_vorbis_get_file_offset(f);
4454 int i;
4455 // check if we're off the end of a file_section stream
4456 if (retry_loc - 25 > f->stream_len)
4457 return 0;
4458 // check the rest of the header
4459 for (i=1; i < 4; ++i)
4460 if (get8(f) != ogg_page_header[i])
4461 break;
4462 if (f->eof) return 0;
4463 if (i == 4) {
4464 uint8 header[27];
4465 uint32 i, crc, goal, len;
4466 for (i=0; i < 4; ++i)
4467 header[i] = ogg_page_header[i];
4468 for (; i < 27; ++i)
4469 header[i] = get8(f);
4470 if (f->eof) return 0;
4471 if (header[4] != 0) goto invalid;
4472 goal = header[22] + (header[23] << 8) + (header[24]<<16) + (header[25]<<24);
4473 for (i=22; i < 26; ++i)
4474 header[i] = 0;
4475 crc = 0;
4476 for (i=0; i < 27; ++i)
4477 crc = crc32_update(crc, header[i]);
4478 len = 0;
4479 for (i=0; i < header[26]; ++i) {
4480 int s = get8(f);
4481 crc = crc32_update(crc, s);
4482 len += s;
4483 }
4484 if (len && f->eof) return 0;
4485 for (i=0; i < len; ++i)
4486 crc = crc32_update(crc, get8(f));
4487 // finished parsing probable page
4488 if (crc == goal) {
4489 // we could now check that it's either got the last
4490 // page flag set, OR it's followed by the capture
4491 // pattern, but I guess TECHNICALLY you could have
4492 // a file with garbage between each ogg page and recover
4493 // from it automatically? So even though that paranoia
4494 // might decrease the chance of an invalid decode by
4495 // another 2^32, not worth it since it would hose those
4496 // invalid-but-useful files?
4497 if (end)
4498 *end = stb_vorbis_get_file_offset(f);
4499 if (last) {
4500 if (header[5] & 0x04)
4501 *last = 1;
4502 else
4503 *last = 0;
4504 }
4505 set_file_offset(f, retry_loc-1);
4506 return 1;
4507 }
4508 }
4509 invalid:
4510 // not a valid page, so rewind and look for next one
4511 set_file_offset(f, retry_loc);
4512 }
4513 }
4514 }
4515
4516
4517 #define SAMPLE_unknown 0xffffffff
4518
4519 // seeking is implemented with a binary search, which narrows down the range to
4520 // 64K, before using a linear search (because finding the synchronization
4521 // pattern can be expensive, and the chance we'd find the end page again is
4522 // relatively high for small ranges)
4523 //
4524 // two initial interpolation-style probes are used at the start of the search
4525 // to try to bound either side of the binary search sensibly, while still
4526 // working in O(log n) time if they fail.
4527
get_seek_page_info(stb_vorbis * f,ProbedPage * z)4528 static int get_seek_page_info(stb_vorbis *f, ProbedPage *z)
4529 {
4530 uint8 header[27], lacing[255];
4531 int i,len;
4532
4533 // record where the page starts
4534 z->page_start = stb_vorbis_get_file_offset(f);
4535
4536 // parse the header
4537 getn(f, header, 27);
4538 if (header[0] != 'O' || header[1] != 'g' || header[2] != 'g' || header[3] != 'S')
4539 return 0;
4540 getn(f, lacing, header[26]);
4541
4542 // determine the length of the payload
4543 len = 0;
4544 for (i=0; i < header[26]; ++i)
4545 len += lacing[i];
4546
4547 // this implies where the page ends
4548 z->page_end = z->page_start + 27 + header[26] + len;
4549
4550 // read the last-decoded sample out of the data
4551 z->last_decoded_sample = header[6] + (header[7] << 8) + (header[8] << 16) + (header[9] << 24);
4552
4553 // restore file state to where we were
4554 set_file_offset(f, z->page_start);
4555 return 1;
4556 }
4557
4558 // rarely used function to seek back to the preceeding page while finding the
4559 // start of a packet
go_to_page_before(stb_vorbis * f,unsigned int limit_offset)4560 static int go_to_page_before(stb_vorbis *f, unsigned int limit_offset)
4561 {
4562 unsigned int previous_safe, end;
4563
4564 // now we want to seek back 64K from the limit
4565 if (limit_offset >= 65536 && limit_offset-65536 >= f->first_audio_page_offset)
4566 previous_safe = limit_offset - 65536;
4567 else
4568 previous_safe = f->first_audio_page_offset;
4569
4570 set_file_offset(f, previous_safe);
4571
4572 while (vorbis_find_page(f, &end, NULL)) {
4573 if (end >= limit_offset && stb_vorbis_get_file_offset(f) < limit_offset)
4574 return 1;
4575 set_file_offset(f, end);
4576 }
4577
4578 return 0;
4579 }
4580
4581 // implements the search logic for finding a page and starting decoding. if
4582 // the function succeeds, current_loc_valid will be true and current_loc will
4583 // be less than or equal to the provided sample number (the closer the
4584 // better).
seek_to_sample_coarse(stb_vorbis * f,uint32 sample_number)4585 static int seek_to_sample_coarse(stb_vorbis *f, uint32 sample_number)
4586 {
4587 ProbedPage left, right, mid;
4588 int i, start_seg_with_known_loc, end_pos, page_start;
4589 uint32 delta, stream_length, padding;
4590 double offset = 0.0, bytes_per_sample = 0.0;
4591 int probe = 0;
4592
4593 // find the last page and validate the target sample
4594 stream_length = stb_vorbis_stream_length_in_samples(f);
4595 if (stream_length == 0) return error(f, VORBIS_seek_without_length);
4596 if (sample_number > stream_length) return error(f, VORBIS_seek_invalid);
4597
4598 // this is the maximum difference between the window-center (which is the
4599 // actual granule position value), and the right-start (which the spec
4600 // indicates should be the granule position (give or take one)).
4601 padding = ((f->blocksize_1 - f->blocksize_0) >> 2);
4602 if (sample_number < padding)
4603 sample_number = 0;
4604 else
4605 sample_number -= padding;
4606
4607 left = f->p_first;
4608 while (left.last_decoded_sample == ~0U) {
4609 // (untested) the first page does not have a 'last_decoded_sample'
4610 set_file_offset(f, left.page_end);
4611 if (!get_seek_page_info(f, &left)) goto error;
4612 }
4613
4614 right = f->p_last;
4615 assert(right.last_decoded_sample != ~0U);
4616
4617 // starting from the start is handled differently
4618 if (sample_number <= left.last_decoded_sample) {
4619 if (stb_vorbis_seek_start(f))
4620 return 1;
4621 return 0;
4622 }
4623
4624 while (left.page_end != right.page_start) {
4625 assert(left.page_end < right.page_start);
4626 // search range in bytes
4627 delta = right.page_start - left.page_end;
4628 if (delta <= 65536) {
4629 // there's only 64K left to search - handle it linearly
4630 set_file_offset(f, left.page_end);
4631 } else {
4632 if (probe < 2) {
4633 if (probe == 0) {
4634 // first probe (interpolate)
4635 double data_bytes = right.page_end - left.page_start;
4636 bytes_per_sample = data_bytes / right.last_decoded_sample;
4637 offset = left.page_start + bytes_per_sample * (sample_number - left.last_decoded_sample);
4638 } else {
4639 // second probe (try to bound the other side)
4640 double error = ((double) sample_number - mid.last_decoded_sample) * bytes_per_sample;
4641 if (error >= 0 && error < 8000) error = 8000;
4642 if (error < 0 && error > -8000) error = -8000;
4643 offset += error * 2;
4644 }
4645
4646 // ensure the offset is valid
4647 if (offset < left.page_end)
4648 offset = left.page_end;
4649 if (offset > right.page_start - 65536)
4650 offset = right.page_start - 65536;
4651
4652 set_file_offset(f, (unsigned int) offset);
4653 } else {
4654 // binary search for large ranges (offset by 32K to ensure
4655 // we don't hit the right page)
4656 set_file_offset(f, left.page_end + (delta / 2) - 32768);
4657 }
4658
4659 if (!vorbis_find_page(f, NULL, NULL)) goto error;
4660 }
4661
4662 for (;;) {
4663 if (!get_seek_page_info(f, &mid)) goto error;
4664 if (mid.last_decoded_sample != ~0U) break;
4665 // (untested) no frames end on this page
4666 set_file_offset(f, mid.page_end);
4667 assert(mid.page_start < right.page_start);
4668 }
4669
4670 // if we've just found the last page again then we're in a tricky file,
4671 // and we're close enough.
4672 if (mid.page_start == right.page_start)
4673 break;
4674
4675 if (sample_number < mid.last_decoded_sample)
4676 right = mid;
4677 else
4678 left = mid;
4679
4680 ++probe;
4681 }
4682
4683 // seek back to start of the last packet
4684 page_start = left.page_start;
4685 set_file_offset(f, page_start);
4686 if (!start_page(f)) return error(f, VORBIS_seek_failed);
4687 end_pos = f->end_seg_with_known_loc;
4688 assert(end_pos >= 0);
4689
4690 for (;;) {
4691 for (i = end_pos; i > 0; --i)
4692 if (f->segments[i-1] != 255)
4693 break;
4694
4695 start_seg_with_known_loc = i;
4696
4697 if (start_seg_with_known_loc > 0 || !(f->page_flag & PAGEFLAG_continued_packet))
4698 break;
4699
4700 // (untested) the final packet begins on an earlier page
4701 if (!go_to_page_before(f, page_start))
4702 goto error;
4703
4704 page_start = stb_vorbis_get_file_offset(f);
4705 if (!start_page(f)) goto error;
4706 end_pos = f->segment_count - 1;
4707 }
4708
4709 // prepare to start decoding
4710 f->current_loc_valid = FALSE;
4711 f->last_seg = FALSE;
4712 f->valid_bits = 0;
4713 f->packet_bytes = 0;
4714 f->bytes_in_seg = 0;
4715 f->previous_length = 0;
4716 f->next_seg = start_seg_with_known_loc;
4717
4718 for (i = 0; i < start_seg_with_known_loc; i++)
4719 skip(f, f->segments[i]);
4720
4721 // start decoding (optimizable - this frame is generally discarded)
4722 if (!vorbis_pump_first_frame(f))
4723 return 0;
4724 if (f->current_loc > sample_number)
4725 return error(f, VORBIS_seek_failed);
4726 return 1;
4727
4728 error:
4729 // try to restore the file to a valid state
4730 stb_vorbis_seek_start(f);
4731 return error(f, VORBIS_seek_failed);
4732 }
4733
4734 // the same as vorbis_decode_initial, but without advancing
peek_decode_initial(vorb * f,int * p_left_start,int * p_left_end,int * p_right_start,int * p_right_end,int * mode)4735 static int peek_decode_initial(vorb *f, int *p_left_start, int *p_left_end, int *p_right_start, int *p_right_end, int *mode)
4736 {
4737 int bits_read, bytes_read;
4738
4739 if (!vorbis_decode_initial(f, p_left_start, p_left_end, p_right_start, p_right_end, mode))
4740 return 0;
4741
4742 // either 1 or 2 bytes were read, figure out which so we can rewind
4743 bits_read = 1 + ilog(f->mode_count-1);
4744 if (f->mode_config[*mode].blockflag)
4745 bits_read += 2;
4746 bytes_read = (bits_read + 7) / 8;
4747
4748 f->bytes_in_seg += bytes_read;
4749 f->packet_bytes -= bytes_read;
4750 skip(f, -bytes_read);
4751 if (f->next_seg == -1)
4752 f->next_seg = f->segment_count - 1;
4753 else
4754 f->next_seg--;
4755 f->valid_bits = 0;
4756
4757 return 1;
4758 }
4759
stb_vorbis_seek_frame(stb_vorbis * f,unsigned int sample_number)4760 int stb_vorbis_seek_frame(stb_vorbis *f, unsigned int sample_number)
4761 {
4762 uint32 max_frame_samples;
4763
4764 if (IS_PUSH_MODE(f)) return error(f, VORBIS_invalid_api_mixing);
4765
4766 // fast page-level search
4767 if (!seek_to_sample_coarse(f, sample_number))
4768 return 0;
4769
4770 assert(f->current_loc_valid);
4771 assert(f->current_loc <= sample_number);
4772
4773 // linear search for the relevant packet
4774 max_frame_samples = (f->blocksize_1*3 - f->blocksize_0) >> 2;
4775 while (f->current_loc < sample_number) {
4776 int left_start, left_end, right_start, right_end, mode, frame_samples;
4777 if (!peek_decode_initial(f, &left_start, &left_end, &right_start, &right_end, &mode))
4778 return error(f, VORBIS_seek_failed);
4779 // calculate the number of samples returned by the next frame
4780 frame_samples = right_start - left_start;
4781 if (f->current_loc + frame_samples > sample_number) {
4782 return 1; // the next frame will contain the sample
4783 } else if (f->current_loc + frame_samples + max_frame_samples > sample_number) {
4784 // there's a chance the frame after this could contain the sample
4785 vorbis_pump_first_frame(f);
4786 } else {
4787 // this frame is too early to be relevant
4788 f->current_loc += frame_samples;
4789 f->previous_length = 0;
4790 maybe_start_packet(f);
4791 flush_packet(f);
4792 }
4793 }
4794 // the next frame will start with the sample
4795 assert(f->current_loc == sample_number);
4796 return 1;
4797 }
4798
stb_vorbis_seek(stb_vorbis * f,unsigned int sample_number)4799 int stb_vorbis_seek(stb_vorbis *f, unsigned int sample_number)
4800 {
4801 if (!stb_vorbis_seek_frame(f, sample_number))
4802 return 0;
4803
4804 if (sample_number != f->current_loc) {
4805 int n;
4806 uint32 frame_start = f->current_loc;
4807 stb_vorbis_get_frame_float(f, &n, NULL);
4808 assert(sample_number > frame_start);
4809 assert(f->channel_buffer_start + (int) (sample_number-frame_start) <= f->channel_buffer_end);
4810 f->channel_buffer_start += (sample_number - frame_start);
4811 }
4812
4813 return 1;
4814 }
4815
stb_vorbis_seek_start(stb_vorbis * f)4816 int stb_vorbis_seek_start(stb_vorbis *f)
4817 {
4818 if (IS_PUSH_MODE(f)) { return error(f, VORBIS_invalid_api_mixing); }
4819 set_file_offset(f, f->first_audio_page_offset);
4820 f->previous_length = 0;
4821 f->first_decode = TRUE;
4822 f->next_seg = -1;
4823 return vorbis_pump_first_frame(f);
4824 }
4825
stb_vorbis_stream_length_in_samples(stb_vorbis * f)4826 unsigned int stb_vorbis_stream_length_in_samples(stb_vorbis *f)
4827 {
4828 unsigned int restore_offset, previous_safe;
4829 unsigned int end, last_page_loc;
4830
4831 if (IS_PUSH_MODE(f)) return error(f, VORBIS_invalid_api_mixing);
4832 if (!f->total_samples) {
4833 unsigned int last;
4834 uint32 lo,hi;
4835 char header[6];
4836
4837 // first, store the current decode position so we can restore it
4838 restore_offset = stb_vorbis_get_file_offset(f);
4839
4840 // now we want to seek back 64K from the end (the last page must
4841 // be at most a little less than 64K, but let's allow a little slop)
4842 if (f->stream_len >= 65536 && f->stream_len-65536 >= f->first_audio_page_offset)
4843 previous_safe = f->stream_len - 65536;
4844 else
4845 previous_safe = f->first_audio_page_offset;
4846
4847 set_file_offset(f, previous_safe);
4848 // previous_safe is now our candidate 'earliest known place that seeking
4849 // to will lead to the final page'
4850
4851 if (!vorbis_find_page(f, &end, &last)) {
4852 // if we can't find a page, we're hosed!
4853 f->error = VORBIS_cant_find_last_page;
4854 f->total_samples = 0xffffffff;
4855 goto done;
4856 }
4857
4858 // check if there are more pages
4859 last_page_loc = stb_vorbis_get_file_offset(f);
4860
4861 // stop when the last_page flag is set, not when we reach eof;
4862 // this allows us to stop short of a 'file_section' end without
4863 // explicitly checking the length of the section
4864 while (!last) {
4865 set_file_offset(f, end);
4866 if (!vorbis_find_page(f, &end, &last)) {
4867 // the last page we found didn't have the 'last page' flag
4868 // set. whoops!
4869 break;
4870 }
4871 previous_safe = last_page_loc+1;
4872 last_page_loc = stb_vorbis_get_file_offset(f);
4873 }
4874
4875 set_file_offset(f, last_page_loc);
4876
4877 // parse the header
4878 getn(f, (unsigned char *)header, 6);
4879 // extract the absolute granule position
4880 lo = get32(f);
4881 hi = get32(f);
4882 if (lo == 0xffffffff && hi == 0xffffffff) {
4883 f->error = VORBIS_cant_find_last_page;
4884 f->total_samples = SAMPLE_unknown;
4885 goto done;
4886 }
4887 if (hi)
4888 lo = 0xfffffffe; // saturate
4889 f->total_samples = lo;
4890
4891 f->p_last.page_start = last_page_loc;
4892 f->p_last.page_end = end;
4893 f->p_last.last_decoded_sample = lo;
4894
4895 done:
4896 set_file_offset(f, restore_offset);
4897 }
4898 return f->total_samples == SAMPLE_unknown ? 0 : f->total_samples;
4899 }
4900
stb_vorbis_stream_length_in_seconds(stb_vorbis * f)4901 float stb_vorbis_stream_length_in_seconds(stb_vorbis *f)
4902 {
4903 return stb_vorbis_stream_length_in_samples(f) / (float) f->sample_rate;
4904 }
4905
4906
4907
stb_vorbis_get_frame_float(stb_vorbis * f,int * channels,float *** output)4908 int stb_vorbis_get_frame_float(stb_vorbis *f, int *channels, float ***output)
4909 {
4910 int len, right,left,i;
4911 if (IS_PUSH_MODE(f)) return error(f, VORBIS_invalid_api_mixing);
4912
4913 if (!vorbis_decode_packet(f, &len, &left, &right)) {
4914 f->channel_buffer_start = f->channel_buffer_end = 0;
4915 return 0;
4916 }
4917
4918 len = vorbis_finish_frame(f, len, left, right);
4919 for (i=0; i < f->channels; ++i)
4920 f->outputs[i] = f->channel_buffers[i] + left;
4921
4922 f->channel_buffer_start = left;
4923 f->channel_buffer_end = left+len;
4924
4925 if (channels) *channels = f->channels;
4926 if (output) *output = f->outputs;
4927 return len;
4928 }
4929
4930 #ifndef STB_VORBIS_NO_STDIO
4931
stb_vorbis_open_file_section(FILE * file,int close_on_free,int * error,const stb_vorbis_alloc * alloc,unsigned int length)4932 stb_vorbis * stb_vorbis_open_file_section(FILE *file, int close_on_free, int *error, const stb_vorbis_alloc *alloc, unsigned int length)
4933 {
4934 stb_vorbis *f, p;
4935 vorbis_init(&p, alloc);
4936 p.f = file;
4937 p.f_start = (uint32) ftell(file);
4938 p.stream_len = length;
4939 p.close_on_free = close_on_free;
4940 if (start_decoder(&p)) {
4941 f = vorbis_alloc(&p);
4942 if (f) {
4943 *f = p;
4944 vorbis_pump_first_frame(f);
4945 return f;
4946 }
4947 }
4948 if (error) *error = p.error;
4949 vorbis_deinit(&p);
4950 return NULL;
4951 }
4952
stb_vorbis_open_file(FILE * file,int close_on_free,int * error,const stb_vorbis_alloc * alloc)4953 stb_vorbis * stb_vorbis_open_file(FILE *file, int close_on_free, int *error, const stb_vorbis_alloc *alloc)
4954 {
4955 unsigned int len, start;
4956 start = (unsigned int) ftell(file);
4957 fseek(file, 0, SEEK_END);
4958 len = (unsigned int) (ftell(file) - start);
4959 fseek(file, start, SEEK_SET);
4960 return stb_vorbis_open_file_section(file, close_on_free, error, alloc, len);
4961 }
4962
stb_vorbis_open_filename(const char * filename,int * error,const stb_vorbis_alloc * alloc)4963 stb_vorbis * stb_vorbis_open_filename(const char *filename, int *error, const stb_vorbis_alloc *alloc)
4964 {
4965 FILE *f = fopen(filename, "rb");
4966 if (f)
4967 return stb_vorbis_open_file(f, TRUE, error, alloc);
4968 if (error) *error = VORBIS_file_open_failure;
4969 return NULL;
4970 }
4971 #endif // STB_VORBIS_NO_STDIO
4972
stb_vorbis_open_memory(const unsigned char * data,int len,int * error,const stb_vorbis_alloc * alloc)4973 stb_vorbis * stb_vorbis_open_memory(const unsigned char *data, int len, int *error, const stb_vorbis_alloc *alloc)
4974 {
4975 stb_vorbis *f, p;
4976 if (data == NULL) return NULL;
4977 vorbis_init(&p, alloc);
4978 p.stream = (uint8 *) data;
4979 p.stream_end = (uint8 *) data + len;
4980 p.stream_start = (uint8 *) p.stream;
4981 p.stream_len = len;
4982 p.push_mode = FALSE;
4983 if (start_decoder(&p)) {
4984 f = vorbis_alloc(&p);
4985 if (f) {
4986 *f = p;
4987 vorbis_pump_first_frame(f);
4988 if (error) *error = VORBIS__no_error;
4989 return f;
4990 }
4991 }
4992 if (error) *error = p.error;
4993 vorbis_deinit(&p);
4994 return NULL;
4995 }
4996
4997 #ifndef STB_VORBIS_NO_INTEGER_CONVERSION
4998 #define PLAYBACK_MONO 1
4999 #define PLAYBACK_LEFT 2
5000 #define PLAYBACK_RIGHT 4
5001
5002 #define L (PLAYBACK_LEFT | PLAYBACK_MONO)
5003 #define C (PLAYBACK_LEFT | PLAYBACK_RIGHT | PLAYBACK_MONO)
5004 #define R (PLAYBACK_RIGHT | PLAYBACK_MONO)
5005
5006 static int8 channel_position[7][6] =
5007 {
5008 { 0 },
5009 { C },
5010 { L, R },
5011 { L, C, R },
5012 { L, R, L, R },
5013 { L, C, R, L, R },
5014 { L, C, R, L, R, C },
5015 };
5016
5017
5018 #ifndef STB_VORBIS_NO_FAST_SCALED_FLOAT
5019 typedef union {
5020 float f;
5021 int i;
5022 } float_conv;
5023 typedef char stb_vorbis_float_size_test[sizeof(float)==4 && sizeof(int) == 4];
5024 #define FASTDEF(x) float_conv x
5025 // add (1<<23) to convert to int, then divide by 2^SHIFT, then add 0.5/2^SHIFT to round
5026 #define MAGIC(SHIFT) (1.5f * (1 << (23-SHIFT)) + 0.5f/(1 << SHIFT))
5027 #define ADDEND(SHIFT) (((150-SHIFT) << 23) + (1 << 22))
5028 #define FAST_SCALED_FLOAT_TO_INT(temp,x,s) (temp.f = (x) + MAGIC(s), temp.i - ADDEND(s))
5029 #define check_endianness()
5030 #else
5031 #define FAST_SCALED_FLOAT_TO_INT(temp,x,s) ((int) ((x) * (1 << (s))))
5032 #define check_endianness()
5033 #define FASTDEF(x)
5034 #endif
5035
copy_samples(short * dest,float * src,int len)5036 static void copy_samples(short *dest, float *src, int len)
5037 {
5038 int i;
5039 check_endianness();
5040 for (i=0; i < len; ++i) {
5041 FASTDEF(temp);
5042 int v = FAST_SCALED_FLOAT_TO_INT(temp, src[i],15);
5043 if ((unsigned int) (v + 32768) > 65535)
5044 v = v < 0 ? -32768 : 32767;
5045 dest[i] = v;
5046 }
5047 }
5048
compute_samples(int mask,short * output,int num_c,float ** data,int d_offset,int len)5049 static void compute_samples(int mask, short *output, int num_c, float **data, int d_offset, int len)
5050 {
5051 #define BUFFER_SIZE 32
5052 float buffer[BUFFER_SIZE];
5053 int i,j,o,n = BUFFER_SIZE;
5054 check_endianness();
5055 for (o = 0; o < len; o += BUFFER_SIZE) {
5056 memset(buffer, 0, sizeof(buffer));
5057 if (o + n > len) n = len - o;
5058 for (j=0; j < num_c; ++j) {
5059 if (channel_position[num_c][j] & mask) {
5060 for (i=0; i < n; ++i)
5061 buffer[i] += data[j][d_offset+o+i];
5062 }
5063 }
5064 for (i=0; i < n; ++i) {
5065 FASTDEF(temp);
5066 int v = FAST_SCALED_FLOAT_TO_INT(temp,buffer[i],15);
5067 if ((unsigned int) (v + 32768) > 65535)
5068 v = v < 0 ? -32768 : 32767;
5069 output[o+i] = v;
5070 }
5071 }
5072 }
5073
compute_stereo_samples(short * output,int num_c,float ** data,int d_offset,int len)5074 static void compute_stereo_samples(short *output, int num_c, float **data, int d_offset, int len)
5075 {
5076 #define BUFFER_SIZE 32
5077 float buffer[BUFFER_SIZE];
5078 int i,j,o,n = BUFFER_SIZE >> 1;
5079 // o is the offset in the source data
5080 check_endianness();
5081 for (o = 0; o < len; o += BUFFER_SIZE >> 1) {
5082 // o2 is the offset in the output data
5083 int o2 = o << 1;
5084 memset(buffer, 0, sizeof(buffer));
5085 if (o + n > len) n = len - o;
5086 for (j=0; j < num_c; ++j) {
5087 int m = channel_position[num_c][j] & (PLAYBACK_LEFT | PLAYBACK_RIGHT);
5088 if (m == (PLAYBACK_LEFT | PLAYBACK_RIGHT)) {
5089 for (i=0; i < n; ++i) {
5090 buffer[i*2+0] += data[j][d_offset+o+i];
5091 buffer[i*2+1] += data[j][d_offset+o+i];
5092 }
5093 } else if (m == PLAYBACK_LEFT) {
5094 for (i=0; i < n; ++i) {
5095 buffer[i*2+0] += data[j][d_offset+o+i];
5096 }
5097 } else if (m == PLAYBACK_RIGHT) {
5098 for (i=0; i < n; ++i) {
5099 buffer[i*2+1] += data[j][d_offset+o+i];
5100 }
5101 }
5102 }
5103 for (i=0; i < (n<<1); ++i) {
5104 FASTDEF(temp);
5105 int v = FAST_SCALED_FLOAT_TO_INT(temp,buffer[i],15);
5106 if ((unsigned int) (v + 32768) > 65535)
5107 v = v < 0 ? -32768 : 32767;
5108 output[o2+i] = v;
5109 }
5110 }
5111 }
5112
convert_samples_short(int buf_c,short ** buffer,int b_offset,int data_c,float ** data,int d_offset,int samples)5113 static void convert_samples_short(int buf_c, short **buffer, int b_offset, int data_c, float **data, int d_offset, int samples)
5114 {
5115 int i;
5116 if (buf_c != data_c && buf_c <= 2 && data_c <= 6) {
5117 static int channel_selector[3][2] = { {0}, {PLAYBACK_MONO}, {PLAYBACK_LEFT, PLAYBACK_RIGHT} };
5118 for (i=0; i < buf_c; ++i)
5119 compute_samples(channel_selector[buf_c][i], buffer[i]+b_offset, data_c, data, d_offset, samples);
5120 } else {
5121 int limit = buf_c < data_c ? buf_c : data_c;
5122 for (i=0; i < limit; ++i)
5123 copy_samples(buffer[i]+b_offset, data[i]+d_offset, samples);
5124 for ( ; i < buf_c; ++i)
5125 memset(buffer[i]+b_offset, 0, sizeof(short) * samples);
5126 }
5127 }
5128
stb_vorbis_get_frame_short(stb_vorbis * f,int num_c,short ** buffer,int num_samples)5129 int stb_vorbis_get_frame_short(stb_vorbis *f, int num_c, short **buffer, int num_samples)
5130 {
5131 float **output;
5132 int len = stb_vorbis_get_frame_float(f, NULL, &output);
5133 if (len > num_samples) len = num_samples;
5134 if (len)
5135 convert_samples_short(num_c, buffer, 0, f->channels, output, 0, len);
5136 return len;
5137 }
5138
convert_channels_short_interleaved(int buf_c,short * buffer,int data_c,float ** data,int d_offset,int len)5139 static void convert_channels_short_interleaved(int buf_c, short *buffer, int data_c, float **data, int d_offset, int len)
5140 {
5141 int i;
5142 check_endianness();
5143 if (buf_c != data_c && buf_c <= 2 && data_c <= 6) {
5144 assert(buf_c == 2);
5145 for (i=0; i < buf_c; ++i)
5146 compute_stereo_samples(buffer, data_c, data, d_offset, len);
5147 } else {
5148 int limit = buf_c < data_c ? buf_c : data_c;
5149 int j;
5150 for (j=0; j < len; ++j) {
5151 for (i=0; i < limit; ++i) {
5152 FASTDEF(temp);
5153 float f = data[i][d_offset+j];
5154 int v = FAST_SCALED_FLOAT_TO_INT(temp, f,15);//data[i][d_offset+j],15);
5155 if ((unsigned int) (v + 32768) > 65535)
5156 v = v < 0 ? -32768 : 32767;
5157 *buffer++ = v;
5158 }
5159 for ( ; i < buf_c; ++i)
5160 *buffer++ = 0;
5161 }
5162 }
5163 }
5164
stb_vorbis_get_frame_short_interleaved(stb_vorbis * f,int num_c,short * buffer,int num_shorts)5165 int stb_vorbis_get_frame_short_interleaved(stb_vorbis *f, int num_c, short *buffer, int num_shorts)
5166 {
5167 float **output;
5168 int len;
5169 if (num_c == 1) return stb_vorbis_get_frame_short(f,num_c,&buffer, num_shorts);
5170 len = stb_vorbis_get_frame_float(f, NULL, &output);
5171 if (len) {
5172 if (len*num_c > num_shorts) len = num_shorts / num_c;
5173 convert_channels_short_interleaved(num_c, buffer, f->channels, output, 0, len);
5174 }
5175 return len;
5176 }
5177
stb_vorbis_get_samples_short_interleaved(stb_vorbis * f,int channels,short * buffer,int num_shorts)5178 int stb_vorbis_get_samples_short_interleaved(stb_vorbis *f, int channels, short *buffer, int num_shorts)
5179 {
5180 float **outputs;
5181 int len = num_shorts / channels;
5182 int n=0;
5183 int z = f->channels;
5184 if (z > channels) z = channels;
5185 while (n < len) {
5186 int k = f->channel_buffer_end - f->channel_buffer_start;
5187 if (n+k >= len) k = len - n;
5188 if (k)
5189 convert_channels_short_interleaved(channels, buffer, f->channels, f->channel_buffers, f->channel_buffer_start, k);
5190 buffer += k*channels;
5191 n += k;
5192 f->channel_buffer_start += k;
5193 if (n == len) break;
5194 if (!stb_vorbis_get_frame_float(f, NULL, &outputs)) break;
5195 }
5196 return n;
5197 }
5198
stb_vorbis_get_samples_short(stb_vorbis * f,int channels,short ** buffer,int len)5199 int stb_vorbis_get_samples_short(stb_vorbis *f, int channels, short **buffer, int len)
5200 {
5201 float **outputs;
5202 int n=0;
5203 int z = f->channels;
5204 if (z > channels) z = channels;
5205 while (n < len) {
5206 int k = f->channel_buffer_end - f->channel_buffer_start;
5207 if (n+k >= len) k = len - n;
5208 if (k)
5209 convert_samples_short(channels, buffer, n, f->channels, f->channel_buffers, f->channel_buffer_start, k);
5210 n += k;
5211 f->channel_buffer_start += k;
5212 if (n == len) break;
5213 if (!stb_vorbis_get_frame_float(f, NULL, &outputs)) break;
5214 }
5215 return n;
5216 }
5217
5218 #ifndef STB_VORBIS_NO_STDIO
stb_vorbis_decode_filename(const char * filename,int * channels,int * sample_rate,short ** output)5219 int stb_vorbis_decode_filename(const char *filename, int *channels, int *sample_rate, short **output)
5220 {
5221 int data_len, offset, total, limit, error;
5222 short *data;
5223 stb_vorbis *v = stb_vorbis_open_filename(filename, &error, NULL);
5224 if (v == NULL) return -1;
5225 limit = v->channels * 4096;
5226 *channels = v->channels;
5227 if (sample_rate)
5228 *sample_rate = v->sample_rate;
5229 offset = data_len = 0;
5230 total = limit;
5231 data = (short *) malloc(total * sizeof(*data));
5232 if (data == NULL) {
5233 stb_vorbis_close(v);
5234 return -2;
5235 }
5236 for (;;) {
5237 int n = stb_vorbis_get_frame_short_interleaved(v, v->channels, data+offset, total-offset);
5238 if (n == 0) break;
5239 data_len += n;
5240 offset += n * v->channels;
5241 if (offset + limit > total) {
5242 short *data2;
5243 total *= 2;
5244 data2 = (short *) realloc(data, total * sizeof(*data));
5245 if (data2 == NULL) {
5246 free(data);
5247 stb_vorbis_close(v);
5248 return -2;
5249 }
5250 data = data2;
5251 }
5252 }
5253 *output = data;
5254 stb_vorbis_close(v);
5255 return data_len;
5256 }
5257 #endif // NO_STDIO
5258
stb_vorbis_decode_memory(const uint8 * mem,int len,int * channels,int * sample_rate,short ** output)5259 int stb_vorbis_decode_memory(const uint8 *mem, int len, int *channels, int *sample_rate, short **output)
5260 {
5261 int data_len, offset, total, limit, error;
5262 short *data;
5263 stb_vorbis *v = stb_vorbis_open_memory(mem, len, &error, NULL);
5264 if (v == NULL) return -1;
5265 limit = v->channels * 4096;
5266 *channels = v->channels;
5267 if (sample_rate)
5268 *sample_rate = v->sample_rate;
5269 offset = data_len = 0;
5270 total = limit;
5271 data = (short *) malloc(total * sizeof(*data));
5272 if (data == NULL) {
5273 stb_vorbis_close(v);
5274 return -2;
5275 }
5276 for (;;) {
5277 int n = stb_vorbis_get_frame_short_interleaved(v, v->channels, data+offset, total-offset);
5278 if (n == 0) break;
5279 data_len += n;
5280 offset += n * v->channels;
5281 if (offset + limit > total) {
5282 short *data2;
5283 total *= 2;
5284 data2 = (short *) realloc(data, total * sizeof(*data));
5285 if (data2 == NULL) {
5286 free(data);
5287 stb_vorbis_close(v);
5288 return -2;
5289 }
5290 data = data2;
5291 }
5292 }
5293 *output = data;
5294 stb_vorbis_close(v);
5295 return data_len;
5296 }
5297 #endif // STB_VORBIS_NO_INTEGER_CONVERSION
5298
stb_vorbis_get_samples_float_interleaved(stb_vorbis * f,int channels,float * buffer,int num_floats)5299 int stb_vorbis_get_samples_float_interleaved(stb_vorbis *f, int channels, float *buffer, int num_floats)
5300 {
5301 float **outputs;
5302 int len = num_floats / channels;
5303 int n=0;
5304 int z = f->channels;
5305 if (z > channels) z = channels;
5306 while (n < len) {
5307 int i,j;
5308 int k = f->channel_buffer_end - f->channel_buffer_start;
5309 if (n+k >= len) k = len - n;
5310 for (j=0; j < k; ++j) {
5311 for (i=0; i < z; ++i)
5312 *buffer++ = f->channel_buffers[i][f->channel_buffer_start+j];
5313 for ( ; i < channels; ++i)
5314 *buffer++ = 0;
5315 }
5316 n += k;
5317 f->channel_buffer_start += k;
5318 if (n == len)
5319 break;
5320 if (!stb_vorbis_get_frame_float(f, NULL, &outputs))
5321 break;
5322 }
5323 return n;
5324 }
5325
stb_vorbis_get_samples_float(stb_vorbis * f,int channels,float ** buffer,int num_samples)5326 int stb_vorbis_get_samples_float(stb_vorbis *f, int channels, float **buffer, int num_samples)
5327 {
5328 float **outputs;
5329 int n=0;
5330 int z = f->channels;
5331 if (z > channels) z = channels;
5332 while (n < num_samples) {
5333 int i;
5334 int k = f->channel_buffer_end - f->channel_buffer_start;
5335 if (n+k >= num_samples) k = num_samples - n;
5336 if (k) {
5337 for (i=0; i < z; ++i)
5338 memcpy(buffer[i]+n, f->channel_buffers[i]+f->channel_buffer_start, sizeof(float)*k);
5339 for ( ; i < channels; ++i)
5340 memset(buffer[i]+n, 0, sizeof(float) * k);
5341 }
5342 n += k;
5343 f->channel_buffer_start += k;
5344 if (n == num_samples)
5345 break;
5346 if (!stb_vorbis_get_frame_float(f, NULL, &outputs))
5347 break;
5348 }
5349 return n;
5350 }
5351 #endif // STB_VORBIS_NO_PULLDATA_API
5352
5353 /* Version history
5354 1.10 - 2017/03/03 - more robust seeking; fix negative ilog(); clear error in open_memory
5355 1.09 - 2016/04/04 - back out 'avoid discarding last frame' fix from previous version
5356 1.08 - 2016/04/02 - fixed multiple warnings; fix setup memory leaks;
5357 avoid discarding last frame of audio data
5358 1.07 - 2015/01/16 - fixed some warnings, fix mingw, const-correct API
5359 some more crash fixes when out of memory or with corrupt files
5360 1.06 - 2015/08/31 - full, correct support for seeking API (Dougall Johnson)
5361 some crash fixes when out of memory or with corrupt files
5362 1.05 - 2015/04/19 - don't define __forceinline if it's redundant
5363 1.04 - 2014/08/27 - fix missing const-correct case in API
5364 1.03 - 2014/08/07 - Warning fixes
5365 1.02 - 2014/07/09 - Declare qsort compare function _cdecl on windows
5366 1.01 - 2014/06/18 - fix stb_vorbis_get_samples_float
5367 1.0 - 2014/05/26 - fix memory leaks; fix warnings; fix bugs in multichannel
5368 (API change) report sample rate for decode-full-file funcs
5369 0.99996 - bracket #include <malloc.h> for macintosh compilation by Laurent Gomila
5370 0.99995 - use union instead of pointer-cast for fast-float-to-int to avoid alias-optimization problem
5371 0.99994 - change fast-float-to-int to work in single-precision FPU mode, remove endian-dependence
5372 0.99993 - remove assert that fired on legal files with empty tables
5373 0.99992 - rewind-to-start
5374 0.99991 - bugfix to stb_vorbis_get_samples_short by Bernhard Wodo
5375 0.9999 - (should have been 0.99990) fix no-CRT support, compiling as C++
5376 0.9998 - add a full-decode function with a memory source
5377 0.9997 - fix a bug in the read-from-FILE case in 0.9996 addition
5378 0.9996 - query length of vorbis stream in samples/seconds
5379 0.9995 - bugfix to another optimization that only happened in certain files
5380 0.9994 - bugfix to one of the optimizations that caused significant (but inaudible?) errors
5381 0.9993 - performance improvements; runs in 99% to 104% of time of reference implementation
5382 0.9992 - performance improvement of IMDCT; now performs close to reference implementation
5383 0.9991 - performance improvement of IMDCT
5384 0.999 - (should have been 0.9990) performance improvement of IMDCT
5385 0.998 - no-CRT support from Casey Muratori
5386 0.997 - bugfixes for bugs found by Terje Mathisen
5387 0.996 - bugfix: fast-huffman decode initialized incorrectly for sparse codebooks; fixing gives 10% speedup - found by Terje Mathisen
5388 0.995 - bugfix: fix to 'effective' overrun detection - found by Terje Mathisen
5389 0.994 - bugfix: garbage decode on final VQ symbol of a non-multiple - found by Terje Mathisen
5390 0.993 - bugfix: pushdata API required 1 extra byte for empty page (failed to consume final page if empty) - found by Terje Mathisen
5391 0.992 - fixes for MinGW warning
5392 0.991 - turn fast-float-conversion on by default
5393 0.990 - fix push-mode seek recovery if you seek into the headers
5394 0.98b - fix to bad release of 0.98
5395 0.98 - fix push-mode seek recovery; robustify float-to-int and support non-fast mode
5396 0.97 - builds under c++ (typecasting, don't use 'class' keyword)
5397 0.96 - somehow MY 0.95 was right, but the web one was wrong, so here's my 0.95 rereleased as 0.96, fixes a typo in the clamping code
5398 0.95 - clamping code for 16-bit functions
5399 0.94 - not publically released
5400 0.93 - fixed all-zero-floor case (was decoding garbage)
5401 0.92 - fixed a memory leak
5402 0.91 - conditional compiles to omit parts of the API and the infrastructure to support them: STB_VORBIS_NO_PULLDATA_API, STB_VORBIS_NO_PUSHDATA_API, STB_VORBIS_NO_STDIO, STB_VORBIS_NO_INTEGER_CONVERSION
5403 0.90 - first public release
5404 */
5405
5406 #endif // STB_VORBIS_HEADER_ONLY
5407
5408
5409 /*
5410 ------------------------------------------------------------------------------
5411 This software is available under 2 licenses -- choose whichever you prefer.
5412 ------------------------------------------------------------------------------
5413 ALTERNATIVE A - MIT License
5414 Copyright (c) 2017 Sean Barrett
5415 Permission is hereby granted, free of charge, to any person obtaining a copy of
5416 this software and associated documentation files (the "Software"), to deal in
5417 the Software without restriction, including without limitation the rights to
5418 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
5419 of the Software, and to permit persons to whom the Software is furnished to do
5420 so, subject to the following conditions:
5421 The above copyright notice and this permission notice shall be included in all
5422 copies or substantial portions of the Software.
5423 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
5424 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
5425 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
5426 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
5427 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
5428 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
5429 SOFTWARE.
5430 ------------------------------------------------------------------------------
5431 ALTERNATIVE B - Public Domain (www.unlicense.org)
5432 This is free and unencumbered software released into the public domain.
5433 Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
5434 software, either in source code form or as a compiled binary, for any purpose,
5435 commercial or non-commercial, and by any means.
5436 In jurisdictions that recognize copyright laws, the author or authors of this
5437 software dedicate any and all copyright interest in the software to the public
5438 domain. We make this dedication for the benefit of the public at large and to
5439 the detriment of our heirs and successors. We intend this dedication to be an
5440 overt act of relinquishment in perpetuity of all present and future rights to
5441 this software under copyright law.
5442 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
5443 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
5444 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
5445 AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
5446 ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
5447 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
5448 ------------------------------------------------------------------------------
5449 */
5450