1 // Ogg Vorbis audio decoder - v1.09 - public domain
2 // http://nothings.org/stb_vorbis/
3 //
4 // Original version written by Sean Barrett in 2007.
5 //
6 // Originally sponsored by RAD Game Tools. Seeking sponsored
7 // by Phillip Bennefall, Marc Andersen, Aaron Baker, Elias Software,
8 // Aras Pranckevicius, and Sean Barrett.
9 //
10 // LICENSE
11 //
12 // This software is dual-licensed to the public domain and under the following
13 // license: you are granted a perpetual, irrevocable license to copy, modify,
14 // publish, and distribute this file as you see fit.
15 //
16 // No warranty for any purpose is expressed or implied by the author (nor
17 // by RAD Game Tools). Report bugs and send enhancements to the author.
18 //
19 // Limitations:
20 //
21 // - floor 0 not supported (used in old ogg vorbis files pre-2004)
22 // - lossless sample-truncation at beginning ignored
23 // - cannot concatenate multiple vorbis streams
24 // - sample positions are 32-bit, limiting seekable 192Khz
25 // files to around 6 hours (Ogg supports 64-bit)
26 //
27 // Feature contributors:
28 // Dougall Johnson (sample-exact seeking)
29 //
30 // Bugfix/warning contributors:
31 // Terje Mathisen Niklas Frykholm Andy Hill
32 // Casey Muratori John Bolton Gargaj
33 // Laurent Gomila Marc LeBlanc Ronny Chevalier
34 // Bernhard Wodo Evan Balster alxprd@github
35 // Tom Beaumont Ingo Leitgeb Nicolas Guillemot
36 // Phillip Bennefall Rohit Thiago Goulart
37 // manxorist@github saga musix
38 //
39 // Partial history:
40 // 1.09 - 2016/04/04 - back out 'truncation of last frame' fix from previous version
41 // 1.08 - 2016/04/02 - warnings; setup memory leaks; truncation of last frame
42 // 1.07 - 2015/01/16 - fixes for crashes on invalid files; warning fixes; const
43 // 1.06 - 2015/08/31 - full, correct support for seeking API (Dougall Johnson)
44 // some crash fixes when out of memory or with corrupt files
45 // fix some inappropriately signed shifts
46 // 1.05 - 2015/04/19 - don't define __forceinline if it's redundant
47 // 1.04 - 2014/08/27 - fix missing const-correct case in API
48 // 1.03 - 2014/08/07 - warning fixes
49 // 1.02 - 2014/07/09 - declare qsort comparison as explicitly _cdecl in Windows
50 // 1.01 - 2014/06/18 - fix stb_vorbis_get_samples_float (interleaved was correct)
51 // 1.0 - 2014/05/26 - fix memory leaks; fix warnings; fix bugs in >2-channel;
52 // (API change) report sample rate for decode-full-file funcs
53 //
54 // See end of file for full version history.
55
56
57 //////////////////////////////////////////////////////////////////////////////
58 //
59 // HEADER BEGINS HERE
60 //
61
62 #ifndef STB_VORBIS_INCLUDE_STB_VORBIS_H
63 #define STB_VORBIS_INCLUDE_STB_VORBIS_H
64
65 #if defined(STB_VORBIS_NO_CRT) && !defined(STB_VORBIS_NO_STDIO)
66 #define STB_VORBIS_NO_STDIO 1
67 #endif
68
69 #ifndef STB_VORBIS_NO_STDIO
70 #include <stdio.h>
71 #endif
72
73 #ifdef __cplusplus
74 extern "C" {
75 #endif
76
77 /////////// THREAD SAFETY
78
79 // Individual stb_vorbis* handles are not thread-safe; you cannot decode from
80 // them from multiple threads at the same time. However, you can have multiple
81 // stb_vorbis* handles and decode from them independently in multiple thrads.
82
83
84 /////////// MEMORY ALLOCATION
85
86 // normally stb_vorbis uses malloc() to allocate memory at startup,
87 // and alloca() to allocate temporary memory during a frame on the
88 // stack. (Memory consumption will depend on the amount of setup
89 // data in the file and how you set the compile flags for speed
90 // vs. size. In my test files the maximal-size usage is ~150KB.)
91 //
92 // You can modify the wrapper functions in the source (setup_malloc,
93 // setup_temp_malloc, temp_malloc) to change this behavior, or you
94 // can use a simpler allocation model: you pass in a buffer from
95 // which stb_vorbis will allocate _all_ its memory (including the
96 // temp memory). "open" may fail with a VORBIS_outofmem if you
97 // do not pass in enough data; there is no way to determine how
98 // much you do need except to succeed (at which point you can
99 // query get_info to find the exact amount required. yes I know
100 // this is lame).
101 //
102 // If you pass in a non-NULL buffer of the type below, allocation
103 // will occur from it as described above. Otherwise just pass NULL
104 // to use malloc()/alloca()
105
106 typedef struct
107 {
108 char *alloc_buffer;
109 int alloc_buffer_length_in_bytes;
110 } stb_vorbis_alloc;
111
112
113 /////////// FUNCTIONS USEABLE WITH ALL INPUT MODES
114
115 typedef struct stb_vorbis stb_vorbis;
116
117 typedef struct
118 {
119 unsigned int sample_rate;
120 int channels;
121
122 unsigned int setup_memory_required;
123 unsigned int setup_temp_memory_required;
124 unsigned int temp_memory_required;
125
126 int max_frame_size;
127 } stb_vorbis_info;
128
129 // get general information about the file
130 extern stb_vorbis_info stb_vorbis_get_info(stb_vorbis *f);
131
132 // get the last error detected (clears it, too)
133 extern int stb_vorbis_get_error(stb_vorbis *f);
134
135 // close an ogg vorbis file and free all memory in use
136 extern void stb_vorbis_close(stb_vorbis *f);
137
138 // this function returns the offset (in samples) from the beginning of the
139 // file that will be returned by the next decode, if it is known, or -1
140 // otherwise. after a flush_pushdata() call, this may take a while before
141 // it becomes valid again.
142 // NOT WORKING YET after a seek with PULLDATA API
143 extern int stb_vorbis_get_sample_offset(stb_vorbis *f);
144
145 // returns the current seek point within the file, or offset from the beginning
146 // of the memory buffer. In pushdata mode it returns 0.
147 extern unsigned int stb_vorbis_get_file_offset(stb_vorbis *f);
148
149 /////////// PUSHDATA API
150
151 #ifndef STB_VORBIS_NO_PUSHDATA_API
152
153 // this API allows you to get blocks of data from any source and hand
154 // them to stb_vorbis. you have to buffer them; stb_vorbis will tell
155 // you how much it used, and you have to give it the rest next time;
156 // and stb_vorbis may not have enough data to work with and you will
157 // need to give it the same data again PLUS more. Note that the Vorbis
158 // specification does not bound the size of an individual frame.
159
160 extern stb_vorbis *stb_vorbis_open_pushdata(
161 const unsigned char * datablock, int datablock_length_in_bytes,
162 int *datablock_memory_consumed_in_bytes,
163 int *error,
164 const stb_vorbis_alloc *alloc_buffer);
165 // create a vorbis decoder by passing in the initial data block containing
166 // the ogg&vorbis headers (you don't need to do parse them, just provide
167 // the first N bytes of the file--you're told if it's not enough, see below)
168 // on success, returns an stb_vorbis *, does not set error, returns the amount of
169 // data parsed/consumed on this call in *datablock_memory_consumed_in_bytes;
170 // on failure, returns NULL on error and sets *error, does not change *datablock_memory_consumed
171 // if returns NULL and *error is VORBIS_need_more_data, then the input block was
172 // incomplete and you need to pass in a larger block from the start of the file
173
174 extern int stb_vorbis_decode_frame_pushdata(
175 stb_vorbis *f,
176 const unsigned char *datablock, int datablock_length_in_bytes,
177 int *channels, // place to write number of float * buffers
178 float ***output, // place to write float ** array of float * buffers
179 int *samples // place to write number of output samples
180 );
181 // decode a frame of audio sample data if possible from the passed-in data block
182 //
183 // return value: number of bytes we used from datablock
184 //
185 // possible cases:
186 // 0 bytes used, 0 samples output (need more data)
187 // N bytes used, 0 samples output (resynching the stream, keep going)
188 // N bytes used, M samples output (one frame of data)
189 // note that after opening a file, you will ALWAYS get one N-bytes,0-sample
190 // frame, because Vorbis always "discards" the first frame.
191 //
192 // Note that on resynch, stb_vorbis will rarely consume all of the buffer,
193 // instead only datablock_length_in_bytes-3 or less. This is because it wants
194 // to avoid missing parts of a page header if they cross a datablock boundary,
195 // without writing state-machiney code to record a partial detection.
196 //
197 // The number of channels returned are stored in *channels (which can be
198 // NULL--it is always the same as the number of channels reported by
199 // get_info). *output will contain an array of float* buffers, one per
200 // channel. In other words, (*output)[0][0] contains the first sample from
201 // the first channel, and (*output)[1][0] contains the first sample from
202 // the second channel.
203
204 extern void stb_vorbis_flush_pushdata(stb_vorbis *f);
205 // inform stb_vorbis that your next datablock will not be contiguous with
206 // previous ones (e.g. you've seeked in the data); future attempts to decode
207 // frames will cause stb_vorbis to resynchronize (as noted above), and
208 // once it sees a valid Ogg page (typically 4-8KB, as large as 64KB), it
209 // will begin decoding the _next_ frame.
210 //
211 // if you want to seek using pushdata, you need to seek in your file, then
212 // call stb_vorbis_flush_pushdata(), then start calling decoding, then once
213 // decoding is returning you data, call stb_vorbis_get_sample_offset, and
214 // if you don't like the result, seek your file again and repeat.
215 #endif
216
217
218 ////////// PULLING INPUT API
219
220 #ifndef STB_VORBIS_NO_PULLDATA_API
221 // This API assumes stb_vorbis is allowed to pull data from a source--
222 // either a block of memory containing the _entire_ vorbis stream, or a
223 // FILE * that you or it create, or possibly some other reading mechanism
224 // if you go modify the source to replace the FILE * case with some kind
225 // of callback to your code. (But if you don't support seeking, you may
226 // just want to go ahead and use pushdata.)
227
228 #if !defined(STB_VORBIS_NO_STDIO) && !defined(STB_VORBIS_NO_INTEGER_CONVERSION)
229 extern int stb_vorbis_decode_filename(const char *filename, int *channels, int *sample_rate, short **output);
230 #endif
231 #if !defined(STB_VORBIS_NO_INTEGER_CONVERSION)
232 extern int stb_vorbis_decode_memory(const unsigned char *mem, int len, int *channels, int *sample_rate, short **output);
233 #endif
234 // decode an entire file and output the data interleaved into a malloc()ed
235 // buffer stored in *output. The return value is the number of samples
236 // decoded, or -1 if the file could not be opened or was not an ogg vorbis file.
237 // When you're done with it, just free() the pointer returned in *output.
238
239 extern stb_vorbis * stb_vorbis_open_memory(const unsigned char *data, int len,
240 int *error, const stb_vorbis_alloc *alloc_buffer);
241 // create an ogg vorbis decoder from an ogg vorbis stream in memory (note
242 // this must be the entire stream!). on failure, returns NULL and sets *error
243
244 #ifndef STB_VORBIS_NO_STDIO
245 extern stb_vorbis * stb_vorbis_open_filename(const char *filename,
246 int *error, const stb_vorbis_alloc *alloc_buffer);
247 // create an ogg vorbis decoder from a filename via fopen(). on failure,
248 // returns NULL and sets *error (possibly to VORBIS_file_open_failure).
249
250 extern stb_vorbis * stb_vorbis_open_file(FILE *f, int close_handle_on_close,
251 int *error, const stb_vorbis_alloc *alloc_buffer);
252 // create an ogg vorbis decoder from an open FILE *, looking for a stream at
253 // the _current_ seek point (ftell). on failure, returns NULL and sets *error.
254 // note that stb_vorbis must "own" this stream; if you seek it in between
255 // calls to stb_vorbis, it will become confused. Morever, if you attempt to
256 // perform stb_vorbis_seek_*() operations on this file, it will assume it
257 // owns the _entire_ rest of the file after the start point. Use the next
258 // function, stb_vorbis_open_file_section(), to limit it.
259
260 extern stb_vorbis * stb_vorbis_open_file_section(FILE *f, int close_handle_on_close,
261 int *error, const stb_vorbis_alloc *alloc_buffer, unsigned int len);
262 // create an ogg vorbis decoder from an open FILE *, looking for a stream at
263 // the _current_ seek point (ftell); the stream will be of length 'len' bytes.
264 // on failure, returns NULL and sets *error. note that stb_vorbis must "own"
265 // this stream; if you seek it in between calls to stb_vorbis, it will become
266 // confused.
267 #endif
268
269 extern int stb_vorbis_seek_frame(stb_vorbis *f, unsigned int sample_number);
270 extern int stb_vorbis_seek(stb_vorbis *f, unsigned int sample_number);
271 // these functions seek in the Vorbis file to (approximately) 'sample_number'.
272 // after calling seek_frame(), the next call to get_frame_*() will include
273 // the specified sample. after calling stb_vorbis_seek(), the next call to
274 // stb_vorbis_get_samples_* will start with the specified sample. If you
275 // do not need to seek to EXACTLY the target sample when using get_samples_*,
276 // you can also use seek_frame().
277
278 extern void stb_vorbis_seek_start(stb_vorbis *f);
279 // this function is equivalent to stb_vorbis_seek(f,0)
280
281 extern unsigned int stb_vorbis_stream_length_in_samples(stb_vorbis *f);
282 extern float stb_vorbis_stream_length_in_seconds(stb_vorbis *f);
283 // these functions return the total length of the vorbis stream
284
285 extern int stb_vorbis_get_frame_float(stb_vorbis *f, int *channels, float ***output);
286 // decode the next frame and return the number of samples. the number of
287 // channels returned are stored in *channels (which can be NULL--it is always
288 // the same as the number of channels reported by get_info). *output will
289 // contain an array of float* buffers, one per channel. These outputs will
290 // be overwritten on the next call to stb_vorbis_get_frame_*.
291 //
292 // You generally should not intermix calls to stb_vorbis_get_frame_*()
293 // and stb_vorbis_get_samples_*(), since the latter calls the former.
294
295 #ifndef STB_VORBIS_NO_INTEGER_CONVERSION
296 extern int stb_vorbis_get_frame_short_interleaved(stb_vorbis *f, int num_c, short *buffer, int num_shorts);
297 extern int stb_vorbis_get_frame_short (stb_vorbis *f, int num_c, short **buffer, int num_samples);
298 #endif
299 // decode the next frame and return the number of *samples* per channel.
300 // Note that for interleaved data, you pass in the number of shorts (the
301 // size of your array), but the return value is the number of samples per
302 // channel, not the total number of samples.
303 //
304 // The data is coerced to the number of channels you request according to the
305 // channel coercion rules (see below). You must pass in the size of your
306 // buffer(s) so that stb_vorbis will not overwrite the end of the buffer.
307 // The maximum buffer size needed can be gotten from get_info(); however,
308 // the Vorbis I specification implies an absolute maximum of 4096 samples
309 // per channel.
310
311 // Channel coercion rules:
312 // Let M be the number of channels requested, and N the number of channels present,
313 // and Cn be the nth channel; let stereo L be the sum of all L and center channels,
314 // and stereo R be the sum of all R and center channels (channel assignment from the
315 // vorbis spec).
316 // M N output
317 // 1 k sum(Ck) for all k
318 // 2 * stereo L, stereo R
319 // k l k > l, the first l channels, then 0s
320 // k l k <= l, the first k channels
321 // Note that this is not _good_ surround etc. mixing at all! It's just so
322 // you get something useful.
323
324 extern int stb_vorbis_get_samples_float_interleaved(stb_vorbis *f, int channels, float *buffer, int num_floats);
325 extern int stb_vorbis_get_samples_float(stb_vorbis *f, int channels, float **buffer, int num_samples);
326 // gets num_samples samples, not necessarily on a frame boundary--this requires
327 // buffering so you have to supply the buffers. DOES NOT APPLY THE COERCION RULES.
328 // Returns the number of samples stored per channel; it may be less than requested
329 // at the end of the file. If there are no more samples in the file, returns 0.
330
331 #ifndef STB_VORBIS_NO_INTEGER_CONVERSION
332 extern int stb_vorbis_get_samples_short_interleaved(stb_vorbis *f, int channels, short *buffer, int num_shorts);
333 extern int stb_vorbis_get_samples_short(stb_vorbis *f, int channels, short **buffer, int num_samples);
334 #endif
335 // gets num_samples samples, not necessarily on a frame boundary--this requires
336 // buffering so you have to supply the buffers. Applies the coercion rules above
337 // to produce 'channels' channels. Returns the number of samples stored per channel;
338 // it may be less than requested at the end of the file. If there are no more
339 // samples in the file, returns 0.
340
341 #endif
342
343 //////// ERROR CODES
344
345 enum STBVorbisError
346 {
347 VORBIS__no_error,
348
349 VORBIS_need_more_data=1, // not a real error
350
351 VORBIS_invalid_api_mixing, // can't mix API modes
352 VORBIS_outofmem, // not enough memory
353 VORBIS_feature_not_supported, // uses floor 0
354 VORBIS_too_many_channels, // STB_VORBIS_MAX_CHANNELS is too small
355 VORBIS_file_open_failure, // fopen() failed
356 VORBIS_seek_without_length, // can't seek in unknown-length file
357
358 VORBIS_unexpected_eof=10, // file is truncated?
359 VORBIS_seek_invalid, // seek past EOF
360
361 // decoding errors (corrupt/invalid stream) -- you probably
362 // don't care about the exact details of these
363
364 // vorbis errors:
365 VORBIS_invalid_setup=20,
366 VORBIS_invalid_stream,
367
368 // ogg errors:
369 VORBIS_missing_capture_pattern=30,
370 VORBIS_invalid_stream_structure_version,
371 VORBIS_continued_packet_flag_invalid,
372 VORBIS_incorrect_stream_serial_number,
373 VORBIS_invalid_first_page,
374 VORBIS_bad_packet_type,
375 VORBIS_cant_find_last_page,
376 VORBIS_seek_failed
377 };
378
379
380 #ifdef __cplusplus
381 }
382 #endif
383
384 #endif // STB_VORBIS_INCLUDE_STB_VORBIS_H
385 //
386 // HEADER ENDS HERE
387 //
388 //////////////////////////////////////////////////////////////////////////////
389
390 #ifndef STB_VORBIS_HEADER_ONLY
391
392 // global configuration settings (e.g. set these in the project/makefile),
393 // or just set them in this file at the top (although ideally the first few
394 // should be visible when the header file is compiled too, although it's not
395 // crucial)
396
397 // STB_VORBIS_NO_PUSHDATA_API
398 // does not compile the code for the various stb_vorbis_*_pushdata()
399 // functions
400 // #define STB_VORBIS_NO_PUSHDATA_API
401
402 // STB_VORBIS_NO_PULLDATA_API
403 // does not compile the code for the non-pushdata APIs
404 // #define STB_VORBIS_NO_PULLDATA_API
405
406 // STB_VORBIS_NO_STDIO
407 // does not compile the code for the APIs that use FILE *s internally
408 // or externally (implied by STB_VORBIS_NO_PULLDATA_API)
409 // #define STB_VORBIS_NO_STDIO
410
411 // STB_VORBIS_NO_INTEGER_CONVERSION
412 // does not compile the code for converting audio sample data from
413 // float to integer (implied by STB_VORBIS_NO_PULLDATA_API)
414 // #define STB_VORBIS_NO_INTEGER_CONVERSION
415
416 // STB_VORBIS_NO_FAST_SCALED_FLOAT
417 // does not use a fast float-to-int trick to accelerate float-to-int on
418 // most platforms which requires endianness be defined correctly.
419 //#define STB_VORBIS_NO_FAST_SCALED_FLOAT
420
421
422 // STB_VORBIS_MAX_CHANNELS [number]
423 // globally define this to the maximum number of channels you need.
424 // The spec does not put a restriction on channels except that
425 // the count is stored in a byte, so 255 is the hard limit.
426 // Reducing this saves about 16 bytes per value, so using 16 saves
427 // (255-16)*16 or around 4KB. Plus anything other memory usage
428 // I forgot to account for. Can probably go as low as 8 (7.1 audio),
429 // 6 (5.1 audio), or 2 (stereo only).
430 #ifndef STB_VORBIS_MAX_CHANNELS
431 #define STB_VORBIS_MAX_CHANNELS 16 // enough for anyone?
432 #endif
433
434 // STB_VORBIS_PUSHDATA_CRC_COUNT [number]
435 // after a flush_pushdata(), stb_vorbis begins scanning for the
436 // next valid page, without backtracking. when it finds something
437 // that looks like a page, it streams through it and verifies its
438 // CRC32. Should that validation fail, it keeps scanning. But it's
439 // possible that _while_ streaming through to check the CRC32 of
440 // one candidate page, it sees another candidate page. This #define
441 // determines how many "overlapping" candidate pages it can search
442 // at once. Note that "real" pages are typically ~4KB to ~8KB, whereas
443 // garbage pages could be as big as 64KB, but probably average ~16KB.
444 // So don't hose ourselves by scanning an apparent 64KB page and
445 // missing a ton of real ones in the interim; so minimum of 2
446 #ifndef STB_VORBIS_PUSHDATA_CRC_COUNT
447 #define STB_VORBIS_PUSHDATA_CRC_COUNT 4
448 #endif
449
450 // STB_VORBIS_FAST_HUFFMAN_LENGTH [number]
451 // sets the log size of the huffman-acceleration table. Maximum
452 // supported value is 24. with larger numbers, more decodings are O(1),
453 // but the table size is larger so worse cache missing, so you'll have
454 // to probe (and try multiple ogg vorbis files) to find the sweet spot.
455 #ifndef STB_VORBIS_FAST_HUFFMAN_LENGTH
456 #define STB_VORBIS_FAST_HUFFMAN_LENGTH 10
457 #endif
458
459 // STB_VORBIS_FAST_BINARY_LENGTH [number]
460 // sets the log size of the binary-search acceleration table. this
461 // is used in similar fashion to the fast-huffman size to set initial
462 // parameters for the binary search
463
464 // STB_VORBIS_FAST_HUFFMAN_INT
465 // The fast huffman tables are much more efficient if they can be
466 // stored as 16-bit results instead of 32-bit results. This restricts
467 // the codebooks to having only 65535 possible outcomes, though.
468 // (At least, accelerated by the huffman table.)
469 #ifndef STB_VORBIS_FAST_HUFFMAN_INT
470 #define STB_VORBIS_FAST_HUFFMAN_SHORT
471 #endif
472
473 // STB_VORBIS_NO_HUFFMAN_BINARY_SEARCH
474 // If the 'fast huffman' search doesn't succeed, then stb_vorbis falls
475 // back on binary searching for the correct one. This requires storing
476 // extra tables with the huffman codes in sorted order. Defining this
477 // symbol trades off space for speed by forcing a linear search in the
478 // non-fast case, except for "sparse" codebooks.
479 // #define STB_VORBIS_NO_HUFFMAN_BINARY_SEARCH
480
481 // STB_VORBIS_DIVIDES_IN_RESIDUE
482 // stb_vorbis precomputes the result of the scalar residue decoding
483 // that would otherwise require a divide per chunk. you can trade off
484 // space for time by defining this symbol.
485 // #define STB_VORBIS_DIVIDES_IN_RESIDUE
486
487 // STB_VORBIS_DIVIDES_IN_CODEBOOK
488 // vorbis VQ codebooks can be encoded two ways: with every case explicitly
489 // stored, or with all elements being chosen from a small range of values,
490 // and all values possible in all elements. By default, stb_vorbis expands
491 // this latter kind out to look like the former kind for ease of decoding,
492 // because otherwise an integer divide-per-vector-element is required to
493 // unpack the index. If you define STB_VORBIS_DIVIDES_IN_CODEBOOK, you can
494 // trade off storage for speed.
495 //#define STB_VORBIS_DIVIDES_IN_CODEBOOK
496
497 #ifdef STB_VORBIS_CODEBOOK_SHORTS
498 #error "STB_VORBIS_CODEBOOK_SHORTS is no longer supported as it produced incorrect results for some input formats"
499 #endif
500
501 // STB_VORBIS_DIVIDE_TABLE
502 // this replaces small integer divides in the floor decode loop with
503 // table lookups. made less than 1% difference, so disabled by default.
504
505 // STB_VORBIS_NO_INLINE_DECODE
506 // disables the inlining of the scalar codebook fast-huffman decode.
507 // might save a little codespace; useful for debugging
508 // #define STB_VORBIS_NO_INLINE_DECODE
509
510 // STB_VORBIS_NO_DEFER_FLOOR
511 // Normally we only decode the floor without synthesizing the actual
512 // full curve. We can instead synthesize the curve immediately. This
513 // requires more memory and is very likely slower, so I don't think
514 // you'd ever want to do it except for debugging.
515 // #define STB_VORBIS_NO_DEFER_FLOOR
516
517
518
519
520 //////////////////////////////////////////////////////////////////////////////
521
522 #ifdef STB_VORBIS_NO_PULLDATA_API
523 #define STB_VORBIS_NO_INTEGER_CONVERSION
524 #define STB_VORBIS_NO_STDIO
525 #endif
526
527 #if defined(STB_VORBIS_NO_CRT) && !defined(STB_VORBIS_NO_STDIO)
528 #define STB_VORBIS_NO_STDIO 1
529 #endif
530
531 #ifndef STB_VORBIS_NO_INTEGER_CONVERSION
532 #ifndef STB_VORBIS_NO_FAST_SCALED_FLOAT
533
534 // only need endianness for fast-float-to-int, which we don't
535 // use for pushdata
536
537 #ifndef STB_VORBIS_BIG_ENDIAN
538 #define STB_VORBIS_ENDIAN 0
539 #else
540 #define STB_VORBIS_ENDIAN 1
541 #endif
542
543 #endif
544 #endif
545
546
547 #ifndef STB_VORBIS_NO_STDIO
548 #include <stdio.h>
549 #endif
550
551 #ifndef STB_VORBIS_NO_CRT
552 #include <stdlib.h>
553 #include <string.h>
554 #include <assert.h>
555 #include <math.h>
556 #if !(defined(__APPLE__) || defined(MACOSX) || defined(macintosh) || defined(Macintosh))
557 #if defined(__linux__) || defined(__linux) || defined(__EMSCRIPTEN__)
558 #include <alloca.h>
559 #endif
560 #endif
561 #else // STB_VORBIS_NO_CRT
562 #define NULL 0
563 #define malloc(s) 0
564 #define free(s) ((void) 0)
565 #define realloc(s) 0
566 #endif // STB_VORBIS_NO_CRT
567
568 #include <limits.h>
569
570 #ifdef __MINGW32__
571 // eff you mingw:
572 // "fixed":
573 // http://sourceforge.net/p/mingw-w64/mailman/message/32882927/
574 // "no that broke the build, reverted, who cares about C":
575 // http://sourceforge.net/p/mingw-w64/mailman/message/32890381/
576 #ifdef __forceinline
577 #undef __forceinline
578 #endif
579 #define __forceinline
580 #elif !defined(_MSC_VER)
581 #if __GNUC__
582 #define __forceinline inline
583 #else
584 #define __forceinline
585 #endif
586 #endif
587
588 #if STB_VORBIS_MAX_CHANNELS > 256
589 #error "Value of STB_VORBIS_MAX_CHANNELS outside of allowed range"
590 #endif
591
592 #if STB_VORBIS_FAST_HUFFMAN_LENGTH > 24
593 #error "Value of STB_VORBIS_FAST_HUFFMAN_LENGTH outside of allowed range"
594 #endif
595
596
597 #if 0
598 #include <crtdbg.h>
599 #define CHECK(f) _CrtIsValidHeapPointer(f->channel_buffers[1])
600 #else
601 #define CHECK(f) ((void) 0)
602 #endif
603
604 #define MAX_BLOCKSIZE_LOG 13 // from specification
605 #define MAX_BLOCKSIZE (1 << MAX_BLOCKSIZE_LOG)
606
607
608 typedef unsigned char uint8;
609 typedef signed char int8;
610 typedef unsigned short uint16;
611 typedef signed short int16;
612 typedef unsigned int uint32;
613 typedef signed int int32;
614
615 #ifndef TRUE
616 #define TRUE 1
617 #define FALSE 0
618 #endif
619
620 typedef float codetype;
621
622 // @NOTE
623 //
624 // Some arrays below are tagged "//varies", which means it's actually
625 // a variable-sized piece of data, but rather than malloc I assume it's
626 // small enough it's better to just allocate it all together with the
627 // main thing
628 //
629 // Most of the variables are specified with the smallest size I could pack
630 // them into. It might give better performance to make them all full-sized
631 // integers. It should be safe to freely rearrange the structures or change
632 // the sizes larger--nothing relies on silently truncating etc., nor the
633 // order of variables.
634
635 #define FAST_HUFFMAN_TABLE_SIZE (1 << STB_VORBIS_FAST_HUFFMAN_LENGTH)
636 #define FAST_HUFFMAN_TABLE_MASK (FAST_HUFFMAN_TABLE_SIZE - 1)
637
638 typedef struct
639 {
640 int dimensions, entries;
641 uint8 *codeword_lengths;
642 float minimum_value;
643 float delta_value;
644 uint8 value_bits;
645 uint8 lookup_type;
646 uint8 sequence_p;
647 uint8 sparse;
648 uint32 lookup_values;
649 codetype *multiplicands;
650 uint32 *codewords;
651 #ifdef STB_VORBIS_FAST_HUFFMAN_SHORT
652 int16 fast_huffman[FAST_HUFFMAN_TABLE_SIZE];
653 #else
654 int32 fast_huffman[FAST_HUFFMAN_TABLE_SIZE];
655 #endif
656 uint32 *sorted_codewords;
657 int *sorted_values;
658 int sorted_entries;
659 } Codebook;
660
661 typedef struct
662 {
663 uint8 order;
664 uint16 rate;
665 uint16 bark_map_size;
666 uint8 amplitude_bits;
667 uint8 amplitude_offset;
668 uint8 number_of_books;
669 uint8 book_list[16]; // varies
670 } Floor0;
671
672 typedef struct
673 {
674 uint8 partitions;
675 uint8 partition_class_list[32]; // varies
676 uint8 class_dimensions[16]; // varies
677 uint8 class_subclasses[16]; // varies
678 uint8 class_masterbooks[16]; // varies
679 int16 subclass_books[16][8]; // varies
680 uint16 Xlist[31*8+2]; // varies
681 uint8 sorted_order[31*8+2];
682 uint8 neighbors[31*8+2][2];
683 uint8 floor1_multiplier;
684 uint8 rangebits;
685 int values;
686 } Floor1;
687
688 typedef union
689 {
690 Floor0 floor0;
691 Floor1 floor1;
692 } Floor;
693
694 typedef struct
695 {
696 uint32 begin, end;
697 uint32 part_size;
698 uint8 classifications;
699 uint8 classbook;
700 uint8 **classdata;
701 int16 (*residue_books)[8];
702 } Residue;
703
704 typedef struct
705 {
706 uint8 magnitude;
707 uint8 angle;
708 uint8 mux;
709 } MappingChannel;
710
711 typedef struct
712 {
713 uint16 coupling_steps;
714 MappingChannel *chan;
715 uint8 submaps;
716 uint8 submap_floor[15]; // varies
717 uint8 submap_residue[15]; // varies
718 } Mapping;
719
720 typedef struct
721 {
722 uint8 blockflag;
723 uint8 mapping;
724 uint16 windowtype;
725 uint16 transformtype;
726 } Mode;
727
728 typedef struct
729 {
730 uint32 goal_crc; // expected crc if match
731 int bytes_left; // bytes left in packet
732 uint32 crc_so_far; // running crc
733 int bytes_done; // bytes processed in _current_ chunk
734 uint32 sample_loc; // granule pos encoded in page
735 } CRCscan;
736
737 typedef struct
738 {
739 uint32 page_start, page_end;
740 uint32 last_decoded_sample;
741 } ProbedPage;
742
743 struct stb_vorbis
744 {
745 // user-accessible info
746 unsigned int sample_rate;
747 int channels;
748
749 unsigned int setup_memory_required;
750 unsigned int temp_memory_required;
751 unsigned int setup_temp_memory_required;
752
753 // input config
754 #ifndef STB_VORBIS_NO_STDIO
755 FILE *f;
756 uint32 f_start;
757 int close_on_free;
758 #endif
759
760 uint8 *stream;
761 uint8 *stream_start;
762 uint8 *stream_end;
763
764 uint32 stream_len;
765
766 uint8 push_mode;
767
768 uint32 first_audio_page_offset;
769
770 ProbedPage p_first, p_last;
771
772 // memory management
773 stb_vorbis_alloc alloc;
774 int setup_offset;
775 int temp_offset;
776
777 // run-time results
778 int eof;
779 enum STBVorbisError error;
780
781 // user-useful data
782
783 // header info
784 int blocksize[2];
785 int blocksize_0, blocksize_1;
786 int codebook_count;
787 Codebook *codebooks;
788 int floor_count;
789 uint16 floor_types[64]; // varies
790 Floor *floor_config;
791 int residue_count;
792 uint16 residue_types[64]; // varies
793 Residue *residue_config;
794 int mapping_count;
795 Mapping *mapping;
796 int mode_count;
797 Mode mode_config[64]; // varies
798
799 uint32 total_samples;
800
801 // decode buffer
802 float *channel_buffers[STB_VORBIS_MAX_CHANNELS];
803 float *outputs [STB_VORBIS_MAX_CHANNELS];
804
805 float *previous_window[STB_VORBIS_MAX_CHANNELS];
806 int previous_length;
807
808 #ifndef STB_VORBIS_NO_DEFER_FLOOR
809 int16 *finalY[STB_VORBIS_MAX_CHANNELS];
810 #else
811 float *floor_buffers[STB_VORBIS_MAX_CHANNELS];
812 #endif
813
814 uint32 current_loc; // sample location of next frame to decode
815 int current_loc_valid;
816
817 // per-blocksize precomputed data
818
819 // twiddle factors
820 float *A[2],*B[2],*C[2];
821 float *window[2];
822 uint16 *bit_reverse[2];
823
824 // current page/packet/segment streaming info
825 uint32 serial; // stream serial number for verification
826 int last_page;
827 int segment_count;
828 uint8 segments[255];
829 uint8 page_flag;
830 uint8 bytes_in_seg;
831 uint8 first_decode;
832 int next_seg;
833 int last_seg; // flag that we're on the last segment
834 int last_seg_which; // what was the segment number of the last seg?
835 uint32 acc;
836 int valid_bits;
837 int packet_bytes;
838 int end_seg_with_known_loc;
839 uint32 known_loc_for_packet;
840 int discard_samples_deferred;
841 uint32 samples_output;
842
843 // push mode scanning
844 int page_crc_tests; // only in push_mode: number of tests active; -1 if not searching
845 #ifndef STB_VORBIS_NO_PUSHDATA_API
846 CRCscan scan[STB_VORBIS_PUSHDATA_CRC_COUNT];
847 #endif
848
849 // sample-access
850 int channel_buffer_start;
851 int channel_buffer_end;
852 };
853
854 #if defined(STB_VORBIS_NO_PUSHDATA_API)
855 #define IS_PUSH_MODE(f) FALSE
856 #elif defined(STB_VORBIS_NO_PULLDATA_API)
857 #define IS_PUSH_MODE(f) TRUE
858 #else
859 #define IS_PUSH_MODE(f) ((f)->push_mode)
860 #endif
861
862 typedef struct stb_vorbis vorb;
863
error(vorb * f,enum STBVorbisError e)864 static int error(vorb *f, enum STBVorbisError e)
865 {
866 f->error = e;
867 if (!f->eof && e != VORBIS_need_more_data) {
868 f->error=e; // breakpoint for debugging
869 }
870 return 0;
871 }
872
873
874 // these functions are used for allocating temporary memory
875 // while decoding. if you can afford the stack space, use
876 // alloca(); otherwise, provide a temp buffer and it will
877 // allocate out of those.
878
879 #define array_size_required(count,size) (count*(sizeof(void *)+(size)))
880
881 #define temp_alloc(f,size) (f->alloc.alloc_buffer ? setup_temp_malloc(f,size) : alloca(size))
882 #ifdef dealloca
883 #define temp_free(f,p) (f->alloc.alloc_buffer ? 0 : dealloca(size))
884 #else
885 #define temp_free(f,p) 0
886 #endif
887 #define temp_alloc_save(f) ((f)->temp_offset)
888 #define temp_alloc_restore(f,p) ((f)->temp_offset = (p))
889
890 #define temp_block_array(f,count,size) make_block_array(temp_alloc(f,array_size_required(count,size)), count, size)
891
892 // given a sufficiently large block of memory, make an array of pointers to subblocks of it
make_block_array(void * mem,int count,int size)893 static void *make_block_array(void *mem, int count, int size)
894 {
895 int i;
896 void ** p = (void **) mem;
897 char *q = (char *) (p + count);
898 for (i=0; i < count; ++i) {
899 p[i] = q;
900 q += size;
901 }
902 return p;
903 }
904
setup_malloc(vorb * f,int sz)905 static void *setup_malloc(vorb *f, int sz)
906 {
907 sz = (sz+3) & ~3;
908 f->setup_memory_required += sz;
909 if (f->alloc.alloc_buffer) {
910 void *p = (char *) f->alloc.alloc_buffer + f->setup_offset;
911 if (f->setup_offset + sz > f->temp_offset) return NULL;
912 f->setup_offset += sz;
913 return p;
914 }
915 return sz ? malloc(sz) : NULL;
916 }
917
setup_free(vorb * f,void * p)918 static void setup_free(vorb *f, void *p)
919 {
920 if (f->alloc.alloc_buffer) return; // do nothing; setup mem is a stack
921 free(p);
922 }
923
setup_temp_malloc(vorb * f,int sz)924 static void *setup_temp_malloc(vorb *f, int sz)
925 {
926 sz = (sz+3) & ~3;
927 if (f->alloc.alloc_buffer) {
928 if (f->temp_offset - sz < f->setup_offset) return NULL;
929 f->temp_offset -= sz;
930 return (char *) f->alloc.alloc_buffer + f->temp_offset;
931 }
932 return malloc(sz);
933 }
934
setup_temp_free(vorb * f,void * p,int sz)935 static void setup_temp_free(vorb *f, void *p, int sz)
936 {
937 if (f->alloc.alloc_buffer) {
938 f->temp_offset += (sz+3)&~3;
939 return;
940 }
941 free(p);
942 }
943
944 #define CRC32_POLY 0x04c11db7 // from spec
945
946 static uint32 crc_table[256];
crc32_init(void)947 static void crc32_init(void)
948 {
949 int i,j;
950 uint32 s;
951 for(i=0; i < 256; i++) {
952 for (s=(uint32) i << 24, j=0; j < 8; ++j)
953 s = (s << 1) ^ (s >= (1U<<31) ? CRC32_POLY : 0);
954 crc_table[i] = s;
955 }
956 }
957
crc32_update(uint32 crc,uint8 byte)958 static __forceinline uint32 crc32_update(uint32 crc, uint8 byte)
959 {
960 return (crc << 8) ^ crc_table[byte ^ (crc >> 24)];
961 }
962
963
964 // used in setup, and for huffman that doesn't go fast path
bit_reverse(unsigned int n)965 static unsigned int bit_reverse(unsigned int n)
966 {
967 n = ((n & 0xAAAAAAAA) >> 1) | ((n & 0x55555555) << 1);
968 n = ((n & 0xCCCCCCCC) >> 2) | ((n & 0x33333333) << 2);
969 n = ((n & 0xF0F0F0F0) >> 4) | ((n & 0x0F0F0F0F) << 4);
970 n = ((n & 0xFF00FF00) >> 8) | ((n & 0x00FF00FF) << 8);
971 return (n >> 16) | (n << 16);
972 }
973
square(float x)974 static float square(float x)
975 {
976 return x*x;
977 }
978
979 // this is a weird definition of log2() for which log2(1) = 1, log2(2) = 2, log2(4) = 3
980 // as required by the specification. fast(?) implementation from stb.h
981 // @OPTIMIZE: called multiple times per-packet with "constants"; move to setup
ilog(int32 n)982 static int ilog(int32 n)
983 {
984 static signed char log2_4[16] = { 0,1,2,2,3,3,3,3,4,4,4,4,4,4,4,4 };
985
986 // 2 compares if n < 16, 3 compares otherwise (4 if signed or n > 1<<29)
987 if (n < (1 << 14))
988 if (n < (1 << 4)) return 0 + log2_4[n ];
989 else if (n < (1 << 9)) return 5 + log2_4[n >> 5];
990 else return 10 + log2_4[n >> 10];
991 else if (n < (1 << 24))
992 if (n < (1 << 19)) return 15 + log2_4[n >> 15];
993 else return 20 + log2_4[n >> 20];
994 else if (n < (1 << 29)) return 25 + log2_4[n >> 25];
995 else if (n < (1 << 31)) return 30 + log2_4[n >> 30];
996 else return 0; // signed n returns 0
997 }
998
999 #ifndef M_PI
1000 #define M_PI 3.14159265358979323846264f // from CRC
1001 #endif
1002
1003 // code length assigned to a value with no huffman encoding
1004 #define NO_CODE 255
1005
1006 /////////////////////// LEAF SETUP FUNCTIONS //////////////////////////
1007 //
1008 // these functions are only called at setup, and only a few times
1009 // per file
1010
float32_unpack(uint32 x)1011 static float float32_unpack(uint32 x)
1012 {
1013 // from the specification
1014 uint32 mantissa = x & 0x1fffff;
1015 uint32 sign = x & 0x80000000;
1016 uint32 exp = (x & 0x7fe00000) >> 21;
1017 double res = sign ? -(double)mantissa : (double)mantissa;
1018 return (float) ldexp((float)res, exp-788);
1019 }
1020
1021
1022 // zlib & jpeg huffman tables assume that the output symbols
1023 // can either be arbitrarily arranged, or have monotonically
1024 // increasing frequencies--they rely on the lengths being sorted;
1025 // this makes for a very simple generation algorithm.
1026 // vorbis allows a huffman table with non-sorted lengths. This
1027 // requires a more sophisticated construction, since symbols in
1028 // order do not map to huffman codes "in order".
add_entry(Codebook * c,uint32 huff_code,int symbol,int count,int len,uint32 * values)1029 static void add_entry(Codebook *c, uint32 huff_code, int symbol, int count, int len, uint32 *values)
1030 {
1031 if (!c->sparse) {
1032 c->codewords [symbol] = huff_code;
1033 } else {
1034 c->codewords [count] = huff_code;
1035 c->codeword_lengths[count] = len;
1036 values [count] = symbol;
1037 }
1038 }
1039
compute_codewords(Codebook * c,uint8 * len,int n,uint32 * values)1040 static int compute_codewords(Codebook *c, uint8 *len, int n, uint32 *values)
1041 {
1042 int i,k,m=0;
1043 uint32 available[32];
1044
1045 memset(available, 0, sizeof(available));
1046 // find the first entry
1047 for (k=0; k < n; ++k) if (len[k] < NO_CODE) break;
1048 if (k == n) { assert(c->sorted_entries == 0); return TRUE; }
1049 // add to the list
1050 add_entry(c, 0, k, m++, len[k], values);
1051 // add all available leaves
1052 for (i=1; i <= len[k]; ++i)
1053 available[i] = 1U << (32-i);
1054 // note that the above code treats the first case specially,
1055 // but it's really the same as the following code, so they
1056 // could probably be combined (except the initial code is 0,
1057 // and I use 0 in available[] to mean 'empty')
1058 for (i=k+1; i < n; ++i) {
1059 uint32 res;
1060 int z = len[i], y;
1061 if (z == NO_CODE) continue;
1062 // find lowest available leaf (should always be earliest,
1063 // which is what the specification calls for)
1064 // note that this property, and the fact we can never have
1065 // more than one free leaf at a given level, isn't totally
1066 // trivial to prove, but it seems true and the assert never
1067 // fires, so!
1068 while (z > 0 && !available[z]) --z;
1069 if (z == 0) { return FALSE; }
1070 res = available[z];
1071 assert(z >= 0 && z < 32);
1072 available[z] = 0;
1073 add_entry(c, bit_reverse(res), i, m++, len[i], values);
1074 // propogate availability up the tree
1075 if (z != len[i]) {
1076 assert(len[i] >= 0 && len[i] < 32);
1077 for (y=len[i]; y > z; --y) {
1078 assert(available[y] == 0);
1079 available[y] = res + (1 << (32-y));
1080 }
1081 }
1082 }
1083 return TRUE;
1084 }
1085
1086 // accelerated huffman table allows fast O(1) match of all symbols
1087 // of length <= STB_VORBIS_FAST_HUFFMAN_LENGTH
compute_accelerated_huffman(Codebook * c)1088 static void compute_accelerated_huffman(Codebook *c)
1089 {
1090 int i, len;
1091 for (i=0; i < FAST_HUFFMAN_TABLE_SIZE; ++i)
1092 c->fast_huffman[i] = -1;
1093
1094 len = c->sparse ? c->sorted_entries : c->entries;
1095 #ifdef STB_VORBIS_FAST_HUFFMAN_SHORT
1096 if (len > 32767) len = 32767; // largest possible value we can encode!
1097 #endif
1098 for (i=0; i < len; ++i) {
1099 if (c->codeword_lengths[i] <= STB_VORBIS_FAST_HUFFMAN_LENGTH) {
1100 uint32 z = c->sparse ? bit_reverse(c->sorted_codewords[i]) : c->codewords[i];
1101 // set table entries for all bit combinations in the higher bits
1102 while (z < FAST_HUFFMAN_TABLE_SIZE) {
1103 c->fast_huffman[z] = i;
1104 z += 1 << c->codeword_lengths[i];
1105 }
1106 }
1107 }
1108 }
1109
1110 #ifdef _MSC_VER
1111 #define STBV_CDECL __cdecl
1112 #else
1113 #define STBV_CDECL
1114 #endif
1115
uint32_compare(const void * p,const void * q)1116 static int STBV_CDECL uint32_compare(const void *p, const void *q)
1117 {
1118 uint32 x = * (uint32 *) p;
1119 uint32 y = * (uint32 *) q;
1120 return x < y ? -1 : x > y;
1121 }
1122
include_in_sort(Codebook * c,uint8 len)1123 static int include_in_sort(Codebook *c, uint8 len)
1124 {
1125 if (c->sparse) { assert(len != NO_CODE); return TRUE; }
1126 if (len == NO_CODE) return FALSE;
1127 if (len > STB_VORBIS_FAST_HUFFMAN_LENGTH) return TRUE;
1128 return FALSE;
1129 }
1130
1131 // if the fast table above doesn't work, we want to binary
1132 // search them... need to reverse the bits
compute_sorted_huffman(Codebook * c,uint8 * lengths,uint32 * values)1133 static void compute_sorted_huffman(Codebook *c, uint8 *lengths, uint32 *values)
1134 {
1135 int i, len;
1136 // build a list of all the entries
1137 // OPTIMIZATION: don't include the short ones, since they'll be caught by FAST_HUFFMAN.
1138 // this is kind of a frivolous optimization--I don't see any performance improvement,
1139 // but it's like 4 extra lines of code, so.
1140 if (!c->sparse) {
1141 int k = 0;
1142 for (i=0; i < c->entries; ++i)
1143 if (include_in_sort(c, lengths[i]))
1144 c->sorted_codewords[k++] = bit_reverse(c->codewords[i]);
1145 assert(k == c->sorted_entries);
1146 } else {
1147 for (i=0; i < c->sorted_entries; ++i)
1148 c->sorted_codewords[i] = bit_reverse(c->codewords[i]);
1149 }
1150
1151 qsort(c->sorted_codewords, c->sorted_entries, sizeof(c->sorted_codewords[0]), uint32_compare);
1152 c->sorted_codewords[c->sorted_entries] = 0xffffffff;
1153
1154 len = c->sparse ? c->sorted_entries : c->entries;
1155 // now we need to indicate how they correspond; we could either
1156 // #1: sort a different data structure that says who they correspond to
1157 // #2: for each sorted entry, search the original list to find who corresponds
1158 // #3: for each original entry, find the sorted entry
1159 // #1 requires extra storage, #2 is slow, #3 can use binary search!
1160 for (i=0; i < len; ++i) {
1161 int huff_len = c->sparse ? lengths[values[i]] : lengths[i];
1162 if (include_in_sort(c,huff_len)) {
1163 uint32 code = bit_reverse(c->codewords[i]);
1164 int x=0, n=c->sorted_entries;
1165 while (n > 1) {
1166 // invariant: sc[x] <= code < sc[x+n]
1167 int m = x + (n >> 1);
1168 if (c->sorted_codewords[m] <= code) {
1169 x = m;
1170 n -= (n>>1);
1171 } else {
1172 n >>= 1;
1173 }
1174 }
1175 assert(c->sorted_codewords[x] == code);
1176 if (c->sparse) {
1177 c->sorted_values[x] = values[i];
1178 c->codeword_lengths[x] = huff_len;
1179 } else {
1180 c->sorted_values[x] = i;
1181 }
1182 }
1183 }
1184 }
1185
1186 // only run while parsing the header (3 times)
vorbis_validate(uint8 * data)1187 static int vorbis_validate(uint8 *data)
1188 {
1189 static uint8 vorbis[6] = { 'v', 'o', 'r', 'b', 'i', 's' };
1190 return memcmp(data, vorbis, 6) == 0;
1191 }
1192
1193 // called from setup only, once per code book
1194 // (formula implied by specification)
lookup1_values(int entries,int dim)1195 static int lookup1_values(int entries, int dim)
1196 {
1197 int r = (int) floor(exp((float) log((float) entries) / dim));
1198 if ((int) floor(pow((float) r+1, dim)) <= entries) // (int) cast for MinGW warning;
1199 ++r; // floor() to avoid _ftol() when non-CRT
1200 assert(pow((float) r+1, dim) > entries);
1201 assert((int) floor(pow((float) r, dim)) <= entries); // (int),floor() as above
1202 return r;
1203 }
1204
1205 // called twice per file
compute_twiddle_factors(int n,float * A,float * B,float * C)1206 static void compute_twiddle_factors(int n, float *A, float *B, float *C)
1207 {
1208 int n4 = n >> 2, n8 = n >> 3;
1209 int k,k2;
1210
1211 for (k=k2=0; k < n4; ++k,k2+=2) {
1212 A[k2 ] = (float) cos(4*k*M_PI/n);
1213 A[k2+1] = (float) -sin(4*k*M_PI/n);
1214 B[k2 ] = (float) cos((k2+1)*M_PI/n/2) * 0.5f;
1215 B[k2+1] = (float) sin((k2+1)*M_PI/n/2) * 0.5f;
1216 }
1217 for (k=k2=0; k < n8; ++k,k2+=2) {
1218 C[k2 ] = (float) cos(2*(k2+1)*M_PI/n);
1219 C[k2+1] = (float) -sin(2*(k2+1)*M_PI/n);
1220 }
1221 }
1222
compute_window(int n,float * window)1223 static void compute_window(int n, float *window)
1224 {
1225 int n2 = n >> 1, i;
1226 for (i=0; i < n2; ++i)
1227 window[i] = (float) sin(0.5 * M_PI * square((float) sin((i - 0 + 0.5) / n2 * 0.5 * M_PI)));
1228 }
1229
compute_bitreverse(int n,uint16 * rev)1230 static void compute_bitreverse(int n, uint16 *rev)
1231 {
1232 int ld = ilog(n) - 1; // ilog is off-by-one from normal definitions
1233 int i, n8 = n >> 3;
1234 for (i=0; i < n8; ++i)
1235 rev[i] = (bit_reverse(i) >> (32-ld+3)) << 2;
1236 }
1237
init_blocksize(vorb * f,int b,int n)1238 static int init_blocksize(vorb *f, int b, int n)
1239 {
1240 int n2 = n >> 1, n4 = n >> 2, n8 = n >> 3;
1241 f->A[b] = (float *) setup_malloc(f, sizeof(float) * n2);
1242 f->B[b] = (float *) setup_malloc(f, sizeof(float) * n2);
1243 f->C[b] = (float *) setup_malloc(f, sizeof(float) * n4);
1244 if (!f->A[b] || !f->B[b] || !f->C[b]) return error(f, VORBIS_outofmem);
1245 compute_twiddle_factors(n, f->A[b], f->B[b], f->C[b]);
1246 f->window[b] = (float *) setup_malloc(f, sizeof(float) * n2);
1247 if (!f->window[b]) return error(f, VORBIS_outofmem);
1248 compute_window(n, f->window[b]);
1249 f->bit_reverse[b] = (uint16 *) setup_malloc(f, sizeof(uint16) * n8);
1250 if (!f->bit_reverse[b]) return error(f, VORBIS_outofmem);
1251 compute_bitreverse(n, f->bit_reverse[b]);
1252 return TRUE;
1253 }
1254
neighbors(uint16 * x,int n,int * plow,int * phigh)1255 static void neighbors(uint16 *x, int n, int *plow, int *phigh)
1256 {
1257 int low = -1;
1258 int high = 65536;
1259 int i;
1260 for (i=0; i < n; ++i) {
1261 if (x[i] > low && x[i] < x[n]) { *plow = i; low = x[i]; }
1262 if (x[i] < high && x[i] > x[n]) { *phigh = i; high = x[i]; }
1263 }
1264 }
1265
1266 // this has been repurposed so y is now the original index instead of y
1267 typedef struct
1268 {
1269 uint16 x,y;
1270 } Point;
1271
point_compare(const void * p,const void * q)1272 static int STBV_CDECL point_compare(const void *p, const void *q)
1273 {
1274 Point *a = (Point *) p;
1275 Point *b = (Point *) q;
1276 return a->x < b->x ? -1 : a->x > b->x;
1277 }
1278
1279 //
1280 /////////////////////// END LEAF SETUP FUNCTIONS //////////////////////////
1281
1282
1283 #if defined(STB_VORBIS_NO_STDIO)
1284 #define USE_MEMORY(z) TRUE
1285 #else
1286 #define USE_MEMORY(z) ((z)->stream)
1287 #endif
1288
get8(vorb * z)1289 static uint8 get8(vorb *z)
1290 {
1291 if (USE_MEMORY(z)) {
1292 if (z->stream >= z->stream_end) { z->eof = TRUE; return 0; }
1293 return *z->stream++;
1294 }
1295
1296 #ifndef STB_VORBIS_NO_STDIO
1297 {
1298 int c = fgetc(z->f);
1299 if (c == EOF) { z->eof = TRUE; return 0; }
1300 return c;
1301 }
1302 #endif
1303 }
1304
get32(vorb * f)1305 static uint32 get32(vorb *f)
1306 {
1307 uint32 x;
1308 x = get8(f);
1309 x += get8(f) << 8;
1310 x += get8(f) << 16;
1311 x += (uint32) get8(f) << 24;
1312 return x;
1313 }
1314
getn(vorb * z,uint8 * data,int n)1315 static int getn(vorb *z, uint8 *data, int n)
1316 {
1317 if (USE_MEMORY(z)) {
1318 if (z->stream+n > z->stream_end) { z->eof = 1; return 0; }
1319 memcpy(data, z->stream, n);
1320 z->stream += n;
1321 return 1;
1322 }
1323
1324 #ifndef STB_VORBIS_NO_STDIO
1325 if (fread(data, n, 1, z->f) == 1)
1326 return 1;
1327 else {
1328 z->eof = 1;
1329 return 0;
1330 }
1331 #endif
1332 }
1333
skip(vorb * z,int n)1334 static void skip(vorb *z, int n)
1335 {
1336 if (USE_MEMORY(z)) {
1337 z->stream += n;
1338 if (z->stream >= z->stream_end) z->eof = 1;
1339 return;
1340 }
1341 #ifndef STB_VORBIS_NO_STDIO
1342 {
1343 long x = ftell(z->f);
1344 fseek(z->f, x+n, SEEK_SET);
1345 }
1346 #endif
1347 }
1348
set_file_offset(stb_vorbis * f,unsigned int loc)1349 static int set_file_offset(stb_vorbis *f, unsigned int loc)
1350 {
1351 #ifndef STB_VORBIS_NO_PUSHDATA_API
1352 if (f->push_mode) return 0;
1353 #endif
1354 f->eof = 0;
1355 if (USE_MEMORY(f)) {
1356 if (f->stream_start + loc >= f->stream_end || f->stream_start + loc < f->stream_start) {
1357 f->stream = f->stream_end;
1358 f->eof = 1;
1359 return 0;
1360 } else {
1361 f->stream = f->stream_start + loc;
1362 return 1;
1363 }
1364 }
1365 #ifndef STB_VORBIS_NO_STDIO
1366 if (loc + f->f_start < loc || loc >= 0x80000000) {
1367 loc = 0x7fffffff;
1368 f->eof = 1;
1369 } else {
1370 loc += f->f_start;
1371 }
1372 if (!fseek(f->f, loc, SEEK_SET))
1373 return 1;
1374 f->eof = 1;
1375 fseek(f->f, f->f_start, SEEK_END);
1376 return 0;
1377 #endif
1378 }
1379
1380
1381 static uint8 ogg_page_header[4] = { 0x4f, 0x67, 0x67, 0x53 };
1382
capture_pattern(vorb * f)1383 static int capture_pattern(vorb *f)
1384 {
1385 if (0x4f != get8(f)) return FALSE;
1386 if (0x67 != get8(f)) return FALSE;
1387 if (0x67 != get8(f)) return FALSE;
1388 if (0x53 != get8(f)) return FALSE;
1389 return TRUE;
1390 }
1391
1392 #define PAGEFLAG_continued_packet 1
1393 #define PAGEFLAG_first_page 2
1394 #define PAGEFLAG_last_page 4
1395
start_page_no_capturepattern(vorb * f)1396 static int start_page_no_capturepattern(vorb *f)
1397 {
1398 uint32 loc0,loc1,n;
1399 // stream structure version
1400 if (0 != get8(f)) return error(f, VORBIS_invalid_stream_structure_version);
1401 // header flag
1402 f->page_flag = get8(f);
1403 // absolute granule position
1404 loc0 = get32(f);
1405 loc1 = get32(f);
1406 // @TODO: validate loc0,loc1 as valid positions?
1407 // stream serial number -- vorbis doesn't interleave, so discard
1408 get32(f);
1409 //if (f->serial != get32(f)) return error(f, VORBIS_incorrect_stream_serial_number);
1410 // page sequence number
1411 n = get32(f);
1412 f->last_page = n;
1413 // CRC32
1414 get32(f);
1415 // page_segments
1416 f->segment_count = get8(f);
1417 if (!getn(f, f->segments, f->segment_count))
1418 return error(f, VORBIS_unexpected_eof);
1419 // assume we _don't_ know any the sample position of any segments
1420 f->end_seg_with_known_loc = -2;
1421 if (loc0 != ~0U || loc1 != ~0U) {
1422 int i;
1423 // determine which packet is the last one that will complete
1424 for (i=f->segment_count-1; i >= 0; --i)
1425 if (f->segments[i] < 255)
1426 break;
1427 // 'i' is now the index of the _last_ segment of a packet that ends
1428 if (i >= 0) {
1429 f->end_seg_with_known_loc = i;
1430 f->known_loc_for_packet = loc0;
1431 }
1432 }
1433 if (f->first_decode) {
1434 int i,len;
1435 ProbedPage p;
1436 len = 0;
1437 for (i=0; i < f->segment_count; ++i)
1438 len += f->segments[i];
1439 len += 27 + f->segment_count;
1440 p.page_start = f->first_audio_page_offset;
1441 p.page_end = p.page_start + len;
1442 p.last_decoded_sample = loc0;
1443 f->p_first = p;
1444 }
1445 f->next_seg = 0;
1446 return TRUE;
1447 }
1448
start_page(vorb * f)1449 static int start_page(vorb *f)
1450 {
1451 if (!capture_pattern(f)) return error(f, VORBIS_missing_capture_pattern);
1452 return start_page_no_capturepattern(f);
1453 }
1454
start_packet(vorb * f)1455 static int start_packet(vorb *f)
1456 {
1457 while (f->next_seg == -1) {
1458 if (!start_page(f)) return FALSE;
1459 if (f->page_flag & PAGEFLAG_continued_packet)
1460 return error(f, VORBIS_continued_packet_flag_invalid);
1461 }
1462 f->last_seg = FALSE;
1463 f->valid_bits = 0;
1464 f->packet_bytes = 0;
1465 f->bytes_in_seg = 0;
1466 // f->next_seg is now valid
1467 return TRUE;
1468 }
1469
maybe_start_packet(vorb * f)1470 static int maybe_start_packet(vorb *f)
1471 {
1472 if (f->next_seg == -1) {
1473 int x = get8(f);
1474 if (f->eof) return FALSE; // EOF at page boundary is not an error!
1475 if (0x4f != x ) return error(f, VORBIS_missing_capture_pattern);
1476 if (0x67 != get8(f)) return error(f, VORBIS_missing_capture_pattern);
1477 if (0x67 != get8(f)) return error(f, VORBIS_missing_capture_pattern);
1478 if (0x53 != get8(f)) return error(f, VORBIS_missing_capture_pattern);
1479 if (!start_page_no_capturepattern(f)) return FALSE;
1480 if (f->page_flag & PAGEFLAG_continued_packet) {
1481 // set up enough state that we can read this packet if we want,
1482 // e.g. during recovery
1483 f->last_seg = FALSE;
1484 f->bytes_in_seg = 0;
1485 return error(f, VORBIS_continued_packet_flag_invalid);
1486 }
1487 }
1488 return start_packet(f);
1489 }
1490
next_segment(vorb * f)1491 static int next_segment(vorb *f)
1492 {
1493 int len;
1494 if (f->last_seg) return 0;
1495 if (f->next_seg == -1) {
1496 f->last_seg_which = f->segment_count-1; // in case start_page fails
1497 if (!start_page(f)) { f->last_seg = 1; return 0; }
1498 if (!(f->page_flag & PAGEFLAG_continued_packet)) return error(f, VORBIS_continued_packet_flag_invalid);
1499 }
1500 len = f->segments[f->next_seg++];
1501 if (len < 255) {
1502 f->last_seg = TRUE;
1503 f->last_seg_which = f->next_seg-1;
1504 }
1505 if (f->next_seg >= f->segment_count)
1506 f->next_seg = -1;
1507 assert(f->bytes_in_seg == 0);
1508 f->bytes_in_seg = len;
1509 return len;
1510 }
1511
1512 #define EOP (-1)
1513 #define INVALID_BITS (-1)
1514
get8_packet_raw(vorb * f)1515 static int get8_packet_raw(vorb *f)
1516 {
1517 if (!f->bytes_in_seg) { // CLANG!
1518 if (f->last_seg) return EOP;
1519 else if (!next_segment(f)) return EOP;
1520 }
1521 assert(f->bytes_in_seg > 0);
1522 --f->bytes_in_seg;
1523 ++f->packet_bytes;
1524 return get8(f);
1525 }
1526
get8_packet(vorb * f)1527 static int get8_packet(vorb *f)
1528 {
1529 int x = get8_packet_raw(f);
1530 f->valid_bits = 0;
1531 return x;
1532 }
1533
flush_packet(vorb * f)1534 static void flush_packet(vorb *f)
1535 {
1536 while (get8_packet_raw(f) != EOP);
1537 }
1538
1539 // @OPTIMIZE: this is the secondary bit decoder, so it's probably not as important
1540 // as the huffman decoder?
get_bits(vorb * f,int n)1541 static uint32 get_bits(vorb *f, int n)
1542 {
1543 uint32 z;
1544
1545 if (f->valid_bits < 0) return 0;
1546 if (f->valid_bits < n) {
1547 if (n > 24) {
1548 // the accumulator technique below would not work correctly in this case
1549 z = get_bits(f, 24);
1550 z += get_bits(f, n-24) << 24;
1551 return z;
1552 }
1553 if (f->valid_bits == 0) f->acc = 0;
1554 while (f->valid_bits < n) {
1555 int z = get8_packet_raw(f);
1556 if (z == EOP) {
1557 f->valid_bits = INVALID_BITS;
1558 return 0;
1559 }
1560 f->acc += z << f->valid_bits;
1561 f->valid_bits += 8;
1562 }
1563 }
1564 if (f->valid_bits < 0) return 0;
1565 z = f->acc & ((1 << n)-1);
1566 f->acc >>= n;
1567 f->valid_bits -= n;
1568 return z;
1569 }
1570
1571 // @OPTIMIZE: primary accumulator for huffman
1572 // expand the buffer to as many bits as possible without reading off end of packet
1573 // it might be nice to allow f->valid_bits and f->acc to be stored in registers,
1574 // e.g. cache them locally and decode locally
prep_huffman(vorb * f)1575 static __forceinline void prep_huffman(vorb *f)
1576 {
1577 if (f->valid_bits <= 24) {
1578 if (f->valid_bits == 0) f->acc = 0;
1579 do {
1580 int z;
1581 if (f->last_seg && !f->bytes_in_seg) return;
1582 z = get8_packet_raw(f);
1583 if (z == EOP) return;
1584 f->acc += (unsigned) z << f->valid_bits;
1585 f->valid_bits += 8;
1586 } while (f->valid_bits <= 24);
1587 }
1588 }
1589
1590 enum
1591 {
1592 VORBIS_packet_id = 1,
1593 VORBIS_packet_comment = 3,
1594 VORBIS_packet_setup = 5
1595 };
1596
codebook_decode_scalar_raw(vorb * f,Codebook * c)1597 static int codebook_decode_scalar_raw(vorb *f, Codebook *c)
1598 {
1599 int i;
1600 prep_huffman(f);
1601
1602 if (c->codewords == NULL && c->sorted_codewords == NULL)
1603 return -1;
1604
1605 // cases to use binary search: sorted_codewords && !c->codewords
1606 // sorted_codewords && c->entries > 8
1607 if (c->entries > 8 ? c->sorted_codewords!=NULL : !c->codewords) {
1608 // binary search
1609 uint32 code = bit_reverse(f->acc);
1610 int x=0, n=c->sorted_entries, len;
1611
1612 while (n > 1) {
1613 // invariant: sc[x] <= code < sc[x+n]
1614 int m = x + (n >> 1);
1615 if (c->sorted_codewords[m] <= code) {
1616 x = m;
1617 n -= (n>>1);
1618 } else {
1619 n >>= 1;
1620 }
1621 }
1622 // x is now the sorted index
1623 if (!c->sparse) x = c->sorted_values[x];
1624 // x is now sorted index if sparse, or symbol otherwise
1625 len = c->codeword_lengths[x];
1626 if (f->valid_bits >= len) {
1627 f->acc >>= len;
1628 f->valid_bits -= len;
1629 return x;
1630 }
1631
1632 f->valid_bits = 0;
1633 return -1;
1634 }
1635
1636 // if small, linear search
1637 assert(!c->sparse);
1638 for (i=0; i < c->entries; ++i) {
1639 if (c->codeword_lengths[i] == NO_CODE) continue;
1640 if (c->codewords[i] == (f->acc & ((1 << c->codeword_lengths[i])-1))) {
1641 if (f->valid_bits >= c->codeword_lengths[i]) {
1642 f->acc >>= c->codeword_lengths[i];
1643 f->valid_bits -= c->codeword_lengths[i];
1644 return i;
1645 }
1646 f->valid_bits = 0;
1647 return -1;
1648 }
1649 }
1650
1651 error(f, VORBIS_invalid_stream);
1652 f->valid_bits = 0;
1653 return -1;
1654 }
1655
1656 #ifndef STB_VORBIS_NO_INLINE_DECODE
1657
1658 #define DECODE_RAW(var, f,c) \
1659 if (f->valid_bits < STB_VORBIS_FAST_HUFFMAN_LENGTH) \
1660 prep_huffman(f); \
1661 var = f->acc & FAST_HUFFMAN_TABLE_MASK; \
1662 var = c->fast_huffman[var]; \
1663 if (var >= 0) { \
1664 int n = c->codeword_lengths[var]; \
1665 f->acc >>= n; \
1666 f->valid_bits -= n; \
1667 if (f->valid_bits < 0) { f->valid_bits = 0; var = -1; } \
1668 } else { \
1669 var = codebook_decode_scalar_raw(f,c); \
1670 }
1671
1672 #else
1673
codebook_decode_scalar(vorb * f,Codebook * c)1674 static int codebook_decode_scalar(vorb *f, Codebook *c)
1675 {
1676 int i;
1677 if (f->valid_bits < STB_VORBIS_FAST_HUFFMAN_LENGTH)
1678 prep_huffman(f);
1679 // fast huffman table lookup
1680 i = f->acc & FAST_HUFFMAN_TABLE_MASK;
1681 i = c->fast_huffman[i];
1682 if (i >= 0) {
1683 f->acc >>= c->codeword_lengths[i];
1684 f->valid_bits -= c->codeword_lengths[i];
1685 if (f->valid_bits < 0) { f->valid_bits = 0; return -1; }
1686 return i;
1687 }
1688 return codebook_decode_scalar_raw(f,c);
1689 }
1690
1691 #define DECODE_RAW(var,f,c) var = codebook_decode_scalar(f,c);
1692
1693 #endif
1694
1695 #define DECODE(var,f,c) \
1696 DECODE_RAW(var,f,c) \
1697 if (c->sparse) var = c->sorted_values[var];
1698
1699 #ifndef STB_VORBIS_DIVIDES_IN_CODEBOOK
1700 #define DECODE_VQ(var,f,c) DECODE_RAW(var,f,c)
1701 #else
1702 #define DECODE_VQ(var,f,c) DECODE(var,f,c)
1703 #endif
1704
1705
1706
1707
1708
1709
1710 // CODEBOOK_ELEMENT_FAST is an optimization for the CODEBOOK_FLOATS case
1711 // where we avoid one addition
1712 #define CODEBOOK_ELEMENT(c,off) (c->multiplicands[off])
1713 #define CODEBOOK_ELEMENT_FAST(c,off) (c->multiplicands[off])
1714 #define CODEBOOK_ELEMENT_BASE(c) (0)
1715
codebook_decode_start(vorb * f,Codebook * c)1716 static int codebook_decode_start(vorb *f, Codebook *c)
1717 {
1718 int z = -1;
1719
1720 // type 0 is only legal in a scalar context
1721 if (c->lookup_type == 0)
1722 error(f, VORBIS_invalid_stream);
1723 else {
1724 DECODE_VQ(z,f,c);
1725 if (c->sparse) assert(z < c->sorted_entries);
1726 if (z < 0) { // check for EOP
1727 if (!f->bytes_in_seg)
1728 if (f->last_seg)
1729 return z;
1730 error(f, VORBIS_invalid_stream);
1731 }
1732 }
1733 return z;
1734 }
1735
codebook_decode(vorb * f,Codebook * c,float * output,int len)1736 static int codebook_decode(vorb *f, Codebook *c, float *output, int len)
1737 {
1738 int i,z = codebook_decode_start(f,c);
1739 if (z < 0) return FALSE;
1740 if (len > c->dimensions) len = c->dimensions;
1741
1742 #ifdef STB_VORBIS_DIVIDES_IN_CODEBOOK
1743 if (c->lookup_type == 1) {
1744 float last = CODEBOOK_ELEMENT_BASE(c);
1745 int div = 1;
1746 for (i=0; i < len; ++i) {
1747 int off = (z / div) % c->lookup_values;
1748 float val = CODEBOOK_ELEMENT_FAST(c,off) + last;
1749 output[i] += val;
1750 if (c->sequence_p) last = val + c->minimum_value;
1751 div *= c->lookup_values;
1752 }
1753 return TRUE;
1754 }
1755 #endif
1756
1757 z *= c->dimensions;
1758 if (c->sequence_p) {
1759 float last = CODEBOOK_ELEMENT_BASE(c);
1760 for (i=0; i < len; ++i) {
1761 float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1762 output[i] += val;
1763 last = val + c->minimum_value;
1764 }
1765 } else {
1766 float last = CODEBOOK_ELEMENT_BASE(c);
1767 for (i=0; i < len; ++i) {
1768 output[i] += CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1769 }
1770 }
1771
1772 return TRUE;
1773 }
1774
codebook_decode_step(vorb * f,Codebook * c,float * output,int len,int step)1775 static int codebook_decode_step(vorb *f, Codebook *c, float *output, int len, int step)
1776 {
1777 int i,z = codebook_decode_start(f,c);
1778 float last = CODEBOOK_ELEMENT_BASE(c);
1779 if (z < 0) return FALSE;
1780 if (len > c->dimensions) len = c->dimensions;
1781
1782 #ifdef STB_VORBIS_DIVIDES_IN_CODEBOOK
1783 if (c->lookup_type == 1) {
1784 int div = 1;
1785 for (i=0; i < len; ++i) {
1786 int off = (z / div) % c->lookup_values;
1787 float val = CODEBOOK_ELEMENT_FAST(c,off) + last;
1788 output[i*step] += val;
1789 if (c->sequence_p) last = val;
1790 div *= c->lookup_values;
1791 }
1792 return TRUE;
1793 }
1794 #endif
1795
1796 z *= c->dimensions;
1797 for (i=0; i < len; ++i) {
1798 float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1799 output[i*step] += val;
1800 if (c->sequence_p) last = val;
1801 }
1802
1803 return TRUE;
1804 }
1805
codebook_decode_deinterleave_repeat(vorb * f,Codebook * c,float ** outputs,int ch,int * c_inter_p,int * p_inter_p,int len,int total_decode)1806 static int codebook_decode_deinterleave_repeat(vorb *f, Codebook *c, float **outputs, int ch, int *c_inter_p, int *p_inter_p, int len, int total_decode)
1807 {
1808 int c_inter = *c_inter_p;
1809 int p_inter = *p_inter_p;
1810 int i,z, effective = c->dimensions;
1811
1812 // type 0 is only legal in a scalar context
1813 if (c->lookup_type == 0) return error(f, VORBIS_invalid_stream);
1814
1815 while (total_decode > 0) {
1816 float last = CODEBOOK_ELEMENT_BASE(c);
1817 DECODE_VQ(z,f,c);
1818 #ifndef STB_VORBIS_DIVIDES_IN_CODEBOOK
1819 assert(!c->sparse || z < c->sorted_entries);
1820 #endif
1821 if (z < 0) {
1822 if (!f->bytes_in_seg)
1823 if (f->last_seg) return FALSE;
1824 return error(f, VORBIS_invalid_stream);
1825 }
1826
1827 // if this will take us off the end of the buffers, stop short!
1828 // we check by computing the length of the virtual interleaved
1829 // buffer (len*ch), our current offset within it (p_inter*ch)+(c_inter),
1830 // and the length we'll be using (effective)
1831 if (c_inter + p_inter*ch + effective > len * ch) {
1832 effective = len*ch - (p_inter*ch - c_inter);
1833 }
1834
1835 #ifdef STB_VORBIS_DIVIDES_IN_CODEBOOK
1836 if (c->lookup_type == 1) {
1837 int div = 1;
1838 for (i=0; i < effective; ++i) {
1839 int off = (z / div) % c->lookup_values;
1840 float val = CODEBOOK_ELEMENT_FAST(c,off) + last;
1841 if (outputs[c_inter])
1842 outputs[c_inter][p_inter] += val;
1843 if (++c_inter == ch) { c_inter = 0; ++p_inter; }
1844 if (c->sequence_p) last = val;
1845 div *= c->lookup_values;
1846 }
1847 } else
1848 #endif
1849 {
1850 z *= c->dimensions;
1851 if (c->sequence_p) {
1852 for (i=0; i < effective; ++i) {
1853 float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1854 if (outputs[c_inter])
1855 outputs[c_inter][p_inter] += val;
1856 if (++c_inter == ch) { c_inter = 0; ++p_inter; }
1857 last = val;
1858 }
1859 } else {
1860 for (i=0; i < effective; ++i) {
1861 float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1862 if (outputs[c_inter])
1863 outputs[c_inter][p_inter] += val;
1864 if (++c_inter == ch) { c_inter = 0; ++p_inter; }
1865 }
1866 }
1867 }
1868
1869 total_decode -= effective;
1870 }
1871 *c_inter_p = c_inter;
1872 *p_inter_p = p_inter;
1873 return TRUE;
1874 }
1875
predict_point(int x,int x0,int x1,int y0,int y1)1876 static int predict_point(int x, int x0, int x1, int y0, int y1)
1877 {
1878 int dy = y1 - y0;
1879 int adx = x1 - x0;
1880 // @OPTIMIZE: force int division to round in the right direction... is this necessary on x86?
1881 int err = abs(dy) * (x - x0);
1882 int off = err / adx;
1883 return dy < 0 ? y0 - off : y0 + off;
1884 }
1885
1886 // the following table is block-copied from the specification
1887 static float inverse_db_table[256] =
1888 {
1889 1.0649863e-07f, 1.1341951e-07f, 1.2079015e-07f, 1.2863978e-07f,
1890 1.3699951e-07f, 1.4590251e-07f, 1.5538408e-07f, 1.6548181e-07f,
1891 1.7623575e-07f, 1.8768855e-07f, 1.9988561e-07f, 2.1287530e-07f,
1892 2.2670913e-07f, 2.4144197e-07f, 2.5713223e-07f, 2.7384213e-07f,
1893 2.9163793e-07f, 3.1059021e-07f, 3.3077411e-07f, 3.5226968e-07f,
1894 3.7516214e-07f, 3.9954229e-07f, 4.2550680e-07f, 4.5315863e-07f,
1895 4.8260743e-07f, 5.1396998e-07f, 5.4737065e-07f, 5.8294187e-07f,
1896 6.2082472e-07f, 6.6116941e-07f, 7.0413592e-07f, 7.4989464e-07f,
1897 7.9862701e-07f, 8.5052630e-07f, 9.0579828e-07f, 9.6466216e-07f,
1898 1.0273513e-06f, 1.0941144e-06f, 1.1652161e-06f, 1.2409384e-06f,
1899 1.3215816e-06f, 1.4074654e-06f, 1.4989305e-06f, 1.5963394e-06f,
1900 1.7000785e-06f, 1.8105592e-06f, 1.9282195e-06f, 2.0535261e-06f,
1901 2.1869758e-06f, 2.3290978e-06f, 2.4804557e-06f, 2.6416497e-06f,
1902 2.8133190e-06f, 2.9961443e-06f, 3.1908506e-06f, 3.3982101e-06f,
1903 3.6190449e-06f, 3.8542308e-06f, 4.1047004e-06f, 4.3714470e-06f,
1904 4.6555282e-06f, 4.9580707e-06f, 5.2802740e-06f, 5.6234160e-06f,
1905 5.9888572e-06f, 6.3780469e-06f, 6.7925283e-06f, 7.2339451e-06f,
1906 7.7040476e-06f, 8.2047000e-06f, 8.7378876e-06f, 9.3057248e-06f,
1907 9.9104632e-06f, 1.0554501e-05f, 1.1240392e-05f, 1.1970856e-05f,
1908 1.2748789e-05f, 1.3577278e-05f, 1.4459606e-05f, 1.5399272e-05f,
1909 1.6400004e-05f, 1.7465768e-05f, 1.8600792e-05f, 1.9809576e-05f,
1910 2.1096914e-05f, 2.2467911e-05f, 2.3928002e-05f, 2.5482978e-05f,
1911 2.7139006e-05f, 2.8902651e-05f, 3.0780908e-05f, 3.2781225e-05f,
1912 3.4911534e-05f, 3.7180282e-05f, 3.9596466e-05f, 4.2169667e-05f,
1913 4.4910090e-05f, 4.7828601e-05f, 5.0936773e-05f, 5.4246931e-05f,
1914 5.7772202e-05f, 6.1526565e-05f, 6.5524908e-05f, 6.9783085e-05f,
1915 7.4317983e-05f, 7.9147585e-05f, 8.4291040e-05f, 8.9768747e-05f,
1916 9.5602426e-05f, 0.00010181521f, 0.00010843174f, 0.00011547824f,
1917 0.00012298267f, 0.00013097477f, 0.00013948625f, 0.00014855085f,
1918 0.00015820453f, 0.00016848555f, 0.00017943469f, 0.00019109536f,
1919 0.00020351382f, 0.00021673929f, 0.00023082423f, 0.00024582449f,
1920 0.00026179955f, 0.00027881276f, 0.00029693158f, 0.00031622787f,
1921 0.00033677814f, 0.00035866388f, 0.00038197188f, 0.00040679456f,
1922 0.00043323036f, 0.00046138411f, 0.00049136745f, 0.00052329927f,
1923 0.00055730621f, 0.00059352311f, 0.00063209358f, 0.00067317058f,
1924 0.00071691700f, 0.00076350630f, 0.00081312324f, 0.00086596457f,
1925 0.00092223983f, 0.00098217216f, 0.0010459992f, 0.0011139742f,
1926 0.0011863665f, 0.0012634633f, 0.0013455702f, 0.0014330129f,
1927 0.0015261382f, 0.0016253153f, 0.0017309374f, 0.0018434235f,
1928 0.0019632195f, 0.0020908006f, 0.0022266726f, 0.0023713743f,
1929 0.0025254795f, 0.0026895994f, 0.0028643847f, 0.0030505286f,
1930 0.0032487691f, 0.0034598925f, 0.0036847358f, 0.0039241906f,
1931 0.0041792066f, 0.0044507950f, 0.0047400328f, 0.0050480668f,
1932 0.0053761186f, 0.0057254891f, 0.0060975636f, 0.0064938176f,
1933 0.0069158225f, 0.0073652516f, 0.0078438871f, 0.0083536271f,
1934 0.0088964928f, 0.009474637f, 0.010090352f, 0.010746080f,
1935 0.011444421f, 0.012188144f, 0.012980198f, 0.013823725f,
1936 0.014722068f, 0.015678791f, 0.016697687f, 0.017782797f,
1937 0.018938423f, 0.020169149f, 0.021479854f, 0.022875735f,
1938 0.024362330f, 0.025945531f, 0.027631618f, 0.029427276f,
1939 0.031339626f, 0.033376252f, 0.035545228f, 0.037855157f,
1940 0.040315199f, 0.042935108f, 0.045725273f, 0.048696758f,
1941 0.051861348f, 0.055231591f, 0.058820850f, 0.062643361f,
1942 0.066714279f, 0.071049749f, 0.075666962f, 0.080584227f,
1943 0.085821044f, 0.091398179f, 0.097337747f, 0.10366330f,
1944 0.11039993f, 0.11757434f, 0.12521498f, 0.13335215f,
1945 0.14201813f, 0.15124727f, 0.16107617f, 0.17154380f,
1946 0.18269168f, 0.19456402f, 0.20720788f, 0.22067342f,
1947 0.23501402f, 0.25028656f, 0.26655159f, 0.28387361f,
1948 0.30232132f, 0.32196786f, 0.34289114f, 0.36517414f,
1949 0.38890521f, 0.41417847f, 0.44109412f, 0.46975890f,
1950 0.50028648f, 0.53279791f, 0.56742212f, 0.60429640f,
1951 0.64356699f, 0.68538959f, 0.72993007f, 0.77736504f,
1952 0.82788260f, 0.88168307f, 0.9389798f, 1.0f
1953 };
1954
1955
1956 // @OPTIMIZE: if you want to replace this bresenham line-drawing routine,
1957 // note that you must produce bit-identical output to decode correctly;
1958 // this specific sequence of operations is specified in the spec (it's
1959 // drawing integer-quantized frequency-space lines that the encoder
1960 // expects to be exactly the same)
1961 // ... also, isn't the whole point of Bresenham's algorithm to NOT
1962 // have to divide in the setup? sigh.
1963 #ifndef STB_VORBIS_NO_DEFER_FLOOR
1964 #define LINE_OP(a,b) a *= b
1965 #else
1966 #define LINE_OP(a,b) a = b
1967 #endif
1968
1969 #ifdef STB_VORBIS_DIVIDE_TABLE
1970 #define DIVTAB_NUMER 32
1971 #define DIVTAB_DENOM 64
1972 int8 integer_divide_table[DIVTAB_NUMER][DIVTAB_DENOM]; // 2KB
1973 #endif
1974
draw_line(float * output,int x0,int y0,int x1,int y1,int n)1975 static __forceinline void draw_line(float *output, int x0, int y0, int x1, int y1, int n)
1976 {
1977 int dy = y1 - y0;
1978 int adx = x1 - x0;
1979 int ady = abs(dy);
1980 int base;
1981 int x=x0,y=y0;
1982 int err = 0;
1983 int sy;
1984
1985 #ifdef STB_VORBIS_DIVIDE_TABLE
1986 if (adx < DIVTAB_DENOM && ady < DIVTAB_NUMER) {
1987 if (dy < 0) {
1988 base = -integer_divide_table[ady][adx];
1989 sy = base-1;
1990 } else {
1991 base = integer_divide_table[ady][adx];
1992 sy = base+1;
1993 }
1994 } else {
1995 base = dy / adx;
1996 if (dy < 0)
1997 sy = base - 1;
1998 else
1999 sy = base+1;
2000 }
2001 #else
2002 base = dy / adx;
2003 if (dy < 0)
2004 sy = base - 1;
2005 else
2006 sy = base+1;
2007 #endif
2008 ady -= abs(base) * adx;
2009 if (x1 > n) x1 = n;
2010 if (x < x1) {
2011 LINE_OP(output[x], inverse_db_table[y]);
2012 for (++x; x < x1; ++x) {
2013 err += ady;
2014 if (err >= adx) {
2015 err -= adx;
2016 y += sy;
2017 } else
2018 y += base;
2019 LINE_OP(output[x], inverse_db_table[y]);
2020 }
2021 }
2022 }
2023
residue_decode(vorb * f,Codebook * book,float * target,int offset,int n,int rtype)2024 static int residue_decode(vorb *f, Codebook *book, float *target, int offset, int n, int rtype)
2025 {
2026 int k;
2027 if (rtype == 0) {
2028 int step = n / book->dimensions;
2029 for (k=0; k < step; ++k)
2030 if (!codebook_decode_step(f, book, target+offset+k, n-offset-k, step))
2031 return FALSE;
2032 } else {
2033 for (k=0; k < n; ) {
2034 if (!codebook_decode(f, book, target+offset, n-k))
2035 return FALSE;
2036 k += book->dimensions;
2037 offset += book->dimensions;
2038 }
2039 }
2040 return TRUE;
2041 }
2042
decode_residue(vorb * f,float * residue_buffers[],int ch,int n,int rn,uint8 * do_not_decode)2043 static void decode_residue(vorb *f, float *residue_buffers[], int ch, int n, int rn, uint8 *do_not_decode)
2044 {
2045 int i,j,pass;
2046 Residue *r = f->residue_config + rn;
2047 int rtype = f->residue_types[rn];
2048 int c = r->classbook;
2049 int classwords = f->codebooks[c].dimensions;
2050 int n_read = r->end - r->begin;
2051 int part_read = n_read / r->part_size;
2052 int temp_alloc_point = temp_alloc_save(f);
2053 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2054 uint8 ***part_classdata = (uint8 ***) temp_block_array(f,f->channels, part_read * sizeof(**part_classdata));
2055 #else
2056 int **classifications = (int **) temp_block_array(f,f->channels, part_read * sizeof(**classifications));
2057 #endif
2058
2059 CHECK(f);
2060
2061 for (i=0; i < ch; ++i)
2062 if (!do_not_decode[i])
2063 memset(residue_buffers[i], 0, sizeof(float) * n);
2064
2065 if (rtype == 2 && ch != 1) {
2066 for (j=0; j < ch; ++j)
2067 if (!do_not_decode[j])
2068 break;
2069 if (j == ch)
2070 goto done;
2071
2072 for (pass=0; pass < 8; ++pass) {
2073 int pcount = 0, class_set = 0;
2074 if (ch == 2) {
2075 while (pcount < part_read) {
2076 int z = r->begin + pcount*r->part_size;
2077 int c_inter = (z & 1), p_inter = z>>1;
2078 if (pass == 0) {
2079 Codebook *c = f->codebooks+r->classbook;
2080 int q;
2081 DECODE(q,f,c);
2082 if (q == EOP) goto done;
2083 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2084 part_classdata[0][class_set] = r->classdata[q];
2085 #else
2086 for (i=classwords-1; i >= 0; --i) {
2087 classifications[0][i+pcount] = q % r->classifications;
2088 q /= r->classifications;
2089 }
2090 #endif
2091 }
2092 for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) {
2093 int z = r->begin + pcount*r->part_size;
2094 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2095 int c = part_classdata[0][class_set][i];
2096 #else
2097 int c = classifications[0][pcount];
2098 #endif
2099 int b = r->residue_books[c][pass];
2100 if (b >= 0) {
2101 Codebook *book = f->codebooks + b;
2102 #ifdef STB_VORBIS_DIVIDES_IN_CODEBOOK
2103 if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r->part_size))
2104 goto done;
2105 #else
2106 // saves 1%
2107 if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r->part_size))
2108 goto done;
2109 #endif
2110 } else {
2111 z += r->part_size;
2112 c_inter = z & 1;
2113 p_inter = z >> 1;
2114 }
2115 }
2116 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2117 ++class_set;
2118 #endif
2119 }
2120 } else if (ch == 1) {
2121 while (pcount < part_read) {
2122 int z = r->begin + pcount*r->part_size;
2123 int c_inter = 0, p_inter = z;
2124 if (pass == 0) {
2125 Codebook *c = f->codebooks+r->classbook;
2126 int q;
2127 DECODE(q,f,c);
2128 if (q == EOP) goto done;
2129 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2130 part_classdata[0][class_set] = r->classdata[q];
2131 #else
2132 for (i=classwords-1; i >= 0; --i) {
2133 classifications[0][i+pcount] = q % r->classifications;
2134 q /= r->classifications;
2135 }
2136 #endif
2137 }
2138 for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) {
2139 int z = r->begin + pcount*r->part_size;
2140 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2141 int c = part_classdata[0][class_set][i];
2142 #else
2143 int c = classifications[0][pcount];
2144 #endif
2145 int b = r->residue_books[c][pass];
2146 if (b >= 0) {
2147 Codebook *book = f->codebooks + b;
2148 if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r->part_size))
2149 goto done;
2150 } else {
2151 z += r->part_size;
2152 c_inter = 0;
2153 p_inter = z;
2154 }
2155 }
2156 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2157 ++class_set;
2158 #endif
2159 }
2160 } else {
2161 while (pcount < part_read) {
2162 int z = r->begin + pcount*r->part_size;
2163 int c_inter = z % ch, p_inter = z/ch;
2164 if (pass == 0) {
2165 Codebook *c = f->codebooks+r->classbook;
2166 int q;
2167 DECODE(q,f,c);
2168 if (q == EOP) goto done;
2169 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2170 part_classdata[0][class_set] = r->classdata[q];
2171 #else
2172 for (i=classwords-1; i >= 0; --i) {
2173 classifications[0][i+pcount] = q % r->classifications;
2174 q /= r->classifications;
2175 }
2176 #endif
2177 }
2178 for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) {
2179 int z = r->begin + pcount*r->part_size;
2180 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2181 int c = part_classdata[0][class_set][i];
2182 #else
2183 int c = classifications[0][pcount];
2184 #endif
2185 int b = r->residue_books[c][pass];
2186 if (b >= 0) {
2187 Codebook *book = f->codebooks + b;
2188 if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r->part_size))
2189 goto done;
2190 } else {
2191 z += r->part_size;
2192 c_inter = z % ch;
2193 p_inter = z / ch;
2194 }
2195 }
2196 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2197 ++class_set;
2198 #endif
2199 }
2200 }
2201 }
2202 goto done;
2203 }
2204 CHECK(f);
2205
2206 for (pass=0; pass < 8; ++pass) {
2207 int pcount = 0, class_set=0;
2208 while (pcount < part_read) {
2209 if (pass == 0) {
2210 for (j=0; j < ch; ++j) {
2211 if (!do_not_decode[j]) {
2212 Codebook *c = f->codebooks+r->classbook;
2213 int temp;
2214 DECODE(temp,f,c);
2215 if (temp == EOP) goto done;
2216 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2217 part_classdata[j][class_set] = r->classdata[temp];
2218 #else
2219 for (i=classwords-1; i >= 0; --i) {
2220 classifications[j][i+pcount] = temp % r->classifications;
2221 temp /= r->classifications;
2222 }
2223 #endif
2224 }
2225 }
2226 }
2227 for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) {
2228 for (j=0; j < ch; ++j) {
2229 if (!do_not_decode[j]) {
2230 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2231 int c = part_classdata[j][class_set][i];
2232 #else
2233 int c = classifications[j][pcount];
2234 #endif
2235 int b = r->residue_books[c][pass];
2236 if (b >= 0) {
2237 float *target = residue_buffers[j];
2238 int offset = r->begin + pcount * r->part_size;
2239 int n = r->part_size;
2240 Codebook *book = f->codebooks + b;
2241 if (!residue_decode(f, book, target, offset, n, rtype))
2242 goto done;
2243 }
2244 }
2245 }
2246 }
2247 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2248 ++class_set;
2249 #endif
2250 }
2251 }
2252 done:
2253 CHECK(f);
2254 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2255 temp_free(f,part_classdata);
2256 #else
2257 temp_free(f,classifications);
2258 #endif
2259 temp_alloc_restore(f,temp_alloc_point);
2260 }
2261
2262
2263 #if 0
2264 // slow way for debugging
2265 void inverse_mdct_slow(float *buffer, int n)
2266 {
2267 int i,j;
2268 int n2 = n >> 1;
2269 float *x = (float *) malloc(sizeof(*x) * n2);
2270 memcpy(x, buffer, sizeof(*x) * n2);
2271 for (i=0; i < n; ++i) {
2272 float acc = 0;
2273 for (j=0; j < n2; ++j)
2274 // formula from paper:
2275 //acc += n/4.0f * x[j] * (float) cos(M_PI / 2 / n * (2 * i + 1 + n/2.0)*(2*j+1));
2276 // formula from wikipedia
2277 //acc += 2.0f / n2 * x[j] * (float) cos(M_PI/n2 * (i + 0.5 + n2/2)*(j + 0.5));
2278 // these are equivalent, except the formula from the paper inverts the multiplier!
2279 // however, what actually works is NO MULTIPLIER!?!
2280 //acc += 64 * 2.0f / n2 * x[j] * (float) cos(M_PI/n2 * (i + 0.5 + n2/2)*(j + 0.5));
2281 acc += x[j] * (float) cos(M_PI / 2 / n * (2 * i + 1 + n/2.0)*(2*j+1));
2282 buffer[i] = acc;
2283 }
2284 free(x);
2285 }
2286 #elif 0
2287 // same as above, but just barely able to run in real time on modern machines
inverse_mdct_slow(float * buffer,int n,vorb * f,int blocktype)2288 void inverse_mdct_slow(float *buffer, int n, vorb *f, int blocktype)
2289 {
2290 float mcos[16384];
2291 int i,j;
2292 int n2 = n >> 1, nmask = (n << 2) -1;
2293 float *x = (float *) malloc(sizeof(*x) * n2);
2294 memcpy(x, buffer, sizeof(*x) * n2);
2295 for (i=0; i < 4*n; ++i)
2296 mcos[i] = (float) cos(M_PI / 2 * i / n);
2297
2298 for (i=0; i < n; ++i) {
2299 float acc = 0;
2300 for (j=0; j < n2; ++j)
2301 acc += x[j] * mcos[(2 * i + 1 + n2)*(2*j+1) & nmask];
2302 buffer[i] = acc;
2303 }
2304 free(x);
2305 }
2306 #elif 0
2307 // transform to use a slow dct-iv; this is STILL basically trivial,
2308 // but only requires half as many ops
dct_iv_slow(float * buffer,int n)2309 void dct_iv_slow(float *buffer, int n)
2310 {
2311 float mcos[16384];
2312 float x[2048];
2313 int i,j;
2314 int n2 = n >> 1, nmask = (n << 3) - 1;
2315 memcpy(x, buffer, sizeof(*x) * n);
2316 for (i=0; i < 8*n; ++i)
2317 mcos[i] = (float) cos(M_PI / 4 * i / n);
2318 for (i=0; i < n; ++i) {
2319 float acc = 0;
2320 for (j=0; j < n; ++j)
2321 acc += x[j] * mcos[((2 * i + 1)*(2*j+1)) & nmask];
2322 buffer[i] = acc;
2323 }
2324 }
2325
inverse_mdct_slow(float * buffer,int n,vorb * f,int blocktype)2326 void inverse_mdct_slow(float *buffer, int n, vorb *f, int blocktype)
2327 {
2328 int i, n4 = n >> 2, n2 = n >> 1, n3_4 = n - n4;
2329 float temp[4096];
2330
2331 memcpy(temp, buffer, n2 * sizeof(float));
2332 dct_iv_slow(temp, n2); // returns -c'-d, a-b'
2333
2334 for (i=0; i < n4 ; ++i) buffer[i] = temp[i+n4]; // a-b'
2335 for ( ; i < n3_4; ++i) buffer[i] = -temp[n3_4 - i - 1]; // b-a', c+d'
2336 for ( ; i < n ; ++i) buffer[i] = -temp[i - n3_4]; // c'+d
2337 }
2338 #endif
2339
2340 #ifndef LIBVORBIS_MDCT
2341 #define LIBVORBIS_MDCT 0
2342 #endif
2343
2344 #if LIBVORBIS_MDCT
2345 // directly call the vorbis MDCT using an interface documented
2346 // by Jeff Roberts... useful for performance comparison
2347 typedef struct
2348 {
2349 int n;
2350 int log2n;
2351
2352 float *trig;
2353 int *bitrev;
2354
2355 float scale;
2356 } mdct_lookup;
2357
2358 extern void mdct_init(mdct_lookup *lookup, int n);
2359 extern void mdct_clear(mdct_lookup *l);
2360 extern void mdct_backward(mdct_lookup *init, float *in, float *out);
2361
2362 mdct_lookup M1,M2;
2363
inverse_mdct(float * buffer,int n,vorb * f,int blocktype)2364 void inverse_mdct(float *buffer, int n, vorb *f, int blocktype)
2365 {
2366 mdct_lookup *M;
2367 if (M1.n == n) M = &M1;
2368 else if (M2.n == n) M = &M2;
2369 else if (M1.n == 0) { mdct_init(&M1, n); M = &M1; }
2370 else {
2371 if (M2.n) __asm int 3;
2372 mdct_init(&M2, n);
2373 M = &M2;
2374 }
2375
2376 mdct_backward(M, buffer, buffer);
2377 }
2378 #endif
2379
2380
2381 // the following were split out into separate functions while optimizing;
2382 // they could be pushed back up but eh. __forceinline showed no change;
2383 // they're probably already being inlined.
imdct_step3_iter0_loop(int n,float * e,int i_off,int k_off,float * A)2384 static void imdct_step3_iter0_loop(int n, float *e, int i_off, int k_off, float *A)
2385 {
2386 float *ee0 = e + i_off;
2387 float *ee2 = ee0 + k_off;
2388 int i;
2389
2390 assert((n & 3) == 0);
2391 for (i=(n>>2); i > 0; --i) {
2392 float k00_20, k01_21;
2393 k00_20 = ee0[ 0] - ee2[ 0];
2394 k01_21 = ee0[-1] - ee2[-1];
2395 ee0[ 0] += ee2[ 0];//ee0[ 0] = ee0[ 0] + ee2[ 0];
2396 ee0[-1] += ee2[-1];//ee0[-1] = ee0[-1] + ee2[-1];
2397 ee2[ 0] = k00_20 * A[0] - k01_21 * A[1];
2398 ee2[-1] = k01_21 * A[0] + k00_20 * A[1];
2399 A += 8;
2400
2401 k00_20 = ee0[-2] - ee2[-2];
2402 k01_21 = ee0[-3] - ee2[-3];
2403 ee0[-2] += ee2[-2];//ee0[-2] = ee0[-2] + ee2[-2];
2404 ee0[-3] += ee2[-3];//ee0[-3] = ee0[-3] + ee2[-3];
2405 ee2[-2] = k00_20 * A[0] - k01_21 * A[1];
2406 ee2[-3] = k01_21 * A[0] + k00_20 * A[1];
2407 A += 8;
2408
2409 k00_20 = ee0[-4] - ee2[-4];
2410 k01_21 = ee0[-5] - ee2[-5];
2411 ee0[-4] += ee2[-4];//ee0[-4] = ee0[-4] + ee2[-4];
2412 ee0[-5] += ee2[-5];//ee0[-5] = ee0[-5] + ee2[-5];
2413 ee2[-4] = k00_20 * A[0] - k01_21 * A[1];
2414 ee2[-5] = k01_21 * A[0] + k00_20 * A[1];
2415 A += 8;
2416
2417 k00_20 = ee0[-6] - ee2[-6];
2418 k01_21 = ee0[-7] - ee2[-7];
2419 ee0[-6] += ee2[-6];//ee0[-6] = ee0[-6] + ee2[-6];
2420 ee0[-7] += ee2[-7];//ee0[-7] = ee0[-7] + ee2[-7];
2421 ee2[-6] = k00_20 * A[0] - k01_21 * A[1];
2422 ee2[-7] = k01_21 * A[0] + k00_20 * A[1];
2423 A += 8;
2424 ee0 -= 8;
2425 ee2 -= 8;
2426 }
2427 }
2428
imdct_step3_inner_r_loop(int lim,float * e,int d0,int k_off,float * A,int k1)2429 static void imdct_step3_inner_r_loop(int lim, float *e, int d0, int k_off, float *A, int k1)
2430 {
2431 int i;
2432 float k00_20, k01_21;
2433
2434 float *e0 = e + d0;
2435 float *e2 = e0 + k_off;
2436
2437 for (i=lim >> 2; i > 0; --i) {
2438 k00_20 = e0[-0] - e2[-0];
2439 k01_21 = e0[-1] - e2[-1];
2440 e0[-0] += e2[-0];//e0[-0] = e0[-0] + e2[-0];
2441 e0[-1] += e2[-1];//e0[-1] = e0[-1] + e2[-1];
2442 e2[-0] = (k00_20)*A[0] - (k01_21) * A[1];
2443 e2[-1] = (k01_21)*A[0] + (k00_20) * A[1];
2444
2445 A += k1;
2446
2447 k00_20 = e0[-2] - e2[-2];
2448 k01_21 = e0[-3] - e2[-3];
2449 e0[-2] += e2[-2];//e0[-2] = e0[-2] + e2[-2];
2450 e0[-3] += e2[-3];//e0[-3] = e0[-3] + e2[-3];
2451 e2[-2] = (k00_20)*A[0] - (k01_21) * A[1];
2452 e2[-3] = (k01_21)*A[0] + (k00_20) * A[1];
2453
2454 A += k1;
2455
2456 k00_20 = e0[-4] - e2[-4];
2457 k01_21 = e0[-5] - e2[-5];
2458 e0[-4] += e2[-4];//e0[-4] = e0[-4] + e2[-4];
2459 e0[-5] += e2[-5];//e0[-5] = e0[-5] + e2[-5];
2460 e2[-4] = (k00_20)*A[0] - (k01_21) * A[1];
2461 e2[-5] = (k01_21)*A[0] + (k00_20) * A[1];
2462
2463 A += k1;
2464
2465 k00_20 = e0[-6] - e2[-6];
2466 k01_21 = e0[-7] - e2[-7];
2467 e0[-6] += e2[-6];//e0[-6] = e0[-6] + e2[-6];
2468 e0[-7] += e2[-7];//e0[-7] = e0[-7] + e2[-7];
2469 e2[-6] = (k00_20)*A[0] - (k01_21) * A[1];
2470 e2[-7] = (k01_21)*A[0] + (k00_20) * A[1];
2471
2472 e0 -= 8;
2473 e2 -= 8;
2474
2475 A += k1;
2476 }
2477 }
2478
imdct_step3_inner_s_loop(int n,float * e,int i_off,int k_off,float * A,int a_off,int k0)2479 static void imdct_step3_inner_s_loop(int n, float *e, int i_off, int k_off, float *A, int a_off, int k0)
2480 {
2481 int i;
2482 float A0 = A[0];
2483 float A1 = A[0+1];
2484 float A2 = A[0+a_off];
2485 float A3 = A[0+a_off+1];
2486 float A4 = A[0+a_off*2+0];
2487 float A5 = A[0+a_off*2+1];
2488 float A6 = A[0+a_off*3+0];
2489 float A7 = A[0+a_off*3+1];
2490
2491 float k00,k11;
2492
2493 float *ee0 = e +i_off;
2494 float *ee2 = ee0+k_off;
2495
2496 for (i=n; i > 0; --i) {
2497 k00 = ee0[ 0] - ee2[ 0];
2498 k11 = ee0[-1] - ee2[-1];
2499 ee0[ 0] = ee0[ 0] + ee2[ 0];
2500 ee0[-1] = ee0[-1] + ee2[-1];
2501 ee2[ 0] = (k00) * A0 - (k11) * A1;
2502 ee2[-1] = (k11) * A0 + (k00) * A1;
2503
2504 k00 = ee0[-2] - ee2[-2];
2505 k11 = ee0[-3] - ee2[-3];
2506 ee0[-2] = ee0[-2] + ee2[-2];
2507 ee0[-3] = ee0[-3] + ee2[-3];
2508 ee2[-2] = (k00) * A2 - (k11) * A3;
2509 ee2[-3] = (k11) * A2 + (k00) * A3;
2510
2511 k00 = ee0[-4] - ee2[-4];
2512 k11 = ee0[-5] - ee2[-5];
2513 ee0[-4] = ee0[-4] + ee2[-4];
2514 ee0[-5] = ee0[-5] + ee2[-5];
2515 ee2[-4] = (k00) * A4 - (k11) * A5;
2516 ee2[-5] = (k11) * A4 + (k00) * A5;
2517
2518 k00 = ee0[-6] - ee2[-6];
2519 k11 = ee0[-7] - ee2[-7];
2520 ee0[-6] = ee0[-6] + ee2[-6];
2521 ee0[-7] = ee0[-7] + ee2[-7];
2522 ee2[-6] = (k00) * A6 - (k11) * A7;
2523 ee2[-7] = (k11) * A6 + (k00) * A7;
2524
2525 ee0 -= k0;
2526 ee2 -= k0;
2527 }
2528 }
2529
iter_54(float * z)2530 static __forceinline void iter_54(float *z)
2531 {
2532 float k00,k11,k22,k33;
2533 float y0,y1,y2,y3;
2534
2535 k00 = z[ 0] - z[-4];
2536 y0 = z[ 0] + z[-4];
2537 y2 = z[-2] + z[-6];
2538 k22 = z[-2] - z[-6];
2539
2540 z[-0] = y0 + y2; // z0 + z4 + z2 + z6
2541 z[-2] = y0 - y2; // z0 + z4 - z2 - z6
2542
2543 // done with y0,y2
2544
2545 k33 = z[-3] - z[-7];
2546
2547 z[-4] = k00 + k33; // z0 - z4 + z3 - z7
2548 z[-6] = k00 - k33; // z0 - z4 - z3 + z7
2549
2550 // done with k33
2551
2552 k11 = z[-1] - z[-5];
2553 y1 = z[-1] + z[-5];
2554 y3 = z[-3] + z[-7];
2555
2556 z[-1] = y1 + y3; // z1 + z5 + z3 + z7
2557 z[-3] = y1 - y3; // z1 + z5 - z3 - z7
2558 z[-5] = k11 - k22; // z1 - z5 + z2 - z6
2559 z[-7] = k11 + k22; // z1 - z5 - z2 + z6
2560 }
2561
imdct_step3_inner_s_loop_ld654(int n,float * e,int i_off,float * A,int base_n)2562 static void imdct_step3_inner_s_loop_ld654(int n, float *e, int i_off, float *A, int base_n)
2563 {
2564 int a_off = base_n >> 3;
2565 float A2 = A[0+a_off];
2566 float *z = e + i_off;
2567 float *base = z - 16 * n;
2568
2569 while (z > base) {
2570 float k00,k11;
2571
2572 k00 = z[-0] - z[-8];
2573 k11 = z[-1] - z[-9];
2574 z[-0] = z[-0] + z[-8];
2575 z[-1] = z[-1] + z[-9];
2576 z[-8] = k00;
2577 z[-9] = k11 ;
2578
2579 k00 = z[ -2] - z[-10];
2580 k11 = z[ -3] - z[-11];
2581 z[ -2] = z[ -2] + z[-10];
2582 z[ -3] = z[ -3] + z[-11];
2583 z[-10] = (k00+k11) * A2;
2584 z[-11] = (k11-k00) * A2;
2585
2586 k00 = z[-12] - z[ -4]; // reverse to avoid a unary negation
2587 k11 = z[ -5] - z[-13];
2588 z[ -4] = z[ -4] + z[-12];
2589 z[ -5] = z[ -5] + z[-13];
2590 z[-12] = k11;
2591 z[-13] = k00;
2592
2593 k00 = z[-14] - z[ -6]; // reverse to avoid a unary negation
2594 k11 = z[ -7] - z[-15];
2595 z[ -6] = z[ -6] + z[-14];
2596 z[ -7] = z[ -7] + z[-15];
2597 z[-14] = (k00+k11) * A2;
2598 z[-15] = (k00-k11) * A2;
2599
2600 iter_54(z);
2601 iter_54(z-8);
2602 z -= 16;
2603 }
2604 }
2605
inverse_mdct(float * buffer,int n,vorb * f,int blocktype)2606 static void inverse_mdct(float *buffer, int n, vorb *f, int blocktype)
2607 {
2608 int n2 = n >> 1, n4 = n >> 2, n8 = n >> 3, l;
2609 int ld;
2610 // @OPTIMIZE: reduce register pressure by using fewer variables?
2611 int save_point = temp_alloc_save(f);
2612 float *buf2 = (float *) temp_alloc(f, n2 * sizeof(*buf2));
2613 float *u=NULL,*v=NULL;
2614 // twiddle factors
2615 float *A = f->A[blocktype];
2616
2617 // IMDCT algorithm from "The use of multirate filter banks for coding of high quality digital audio"
2618 // See notes about bugs in that paper in less-optimal implementation 'inverse_mdct_old' after this function.
2619
2620 // kernel from paper
2621
2622
2623 // merged:
2624 // copy and reflect spectral data
2625 // step 0
2626
2627 // note that it turns out that the items added together during
2628 // this step are, in fact, being added to themselves (as reflected
2629 // by step 0). inexplicable inefficiency! this became obvious
2630 // once I combined the passes.
2631
2632 // so there's a missing 'times 2' here (for adding X to itself).
2633 // this propogates through linearly to the end, where the numbers
2634 // are 1/2 too small, and need to be compensated for.
2635
2636 {
2637 float *d,*e, *AA, *e_stop;
2638 d = &buf2[n2-2];
2639 AA = A;
2640 e = &buffer[0];
2641 e_stop = &buffer[n2];
2642 while (e != e_stop) {
2643 d[1] = (e[0] * AA[0] - e[2]*AA[1]);
2644 d[0] = (e[0] * AA[1] + e[2]*AA[0]);
2645 d -= 2;
2646 AA += 2;
2647 e += 4;
2648 }
2649
2650 e = &buffer[n2-3];
2651 while (d >= buf2) {
2652 d[1] = (-e[2] * AA[0] - -e[0]*AA[1]);
2653 d[0] = (-e[2] * AA[1] + -e[0]*AA[0]);
2654 d -= 2;
2655 AA += 2;
2656 e -= 4;
2657 }
2658 }
2659
2660 // now we use symbolic names for these, so that we can
2661 // possibly swap their meaning as we change which operations
2662 // are in place
2663
2664 u = buffer;
2665 v = buf2;
2666
2667 // step 2 (paper output is w, now u)
2668 // this could be in place, but the data ends up in the wrong
2669 // place... _somebody_'s got to swap it, so this is nominated
2670 {
2671 float *AA = &A[n2-8];
2672 float *d0,*d1, *e0, *e1;
2673
2674 e0 = &v[n4];
2675 e1 = &v[0];
2676
2677 d0 = &u[n4];
2678 d1 = &u[0];
2679
2680 while (AA >= A) {
2681 float v40_20, v41_21;
2682
2683 v41_21 = e0[1] - e1[1];
2684 v40_20 = e0[0] - e1[0];
2685 d0[1] = e0[1] + e1[1];
2686 d0[0] = e0[0] + e1[0];
2687 d1[1] = v41_21*AA[4] - v40_20*AA[5];
2688 d1[0] = v40_20*AA[4] + v41_21*AA[5];
2689
2690 v41_21 = e0[3] - e1[3];
2691 v40_20 = e0[2] - e1[2];
2692 d0[3] = e0[3] + e1[3];
2693 d0[2] = e0[2] + e1[2];
2694 d1[3] = v41_21*AA[0] - v40_20*AA[1];
2695 d1[2] = v40_20*AA[0] + v41_21*AA[1];
2696
2697 AA -= 8;
2698
2699 d0 += 4;
2700 d1 += 4;
2701 e0 += 4;
2702 e1 += 4;
2703 }
2704 }
2705
2706 // step 3
2707 ld = ilog(n) - 1; // ilog is off-by-one from normal definitions
2708
2709 // optimized step 3:
2710
2711 // the original step3 loop can be nested r inside s or s inside r;
2712 // it's written originally as s inside r, but this is dumb when r
2713 // iterates many times, and s few. So I have two copies of it and
2714 // switch between them halfway.
2715
2716 // this is iteration 0 of step 3
2717 imdct_step3_iter0_loop(n >> 4, u, n2-1-n4*0, -(n >> 3), A);
2718 imdct_step3_iter0_loop(n >> 4, u, n2-1-n4*1, -(n >> 3), A);
2719
2720 // this is iteration 1 of step 3
2721 imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*0, -(n >> 4), A, 16);
2722 imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*1, -(n >> 4), A, 16);
2723 imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*2, -(n >> 4), A, 16);
2724 imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*3, -(n >> 4), A, 16);
2725
2726 l=2;
2727 for (; l < (ld-3)>>1; ++l) {
2728 int k0 = n >> (l+2), k0_2 = k0>>1;
2729 int lim = 1 << (l+1);
2730 int i;
2731 for (i=0; i < lim; ++i)
2732 imdct_step3_inner_r_loop(n >> (l+4), u, n2-1 - k0*i, -k0_2, A, 1 << (l+3));
2733 }
2734
2735 for (; l < ld-6; ++l) {
2736 int k0 = n >> (l+2), k1 = 1 << (l+3), k0_2 = k0>>1;
2737 int rlim = n >> (l+6), r;
2738 int lim = 1 << (l+1);
2739 int i_off;
2740 float *A0 = A;
2741 i_off = n2-1;
2742 for (r=rlim; r > 0; --r) {
2743 imdct_step3_inner_s_loop(lim, u, i_off, -k0_2, A0, k1, k0);
2744 A0 += k1*4;
2745 i_off -= 8;
2746 }
2747 }
2748
2749 // iterations with count:
2750 // ld-6,-5,-4 all interleaved together
2751 // the big win comes from getting rid of needless flops
2752 // due to the constants on pass 5 & 4 being all 1 and 0;
2753 // combining them to be simultaneous to improve cache made little difference
2754 imdct_step3_inner_s_loop_ld654(n >> 5, u, n2-1, A, n);
2755
2756 // output is u
2757
2758 // step 4, 5, and 6
2759 // cannot be in-place because of step 5
2760 {
2761 uint16 *bitrev = f->bit_reverse[blocktype];
2762 // weirdly, I'd have thought reading sequentially and writing
2763 // erratically would have been better than vice-versa, but in
2764 // fact that's not what my testing showed. (That is, with
2765 // j = bitreverse(i), do you read i and write j, or read j and write i.)
2766
2767 float *d0 = &v[n4-4];
2768 float *d1 = &v[n2-4];
2769 while (d0 >= v) {
2770 int k4;
2771
2772 k4 = bitrev[0];
2773 d1[3] = u[k4+0];
2774 d1[2] = u[k4+1];
2775 d0[3] = u[k4+2];
2776 d0[2] = u[k4+3];
2777
2778 k4 = bitrev[1];
2779 d1[1] = u[k4+0];
2780 d1[0] = u[k4+1];
2781 d0[1] = u[k4+2];
2782 d0[0] = u[k4+3];
2783
2784 d0 -= 4;
2785 d1 -= 4;
2786 bitrev += 2;
2787 }
2788 }
2789 // (paper output is u, now v)
2790
2791
2792 // data must be in buf2
2793 assert(v == buf2);
2794
2795 // step 7 (paper output is v, now v)
2796 // this is now in place
2797 {
2798 float *C = f->C[blocktype];
2799 float *d, *e;
2800
2801 d = v;
2802 e = v + n2 - 4;
2803
2804 while (d < e) {
2805 float a02,a11,b0,b1,b2,b3;
2806
2807 a02 = d[0] - e[2];
2808 a11 = d[1] + e[3];
2809
2810 b0 = C[1]*a02 + C[0]*a11;
2811 b1 = C[1]*a11 - C[0]*a02;
2812
2813 b2 = d[0] + e[ 2];
2814 b3 = d[1] - e[ 3];
2815
2816 d[0] = b2 + b0;
2817 d[1] = b3 + b1;
2818 e[2] = b2 - b0;
2819 e[3] = b1 - b3;
2820
2821 a02 = d[2] - e[0];
2822 a11 = d[3] + e[1];
2823
2824 b0 = C[3]*a02 + C[2]*a11;
2825 b1 = C[3]*a11 - C[2]*a02;
2826
2827 b2 = d[2] + e[ 0];
2828 b3 = d[3] - e[ 1];
2829
2830 d[2] = b2 + b0;
2831 d[3] = b3 + b1;
2832 e[0] = b2 - b0;
2833 e[1] = b1 - b3;
2834
2835 C += 4;
2836 d += 4;
2837 e -= 4;
2838 }
2839 }
2840
2841 // data must be in buf2
2842
2843
2844 // step 8+decode (paper output is X, now buffer)
2845 // this generates pairs of data a la 8 and pushes them directly through
2846 // the decode kernel (pushing rather than pulling) to avoid having
2847 // to make another pass later
2848
2849 // this cannot POSSIBLY be in place, so we refer to the buffers directly
2850
2851 {
2852 float *d0,*d1,*d2,*d3;
2853
2854 float *B = f->B[blocktype] + n2 - 8;
2855 float *e = buf2 + n2 - 8;
2856 d0 = &buffer[0];
2857 d1 = &buffer[n2-4];
2858 d2 = &buffer[n2];
2859 d3 = &buffer[n-4];
2860 while (e >= v) {
2861 float p0,p1,p2,p3;
2862
2863 p3 = e[6]*B[7] - e[7]*B[6];
2864 p2 = -e[6]*B[6] - e[7]*B[7];
2865
2866 d0[0] = p3;
2867 d1[3] = - p3;
2868 d2[0] = p2;
2869 d3[3] = p2;
2870
2871 p1 = e[4]*B[5] - e[5]*B[4];
2872 p0 = -e[4]*B[4] - e[5]*B[5];
2873
2874 d0[1] = p1;
2875 d1[2] = - p1;
2876 d2[1] = p0;
2877 d3[2] = p0;
2878
2879 p3 = e[2]*B[3] - e[3]*B[2];
2880 p2 = -e[2]*B[2] - e[3]*B[3];
2881
2882 d0[2] = p3;
2883 d1[1] = - p3;
2884 d2[2] = p2;
2885 d3[1] = p2;
2886
2887 p1 = e[0]*B[1] - e[1]*B[0];
2888 p0 = -e[0]*B[0] - e[1]*B[1];
2889
2890 d0[3] = p1;
2891 d1[0] = - p1;
2892 d2[3] = p0;
2893 d3[0] = p0;
2894
2895 B -= 8;
2896 e -= 8;
2897 d0 += 4;
2898 d2 += 4;
2899 d1 -= 4;
2900 d3 -= 4;
2901 }
2902 }
2903
2904 temp_free(f,buf2);
2905 temp_alloc_restore(f,save_point);
2906 }
2907
2908 #if 0
2909 // this is the original version of the above code, if you want to optimize it from scratch
2910 void inverse_mdct_naive(float *buffer, int n)
2911 {
2912 float s;
2913 float A[1 << 12], B[1 << 12], C[1 << 11];
2914 int i,k,k2,k4, n2 = n >> 1, n4 = n >> 2, n8 = n >> 3, l;
2915 int n3_4 = n - n4, ld;
2916 // how can they claim this only uses N words?!
2917 // oh, because they're only used sparsely, whoops
2918 float u[1 << 13], X[1 << 13], v[1 << 13], w[1 << 13];
2919 // set up twiddle factors
2920
2921 for (k=k2=0; k < n4; ++k,k2+=2) {
2922 A[k2 ] = (float) cos(4*k*M_PI/n);
2923 A[k2+1] = (float) -sin(4*k*M_PI/n);
2924 B[k2 ] = (float) cos((k2+1)*M_PI/n/2);
2925 B[k2+1] = (float) sin((k2+1)*M_PI/n/2);
2926 }
2927 for (k=k2=0; k < n8; ++k,k2+=2) {
2928 C[k2 ] = (float) cos(2*(k2+1)*M_PI/n);
2929 C[k2+1] = (float) -sin(2*(k2+1)*M_PI/n);
2930 }
2931
2932 // IMDCT algorithm from "The use of multirate filter banks for coding of high quality digital audio"
2933 // Note there are bugs in that pseudocode, presumably due to them attempting
2934 // to rename the arrays nicely rather than representing the way their actual
2935 // implementation bounces buffers back and forth. As a result, even in the
2936 // "some formulars corrected" version, a direct implementation fails. These
2937 // are noted below as "paper bug".
2938
2939 // copy and reflect spectral data
2940 for (k=0; k < n2; ++k) u[k] = buffer[k];
2941 for ( ; k < n ; ++k) u[k] = -buffer[n - k - 1];
2942 // kernel from paper
2943 // step 1
2944 for (k=k2=k4=0; k < n4; k+=1, k2+=2, k4+=4) {
2945 v[n-k4-1] = (u[k4] - u[n-k4-1]) * A[k2] - (u[k4+2] - u[n-k4-3])*A[k2+1];
2946 v[n-k4-3] = (u[k4] - u[n-k4-1]) * A[k2+1] + (u[k4+2] - u[n-k4-3])*A[k2];
2947 }
2948 // step 2
2949 for (k=k4=0; k < n8; k+=1, k4+=4) {
2950 w[n2+3+k4] = v[n2+3+k4] + v[k4+3];
2951 w[n2+1+k4] = v[n2+1+k4] + v[k4+1];
2952 w[k4+3] = (v[n2+3+k4] - v[k4+3])*A[n2-4-k4] - (v[n2+1+k4]-v[k4+1])*A[n2-3-k4];
2953 w[k4+1] = (v[n2+1+k4] - v[k4+1])*A[n2-4-k4] + (v[n2+3+k4]-v[k4+3])*A[n2-3-k4];
2954 }
2955 // step 3
2956 ld = ilog(n) - 1; // ilog is off-by-one from normal definitions
2957 for (l=0; l < ld-3; ++l) {
2958 int k0 = n >> (l+2), k1 = 1 << (l+3);
2959 int rlim = n >> (l+4), r4, r;
2960 int s2lim = 1 << (l+2), s2;
2961 for (r=r4=0; r < rlim; r4+=4,++r) {
2962 for (s2=0; s2 < s2lim; s2+=2) {
2963 u[n-1-k0*s2-r4] = w[n-1-k0*s2-r4] + w[n-1-k0*(s2+1)-r4];
2964 u[n-3-k0*s2-r4] = w[n-3-k0*s2-r4] + w[n-3-k0*(s2+1)-r4];
2965 u[n-1-k0*(s2+1)-r4] = (w[n-1-k0*s2-r4] - w[n-1-k0*(s2+1)-r4]) * A[r*k1]
2966 - (w[n-3-k0*s2-r4] - w[n-3-k0*(s2+1)-r4]) * A[r*k1+1];
2967 u[n-3-k0*(s2+1)-r4] = (w[n-3-k0*s2-r4] - w[n-3-k0*(s2+1)-r4]) * A[r*k1]
2968 + (w[n-1-k0*s2-r4] - w[n-1-k0*(s2+1)-r4]) * A[r*k1+1];
2969 }
2970 }
2971 if (l+1 < ld-3) {
2972 // paper bug: ping-ponging of u&w here is omitted
2973 memcpy(w, u, sizeof(u));
2974 }
2975 }
2976
2977 // step 4
2978 for (i=0; i < n8; ++i) {
2979 int j = bit_reverse(i) >> (32-ld+3);
2980 assert(j < n8);
2981 if (i == j) {
2982 // paper bug: original code probably swapped in place; if copying,
2983 // need to directly copy in this case
2984 int i8 = i << 3;
2985 v[i8+1] = u[i8+1];
2986 v[i8+3] = u[i8+3];
2987 v[i8+5] = u[i8+5];
2988 v[i8+7] = u[i8+7];
2989 } else if (i < j) {
2990 int i8 = i << 3, j8 = j << 3;
2991 v[j8+1] = u[i8+1], v[i8+1] = u[j8 + 1];
2992 v[j8+3] = u[i8+3], v[i8+3] = u[j8 + 3];
2993 v[j8+5] = u[i8+5], v[i8+5] = u[j8 + 5];
2994 v[j8+7] = u[i8+7], v[i8+7] = u[j8 + 7];
2995 }
2996 }
2997 // step 5
2998 for (k=0; k < n2; ++k) {
2999 w[k] = v[k*2+1];
3000 }
3001 // step 6
3002 for (k=k2=k4=0; k < n8; ++k, k2 += 2, k4 += 4) {
3003 u[n-1-k2] = w[k4];
3004 u[n-2-k2] = w[k4+1];
3005 u[n3_4 - 1 - k2] = w[k4+2];
3006 u[n3_4 - 2 - k2] = w[k4+3];
3007 }
3008 // step 7
3009 for (k=k2=0; k < n8; ++k, k2 += 2) {
3010 v[n2 + k2 ] = ( u[n2 + k2] + u[n-2-k2] + C[k2+1]*(u[n2+k2]-u[n-2-k2]) + C[k2]*(u[n2+k2+1]+u[n-2-k2+1]))/2;
3011 v[n-2 - k2] = ( u[n2 + k2] + u[n-2-k2] - C[k2+1]*(u[n2+k2]-u[n-2-k2]) - C[k2]*(u[n2+k2+1]+u[n-2-k2+1]))/2;
3012 v[n2+1+ k2] = ( u[n2+1+k2] - u[n-1-k2] + C[k2+1]*(u[n2+1+k2]+u[n-1-k2]) - C[k2]*(u[n2+k2]-u[n-2-k2]))/2;
3013 v[n-1 - k2] = (-u[n2+1+k2] + u[n-1-k2] + C[k2+1]*(u[n2+1+k2]+u[n-1-k2]) - C[k2]*(u[n2+k2]-u[n-2-k2]))/2;
3014 }
3015 // step 8
3016 for (k=k2=0; k < n4; ++k,k2 += 2) {
3017 X[k] = v[k2+n2]*B[k2 ] + v[k2+1+n2]*B[k2+1];
3018 X[n2-1-k] = v[k2+n2]*B[k2+1] - v[k2+1+n2]*B[k2 ];
3019 }
3020
3021 // decode kernel to output
3022 // determined the following value experimentally
3023 // (by first figuring out what made inverse_mdct_slow work); then matching that here
3024 // (probably vorbis encoder premultiplies by n or n/2, to save it on the decoder?)
3025 s = 0.5; // theoretically would be n4
3026
3027 // [[[ note! the s value of 0.5 is compensated for by the B[] in the current code,
3028 // so it needs to use the "old" B values to behave correctly, or else
3029 // set s to 1.0 ]]]
3030 for (i=0; i < n4 ; ++i) buffer[i] = s * X[i+n4];
3031 for ( ; i < n3_4; ++i) buffer[i] = -s * X[n3_4 - i - 1];
3032 for ( ; i < n ; ++i) buffer[i] = -s * X[i - n3_4];
3033 }
3034 #endif
3035
get_window(vorb * f,int len)3036 static float *get_window(vorb *f, int len)
3037 {
3038 len <<= 1;
3039 if (len == f->blocksize_0) return f->window[0];
3040 if (len == f->blocksize_1) return f->window[1];
3041 assert(0);
3042 return NULL;
3043 }
3044
3045 #ifndef STB_VORBIS_NO_DEFER_FLOOR
3046 typedef int16 YTYPE;
3047 #else
3048 typedef int YTYPE;
3049 #endif
do_floor(vorb * f,Mapping * map,int i,int n,float * target,YTYPE * finalY,uint8 * step2_flag)3050 static int do_floor(vorb *f, Mapping *map, int i, int n, float *target, YTYPE *finalY, uint8 *step2_flag)
3051 {
3052 int n2 = n >> 1;
3053 int s = map->chan[i].mux, floor;
3054 floor = map->submap_floor[s];
3055 if (f->floor_types[floor] == 0) {
3056 return error(f, VORBIS_invalid_stream);
3057 } else {
3058 Floor1 *g = &f->floor_config[floor].floor1;
3059 int j,q;
3060 int lx = 0, ly = finalY[0] * g->floor1_multiplier;
3061 for (q=1; q < g->values; ++q) {
3062 j = g->sorted_order[q];
3063 #ifndef STB_VORBIS_NO_DEFER_FLOOR
3064 if (finalY[j] >= 0)
3065 #else
3066 if (step2_flag[j])
3067 #endif
3068 {
3069 int hy = finalY[j] * g->floor1_multiplier;
3070 int hx = g->Xlist[j];
3071 if (lx != hx)
3072 draw_line(target, lx,ly, hx,hy, n2);
3073 CHECK(f);
3074 lx = hx, ly = hy;
3075 }
3076 }
3077 if (lx < n2) {
3078 // optimization of: draw_line(target, lx,ly, n,ly, n2);
3079 for (j=lx; j < n2; ++j)
3080 LINE_OP(target[j], inverse_db_table[ly]);
3081 CHECK(f);
3082 }
3083 }
3084 return TRUE;
3085 }
3086
3087 // The meaning of "left" and "right"
3088 //
3089 // For a given frame:
3090 // we compute samples from 0..n
3091 // window_center is n/2
3092 // we'll window and mix the samples from left_start to left_end with data from the previous frame
3093 // all of the samples from left_end to right_start can be output without mixing; however,
3094 // this interval is 0-length except when transitioning between short and long frames
3095 // all of the samples from right_start to right_end need to be mixed with the next frame,
3096 // which we don't have, so those get saved in a buffer
3097 // frame N's right_end-right_start, the number of samples to mix with the next frame,
3098 // has to be the same as frame N+1's left_end-left_start (which they are by
3099 // construction)
3100
vorbis_decode_initial(vorb * f,int * p_left_start,int * p_left_end,int * p_right_start,int * p_right_end,int * mode)3101 static int vorbis_decode_initial(vorb *f, int *p_left_start, int *p_left_end, int *p_right_start, int *p_right_end, int *mode)
3102 {
3103 Mode *m;
3104 int i, n, prev, next, window_center;
3105 f->channel_buffer_start = f->channel_buffer_end = 0;
3106
3107 retry:
3108 if (f->eof) return FALSE;
3109 if (!maybe_start_packet(f))
3110 return FALSE;
3111 // check packet type
3112 if (get_bits(f,1) != 0) {
3113 if (IS_PUSH_MODE(f))
3114 return error(f,VORBIS_bad_packet_type);
3115 while (EOP != get8_packet(f));
3116 goto retry;
3117 }
3118
3119 if (f->alloc.alloc_buffer)
3120 assert(f->alloc.alloc_buffer_length_in_bytes == f->temp_offset);
3121
3122 i = get_bits(f, ilog(f->mode_count-1));
3123 if (i == EOP) return FALSE;
3124 if (i >= f->mode_count) return FALSE;
3125 *mode = i;
3126 m = f->mode_config + i;
3127 if (m->blockflag) {
3128 n = f->blocksize_1;
3129 prev = get_bits(f,1);
3130 next = get_bits(f,1);
3131 } else {
3132 prev = next = 0;
3133 n = f->blocksize_0;
3134 }
3135
3136 // WINDOWING
3137
3138 window_center = n >> 1;
3139 if (m->blockflag && !prev) {
3140 *p_left_start = (n - f->blocksize_0) >> 2;
3141 *p_left_end = (n + f->blocksize_0) >> 2;
3142 } else {
3143 *p_left_start = 0;
3144 *p_left_end = window_center;
3145 }
3146 if (m->blockflag && !next) {
3147 *p_right_start = (n*3 - f->blocksize_0) >> 2;
3148 *p_right_end = (n*3 + f->blocksize_0) >> 2;
3149 } else {
3150 *p_right_start = window_center;
3151 *p_right_end = n;
3152 }
3153
3154 return TRUE;
3155 }
3156
vorbis_decode_packet_rest(vorb * f,int * len,Mode * m,int left_start,int left_end,int right_start,int right_end,int * p_left)3157 static int vorbis_decode_packet_rest(vorb *f, int *len, Mode *m, int left_start, int left_end, int right_start, int right_end, int *p_left)
3158 {
3159 Mapping *map;
3160 int i,j,k,n,n2;
3161 int zero_channel[256];
3162 int really_zero_channel[256];
3163
3164 // WINDOWING
3165
3166 n = f->blocksize[m->blockflag];
3167 map = &f->mapping[m->mapping];
3168
3169 // FLOORS
3170 n2 = n >> 1;
3171
3172 CHECK(f);
3173
3174 for (i=0; i < f->channels; ++i) {
3175 int s = map->chan[i].mux, floor;
3176 zero_channel[i] = FALSE;
3177 floor = map->submap_floor[s];
3178 if (f->floor_types[floor] == 0) {
3179 return error(f, VORBIS_invalid_stream);
3180 } else {
3181 Floor1 *g = &f->floor_config[floor].floor1;
3182 if (get_bits(f, 1)) {
3183 short *finalY;
3184 uint8 step2_flag[256];
3185 static int range_list[4] = { 256, 128, 86, 64 };
3186 int range = range_list[g->floor1_multiplier-1];
3187 int offset = 2;
3188 finalY = f->finalY[i];
3189 finalY[0] = get_bits(f, ilog(range)-1);
3190 finalY[1] = get_bits(f, ilog(range)-1);
3191 for (j=0; j < g->partitions; ++j) {
3192 int pclass = g->partition_class_list[j];
3193 int cdim = g->class_dimensions[pclass];
3194 int cbits = g->class_subclasses[pclass];
3195 int csub = (1 << cbits)-1;
3196 int cval = 0;
3197 if (cbits) {
3198 Codebook *c = f->codebooks + g->class_masterbooks[pclass];
3199 DECODE(cval,f,c);
3200 }
3201 for (k=0; k < cdim; ++k) {
3202 int book = g->subclass_books[pclass][cval & csub];
3203 cval = cval >> cbits;
3204 if (book >= 0) {
3205 int temp;
3206 Codebook *c = f->codebooks + book;
3207 DECODE(temp,f,c);
3208 finalY[offset++] = temp;
3209 } else
3210 finalY[offset++] = 0;
3211 }
3212 }
3213 if (f->valid_bits == INVALID_BITS) goto error; // behavior according to spec
3214 step2_flag[0] = step2_flag[1] = 1;
3215 for (j=2; j < g->values; ++j) {
3216 int low, high, pred, highroom, lowroom, room, val;
3217 low = g->neighbors[j][0];
3218 high = g->neighbors[j][1];
3219 //neighbors(g->Xlist, j, &low, &high);
3220 pred = predict_point(g->Xlist[j], g->Xlist[low], g->Xlist[high], finalY[low], finalY[high]);
3221 val = finalY[j];
3222 highroom = range - pred;
3223 lowroom = pred;
3224 if (highroom < lowroom)
3225 room = highroom * 2;
3226 else
3227 room = lowroom * 2;
3228 if (val) {
3229 step2_flag[low] = step2_flag[high] = 1;
3230 step2_flag[j] = 1;
3231 if (val >= room)
3232 if (highroom > lowroom)
3233 finalY[j] = val - lowroom + pred;
3234 else
3235 finalY[j] = pred - val + highroom - 1;
3236 else
3237 if (val & 1)
3238 finalY[j] = pred - ((val+1)>>1);
3239 else
3240 finalY[j] = pred + (val>>1);
3241 } else {
3242 step2_flag[j] = 0;
3243 finalY[j] = pred;
3244 }
3245 }
3246
3247 #ifdef STB_VORBIS_NO_DEFER_FLOOR
3248 do_floor(f, map, i, n, f->floor_buffers[i], finalY, step2_flag);
3249 #else
3250 // defer final floor computation until _after_ residue
3251 for (j=0; j < g->values; ++j) {
3252 if (!step2_flag[j])
3253 finalY[j] = -1;
3254 }
3255 #endif
3256 } else {
3257 error:
3258 zero_channel[i] = TRUE;
3259 }
3260 // So we just defer everything else to later
3261
3262 // at this point we've decoded the floor into buffer
3263 }
3264 }
3265 CHECK(f);
3266 // at this point we've decoded all floors
3267
3268 if (f->alloc.alloc_buffer)
3269 assert(f->alloc.alloc_buffer_length_in_bytes == f->temp_offset);
3270
3271 // re-enable coupled channels if necessary
3272 memcpy(really_zero_channel, zero_channel, sizeof(really_zero_channel[0]) * f->channels);
3273 for (i=0; i < map->coupling_steps; ++i)
3274 if (!zero_channel[map->chan[i].magnitude] || !zero_channel[map->chan[i].angle]) {
3275 zero_channel[map->chan[i].magnitude] = zero_channel[map->chan[i].angle] = FALSE;
3276 }
3277
3278 CHECK(f);
3279 // RESIDUE DECODE
3280 for (i=0; i < map->submaps; ++i) {
3281 float *residue_buffers[STB_VORBIS_MAX_CHANNELS];
3282 int r;
3283 uint8 do_not_decode[256];
3284 int ch = 0;
3285 for (j=0; j < f->channels; ++j) {
3286 if (map->chan[j].mux == i) {
3287 if (zero_channel[j]) {
3288 do_not_decode[ch] = TRUE;
3289 residue_buffers[ch] = NULL;
3290 } else {
3291 do_not_decode[ch] = FALSE;
3292 residue_buffers[ch] = f->channel_buffers[j];
3293 }
3294 ++ch;
3295 }
3296 }
3297 r = map->submap_residue[i];
3298 decode_residue(f, residue_buffers, ch, n2, r, do_not_decode);
3299 }
3300
3301 if (f->alloc.alloc_buffer)
3302 assert(f->alloc.alloc_buffer_length_in_bytes == f->temp_offset);
3303 CHECK(f);
3304
3305 // INVERSE COUPLING
3306 for (i = map->coupling_steps-1; i >= 0; --i) {
3307 int n2 = n >> 1;
3308 float *m = f->channel_buffers[map->chan[i].magnitude];
3309 float *a = f->channel_buffers[map->chan[i].angle ];
3310 for (j=0; j < n2; ++j) {
3311 float a2,m2;
3312 if (m[j] > 0)
3313 if (a[j] > 0)
3314 m2 = m[j], a2 = m[j] - a[j];
3315 else
3316 a2 = m[j], m2 = m[j] + a[j];
3317 else
3318 if (a[j] > 0)
3319 m2 = m[j], a2 = m[j] + a[j];
3320 else
3321 a2 = m[j], m2 = m[j] - a[j];
3322 m[j] = m2;
3323 a[j] = a2;
3324 }
3325 }
3326 CHECK(f);
3327
3328 // finish decoding the floors
3329 #ifndef STB_VORBIS_NO_DEFER_FLOOR
3330 for (i=0; i < f->channels; ++i) {
3331 if (really_zero_channel[i]) {
3332 memset(f->channel_buffers[i], 0, sizeof(*f->channel_buffers[i]) * n2);
3333 } else {
3334 do_floor(f, map, i, n, f->channel_buffers[i], f->finalY[i], NULL);
3335 }
3336 }
3337 #else
3338 for (i=0; i < f->channels; ++i) {
3339 if (really_zero_channel[i]) {
3340 memset(f->channel_buffers[i], 0, sizeof(*f->channel_buffers[i]) * n2);
3341 } else {
3342 for (j=0; j < n2; ++j)
3343 f->channel_buffers[i][j] *= f->floor_buffers[i][j];
3344 }
3345 }
3346 #endif
3347
3348 // INVERSE MDCT
3349 CHECK(f);
3350 for (i=0; i < f->channels; ++i)
3351 inverse_mdct(f->channel_buffers[i], n, f, m->blockflag);
3352 CHECK(f);
3353
3354 // this shouldn't be necessary, unless we exited on an error
3355 // and want to flush to get to the next packet
3356 flush_packet(f);
3357
3358 if (f->first_decode) {
3359 // assume we start so first non-discarded sample is sample 0
3360 // this isn't to spec, but spec would require us to read ahead
3361 // and decode the size of all current frames--could be done,
3362 // but presumably it's not a commonly used feature
3363 f->current_loc = -n2; // start of first frame is positioned for discard
3364 // we might have to discard samples "from" the next frame too,
3365 // if we're lapping a large block then a small at the start?
3366 f->discard_samples_deferred = n - right_end;
3367 f->current_loc_valid = TRUE;
3368 f->first_decode = FALSE;
3369 } else if (f->discard_samples_deferred) {
3370 if (f->discard_samples_deferred >= right_start - left_start) {
3371 f->discard_samples_deferred -= (right_start - left_start);
3372 left_start = right_start;
3373 *p_left = left_start;
3374 } else {
3375 left_start += f->discard_samples_deferred;
3376 *p_left = left_start;
3377 f->discard_samples_deferred = 0;
3378 }
3379 } else if (f->previous_length == 0 && f->current_loc_valid) {
3380 // we're recovering from a seek... that means we're going to discard
3381 // the samples from this packet even though we know our position from
3382 // the last page header, so we need to update the position based on
3383 // the discarded samples here
3384 // but wait, the code below is going to add this in itself even
3385 // on a discard, so we don't need to do it here...
3386 }
3387
3388 // check if we have ogg information about the sample # for this packet
3389 if (f->last_seg_which == f->end_seg_with_known_loc) {
3390 // if we have a valid current loc, and this is final:
3391 if (f->current_loc_valid && (f->page_flag & PAGEFLAG_last_page)) {
3392 uint32 current_end = f->known_loc_for_packet - (n-right_end);
3393 // then let's infer the size of the (probably) short final frame
3394 if (current_end < f->current_loc + (right_end-left_start)) {
3395 if (current_end < f->current_loc) {
3396 // negative truncation, that's impossible!
3397 *len = 0;
3398 } else {
3399 *len = current_end - f->current_loc;
3400 }
3401 *len += left_start;
3402 if (*len > right_end) *len = right_end; // this should never happen
3403 f->current_loc += *len;
3404 return TRUE;
3405 }
3406 }
3407 // otherwise, just set our sample loc
3408 // guess that the ogg granule pos refers to the _middle_ of the
3409 // last frame?
3410 // set f->current_loc to the position of left_start
3411 f->current_loc = f->known_loc_for_packet - (n2-left_start);
3412 f->current_loc_valid = TRUE;
3413 }
3414 if (f->current_loc_valid)
3415 f->current_loc += (right_start - left_start);
3416
3417 if (f->alloc.alloc_buffer)
3418 assert(f->alloc.alloc_buffer_length_in_bytes == f->temp_offset);
3419 *len = right_end; // ignore samples after the window goes to 0
3420 CHECK(f);
3421
3422 return TRUE;
3423 }
3424
vorbis_decode_packet(vorb * f,int * len,int * p_left,int * p_right)3425 static int vorbis_decode_packet(vorb *f, int *len, int *p_left, int *p_right)
3426 {
3427 int mode, left_end, right_end;
3428 if (!vorbis_decode_initial(f, p_left, &left_end, p_right, &right_end, &mode)) return 0;
3429 return vorbis_decode_packet_rest(f, len, f->mode_config + mode, *p_left, left_end, *p_right, right_end, p_left);
3430 }
3431
vorbis_finish_frame(stb_vorbis * f,int len,int left,int right)3432 static int vorbis_finish_frame(stb_vorbis *f, int len, int left, int right)
3433 {
3434 int prev,i,j;
3435 // we use right&left (the start of the right- and left-window sin()-regions)
3436 // to determine how much to return, rather than inferring from the rules
3437 // (same result, clearer code); 'left' indicates where our sin() window
3438 // starts, therefore where the previous window's right edge starts, and
3439 // therefore where to start mixing from the previous buffer. 'right'
3440 // indicates where our sin() ending-window starts, therefore that's where
3441 // we start saving, and where our returned-data ends.
3442
3443 // mixin from previous window
3444 if (f->previous_length) {
3445 int i,j, n = f->previous_length;
3446 float *w = get_window(f, n);
3447 for (i=0; i < f->channels; ++i) {
3448 for (j=0; j < n; ++j)
3449 f->channel_buffers[i][left+j] =
3450 f->channel_buffers[i][left+j]*w[ j] +
3451 f->previous_window[i][ j]*w[n-1-j];
3452 }
3453 }
3454
3455 prev = f->previous_length;
3456
3457 // last half of this data becomes previous window
3458 f->previous_length = len - right;
3459
3460 // @OPTIMIZE: could avoid this copy by double-buffering the
3461 // output (flipping previous_window with channel_buffers), but
3462 // then previous_window would have to be 2x as large, and
3463 // channel_buffers couldn't be temp mem (although they're NOT
3464 // currently temp mem, they could be (unless we want to level
3465 // performance by spreading out the computation))
3466 for (i=0; i < f->channels; ++i)
3467 for (j=0; right+j < len; ++j)
3468 f->previous_window[i][j] = f->channel_buffers[i][right+j];
3469
3470 if (!prev)
3471 // there was no previous packet, so this data isn't valid...
3472 // this isn't entirely true, only the would-have-overlapped data
3473 // isn't valid, but this seems to be what the spec requires
3474 return 0;
3475
3476 // truncate a short frame
3477 if (len < right) right = len;
3478
3479 f->samples_output += right-left;
3480
3481 return right - left;
3482 }
3483
vorbis_pump_first_frame(stb_vorbis * f)3484 static void vorbis_pump_first_frame(stb_vorbis *f)
3485 {
3486 int len, right, left;
3487 if (vorbis_decode_packet(f, &len, &left, &right))
3488 vorbis_finish_frame(f, len, left, right);
3489 }
3490
3491 #ifndef STB_VORBIS_NO_PUSHDATA_API
is_whole_packet_present(stb_vorbis * f,int end_page)3492 static int is_whole_packet_present(stb_vorbis *f, int end_page)
3493 {
3494 // make sure that we have the packet available before continuing...
3495 // this requires a full ogg parse, but we know we can fetch from f->stream
3496
3497 // instead of coding this out explicitly, we could save the current read state,
3498 // read the next packet with get8() until end-of-packet, check f->eof, then
3499 // reset the state? but that would be slower, esp. since we'd have over 256 bytes
3500 // of state to restore (primarily the page segment table)
3501
3502 int s = f->next_seg, first = TRUE;
3503 uint8 *p = f->stream;
3504
3505 if (s != -1) { // if we're not starting the packet with a 'continue on next page' flag
3506 for (; s < f->segment_count; ++s) {
3507 p += f->segments[s];
3508 if (f->segments[s] < 255) // stop at first short segment
3509 break;
3510 }
3511 // either this continues, or it ends it...
3512 if (end_page)
3513 if (s < f->segment_count-1) return error(f, VORBIS_invalid_stream);
3514 if (s == f->segment_count)
3515 s = -1; // set 'crosses page' flag
3516 if (p > f->stream_end) return error(f, VORBIS_need_more_data);
3517 first = FALSE;
3518 }
3519 for (; s == -1;) {
3520 uint8 *q;
3521 int n;
3522
3523 // check that we have the page header ready
3524 if (p + 26 >= f->stream_end) return error(f, VORBIS_need_more_data);
3525 // validate the page
3526 if (memcmp(p, ogg_page_header, 4)) return error(f, VORBIS_invalid_stream);
3527 if (p[4] != 0) return error(f, VORBIS_invalid_stream);
3528 if (first) { // the first segment must NOT have 'continued_packet', later ones MUST
3529 if (f->previous_length)
3530 if ((p[5] & PAGEFLAG_continued_packet)) return error(f, VORBIS_invalid_stream);
3531 // if no previous length, we're resynching, so we can come in on a continued-packet,
3532 // which we'll just drop
3533 } else {
3534 if (!(p[5] & PAGEFLAG_continued_packet)) return error(f, VORBIS_invalid_stream);
3535 }
3536 n = p[26]; // segment counts
3537 q = p+27; // q points to segment table
3538 p = q + n; // advance past header
3539 // make sure we've read the segment table
3540 if (p > f->stream_end) return error(f, VORBIS_need_more_data);
3541 for (s=0; s < n; ++s) {
3542 p += q[s];
3543 if (q[s] < 255)
3544 break;
3545 }
3546 if (end_page)
3547 if (s < n-1) return error(f, VORBIS_invalid_stream);
3548 if (s == n)
3549 s = -1; // set 'crosses page' flag
3550 if (p > f->stream_end) return error(f, VORBIS_need_more_data);
3551 first = FALSE;
3552 }
3553 return TRUE;
3554 }
3555 #endif // !STB_VORBIS_NO_PUSHDATA_API
3556
start_decoder(vorb * f)3557 static int start_decoder(vorb *f)
3558 {
3559 uint8 header[6], x,y;
3560 int len,i,j,k, max_submaps = 0;
3561 int longest_floorlist=0;
3562
3563 // first page, first packet
3564
3565 if (!start_page(f)) return FALSE;
3566 // validate page flag
3567 if (!(f->page_flag & PAGEFLAG_first_page)) return error(f, VORBIS_invalid_first_page);
3568 if (f->page_flag & PAGEFLAG_last_page) return error(f, VORBIS_invalid_first_page);
3569 if (f->page_flag & PAGEFLAG_continued_packet) return error(f, VORBIS_invalid_first_page);
3570 // check for expected packet length
3571 if (f->segment_count != 1) return error(f, VORBIS_invalid_first_page);
3572 if (f->segments[0] != 30) return error(f, VORBIS_invalid_first_page);
3573 // read packet
3574 // check packet header
3575 if (get8(f) != VORBIS_packet_id) return error(f, VORBIS_invalid_first_page);
3576 if (!getn(f, header, 6)) return error(f, VORBIS_unexpected_eof);
3577 if (!vorbis_validate(header)) return error(f, VORBIS_invalid_first_page);
3578 // vorbis_version
3579 if (get32(f) != 0) return error(f, VORBIS_invalid_first_page);
3580 f->channels = get8(f); if (!f->channels) return error(f, VORBIS_invalid_first_page);
3581 if (f->channels > STB_VORBIS_MAX_CHANNELS) return error(f, VORBIS_too_many_channels);
3582 f->sample_rate = get32(f); if (!f->sample_rate) return error(f, VORBIS_invalid_first_page);
3583 get32(f); // bitrate_maximum
3584 get32(f); // bitrate_nominal
3585 get32(f); // bitrate_minimum
3586 x = get8(f);
3587 {
3588 int log0,log1;
3589 log0 = x & 15;
3590 log1 = x >> 4;
3591 f->blocksize_0 = 1 << log0;
3592 f->blocksize_1 = 1 << log1;
3593 if (log0 < 6 || log0 > 13) return error(f, VORBIS_invalid_setup);
3594 if (log1 < 6 || log1 > 13) return error(f, VORBIS_invalid_setup);
3595 if (log0 > log1) return error(f, VORBIS_invalid_setup);
3596 }
3597
3598 // framing_flag
3599 x = get8(f);
3600 if (!(x & 1)) return error(f, VORBIS_invalid_first_page);
3601
3602 // second packet!
3603 if (!start_page(f)) return FALSE;
3604
3605 if (!start_packet(f)) return FALSE;
3606 do {
3607 len = next_segment(f);
3608 skip(f, len);
3609 f->bytes_in_seg = 0;
3610 } while (len);
3611
3612 // third packet!
3613 if (!start_packet(f)) return FALSE;
3614
3615 #ifndef STB_VORBIS_NO_PUSHDATA_API
3616 if (IS_PUSH_MODE(f)) {
3617 if (!is_whole_packet_present(f, TRUE)) {
3618 // convert error in ogg header to write type
3619 if (f->error == VORBIS_invalid_stream)
3620 f->error = VORBIS_invalid_setup;
3621 return FALSE;
3622 }
3623 }
3624 #endif
3625
3626 crc32_init(); // always init it, to avoid multithread race conditions
3627
3628 if (get8_packet(f) != VORBIS_packet_setup) return error(f, VORBIS_invalid_setup);
3629 for (i=0; i < 6; ++i) header[i] = get8_packet(f);
3630 if (!vorbis_validate(header)) return error(f, VORBIS_invalid_setup);
3631
3632 // codebooks
3633
3634 f->codebook_count = get_bits(f,8) + 1;
3635 f->codebooks = (Codebook *) setup_malloc(f, sizeof(*f->codebooks) * f->codebook_count);
3636 if (f->codebooks == NULL) return error(f, VORBIS_outofmem);
3637 memset(f->codebooks, 0, sizeof(*f->codebooks) * f->codebook_count);
3638 for (i=0; i < f->codebook_count; ++i) {
3639 uint32 *values;
3640 int ordered, sorted_count;
3641 int total=0;
3642 uint8 *lengths;
3643 Codebook *c = f->codebooks+i;
3644 CHECK(f);
3645 x = get_bits(f, 8); if (x != 0x42) return error(f, VORBIS_invalid_setup);
3646 x = get_bits(f, 8); if (x != 0x43) return error(f, VORBIS_invalid_setup);
3647 x = get_bits(f, 8); if (x != 0x56) return error(f, VORBIS_invalid_setup);
3648 x = get_bits(f, 8);
3649 c->dimensions = (get_bits(f, 8)<<8) + x;
3650 x = get_bits(f, 8);
3651 y = get_bits(f, 8);
3652 c->entries = (get_bits(f, 8)<<16) + (y<<8) + x;
3653 ordered = get_bits(f,1);
3654 c->sparse = ordered ? 0 : get_bits(f,1);
3655
3656 if (c->dimensions == 0 && c->entries != 0) return error(f, VORBIS_invalid_setup);
3657
3658 if (c->sparse)
3659 lengths = (uint8 *) setup_temp_malloc(f, c->entries);
3660 else
3661 lengths = c->codeword_lengths = (uint8 *) setup_malloc(f, c->entries);
3662
3663 if (!lengths) return error(f, VORBIS_outofmem);
3664
3665 if (ordered) {
3666 int current_entry = 0;
3667 int current_length = get_bits(f,5) + 1;
3668 while (current_entry < c->entries) {
3669 int limit = c->entries - current_entry;
3670 int n = get_bits(f, ilog(limit));
3671 if (current_entry + n > (int) c->entries) { return error(f, VORBIS_invalid_setup); }
3672 memset(lengths + current_entry, current_length, n);
3673 current_entry += n;
3674 ++current_length;
3675 }
3676 } else {
3677 for (j=0; j < c->entries; ++j) {
3678 int present = c->sparse ? get_bits(f,1) : 1;
3679 if (present) {
3680 lengths[j] = get_bits(f, 5) + 1;
3681 ++total;
3682 if (lengths[j] == 32)
3683 return error(f, VORBIS_invalid_setup);
3684 } else {
3685 lengths[j] = NO_CODE;
3686 }
3687 }
3688 }
3689
3690 if (c->sparse && total >= c->entries >> 2) {
3691 // convert sparse items to non-sparse!
3692 if (c->entries > (int) f->setup_temp_memory_required)
3693 f->setup_temp_memory_required = c->entries;
3694
3695 c->codeword_lengths = (uint8 *) setup_malloc(f, c->entries);
3696 if (c->codeword_lengths == NULL) return error(f, VORBIS_outofmem);
3697 memcpy(c->codeword_lengths, lengths, c->entries);
3698 setup_temp_free(f, lengths, c->entries); // note this is only safe if there have been no intervening temp mallocs!
3699 lengths = c->codeword_lengths;
3700 c->sparse = 0;
3701 }
3702
3703 // compute the size of the sorted tables
3704 if (c->sparse) {
3705 sorted_count = total;
3706 } else {
3707 sorted_count = 0;
3708 #ifndef STB_VORBIS_NO_HUFFMAN_BINARY_SEARCH
3709 for (j=0; j < c->entries; ++j)
3710 if (lengths[j] > STB_VORBIS_FAST_HUFFMAN_LENGTH && lengths[j] != NO_CODE)
3711 ++sorted_count;
3712 #endif
3713 }
3714
3715 c->sorted_entries = sorted_count;
3716 values = NULL;
3717
3718 CHECK(f);
3719 if (!c->sparse) {
3720 c->codewords = (uint32 *) setup_malloc(f, sizeof(c->codewords[0]) * c->entries);
3721 if (!c->codewords) return error(f, VORBIS_outofmem);
3722 } else {
3723 unsigned int size;
3724 if (c->sorted_entries) {
3725 c->codeword_lengths = (uint8 *) setup_malloc(f, c->sorted_entries);
3726 if (!c->codeword_lengths) return error(f, VORBIS_outofmem);
3727 c->codewords = (uint32 *) setup_temp_malloc(f, sizeof(*c->codewords) * c->sorted_entries);
3728 if (!c->codewords) return error(f, VORBIS_outofmem);
3729 values = (uint32 *) setup_temp_malloc(f, sizeof(*values) * c->sorted_entries);
3730 if (!values) return error(f, VORBIS_outofmem);
3731 }
3732 size = c->entries + (sizeof(*c->codewords) + sizeof(*values)) * c->sorted_entries;
3733 if (size > f->setup_temp_memory_required)
3734 f->setup_temp_memory_required = size;
3735 }
3736
3737 if (!compute_codewords(c, lengths, c->entries, values)) {
3738 if (c->sparse) setup_temp_free(f, values, 0);
3739 return error(f, VORBIS_invalid_setup);
3740 }
3741
3742 if (c->sorted_entries) {
3743 // allocate an extra slot for sentinels
3744 c->sorted_codewords = (uint32 *) setup_malloc(f, sizeof(*c->sorted_codewords) * (c->sorted_entries+1));
3745 if (c->sorted_codewords == NULL) return error(f, VORBIS_outofmem);
3746 // allocate an extra slot at the front so that c->sorted_values[-1] is defined
3747 // so that we can catch that case without an extra if
3748 c->sorted_values = ( int *) setup_malloc(f, sizeof(*c->sorted_values ) * (c->sorted_entries+1));
3749 if (c->sorted_values == NULL) return error(f, VORBIS_outofmem);
3750 ++c->sorted_values;
3751 c->sorted_values[-1] = -1;
3752 compute_sorted_huffman(c, lengths, values);
3753 }
3754
3755 if (c->sparse) {
3756 setup_temp_free(f, values, sizeof(*values)*c->sorted_entries);
3757 setup_temp_free(f, c->codewords, sizeof(*c->codewords)*c->sorted_entries);
3758 setup_temp_free(f, lengths, c->entries);
3759 c->codewords = NULL;
3760 }
3761
3762 compute_accelerated_huffman(c);
3763
3764 CHECK(f);
3765 c->lookup_type = get_bits(f, 4);
3766 if (c->lookup_type > 2) return error(f, VORBIS_invalid_setup);
3767 if (c->lookup_type > 0) {
3768 uint16 *mults;
3769 c->minimum_value = float32_unpack(get_bits(f, 32));
3770 c->delta_value = float32_unpack(get_bits(f, 32));
3771 c->value_bits = get_bits(f, 4)+1;
3772 c->sequence_p = get_bits(f,1);
3773 if (c->lookup_type == 1) {
3774 c->lookup_values = lookup1_values(c->entries, c->dimensions);
3775 } else {
3776 c->lookup_values = c->entries * c->dimensions;
3777 }
3778 if (c->lookup_values == 0) return error(f, VORBIS_invalid_setup);
3779 mults = (uint16 *) setup_temp_malloc(f, sizeof(mults[0]) * c->lookup_values);
3780 if (mults == NULL) return error(f, VORBIS_outofmem);
3781 for (j=0; j < (int) c->lookup_values; ++j) {
3782 int q = get_bits(f, c->value_bits);
3783 if (q == EOP) { setup_temp_free(f,mults,sizeof(mults[0])*c->lookup_values); return error(f, VORBIS_invalid_setup); }
3784 mults[j] = q;
3785 }
3786
3787 #ifndef STB_VORBIS_DIVIDES_IN_CODEBOOK
3788 if (c->lookup_type == 1) {
3789 int len, sparse = c->sparse;
3790 float last=0;
3791 // pre-expand the lookup1-style multiplicands, to avoid a divide in the inner loop
3792 if (sparse) {
3793 if (c->sorted_entries == 0) goto skip;
3794 c->multiplicands = (codetype *) setup_malloc(f, sizeof(c->multiplicands[0]) * c->sorted_entries * c->dimensions);
3795 } else
3796 c->multiplicands = (codetype *) setup_malloc(f, sizeof(c->multiplicands[0]) * c->entries * c->dimensions);
3797 if (c->multiplicands == NULL) { setup_temp_free(f,mults,sizeof(mults[0])*c->lookup_values); return error(f, VORBIS_outofmem); }
3798 len = sparse ? c->sorted_entries : c->entries;
3799 for (j=0; j < len; ++j) {
3800 unsigned int z = sparse ? c->sorted_values[j] : j;
3801 unsigned int div=1;
3802 for (k=0; k < c->dimensions; ++k) {
3803 int off = (z / div) % c->lookup_values;
3804 float val = mults[off];
3805 val = mults[off]*c->delta_value + c->minimum_value + last;
3806 c->multiplicands[j*c->dimensions + k] = val;
3807 if (c->sequence_p)
3808 last = val;
3809 if (k+1 < c->dimensions) {
3810 if (div > UINT_MAX / (unsigned int) c->lookup_values) {
3811 setup_temp_free(f, mults,sizeof(mults[0])*c->lookup_values);
3812 return error(f, VORBIS_invalid_setup);
3813 }
3814 div *= c->lookup_values;
3815 }
3816 }
3817 }
3818 c->lookup_type = 2;
3819 }
3820 else
3821 #endif
3822 {
3823 float last=0;
3824 CHECK(f);
3825 c->multiplicands = (codetype *) setup_malloc(f, sizeof(c->multiplicands[0]) * c->lookup_values);
3826 if (c->multiplicands == NULL) { setup_temp_free(f, mults,sizeof(mults[0])*c->lookup_values); return error(f, VORBIS_outofmem); }
3827 for (j=0; j < (int) c->lookup_values; ++j) {
3828 float val = mults[j] * c->delta_value + c->minimum_value + last;
3829 c->multiplicands[j] = val;
3830 if (c->sequence_p)
3831 last = val;
3832 }
3833 }
3834 #ifndef STB_VORBIS_DIVIDES_IN_CODEBOOK
3835 skip:;
3836 #endif
3837 setup_temp_free(f, mults, sizeof(mults[0])*c->lookup_values);
3838
3839 CHECK(f);
3840 }
3841 CHECK(f);
3842 }
3843
3844 // time domain transfers (notused)
3845
3846 x = get_bits(f, 6) + 1;
3847 for (i=0; i < x; ++i) {
3848 uint32 z = get_bits(f, 16);
3849 if (z != 0) return error(f, VORBIS_invalid_setup);
3850 }
3851
3852 // Floors
3853 f->floor_count = get_bits(f, 6)+1;
3854 f->floor_config = (Floor *) setup_malloc(f, f->floor_count * sizeof(*f->floor_config));
3855 if (f->floor_config == NULL) return error(f, VORBIS_outofmem);
3856 for (i=0; i < f->floor_count; ++i) {
3857 f->floor_types[i] = get_bits(f, 16);
3858 if (f->floor_types[i] > 1) return error(f, VORBIS_invalid_setup);
3859 if (f->floor_types[i] == 0) {
3860 Floor0 *g = &f->floor_config[i].floor0;
3861 g->order = get_bits(f,8);
3862 g->rate = get_bits(f,16);
3863 g->bark_map_size = get_bits(f,16);
3864 g->amplitude_bits = get_bits(f,6);
3865 g->amplitude_offset = get_bits(f,8);
3866 g->number_of_books = get_bits(f,4) + 1;
3867 for (j=0; j < g->number_of_books; ++j)
3868 g->book_list[j] = get_bits(f,8);
3869 return error(f, VORBIS_feature_not_supported);
3870 } else {
3871 Point p[31*8+2];
3872 Floor1 *g = &f->floor_config[i].floor1;
3873 int max_class = -1;
3874 g->partitions = get_bits(f, 5);
3875 for (j=0; j < g->partitions; ++j) {
3876 g->partition_class_list[j] = get_bits(f, 4);
3877 if (g->partition_class_list[j] > max_class)
3878 max_class = g->partition_class_list[j];
3879 }
3880 for (j=0; j <= max_class; ++j) {
3881 g->class_dimensions[j] = get_bits(f, 3)+1;
3882 g->class_subclasses[j] = get_bits(f, 2);
3883 if (g->class_subclasses[j]) {
3884 g->class_masterbooks[j] = get_bits(f, 8);
3885 if (g->class_masterbooks[j] >= f->codebook_count) return error(f, VORBIS_invalid_setup);
3886 }
3887 for (k=0; k < 1 << g->class_subclasses[j]; ++k) {
3888 g->subclass_books[j][k] = get_bits(f,8)-1;
3889 if (g->subclass_books[j][k] >= f->codebook_count) return error(f, VORBIS_invalid_setup);
3890 }
3891 }
3892 g->floor1_multiplier = get_bits(f,2)+1;
3893 g->rangebits = get_bits(f,4);
3894 g->Xlist[0] = 0;
3895 g->Xlist[1] = 1 << g->rangebits;
3896 g->values = 2;
3897 for (j=0; j < g->partitions; ++j) {
3898 int c = g->partition_class_list[j];
3899 for (k=0; k < g->class_dimensions[c]; ++k) {
3900 g->Xlist[g->values] = get_bits(f, g->rangebits);
3901 ++g->values;
3902 }
3903 }
3904 // precompute the sorting
3905 for (j=0; j < g->values; ++j) {
3906 p[j].x = g->Xlist[j];
3907 p[j].y = j;
3908 }
3909 qsort(p, g->values, sizeof(p[0]), point_compare);
3910 for (j=0; j < g->values; ++j)
3911 g->sorted_order[j] = (uint8) p[j].y;
3912 // precompute the neighbors
3913 for (j=2; j < g->values; ++j) {
3914 int low,hi;
3915 neighbors(g->Xlist, j, &low,&hi);
3916 g->neighbors[j][0] = low;
3917 g->neighbors[j][1] = hi;
3918 }
3919
3920 if (g->values > longest_floorlist)
3921 longest_floorlist = g->values;
3922 }
3923 }
3924
3925 // Residue
3926 f->residue_count = get_bits(f, 6)+1;
3927 f->residue_config = (Residue *) setup_malloc(f, f->residue_count * sizeof(f->residue_config[0]));
3928 if (f->residue_config == NULL) return error(f, VORBIS_outofmem);
3929 memset(f->residue_config, 0, f->residue_count * sizeof(f->residue_config[0]));
3930 for (i=0; i < f->residue_count; ++i) {
3931 uint8 residue_cascade[64];
3932 Residue *r = f->residue_config+i;
3933 f->residue_types[i] = get_bits(f, 16);
3934 if (f->residue_types[i] > 2) return error(f, VORBIS_invalid_setup);
3935 r->begin = get_bits(f, 24);
3936 r->end = get_bits(f, 24);
3937 if (r->end < r->begin) return error(f, VORBIS_invalid_setup);
3938 r->part_size = get_bits(f,24)+1;
3939 r->classifications = get_bits(f,6)+1;
3940 r->classbook = get_bits(f,8);
3941 if (r->classbook >= f->codebook_count) return error(f, VORBIS_invalid_setup);
3942 for (j=0; j < r->classifications; ++j) {
3943 uint8 high_bits=0;
3944 uint8 low_bits=get_bits(f,3);
3945 if (get_bits(f,1))
3946 high_bits = get_bits(f,5);
3947 residue_cascade[j] = high_bits*8 + low_bits;
3948 }
3949 r->residue_books = (short (*)[8]) setup_malloc(f, sizeof(r->residue_books[0]) * r->classifications);
3950 if (r->residue_books == NULL) return error(f, VORBIS_outofmem);
3951 for (j=0; j < r->classifications; ++j) {
3952 for (k=0; k < 8; ++k) {
3953 if (residue_cascade[j] & (1 << k)) {
3954 r->residue_books[j][k] = get_bits(f, 8);
3955 if (r->residue_books[j][k] >= f->codebook_count) return error(f, VORBIS_invalid_setup);
3956 } else {
3957 r->residue_books[j][k] = -1;
3958 }
3959 }
3960 }
3961 // precompute the classifications[] array to avoid inner-loop mod/divide
3962 // call it 'classdata' since we already have r->classifications
3963 r->classdata = (uint8 **) setup_malloc(f, sizeof(*r->classdata) * f->codebooks[r->classbook].entries);
3964 if (!r->classdata) return error(f, VORBIS_outofmem);
3965 memset(r->classdata, 0, sizeof(*r->classdata) * f->codebooks[r->classbook].entries);
3966 for (j=0; j < f->codebooks[r->classbook].entries; ++j) {
3967 int classwords = f->codebooks[r->classbook].dimensions;
3968 int temp = j;
3969 r->classdata[j] = (uint8 *) setup_malloc(f, sizeof(r->classdata[j][0]) * classwords);
3970 if (r->classdata[j] == NULL) return error(f, VORBIS_outofmem);
3971 for (k=classwords-1; k >= 0; --k) {
3972 r->classdata[j][k] = temp % r->classifications;
3973 temp /= r->classifications;
3974 }
3975 }
3976 }
3977
3978 f->mapping_count = get_bits(f,6)+1;
3979 f->mapping = (Mapping *) setup_malloc(f, f->mapping_count * sizeof(*f->mapping));
3980 if (f->mapping == NULL) return error(f, VORBIS_outofmem);
3981 memset(f->mapping, 0, f->mapping_count * sizeof(*f->mapping));
3982 for (i=0; i < f->mapping_count; ++i) {
3983 Mapping *m = f->mapping + i;
3984 int mapping_type = get_bits(f,16);
3985 if (mapping_type != 0) return error(f, VORBIS_invalid_setup);
3986 m->chan = (MappingChannel *) setup_malloc(f, f->channels * sizeof(*m->chan));
3987 if (m->chan == NULL) return error(f, VORBIS_outofmem);
3988 if (get_bits(f,1))
3989 m->submaps = get_bits(f,4)+1;
3990 else
3991 m->submaps = 1;
3992 if (m->submaps > max_submaps)
3993 max_submaps = m->submaps;
3994 if (get_bits(f,1)) {
3995 m->coupling_steps = get_bits(f,8)+1;
3996 for (k=0; k < m->coupling_steps; ++k) {
3997 m->chan[k].magnitude = get_bits(f, ilog(f->channels-1));
3998 m->chan[k].angle = get_bits(f, ilog(f->channels-1));
3999 if (m->chan[k].magnitude >= f->channels) return error(f, VORBIS_invalid_setup);
4000 if (m->chan[k].angle >= f->channels) return error(f, VORBIS_invalid_setup);
4001 if (m->chan[k].magnitude == m->chan[k].angle) return error(f, VORBIS_invalid_setup);
4002 }
4003 } else
4004 m->coupling_steps = 0;
4005
4006 // reserved field
4007 if (get_bits(f,2)) return error(f, VORBIS_invalid_setup);
4008 if (m->submaps > 1) {
4009 for (j=0; j < f->channels; ++j) {
4010 m->chan[j].mux = get_bits(f, 4);
4011 if (m->chan[j].mux >= m->submaps) return error(f, VORBIS_invalid_setup);
4012 }
4013 } else
4014 // @SPECIFICATION: this case is missing from the spec
4015 for (j=0; j < f->channels; ++j)
4016 m->chan[j].mux = 0;
4017
4018 for (j=0; j < m->submaps; ++j) {
4019 get_bits(f,8); // discard
4020 m->submap_floor[j] = get_bits(f,8);
4021 m->submap_residue[j] = get_bits(f,8);
4022 if (m->submap_floor[j] >= f->floor_count) return error(f, VORBIS_invalid_setup);
4023 if (m->submap_residue[j] >= f->residue_count) return error(f, VORBIS_invalid_setup);
4024 }
4025 }
4026
4027 // Modes
4028 f->mode_count = get_bits(f, 6)+1;
4029 for (i=0; i < f->mode_count; ++i) {
4030 Mode *m = f->mode_config+i;
4031 m->blockflag = get_bits(f,1);
4032 m->windowtype = get_bits(f,16);
4033 m->transformtype = get_bits(f,16);
4034 m->mapping = get_bits(f,8);
4035 if (m->windowtype != 0) return error(f, VORBIS_invalid_setup);
4036 if (m->transformtype != 0) return error(f, VORBIS_invalid_setup);
4037 if (m->mapping >= f->mapping_count) return error(f, VORBIS_invalid_setup);
4038 }
4039
4040 flush_packet(f);
4041
4042 f->previous_length = 0;
4043
4044 for (i=0; i < f->channels; ++i) {
4045 f->channel_buffers[i] = (float *) setup_malloc(f, sizeof(float) * f->blocksize_1);
4046 f->previous_window[i] = (float *) setup_malloc(f, sizeof(float) * f->blocksize_1/2);
4047 f->finalY[i] = (int16 *) setup_malloc(f, sizeof(int16) * longest_floorlist);
4048 if (f->channel_buffers[i] == NULL || f->previous_window[i] == NULL || f->finalY[i] == NULL) return error(f, VORBIS_outofmem);
4049 #ifdef STB_VORBIS_NO_DEFER_FLOOR
4050 f->floor_buffers[i] = (float *) setup_malloc(f, sizeof(float) * f->blocksize_1/2);
4051 if (f->floor_buffers[i] == NULL) return error(f, VORBIS_outofmem);
4052 #endif
4053 }
4054
4055 if (!init_blocksize(f, 0, f->blocksize_0)) return FALSE;
4056 if (!init_blocksize(f, 1, f->blocksize_1)) return FALSE;
4057 f->blocksize[0] = f->blocksize_0;
4058 f->blocksize[1] = f->blocksize_1;
4059
4060 #ifdef STB_VORBIS_DIVIDE_TABLE
4061 if (integer_divide_table[1][1]==0)
4062 for (i=0; i < DIVTAB_NUMER; ++i)
4063 for (j=1; j < DIVTAB_DENOM; ++j)
4064 integer_divide_table[i][j] = i / j;
4065 #endif
4066
4067 // compute how much temporary memory is needed
4068
4069 // 1.
4070 {
4071 uint32 imdct_mem = (f->blocksize_1 * sizeof(float) >> 1);
4072 uint32 classify_mem;
4073 int i,max_part_read=0;
4074 for (i=0; i < f->residue_count; ++i) {
4075 Residue *r = f->residue_config + i;
4076 int n_read = r->end - r->begin;
4077 int part_read = n_read / r->part_size;
4078 if (part_read > max_part_read)
4079 max_part_read = part_read;
4080 }
4081 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
4082 classify_mem = f->channels * (sizeof(void*) + max_part_read * sizeof(uint8 *));
4083 #else
4084 classify_mem = f->channels * (sizeof(void*) + max_part_read * sizeof(int *));
4085 #endif
4086
4087 f->temp_memory_required = classify_mem;
4088 if (imdct_mem > f->temp_memory_required)
4089 f->temp_memory_required = imdct_mem;
4090 }
4091
4092 f->first_decode = TRUE;
4093
4094 if (f->alloc.alloc_buffer) {
4095 assert(f->temp_offset == f->alloc.alloc_buffer_length_in_bytes);
4096 // check if there's enough temp memory so we don't error later
4097 if (f->setup_offset + sizeof(*f) + f->temp_memory_required > (unsigned) f->temp_offset)
4098 return error(f, VORBIS_outofmem);
4099 }
4100
4101 f->first_audio_page_offset = stb_vorbis_get_file_offset(f);
4102
4103 return TRUE;
4104 }
4105
vorbis_deinit(stb_vorbis * p)4106 static void vorbis_deinit(stb_vorbis *p)
4107 {
4108 int i,j;
4109 if (p->residue_config) {
4110 for (i=0; i < p->residue_count; ++i) {
4111 Residue *r = p->residue_config+i;
4112 if (r->classdata) {
4113 for (j=0; j < p->codebooks[r->classbook].entries; ++j)
4114 setup_free(p, r->classdata[j]);
4115 setup_free(p, r->classdata);
4116 }
4117 setup_free(p, r->residue_books);
4118 }
4119 }
4120
4121 if (p->codebooks) {
4122 CHECK(p);
4123 for (i=0; i < p->codebook_count; ++i) {
4124 Codebook *c = p->codebooks + i;
4125 setup_free(p, c->codeword_lengths);
4126 setup_free(p, c->multiplicands);
4127 setup_free(p, c->codewords);
4128 setup_free(p, c->sorted_codewords);
4129 // c->sorted_values[-1] is the first entry in the array
4130 setup_free(p, c->sorted_values ? c->sorted_values-1 : NULL);
4131 }
4132 setup_free(p, p->codebooks);
4133 }
4134 setup_free(p, p->floor_config);
4135 setup_free(p, p->residue_config);
4136 if (p->mapping) {
4137 for (i=0; i < p->mapping_count; ++i)
4138 setup_free(p, p->mapping[i].chan);
4139 setup_free(p, p->mapping);
4140 }
4141 CHECK(p);
4142 for (i=0; i < p->channels && i < STB_VORBIS_MAX_CHANNELS; ++i) {
4143 setup_free(p, p->channel_buffers[i]);
4144 setup_free(p, p->previous_window[i]);
4145 #ifdef STB_VORBIS_NO_DEFER_FLOOR
4146 setup_free(p, p->floor_buffers[i]);
4147 #endif
4148 setup_free(p, p->finalY[i]);
4149 }
4150 for (i=0; i < 2; ++i) {
4151 setup_free(p, p->A[i]);
4152 setup_free(p, p->B[i]);
4153 setup_free(p, p->C[i]);
4154 setup_free(p, p->window[i]);
4155 setup_free(p, p->bit_reverse[i]);
4156 }
4157 #ifndef STB_VORBIS_NO_STDIO
4158 if (p->close_on_free) fclose(p->f);
4159 #endif
4160 }
4161
stb_vorbis_close(stb_vorbis * p)4162 void stb_vorbis_close(stb_vorbis *p)
4163 {
4164 if (p == NULL) return;
4165 vorbis_deinit(p);
4166 setup_free(p,p);
4167 }
4168
vorbis_init(stb_vorbis * p,const stb_vorbis_alloc * z)4169 static void vorbis_init(stb_vorbis *p, const stb_vorbis_alloc *z)
4170 {
4171 memset(p, 0, sizeof(*p)); // NULL out all malloc'd pointers to start
4172 if (z) {
4173 p->alloc = *z;
4174 p->alloc.alloc_buffer_length_in_bytes = (p->alloc.alloc_buffer_length_in_bytes+3) & ~3;
4175 p->temp_offset = p->alloc.alloc_buffer_length_in_bytes;
4176 }
4177 p->eof = 0;
4178 p->error = VORBIS__no_error;
4179 p->stream = NULL;
4180 p->codebooks = NULL;
4181 p->page_crc_tests = -1;
4182 #ifndef STB_VORBIS_NO_STDIO
4183 p->close_on_free = FALSE;
4184 p->f = NULL;
4185 #endif
4186 }
4187
stb_vorbis_get_sample_offset(stb_vorbis * f)4188 int stb_vorbis_get_sample_offset(stb_vorbis *f)
4189 {
4190 if (f->current_loc_valid)
4191 return f->current_loc;
4192 else
4193 return -1;
4194 }
4195
stb_vorbis_get_info(stb_vorbis * f)4196 stb_vorbis_info stb_vorbis_get_info(stb_vorbis *f)
4197 {
4198 stb_vorbis_info d;
4199 d.channels = f->channels;
4200 d.sample_rate = f->sample_rate;
4201 d.setup_memory_required = f->setup_memory_required;
4202 d.setup_temp_memory_required = f->setup_temp_memory_required;
4203 d.temp_memory_required = f->temp_memory_required;
4204 d.max_frame_size = f->blocksize_1 >> 1;
4205 return d;
4206 }
4207
stb_vorbis_get_error(stb_vorbis * f)4208 int stb_vorbis_get_error(stb_vorbis *f)
4209 {
4210 int e = f->error;
4211 f->error = VORBIS__no_error;
4212 return e;
4213 }
4214
vorbis_alloc(stb_vorbis * f)4215 static stb_vorbis * vorbis_alloc(stb_vorbis *f)
4216 {
4217 stb_vorbis *p = (stb_vorbis *) setup_malloc(f, sizeof(*p));
4218 return p;
4219 }
4220
4221 #ifndef STB_VORBIS_NO_PUSHDATA_API
4222
stb_vorbis_flush_pushdata(stb_vorbis * f)4223 void stb_vorbis_flush_pushdata(stb_vorbis *f)
4224 {
4225 f->previous_length = 0;
4226 f->page_crc_tests = 0;
4227 f->discard_samples_deferred = 0;
4228 f->current_loc_valid = FALSE;
4229 f->first_decode = FALSE;
4230 f->samples_output = 0;
4231 f->channel_buffer_start = 0;
4232 f->channel_buffer_end = 0;
4233 }
4234
vorbis_search_for_page_pushdata(vorb * f,uint8 * data,int data_len)4235 static int vorbis_search_for_page_pushdata(vorb *f, uint8 *data, int data_len)
4236 {
4237 int i,n;
4238 for (i=0; i < f->page_crc_tests; ++i)
4239 f->scan[i].bytes_done = 0;
4240
4241 // if we have room for more scans, search for them first, because
4242 // they may cause us to stop early if their header is incomplete
4243 if (f->page_crc_tests < STB_VORBIS_PUSHDATA_CRC_COUNT) {
4244 if (data_len < 4) return 0;
4245 data_len -= 3; // need to look for 4-byte sequence, so don't miss
4246 // one that straddles a boundary
4247 for (i=0; i < data_len; ++i) {
4248 if (data[i] == 0x4f) {
4249 if (0==memcmp(data+i, ogg_page_header, 4)) {
4250 int j,len;
4251 uint32 crc;
4252 // make sure we have the whole page header
4253 if (i+26 >= data_len || i+27+data[i+26] >= data_len) {
4254 // only read up to this page start, so hopefully we'll
4255 // have the whole page header start next time
4256 data_len = i;
4257 break;
4258 }
4259 // ok, we have it all; compute the length of the page
4260 len = 27 + data[i+26];
4261 for (j=0; j < data[i+26]; ++j)
4262 len += data[i+27+j];
4263 // scan everything up to the embedded crc (which we must 0)
4264 crc = 0;
4265 for (j=0; j < 22; ++j)
4266 crc = crc32_update(crc, data[i+j]);
4267 // now process 4 0-bytes
4268 for ( ; j < 26; ++j)
4269 crc = crc32_update(crc, 0);
4270 // len is the total number of bytes we need to scan
4271 n = f->page_crc_tests++;
4272 f->scan[n].bytes_left = len-j;
4273 f->scan[n].crc_so_far = crc;
4274 f->scan[n].goal_crc = data[i+22] + (data[i+23] << 8) + (data[i+24]<<16) + (data[i+25]<<24);
4275 // if the last frame on a page is continued to the next, then
4276 // we can't recover the sample_loc immediately
4277 if (data[i+27+data[i+26]-1] == 255)
4278 f->scan[n].sample_loc = ~0;
4279 else
4280 f->scan[n].sample_loc = data[i+6] + (data[i+7] << 8) + (data[i+ 8]<<16) + (data[i+ 9]<<24);
4281 f->scan[n].bytes_done = i+j;
4282 if (f->page_crc_tests == STB_VORBIS_PUSHDATA_CRC_COUNT)
4283 break;
4284 // keep going if we still have room for more
4285 }
4286 }
4287 }
4288 }
4289
4290 for (i=0; i < f->page_crc_tests;) {
4291 uint32 crc;
4292 int j;
4293 int n = f->scan[i].bytes_done;
4294 int m = f->scan[i].bytes_left;
4295 if (m > data_len - n) m = data_len - n;
4296 // m is the bytes to scan in the current chunk
4297 crc = f->scan[i].crc_so_far;
4298 for (j=0; j < m; ++j)
4299 crc = crc32_update(crc, data[n+j]);
4300 f->scan[i].bytes_left -= m;
4301 f->scan[i].crc_so_far = crc;
4302 if (f->scan[i].bytes_left == 0) {
4303 // does it match?
4304 if (f->scan[i].crc_so_far == f->scan[i].goal_crc) {
4305 // Houston, we have page
4306 data_len = n+m; // consumption amount is wherever that scan ended
4307 f->page_crc_tests = -1; // drop out of page scan mode
4308 f->previous_length = 0; // decode-but-don't-output one frame
4309 f->next_seg = -1; // start a new page
4310 f->current_loc = f->scan[i].sample_loc; // set the current sample location
4311 // to the amount we'd have decoded had we decoded this page
4312 f->current_loc_valid = f->current_loc != ~0U;
4313 return data_len;
4314 }
4315 // delete entry
4316 f->scan[i] = f->scan[--f->page_crc_tests];
4317 } else {
4318 ++i;
4319 }
4320 }
4321
4322 return data_len;
4323 }
4324
4325 // return value: number of bytes we used
stb_vorbis_decode_frame_pushdata(stb_vorbis * f,const uint8 * data,int data_len,int * channels,float *** output,int * samples)4326 int stb_vorbis_decode_frame_pushdata(
4327 stb_vorbis *f, // the file we're decoding
4328 const uint8 *data, int data_len, // the memory available for decoding
4329 int *channels, // place to write number of float * buffers
4330 float ***output, // place to write float ** array of float * buffers
4331 int *samples // place to write number of output samples
4332 )
4333 {
4334 int i;
4335 int len,right,left;
4336
4337 if (!IS_PUSH_MODE(f)) return error(f, VORBIS_invalid_api_mixing);
4338
4339 if (f->page_crc_tests >= 0) {
4340 *samples = 0;
4341 return vorbis_search_for_page_pushdata(f, (uint8 *) data, data_len);
4342 }
4343
4344 f->stream = (uint8 *) data;
4345 f->stream_end = (uint8 *) data + data_len;
4346 f->error = VORBIS__no_error;
4347
4348 // check that we have the entire packet in memory
4349 if (!is_whole_packet_present(f, FALSE)) {
4350 *samples = 0;
4351 return 0;
4352 }
4353
4354 if (!vorbis_decode_packet(f, &len, &left, &right)) {
4355 // save the actual error we encountered
4356 enum STBVorbisError error = f->error;
4357 if (error == VORBIS_bad_packet_type) {
4358 // flush and resynch
4359 f->error = VORBIS__no_error;
4360 while (get8_packet(f) != EOP)
4361 if (f->eof) break;
4362 *samples = 0;
4363 return (int) (f->stream - data);
4364 }
4365 if (error == VORBIS_continued_packet_flag_invalid) {
4366 if (f->previous_length == 0) {
4367 // we may be resynching, in which case it's ok to hit one
4368 // of these; just discard the packet
4369 f->error = VORBIS__no_error;
4370 while (get8_packet(f) != EOP)
4371 if (f->eof) break;
4372 *samples = 0;
4373 return (int) (f->stream - data);
4374 }
4375 }
4376 // if we get an error while parsing, what to do?
4377 // well, it DEFINITELY won't work to continue from where we are!
4378 stb_vorbis_flush_pushdata(f);
4379 // restore the error that actually made us bail
4380 f->error = error;
4381 *samples = 0;
4382 return 1;
4383 }
4384
4385 // success!
4386 len = vorbis_finish_frame(f, len, left, right);
4387 for (i=0; i < f->channels; ++i)
4388 f->outputs[i] = f->channel_buffers[i] + left;
4389
4390 if (channels) *channels = f->channels;
4391 *samples = len;
4392 *output = f->outputs;
4393 return (int) (f->stream - data);
4394 }
4395
stb_vorbis_open_pushdata(const unsigned char * data,int data_len,int * data_used,int * error,const stb_vorbis_alloc * alloc)4396 stb_vorbis *stb_vorbis_open_pushdata(
4397 const unsigned char *data, int data_len, // the memory available for decoding
4398 int *data_used, // only defined if result is not NULL
4399 int *error, const stb_vorbis_alloc *alloc)
4400 {
4401 stb_vorbis *f, p;
4402 vorbis_init(&p, alloc);
4403 p.stream = (uint8 *) data;
4404 p.stream_end = (uint8 *) data + data_len;
4405 p.push_mode = TRUE;
4406 if (!start_decoder(&p)) {
4407 if (p.eof)
4408 *error = VORBIS_need_more_data;
4409 else
4410 *error = p.error;
4411 return NULL;
4412 }
4413 f = vorbis_alloc(&p);
4414 if (f) {
4415 *f = p;
4416 *data_used = (int) (f->stream - data);
4417 *error = 0;
4418 return f;
4419 } else {
4420 vorbis_deinit(&p);
4421 return NULL;
4422 }
4423 }
4424 #endif // STB_VORBIS_NO_PUSHDATA_API
4425
stb_vorbis_get_file_offset(stb_vorbis * f)4426 unsigned int stb_vorbis_get_file_offset(stb_vorbis *f)
4427 {
4428 #ifndef STB_VORBIS_NO_PUSHDATA_API
4429 if (f->push_mode) return 0;
4430 #endif
4431 if (USE_MEMORY(f)) return (unsigned int) (f->stream - f->stream_start);
4432 #ifndef STB_VORBIS_NO_STDIO
4433 return (unsigned int) (ftell(f->f) - f->f_start);
4434 #endif
4435 }
4436
4437 #ifndef STB_VORBIS_NO_PULLDATA_API
4438 //
4439 // DATA-PULLING API
4440 //
4441
vorbis_find_page(stb_vorbis * f,uint32 * end,uint32 * last)4442 static uint32 vorbis_find_page(stb_vorbis *f, uint32 *end, uint32 *last)
4443 {
4444 for(;;) {
4445 int n;
4446 if (f->eof) return 0;
4447 n = get8(f);
4448 if (n == 0x4f) { // page header candidate
4449 unsigned int retry_loc = stb_vorbis_get_file_offset(f);
4450 int i;
4451 // check if we're off the end of a file_section stream
4452 if (retry_loc - 25 > f->stream_len)
4453 return 0;
4454 // check the rest of the header
4455 for (i=1; i < 4; ++i)
4456 if (get8(f) != ogg_page_header[i])
4457 break;
4458 if (f->eof) return 0;
4459 if (i == 4) {
4460 uint8 header[27];
4461 uint32 i, crc, goal, len;
4462 for (i=0; i < 4; ++i)
4463 header[i] = ogg_page_header[i];
4464 for (; i < 27; ++i)
4465 header[i] = get8(f);
4466 if (f->eof) return 0;
4467 if (header[4] != 0) goto invalid;
4468 goal = header[22] + (header[23] << 8) + (header[24]<<16) + (header[25]<<24);
4469 for (i=22; i < 26; ++i)
4470 header[i] = 0;
4471 crc = 0;
4472 for (i=0; i < 27; ++i)
4473 crc = crc32_update(crc, header[i]);
4474 len = 0;
4475 for (i=0; i < header[26]; ++i) {
4476 int s = get8(f);
4477 crc = crc32_update(crc, s);
4478 len += s;
4479 }
4480 if (len && f->eof) return 0;
4481 for (i=0; i < len; ++i)
4482 crc = crc32_update(crc, get8(f));
4483 // finished parsing probable page
4484 if (crc == goal) {
4485 // we could now check that it's either got the last
4486 // page flag set, OR it's followed by the capture
4487 // pattern, but I guess TECHNICALLY you could have
4488 // a file with garbage between each ogg page and recover
4489 // from it automatically? So even though that paranoia
4490 // might decrease the chance of an invalid decode by
4491 // another 2^32, not worth it since it would hose those
4492 // invalid-but-useful files?
4493 if (end)
4494 *end = stb_vorbis_get_file_offset(f);
4495 if (last) {
4496 if (header[5] & 0x04)
4497 *last = 1;
4498 else
4499 *last = 0;
4500 }
4501 set_file_offset(f, retry_loc-1);
4502 return 1;
4503 }
4504 }
4505 invalid:
4506 // not a valid page, so rewind and look for next one
4507 set_file_offset(f, retry_loc);
4508 }
4509 }
4510 }
4511
4512
4513 #define SAMPLE_unknown 0xffffffff
4514
4515 // seeking is implemented with a binary search, which narrows down the range to
4516 // 64K, before using a linear search (because finding the synchronization
4517 // pattern can be expensive, and the chance we'd find the end page again is
4518 // relatively high for small ranges)
4519 //
4520 // two initial interpolation-style probes are used at the start of the search
4521 // to try to bound either side of the binary search sensibly, while still
4522 // working in O(log n) time if they fail.
4523
get_seek_page_info(stb_vorbis * f,ProbedPage * z)4524 static int get_seek_page_info(stb_vorbis *f, ProbedPage *z)
4525 {
4526 uint8 header[27], lacing[255];
4527 int i,len;
4528
4529 // record where the page starts
4530 z->page_start = stb_vorbis_get_file_offset(f);
4531
4532 // parse the header
4533 getn(f, header, 27);
4534 if (header[0] != 'O' || header[1] != 'g' || header[2] != 'g' || header[3] != 'S')
4535 return 0;
4536 getn(f, lacing, header[26]);
4537
4538 // determine the length of the payload
4539 len = 0;
4540 for (i=0; i < header[26]; ++i)
4541 len += lacing[i];
4542
4543 // this implies where the page ends
4544 z->page_end = z->page_start + 27 + header[26] + len;
4545
4546 // read the last-decoded sample out of the data
4547 z->last_decoded_sample = header[6] + (header[7] << 8) + (header[8] << 16) + (header[9] << 24);
4548
4549 // restore file state to where we were
4550 set_file_offset(f, z->page_start);
4551 return 1;
4552 }
4553
4554 // rarely used function to seek back to the preceeding page while finding the
4555 // start of a packet
go_to_page_before(stb_vorbis * f,unsigned int limit_offset)4556 static int go_to_page_before(stb_vorbis *f, unsigned int limit_offset)
4557 {
4558 unsigned int previous_safe, end;
4559
4560 // now we want to seek back 64K from the limit
4561 if (limit_offset >= 65536 && limit_offset-65536 >= f->first_audio_page_offset)
4562 previous_safe = limit_offset - 65536;
4563 else
4564 previous_safe = f->first_audio_page_offset;
4565
4566 set_file_offset(f, previous_safe);
4567
4568 while (vorbis_find_page(f, &end, NULL)) {
4569 if (end >= limit_offset && stb_vorbis_get_file_offset(f) < limit_offset)
4570 return 1;
4571 set_file_offset(f, end);
4572 }
4573
4574 return 0;
4575 }
4576
4577 // implements the search logic for finding a page and starting decoding. if
4578 // the function succeeds, current_loc_valid will be true and current_loc will
4579 // be less than or equal to the provided sample number (the closer the
4580 // better).
seek_to_sample_coarse(stb_vorbis * f,uint32 sample_number)4581 static int seek_to_sample_coarse(stb_vorbis *f, uint32 sample_number)
4582 {
4583 ProbedPage left, right, mid;
4584 int i, start_seg_with_known_loc, end_pos, page_start;
4585 uint32 delta, stream_length, padding;
4586 double offset, bytes_per_sample;
4587 int probe = 0;
4588
4589 // find the last page and validate the target sample
4590 stream_length = stb_vorbis_stream_length_in_samples(f);
4591 if (stream_length == 0) return error(f, VORBIS_seek_without_length);
4592 if (sample_number > stream_length) return error(f, VORBIS_seek_invalid);
4593
4594 // this is the maximum difference between the window-center (which is the
4595 // actual granule position value), and the right-start (which the spec
4596 // indicates should be the granule position (give or take one)).
4597 padding = ((f->blocksize_1 - f->blocksize_0) >> 2);
4598 if (sample_number < padding)
4599 sample_number = 0;
4600 else
4601 sample_number -= padding;
4602
4603 left = f->p_first;
4604 while (left.last_decoded_sample == ~0U) {
4605 // (untested) the first page does not have a 'last_decoded_sample'
4606 set_file_offset(f, left.page_end);
4607 if (!get_seek_page_info(f, &left)) goto error;
4608 }
4609
4610 right = f->p_last;
4611 assert(right.last_decoded_sample != ~0U);
4612
4613 // starting from the start is handled differently
4614 if (sample_number <= left.last_decoded_sample) {
4615 stb_vorbis_seek_start(f);
4616 return 1;
4617 }
4618
4619 while (left.page_end != right.page_start) {
4620 assert(left.page_end < right.page_start);
4621 // search range in bytes
4622 delta = right.page_start - left.page_end;
4623 if (delta <= 65536) {
4624 // there's only 64K left to search - handle it linearly
4625 set_file_offset(f, left.page_end);
4626 } else {
4627 if (probe < 2) {
4628 if (probe == 0) {
4629 // first probe (interpolate)
4630 double data_bytes = right.page_end - left.page_start;
4631 bytes_per_sample = data_bytes / right.last_decoded_sample;
4632 offset = left.page_start + bytes_per_sample * (sample_number - left.last_decoded_sample);
4633 } else {
4634 // second probe (try to bound the other side)
4635 double error = ((double) sample_number - mid.last_decoded_sample) * bytes_per_sample;
4636 if (error >= 0 && error < 8000) error = 8000;
4637 if (error < 0 && error > -8000) error = -8000;
4638 offset += error * 2;
4639 }
4640
4641 // ensure the offset is valid
4642 if (offset < left.page_end)
4643 offset = left.page_end;
4644 if (offset > right.page_start - 65536)
4645 offset = right.page_start - 65536;
4646
4647 set_file_offset(f, (unsigned int) offset);
4648 } else {
4649 // binary search for large ranges (offset by 32K to ensure
4650 // we don't hit the right page)
4651 set_file_offset(f, left.page_end + (delta / 2) - 32768);
4652 }
4653
4654 if (!vorbis_find_page(f, NULL, NULL)) goto error;
4655 }
4656
4657 for (;;) {
4658 if (!get_seek_page_info(f, &mid)) goto error;
4659 if (mid.last_decoded_sample != ~0U) break;
4660 // (untested) no frames end on this page
4661 set_file_offset(f, mid.page_end);
4662 assert(mid.page_start < right.page_start);
4663 }
4664
4665 // if we've just found the last page again then we're in a tricky file,
4666 // and we're close enough.
4667 if (mid.page_start == right.page_start)
4668 break;
4669
4670 if (sample_number < mid.last_decoded_sample)
4671 right = mid;
4672 else
4673 left = mid;
4674
4675 ++probe;
4676 }
4677
4678 // seek back to start of the last packet
4679 page_start = left.page_start;
4680 set_file_offset(f, page_start);
4681 if (!start_page(f)) return error(f, VORBIS_seek_failed);
4682 end_pos = f->end_seg_with_known_loc;
4683 assert(end_pos >= 0);
4684
4685 for (;;) {
4686 for (i = end_pos; i > 0; --i)
4687 if (f->segments[i-1] != 255)
4688 break;
4689
4690 start_seg_with_known_loc = i;
4691
4692 if (start_seg_with_known_loc > 0 || !(f->page_flag & PAGEFLAG_continued_packet))
4693 break;
4694
4695 // (untested) the final packet begins on an earlier page
4696 if (!go_to_page_before(f, page_start))
4697 goto error;
4698
4699 page_start = stb_vorbis_get_file_offset(f);
4700 if (!start_page(f)) goto error;
4701 end_pos = f->segment_count - 1;
4702 }
4703
4704 // prepare to start decoding
4705 f->current_loc_valid = FALSE;
4706 f->last_seg = FALSE;
4707 f->valid_bits = 0;
4708 f->packet_bytes = 0;
4709 f->bytes_in_seg = 0;
4710 f->previous_length = 0;
4711 f->next_seg = start_seg_with_known_loc;
4712
4713 for (i = 0; i < start_seg_with_known_loc; i++)
4714 skip(f, f->segments[i]);
4715
4716 // start decoding (optimizable - this frame is generally discarded)
4717 vorbis_pump_first_frame(f);
4718 return 1;
4719
4720 error:
4721 // try to restore the file to a valid state
4722 stb_vorbis_seek_start(f);
4723 return error(f, VORBIS_seek_failed);
4724 }
4725
4726 // the same as vorbis_decode_initial, but without advancing
peek_decode_initial(vorb * f,int * p_left_start,int * p_left_end,int * p_right_start,int * p_right_end,int * mode)4727 static int peek_decode_initial(vorb *f, int *p_left_start, int *p_left_end, int *p_right_start, int *p_right_end, int *mode)
4728 {
4729 int bits_read, bytes_read;
4730
4731 if (!vorbis_decode_initial(f, p_left_start, p_left_end, p_right_start, p_right_end, mode))
4732 return 0;
4733
4734 // either 1 or 2 bytes were read, figure out which so we can rewind
4735 bits_read = 1 + ilog(f->mode_count-1);
4736 if (f->mode_config[*mode].blockflag)
4737 bits_read += 2;
4738 bytes_read = (bits_read + 7) / 8;
4739
4740 f->bytes_in_seg += bytes_read;
4741 f->packet_bytes -= bytes_read;
4742 skip(f, -bytes_read);
4743 if (f->next_seg == -1)
4744 f->next_seg = f->segment_count - 1;
4745 else
4746 f->next_seg--;
4747 f->valid_bits = 0;
4748
4749 return 1;
4750 }
4751
stb_vorbis_seek_frame(stb_vorbis * f,unsigned int sample_number)4752 int stb_vorbis_seek_frame(stb_vorbis *f, unsigned int sample_number)
4753 {
4754 uint32 max_frame_samples;
4755
4756 if (IS_PUSH_MODE(f)) return error(f, VORBIS_invalid_api_mixing);
4757
4758 // fast page-level search
4759 if (!seek_to_sample_coarse(f, sample_number))
4760 return 0;
4761
4762 assert(f->current_loc_valid);
4763 assert(f->current_loc <= sample_number);
4764
4765 // linear search for the relevant packet
4766 max_frame_samples = (f->blocksize_1*3 - f->blocksize_0) >> 2;
4767 while (f->current_loc < sample_number) {
4768 int left_start, left_end, right_start, right_end, mode, frame_samples;
4769 if (!peek_decode_initial(f, &left_start, &left_end, &right_start, &right_end, &mode))
4770 return error(f, VORBIS_seek_failed);
4771 // calculate the number of samples returned by the next frame
4772 frame_samples = right_start - left_start;
4773 if (f->current_loc + frame_samples > sample_number) {
4774 return 1; // the next frame will contain the sample
4775 } else if (f->current_loc + frame_samples + max_frame_samples > sample_number) {
4776 // there's a chance the frame after this could contain the sample
4777 vorbis_pump_first_frame(f);
4778 } else {
4779 // this frame is too early to be relevant
4780 f->current_loc += frame_samples;
4781 f->previous_length = 0;
4782 maybe_start_packet(f);
4783 flush_packet(f);
4784 }
4785 }
4786 // the next frame will start with the sample
4787 assert(f->current_loc == sample_number);
4788 return 1;
4789 }
4790
stb_vorbis_seek(stb_vorbis * f,unsigned int sample_number)4791 int stb_vorbis_seek(stb_vorbis *f, unsigned int sample_number)
4792 {
4793 if (!stb_vorbis_seek_frame(f, sample_number))
4794 return 0;
4795
4796 if (sample_number != f->current_loc) {
4797 int n;
4798 uint32 frame_start = f->current_loc;
4799 stb_vorbis_get_frame_float(f, &n, NULL);
4800 assert(sample_number > frame_start);
4801 assert(f->channel_buffer_start + (int) (sample_number-frame_start) <= f->channel_buffer_end);
4802 f->channel_buffer_start += (sample_number - frame_start);
4803 }
4804
4805 return 1;
4806 }
4807
stb_vorbis_seek_start(stb_vorbis * f)4808 void stb_vorbis_seek_start(stb_vorbis *f)
4809 {
4810 if (IS_PUSH_MODE(f)) { error(f, VORBIS_invalid_api_mixing); return; }
4811 set_file_offset(f, f->first_audio_page_offset);
4812 f->previous_length = 0;
4813 f->first_decode = TRUE;
4814 f->next_seg = -1;
4815 vorbis_pump_first_frame(f);
4816 }
4817
stb_vorbis_stream_length_in_samples(stb_vorbis * f)4818 unsigned int stb_vorbis_stream_length_in_samples(stb_vorbis *f)
4819 {
4820 unsigned int restore_offset, previous_safe;
4821 unsigned int end, last_page_loc;
4822
4823 if (IS_PUSH_MODE(f)) return error(f, VORBIS_invalid_api_mixing);
4824 if (!f->total_samples) {
4825 unsigned int last;
4826 uint32 lo,hi;
4827 char header[6];
4828
4829 // first, store the current decode position so we can restore it
4830 restore_offset = stb_vorbis_get_file_offset(f);
4831
4832 // now we want to seek back 64K from the end (the last page must
4833 // be at most a little less than 64K, but let's allow a little slop)
4834 if (f->stream_len >= 65536 && f->stream_len-65536 >= f->first_audio_page_offset)
4835 previous_safe = f->stream_len - 65536;
4836 else
4837 previous_safe = f->first_audio_page_offset;
4838
4839 set_file_offset(f, previous_safe);
4840 // previous_safe is now our candidate 'earliest known place that seeking
4841 // to will lead to the final page'
4842
4843 if (!vorbis_find_page(f, &end, &last)) {
4844 // if we can't find a page, we're hosed!
4845 f->error = VORBIS_cant_find_last_page;
4846 f->total_samples = 0xffffffff;
4847 goto done;
4848 }
4849
4850 // check if there are more pages
4851 last_page_loc = stb_vorbis_get_file_offset(f);
4852
4853 // stop when the last_page flag is set, not when we reach eof;
4854 // this allows us to stop short of a 'file_section' end without
4855 // explicitly checking the length of the section
4856 while (!last) {
4857 set_file_offset(f, end);
4858 if (!vorbis_find_page(f, &end, &last)) {
4859 // the last page we found didn't have the 'last page' flag
4860 // set. whoops!
4861 break;
4862 }
4863 previous_safe = last_page_loc+1;
4864 last_page_loc = stb_vorbis_get_file_offset(f);
4865 }
4866
4867 set_file_offset(f, last_page_loc);
4868
4869 // parse the header
4870 getn(f, (unsigned char *)header, 6);
4871 // extract the absolute granule position
4872 lo = get32(f);
4873 hi = get32(f);
4874 if (lo == 0xffffffff && hi == 0xffffffff) {
4875 f->error = VORBIS_cant_find_last_page;
4876 f->total_samples = SAMPLE_unknown;
4877 goto done;
4878 }
4879 if (hi)
4880 lo = 0xfffffffe; // saturate
4881 f->total_samples = lo;
4882
4883 f->p_last.page_start = last_page_loc;
4884 f->p_last.page_end = end;
4885 f->p_last.last_decoded_sample = lo;
4886
4887 done:
4888 set_file_offset(f, restore_offset);
4889 }
4890 return f->total_samples == SAMPLE_unknown ? 0 : f->total_samples;
4891 }
4892
stb_vorbis_stream_length_in_seconds(stb_vorbis * f)4893 float stb_vorbis_stream_length_in_seconds(stb_vorbis *f)
4894 {
4895 return stb_vorbis_stream_length_in_samples(f) / (float) f->sample_rate;
4896 }
4897
4898
4899
stb_vorbis_get_frame_float(stb_vorbis * f,int * channels,float *** output)4900 int stb_vorbis_get_frame_float(stb_vorbis *f, int *channels, float ***output)
4901 {
4902 int len, right,left,i;
4903 if (IS_PUSH_MODE(f)) return error(f, VORBIS_invalid_api_mixing);
4904
4905 if (!vorbis_decode_packet(f, &len, &left, &right)) {
4906 f->channel_buffer_start = f->channel_buffer_end = 0;
4907 return 0;
4908 }
4909
4910 len = vorbis_finish_frame(f, len, left, right);
4911 for (i=0; i < f->channels; ++i)
4912 f->outputs[i] = f->channel_buffers[i] + left;
4913
4914 f->channel_buffer_start = left;
4915 f->channel_buffer_end = left+len;
4916
4917 if (channels) *channels = f->channels;
4918 if (output) *output = f->outputs;
4919 return len;
4920 }
4921
4922 #ifndef STB_VORBIS_NO_STDIO
4923
stb_vorbis_open_file_section(FILE * file,int close_on_free,int * error,const stb_vorbis_alloc * alloc,unsigned int length)4924 stb_vorbis * stb_vorbis_open_file_section(FILE *file, int close_on_free, int *error, const stb_vorbis_alloc *alloc, unsigned int length)
4925 {
4926 stb_vorbis *f, p;
4927 vorbis_init(&p, alloc);
4928 p.f = file;
4929 p.f_start = (uint32) ftell(file);
4930 p.stream_len = length;
4931 p.close_on_free = close_on_free;
4932 if (start_decoder(&p)) {
4933 f = vorbis_alloc(&p);
4934 if (f) {
4935 *f = p;
4936 vorbis_pump_first_frame(f);
4937 return f;
4938 }
4939 }
4940 if (error) *error = p.error;
4941 vorbis_deinit(&p);
4942 return NULL;
4943 }
4944
stb_vorbis_open_file(FILE * file,int close_on_free,int * error,const stb_vorbis_alloc * alloc)4945 stb_vorbis * stb_vorbis_open_file(FILE *file, int close_on_free, int *error, const stb_vorbis_alloc *alloc)
4946 {
4947 unsigned int len, start;
4948 start = (unsigned int) ftell(file);
4949 fseek(file, 0, SEEK_END);
4950 len = (unsigned int) (ftell(file) - start);
4951 fseek(file, start, SEEK_SET);
4952 return stb_vorbis_open_file_section(file, close_on_free, error, alloc, len);
4953 }
4954
stb_vorbis_open_filename(const char * filename,int * error,const stb_vorbis_alloc * alloc)4955 stb_vorbis * stb_vorbis_open_filename(const char *filename, int *error, const stb_vorbis_alloc *alloc)
4956 {
4957 FILE *f = fopen(filename, "rb");
4958 if (f)
4959 return stb_vorbis_open_file(f, TRUE, error, alloc);
4960 if (error) *error = VORBIS_file_open_failure;
4961 return NULL;
4962 }
4963 #endif // STB_VORBIS_NO_STDIO
4964
stb_vorbis_open_memory(const unsigned char * data,int len,int * error,const stb_vorbis_alloc * alloc)4965 stb_vorbis * stb_vorbis_open_memory(const unsigned char *data, int len, int *error, const stb_vorbis_alloc *alloc)
4966 {
4967 stb_vorbis *f, p;
4968 if (data == NULL) return NULL;
4969 vorbis_init(&p, alloc);
4970 p.stream = (uint8 *) data;
4971 p.stream_end = (uint8 *) data + len;
4972 p.stream_start = (uint8 *) p.stream;
4973 p.stream_len = len;
4974 p.push_mode = FALSE;
4975 if (start_decoder(&p)) {
4976 f = vorbis_alloc(&p);
4977 if (f) {
4978 *f = p;
4979 vorbis_pump_first_frame(f);
4980 return f;
4981 }
4982 }
4983 if (error) *error = p.error;
4984 vorbis_deinit(&p);
4985 return NULL;
4986 }
4987
4988 #ifndef STB_VORBIS_NO_INTEGER_CONVERSION
4989 #define PLAYBACK_MONO 1
4990 #define PLAYBACK_LEFT 2
4991 #define PLAYBACK_RIGHT 4
4992
4993 #define L (PLAYBACK_LEFT | PLAYBACK_MONO)
4994 #define C (PLAYBACK_LEFT | PLAYBACK_RIGHT | PLAYBACK_MONO)
4995 #define R (PLAYBACK_RIGHT | PLAYBACK_MONO)
4996
4997 static int8 channel_position[7][6] =
4998 {
4999 { 0 },
5000 { C },
5001 { L, R },
5002 { L, C, R },
5003 { L, R, L, R },
5004 { L, C, R, L, R },
5005 { L, C, R, L, R, C },
5006 };
5007
5008
5009 #ifndef STB_VORBIS_NO_FAST_SCALED_FLOAT
5010 typedef union {
5011 float f;
5012 int i;
5013 } float_conv;
5014 typedef char stb_vorbis_float_size_test[sizeof(float)==4 && sizeof(int) == 4];
5015 #define FASTDEF(x) float_conv x
5016 // add (1<<23) to convert to int, then divide by 2^SHIFT, then add 0.5/2^SHIFT to round
5017 #define MAGIC(SHIFT) (1.5f * (1 << (23-SHIFT)) + 0.5f/(1 << SHIFT))
5018 #define ADDEND(SHIFT) (((150-SHIFT) << 23) + (1 << 22))
5019 #define FAST_SCALED_FLOAT_TO_INT(temp,x,s) (temp.f = (x) + MAGIC(s), temp.i - ADDEND(s))
5020 #define check_endianness()
5021 #else
5022 #define FAST_SCALED_FLOAT_TO_INT(temp,x,s) ((int) ((x) * (1 << (s))))
5023 #define check_endianness()
5024 #define FASTDEF(x)
5025 #endif
5026
copy_samples(short * dest,float * src,int len)5027 static void copy_samples(short *dest, float *src, int len)
5028 {
5029 int i;
5030 check_endianness();
5031 for (i=0; i < len; ++i) {
5032 FASTDEF(temp);
5033 int v = FAST_SCALED_FLOAT_TO_INT(temp, src[i],15);
5034 if ((unsigned int) (v + 32768) > 65535)
5035 v = v < 0 ? -32768 : 32767;
5036 dest[i] = v;
5037 }
5038 }
5039
compute_samples(int mask,short * output,int num_c,float ** data,int d_offset,int len)5040 static void compute_samples(int mask, short *output, int num_c, float **data, int d_offset, int len)
5041 {
5042 #define BUFFER_SIZE 32
5043 float buffer[BUFFER_SIZE];
5044 int i,j,o,n = BUFFER_SIZE;
5045 check_endianness();
5046 for (o = 0; o < len; o += BUFFER_SIZE) {
5047 memset(buffer, 0, sizeof(buffer));
5048 if (o + n > len) n = len - o;
5049 for (j=0; j < num_c; ++j) {
5050 if (channel_position[num_c][j] & mask) {
5051 for (i=0; i < n; ++i)
5052 buffer[i] += data[j][d_offset+o+i];
5053 }
5054 }
5055 for (i=0; i < n; ++i) {
5056 FASTDEF(temp);
5057 int v = FAST_SCALED_FLOAT_TO_INT(temp,buffer[i],15);
5058 if ((unsigned int) (v + 32768) > 65535)
5059 v = v < 0 ? -32768 : 32767;
5060 output[o+i] = v;
5061 }
5062 }
5063 }
5064
compute_stereo_samples(short * output,int num_c,float ** data,int d_offset,int len)5065 static void compute_stereo_samples(short *output, int num_c, float **data, int d_offset, int len)
5066 {
5067 #define BUFFER_SIZE 32
5068 float buffer[BUFFER_SIZE];
5069 int i,j,o,n = BUFFER_SIZE >> 1;
5070 // o is the offset in the source data
5071 check_endianness();
5072 for (o = 0; o < len; o += BUFFER_SIZE >> 1) {
5073 // o2 is the offset in the output data
5074 int o2 = o << 1;
5075 memset(buffer, 0, sizeof(buffer));
5076 if (o + n > len) n = len - o;
5077 for (j=0; j < num_c; ++j) {
5078 int m = channel_position[num_c][j] & (PLAYBACK_LEFT | PLAYBACK_RIGHT);
5079 if (m == (PLAYBACK_LEFT | PLAYBACK_RIGHT)) {
5080 for (i=0; i < n; ++i) {
5081 buffer[i*2+0] += data[j][d_offset+o+i];
5082 buffer[i*2+1] += data[j][d_offset+o+i];
5083 }
5084 } else if (m == PLAYBACK_LEFT) {
5085 for (i=0; i < n; ++i) {
5086 buffer[i*2+0] += data[j][d_offset+o+i];
5087 }
5088 } else if (m == PLAYBACK_RIGHT) {
5089 for (i=0; i < n; ++i) {
5090 buffer[i*2+1] += data[j][d_offset+o+i];
5091 }
5092 }
5093 }
5094 for (i=0; i < (n<<1); ++i) {
5095 FASTDEF(temp);
5096 int v = FAST_SCALED_FLOAT_TO_INT(temp,buffer[i],15);
5097 if ((unsigned int) (v + 32768) > 65535)
5098 v = v < 0 ? -32768 : 32767;
5099 output[o2+i] = v;
5100 }
5101 }
5102 }
5103
convert_samples_short(int buf_c,short ** buffer,int b_offset,int data_c,float ** data,int d_offset,int samples)5104 static void convert_samples_short(int buf_c, short **buffer, int b_offset, int data_c, float **data, int d_offset, int samples)
5105 {
5106 int i;
5107 if (buf_c != data_c && buf_c <= 2 && data_c <= 6) {
5108 static int channel_selector[3][2] = { {0}, {PLAYBACK_MONO}, {PLAYBACK_LEFT, PLAYBACK_RIGHT} };
5109 for (i=0; i < buf_c; ++i)
5110 compute_samples(channel_selector[buf_c][i], buffer[i]+b_offset, data_c, data, d_offset, samples);
5111 } else {
5112 int limit = buf_c < data_c ? buf_c : data_c;
5113 for (i=0; i < limit; ++i)
5114 copy_samples(buffer[i]+b_offset, data[i]+d_offset, samples);
5115 for ( ; i < buf_c; ++i)
5116 memset(buffer[i]+b_offset, 0, sizeof(short) * samples);
5117 }
5118 }
5119
stb_vorbis_get_frame_short(stb_vorbis * f,int num_c,short ** buffer,int num_samples)5120 int stb_vorbis_get_frame_short(stb_vorbis *f, int num_c, short **buffer, int num_samples)
5121 {
5122 float **output;
5123 int len = stb_vorbis_get_frame_float(f, NULL, &output);
5124 if (len > num_samples) len = num_samples;
5125 if (len)
5126 convert_samples_short(num_c, buffer, 0, f->channels, output, 0, len);
5127 return len;
5128 }
5129
convert_channels_short_interleaved(int buf_c,short * buffer,int data_c,float ** data,int d_offset,int len)5130 static void convert_channels_short_interleaved(int buf_c, short *buffer, int data_c, float **data, int d_offset, int len)
5131 {
5132 int i;
5133 check_endianness();
5134 if (buf_c != data_c && buf_c <= 2 && data_c <= 6) {
5135 assert(buf_c == 2);
5136 for (i=0; i < buf_c; ++i)
5137 compute_stereo_samples(buffer, data_c, data, d_offset, len);
5138 } else {
5139 int limit = buf_c < data_c ? buf_c : data_c;
5140 int j;
5141 for (j=0; j < len; ++j) {
5142 for (i=0; i < limit; ++i) {
5143 FASTDEF(temp);
5144 float f = data[i][d_offset+j];
5145 int v = FAST_SCALED_FLOAT_TO_INT(temp, f,15);//data[i][d_offset+j],15);
5146 if ((unsigned int) (v + 32768) > 65535)
5147 v = v < 0 ? -32768 : 32767;
5148 *buffer++ = v;
5149 }
5150 for ( ; i < buf_c; ++i)
5151 *buffer++ = 0;
5152 }
5153 }
5154 }
5155
stb_vorbis_get_frame_short_interleaved(stb_vorbis * f,int num_c,short * buffer,int num_shorts)5156 int stb_vorbis_get_frame_short_interleaved(stb_vorbis *f, int num_c, short *buffer, int num_shorts)
5157 {
5158 float **output;
5159 int len;
5160 if (num_c == 1) return stb_vorbis_get_frame_short(f,num_c,&buffer, num_shorts);
5161 len = stb_vorbis_get_frame_float(f, NULL, &output);
5162 if (len) {
5163 if (len*num_c > num_shorts) len = num_shorts / num_c;
5164 convert_channels_short_interleaved(num_c, buffer, f->channels, output, 0, len);
5165 }
5166 return len;
5167 }
5168
stb_vorbis_get_samples_short_interleaved(stb_vorbis * f,int channels,short * buffer,int num_shorts)5169 int stb_vorbis_get_samples_short_interleaved(stb_vorbis *f, int channels, short *buffer, int num_shorts)
5170 {
5171 float **outputs;
5172 int len = num_shorts / channels;
5173 int n=0;
5174 int z = f->channels;
5175 if (z > channels) z = channels;
5176 while (n < len) {
5177 int k = f->channel_buffer_end - f->channel_buffer_start;
5178 if (n+k >= len) k = len - n;
5179 if (k)
5180 convert_channels_short_interleaved(channels, buffer, f->channels, f->channel_buffers, f->channel_buffer_start, k);
5181 buffer += k*channels;
5182 n += k;
5183 f->channel_buffer_start += k;
5184 if (n == len) break;
5185 if (!stb_vorbis_get_frame_float(f, NULL, &outputs)) break;
5186 }
5187 return n;
5188 }
5189
stb_vorbis_get_samples_short(stb_vorbis * f,int channels,short ** buffer,int len)5190 int stb_vorbis_get_samples_short(stb_vorbis *f, int channels, short **buffer, int len)
5191 {
5192 float **outputs;
5193 int n=0;
5194 int z = f->channels;
5195 if (z > channels) z = channels;
5196 while (n < len) {
5197 int k = f->channel_buffer_end - f->channel_buffer_start;
5198 if (n+k >= len) k = len - n;
5199 if (k)
5200 convert_samples_short(channels, buffer, n, f->channels, f->channel_buffers, f->channel_buffer_start, k);
5201 n += k;
5202 f->channel_buffer_start += k;
5203 if (n == len) break;
5204 if (!stb_vorbis_get_frame_float(f, NULL, &outputs)) break;
5205 }
5206 return n;
5207 }
5208
5209 #ifndef STB_VORBIS_NO_STDIO
stb_vorbis_decode_filename(const char * filename,int * channels,int * sample_rate,short ** output)5210 int stb_vorbis_decode_filename(const char *filename, int *channels, int *sample_rate, short **output)
5211 {
5212 int data_len, offset, total, limit, error;
5213 short *data;
5214 stb_vorbis *v = stb_vorbis_open_filename(filename, &error, NULL);
5215 if (v == NULL) return -1;
5216 limit = v->channels * 4096;
5217 *channels = v->channels;
5218 if (sample_rate)
5219 *sample_rate = v->sample_rate;
5220 offset = data_len = 0;
5221 total = limit;
5222 data = (short *) malloc(total * sizeof(*data));
5223 if (data == NULL) {
5224 stb_vorbis_close(v);
5225 return -2;
5226 }
5227 for (;;) {
5228 int n = stb_vorbis_get_frame_short_interleaved(v, v->channels, data+offset, total-offset);
5229 if (n == 0) break;
5230 data_len += n;
5231 offset += n * v->channels;
5232 if (offset + limit > total) {
5233 short *data2;
5234 total *= 2;
5235 data2 = (short *) realloc(data, total * sizeof(*data));
5236 if (data2 == NULL) {
5237 free(data);
5238 stb_vorbis_close(v);
5239 return -2;
5240 }
5241 data = data2;
5242 }
5243 }
5244 *output = data;
5245 stb_vorbis_close(v);
5246 return data_len;
5247 }
5248 #endif // NO_STDIO
5249
stb_vorbis_decode_memory(const uint8 * mem,int len,int * channels,int * sample_rate,short ** output)5250 int stb_vorbis_decode_memory(const uint8 *mem, int len, int *channels, int *sample_rate, short **output)
5251 {
5252 int data_len, offset, total, limit, error;
5253 short *data;
5254 stb_vorbis *v = stb_vorbis_open_memory(mem, len, &error, NULL);
5255 if (v == NULL) return -1;
5256 limit = v->channels * 4096;
5257 *channels = v->channels;
5258 if (sample_rate)
5259 *sample_rate = v->sample_rate;
5260 offset = data_len = 0;
5261 total = limit;
5262 data = (short *) malloc(total * sizeof(*data));
5263 if (data == NULL) {
5264 stb_vorbis_close(v);
5265 return -2;
5266 }
5267 for (;;) {
5268 int n = stb_vorbis_get_frame_short_interleaved(v, v->channels, data+offset, total-offset);
5269 if (n == 0) break;
5270 data_len += n;
5271 offset += n * v->channels;
5272 if (offset + limit > total) {
5273 short *data2;
5274 total *= 2;
5275 data2 = (short *) realloc(data, total * sizeof(*data));
5276 if (data2 == NULL) {
5277 free(data);
5278 stb_vorbis_close(v);
5279 return -2;
5280 }
5281 data = data2;
5282 }
5283 }
5284 *output = data;
5285 stb_vorbis_close(v);
5286 return data_len;
5287 }
5288 #endif // STB_VORBIS_NO_INTEGER_CONVERSION
5289
stb_vorbis_get_samples_float_interleaved(stb_vorbis * f,int channels,float * buffer,int num_floats)5290 int stb_vorbis_get_samples_float_interleaved(stb_vorbis *f, int channels, float *buffer, int num_floats)
5291 {
5292 float **outputs;
5293 int len = num_floats / channels;
5294 int n=0;
5295 int z = f->channels;
5296 if (z > channels) z = channels;
5297 while (n < len) {
5298 int i,j;
5299 int k = f->channel_buffer_end - f->channel_buffer_start;
5300 if (n+k >= len) k = len - n;
5301 for (j=0; j < k; ++j) {
5302 for (i=0; i < z; ++i)
5303 *buffer++ = f->channel_buffers[i][f->channel_buffer_start+j];
5304 for ( ; i < channels; ++i)
5305 *buffer++ = 0;
5306 }
5307 n += k;
5308 f->channel_buffer_start += k;
5309 if (n == len)
5310 break;
5311 if (!stb_vorbis_get_frame_float(f, NULL, &outputs))
5312 break;
5313 }
5314 return n;
5315 }
5316
stb_vorbis_get_samples_float(stb_vorbis * f,int channels,float ** buffer,int num_samples)5317 int stb_vorbis_get_samples_float(stb_vorbis *f, int channels, float **buffer, int num_samples)
5318 {
5319 float **outputs;
5320 int n=0;
5321 int z = f->channels;
5322 if (z > channels) z = channels;
5323 while (n < num_samples) {
5324 int i;
5325 int k = f->channel_buffer_end - f->channel_buffer_start;
5326 if (n+k >= num_samples) k = num_samples - n;
5327 if (k) {
5328 for (i=0; i < z; ++i)
5329 memcpy(buffer[i]+n, f->channel_buffers[i]+f->channel_buffer_start, sizeof(float)*k);
5330 for ( ; i < channels; ++i)
5331 memset(buffer[i]+n, 0, sizeof(float) * k);
5332 }
5333 n += k;
5334 f->channel_buffer_start += k;
5335 if (n == num_samples)
5336 break;
5337 if (!stb_vorbis_get_frame_float(f, NULL, &outputs))
5338 break;
5339 }
5340 return n;
5341 }
5342 #endif // STB_VORBIS_NO_PULLDATA_API
5343
5344 /* Version history
5345 1.09 - 2016/04/04 - back out 'avoid discarding last frame' fix from previous version
5346 1.08 - 2016/04/02 - fixed multiple warnings; fix setup memory leaks;
5347 avoid discarding last frame of audio data
5348 1.07 - 2015/01/16 - fixed some warnings, fix mingw, const-correct API
5349 some more crash fixes when out of memory or with corrupt files
5350 1.06 - 2015/08/31 - full, correct support for seeking API (Dougall Johnson)
5351 some crash fixes when out of memory or with corrupt files
5352 1.05 - 2015/04/19 - don't define __forceinline if it's redundant
5353 1.04 - 2014/08/27 - fix missing const-correct case in API
5354 1.03 - 2014/08/07 - Warning fixes
5355 1.02 - 2014/07/09 - Declare qsort compare function _cdecl on windows
5356 1.01 - 2014/06/18 - fix stb_vorbis_get_samples_float
5357 1.0 - 2014/05/26 - fix memory leaks; fix warnings; fix bugs in multichannel
5358 (API change) report sample rate for decode-full-file funcs
5359 0.99996 - bracket #include <malloc.h> for macintosh compilation by Laurent Gomila
5360 0.99995 - use union instead of pointer-cast for fast-float-to-int to avoid alias-optimization problem
5361 0.99994 - change fast-float-to-int to work in single-precision FPU mode, remove endian-dependence
5362 0.99993 - remove assert that fired on legal files with empty tables
5363 0.99992 - rewind-to-start
5364 0.99991 - bugfix to stb_vorbis_get_samples_short by Bernhard Wodo
5365 0.9999 - (should have been 0.99990) fix no-CRT support, compiling as C++
5366 0.9998 - add a full-decode function with a memory source
5367 0.9997 - fix a bug in the read-from-FILE case in 0.9996 addition
5368 0.9996 - query length of vorbis stream in samples/seconds
5369 0.9995 - bugfix to another optimization that only happened in certain files
5370 0.9994 - bugfix to one of the optimizations that caused significant (but inaudible?) errors
5371 0.9993 - performance improvements; runs in 99% to 104% of time of reference implementation
5372 0.9992 - performance improvement of IMDCT; now performs close to reference implementation
5373 0.9991 - performance improvement of IMDCT
5374 0.999 - (should have been 0.9990) performance improvement of IMDCT
5375 0.998 - no-CRT support from Casey Muratori
5376 0.997 - bugfixes for bugs found by Terje Mathisen
5377 0.996 - bugfix: fast-huffman decode initialized incorrectly for sparse codebooks; fixing gives 10% speedup - found by Terje Mathisen
5378 0.995 - bugfix: fix to 'effective' overrun detection - found by Terje Mathisen
5379 0.994 - bugfix: garbage decode on final VQ symbol of a non-multiple - found by Terje Mathisen
5380 0.993 - bugfix: pushdata API required 1 extra byte for empty page (failed to consume final page if empty) - found by Terje Mathisen
5381 0.992 - fixes for MinGW warning
5382 0.991 - turn fast-float-conversion on by default
5383 0.990 - fix push-mode seek recovery if you seek into the headers
5384 0.98b - fix to bad release of 0.98
5385 0.98 - fix push-mode seek recovery; robustify float-to-int and support non-fast mode
5386 0.97 - builds under c++ (typecasting, don't use 'class' keyword)
5387 0.96 - somehow MY 0.95 was right, but the web one was wrong, so here's my 0.95 rereleased as 0.96, fixes a typo in the clamping code
5388 0.95 - clamping code for 16-bit functions
5389 0.94 - not publically released
5390 0.93 - fixed all-zero-floor case (was decoding garbage)
5391 0.92 - fixed a memory leak
5392 0.91 - conditional compiles to omit parts of the API and the infrastructure to support them: STB_VORBIS_NO_PULLDATA_API, STB_VORBIS_NO_PUSHDATA_API, STB_VORBIS_NO_STDIO, STB_VORBIS_NO_INTEGER_CONVERSION
5393 0.90 - first public release
5394 */
5395
5396 #endif // STB_VORBIS_HEADER_ONLY
5397