1 // Ogg Vorbis audio decoder - v1.14 - public domain
2 // http://nothings.org/stb_vorbis/
3 //
4 // Original version written by Sean Barrett in 2007.
5 //
6 // Originally sponsored by RAD Game Tools. Seeking implementation
7 // sponsored by Phillip Bennefall, Marc Andersen, Aaron Baker,
8 // Elias Software, Aras Pranckevicius, and Sean Barrett.
9 //
10 // LICENSE
11 //
12 // See end of file for license information.
13 //
14 // Limitations:
15 //
16 // - floor 0 not supported (used in old ogg vorbis files pre-2004)
17 // - lossless sample-truncation at beginning ignored
18 // - cannot concatenate multiple vorbis streams
19 // - sample positions are 32-bit, limiting seekable 192Khz
20 // files to around 6 hours (Ogg supports 64-bit)
21 //
22 // Feature contributors:
23 // Dougall Johnson (sample-exact seeking)
24 //
25 // Bugfix/warning contributors:
26 // Terje Mathisen Niklas Frykholm Andy Hill
27 // Casey Muratori John Bolton Gargaj
28 // Laurent Gomila Marc LeBlanc Ronny Chevalier
29 // Bernhard Wodo Evan Balster alxprd@github
30 // Tom Beaumont Ingo Leitgeb Nicolas Guillemot
31 // Phillip Bennefall Rohit Thiago Goulart
32 // manxorist@github saga musix github:infatum
33 // Timur Gagiev
34 //
35 // Partial history:
36 // 1.14 - 2018-02-11 - delete bogus dealloca usage
37 // 1.13 - 2018-01-29 - fix truncation of last frame (hopefully)
38 // 1.12 - 2017-11-21 - limit residue begin/end to blocksize/2 to avoid large temp allocs in bad/corrupt files
39 // 1.11 - 2017-07-23 - fix MinGW compilation
40 // 1.10 - 2017-03-03 - more robust seeking; fix negative ilog(); clear error in open_memory
41 // 1.09 - 2016-04-04 - back out 'truncation of last frame' fix from previous version
42 // 1.08 - 2016-04-02 - warnings; setup memory leaks; truncation of last frame
43 // 1.07 - 2015-01-16 - fixes for crashes on invalid files; warning fixes; const
44 // 1.06 - 2015-08-31 - full, correct support for seeking API (Dougall Johnson)
45 // some crash fixes when out of memory or with corrupt files
46 // fix some inappropriately signed shifts
47 // 1.05 - 2015-04-19 - don't define __forceinline if it's redundant
48 // 1.04 - 2014-08-27 - fix missing const-correct case in API
49 // 1.03 - 2014-08-07 - warning fixes
50 // 1.02 - 2014-07-09 - declare qsort comparison as explicitly _cdecl in Windows
51 // 1.01 - 2014-06-18 - fix stb_vorbis_get_samples_float (interleaved was correct)
52 // 1.0 - 2014-05-26 - fix memory leaks; fix warnings; fix bugs in >2-channel;
53 // (API change) report sample rate for decode-full-file funcs
54 //
55 // See end of file for full version history.
56
57
58 //////////////////////////////////////////////////////////////////////////////
59 //
60 // HEADER BEGINS HERE
61 //
62
63 #ifndef STB_VORBIS_INCLUDE_STB_VORBIS_H
64 #define STB_VORBIS_INCLUDE_STB_VORBIS_H
65
66 #if defined(STB_VORBIS_NO_CRT) && !defined(STB_VORBIS_NO_STDIO)
67 #define STB_VORBIS_NO_STDIO 1
68 #endif
69
70 #ifndef STB_VORBIS_NO_STDIO
71 #include <stdio.h>
72 #endif
73
74 #ifdef __cplusplus
75 extern "C" {
76 #endif
77
78 /////////// THREAD SAFETY
79
80 // Individual stb_vorbis* handles are not thread-safe; you cannot decode from
81 // them from multiple threads at the same time. However, you can have multiple
82 // stb_vorbis* handles and decode from them independently in multiple thrads.
83
84
85 /////////// MEMORY ALLOCATION
86
87 // normally stb_vorbis uses malloc() to allocate memory at startup,
88 // and alloca() to allocate temporary memory during a frame on the
89 // stack. (Memory consumption will depend on the amount of setup
90 // data in the file and how you set the compile flags for speed
91 // vs. size. In my test files the maximal-size usage is ~150KB.)
92 //
93 // You can modify the wrapper functions in the source (setup_malloc,
94 // setup_temp_malloc, temp_malloc) to change this behavior, or you
95 // can use a simpler allocation model: you pass in a buffer from
96 // which stb_vorbis will allocate _all_ its memory (including the
97 // temp memory). "open" may fail with a VORBIS_outofmem if you
98 // do not pass in enough data; there is no way to determine how
99 // much you do need except to succeed (at which point you can
100 // query get_info to find the exact amount required. yes I know
101 // this is lame).
102 //
103 // If you pass in a non-NULL buffer of the type below, allocation
104 // will occur from it as described above. Otherwise just pass NULL
105 // to use malloc()/alloca()
106
107 typedef struct
108 {
109 char *alloc_buffer;
110 int alloc_buffer_length_in_bytes;
111 } stb_vorbis_alloc;
112
113
114 /////////// FUNCTIONS USEABLE WITH ALL INPUT MODES
115
116 typedef struct stb_vorbis stb_vorbis;
117
118 typedef struct
119 {
120 unsigned int sample_rate;
121 int channels;
122
123 unsigned int setup_memory_required;
124 unsigned int setup_temp_memory_required;
125 unsigned int temp_memory_required;
126
127 int max_frame_size;
128 } stb_vorbis_info;
129
130 // get general information about the file
131 extern stb_vorbis_info stb_vorbis_get_info(stb_vorbis *f);
132
133 // get the last error detected (clears it, too)
134 extern int stb_vorbis_get_error(stb_vorbis *f);
135
136 // close an ogg vorbis file and free all memory in use
137 extern void stb_vorbis_close(stb_vorbis *f);
138
139 // this function returns the offset (in samples) from the beginning of the
140 // file that will be returned by the next decode, if it is known, or -1
141 // otherwise. after a flush_pushdata() call, this may take a while before
142 // it becomes valid again.
143 // NOT WORKING YET after a seek with PULLDATA API
144 extern int stb_vorbis_get_sample_offset(stb_vorbis *f);
145
146 // returns the current seek point within the file, or offset from the beginning
147 // of the memory buffer. In pushdata mode it returns 0.
148 extern unsigned int stb_vorbis_get_file_offset(stb_vorbis *f);
149
150 /////////// PUSHDATA API
151
152 #ifndef STB_VORBIS_NO_PUSHDATA_API
153
154 // this API allows you to get blocks of data from any source and hand
155 // them to stb_vorbis. you have to buffer them; stb_vorbis will tell
156 // you how much it used, and you have to give it the rest next time;
157 // and stb_vorbis may not have enough data to work with and you will
158 // need to give it the same data again PLUS more. Note that the Vorbis
159 // specification does not bound the size of an individual frame.
160
161 extern stb_vorbis *stb_vorbis_open_pushdata(
162 const unsigned char * datablock, int datablock_length_in_bytes,
163 int *datablock_memory_consumed_in_bytes,
164 int *error,
165 const stb_vorbis_alloc *alloc_buffer);
166 // create a vorbis decoder by passing in the initial data block containing
167 // the ogg&vorbis headers (you don't need to do parse them, just provide
168 // the first N bytes of the file--you're told if it's not enough, see below)
169 // on success, returns an stb_vorbis *, does not set error, returns the amount of
170 // data parsed/consumed on this call in *datablock_memory_consumed_in_bytes;
171 // on failure, returns NULL on error and sets *error, does not change *datablock_memory_consumed
172 // if returns NULL and *error is VORBIS_need_more_data, then the input block was
173 // incomplete and you need to pass in a larger block from the start of the file
174
175 extern int stb_vorbis_decode_frame_pushdata(
176 stb_vorbis *f,
177 const unsigned char *datablock, int datablock_length_in_bytes,
178 int *channels, // place to write number of float * buffers
179 float ***output, // place to write float ** array of float * buffers
180 int *samples // place to write number of output samples
181 );
182 // decode a frame of audio sample data if possible from the passed-in data block
183 //
184 // return value: number of bytes we used from datablock
185 //
186 // possible cases:
187 // 0 bytes used, 0 samples output (need more data)
188 // N bytes used, 0 samples output (resynching the stream, keep going)
189 // N bytes used, M samples output (one frame of data)
190 // note that after opening a file, you will ALWAYS get one N-bytes,0-sample
191 // frame, because Vorbis always "discards" the first frame.
192 //
193 // Note that on resynch, stb_vorbis will rarely consume all of the buffer,
194 // instead only datablock_length_in_bytes-3 or less. This is because it wants
195 // to avoid missing parts of a page header if they cross a datablock boundary,
196 // without writing state-machiney code to record a partial detection.
197 //
198 // The number of channels returned are stored in *channels (which can be
199 // NULL--it is always the same as the number of channels reported by
200 // get_info). *output will contain an array of float* buffers, one per
201 // channel. In other words, (*output)[0][0] contains the first sample from
202 // the first channel, and (*output)[1][0] contains the first sample from
203 // the second channel.
204
205 extern void stb_vorbis_flush_pushdata(stb_vorbis *f);
206 // inform stb_vorbis that your next datablock will not be contiguous with
207 // previous ones (e.g. you've seeked in the data); future attempts to decode
208 // frames will cause stb_vorbis to resynchronize (as noted above), and
209 // once it sees a valid Ogg page (typically 4-8KB, as large as 64KB), it
210 // will begin decoding the _next_ frame.
211 //
212 // if you want to seek using pushdata, you need to seek in your file, then
213 // call stb_vorbis_flush_pushdata(), then start calling decoding, then once
214 // decoding is returning you data, call stb_vorbis_get_sample_offset, and
215 // if you don't like the result, seek your file again and repeat.
216 #endif
217
218
219 ////////// PULLING INPUT API
220
221 #ifndef STB_VORBIS_NO_PULLDATA_API
222 // This API assumes stb_vorbis is allowed to pull data from a source--
223 // either a block of memory containing the _entire_ vorbis stream, or a
224 // FILE * that you or it create, or possibly some other reading mechanism
225 // if you go modify the source to replace the FILE * case with some kind
226 // of callback to your code. (But if you don't support seeking, you may
227 // just want to go ahead and use pushdata.)
228
229 #if !defined(STB_VORBIS_NO_STDIO) && !defined(STB_VORBIS_NO_INTEGER_CONVERSION)
230 extern int stb_vorbis_decode_filename(const char *filename, int *channels, int *sample_rate, short **output);
231 #endif
232 #if !defined(STB_VORBIS_NO_INTEGER_CONVERSION)
233 extern int stb_vorbis_decode_memory(const unsigned char *mem, int len, int *channels, int *sample_rate, short **output);
234 #endif
235 // decode an entire file and output the data interleaved into a malloc()ed
236 // buffer stored in *output. The return value is the number of samples
237 // decoded, or -1 if the file could not be opened or was not an ogg vorbis file.
238 // When you're done with it, just free() the pointer returned in *output.
239
240 extern stb_vorbis * stb_vorbis_open_memory(const unsigned char *data, int len,
241 int *error, const stb_vorbis_alloc *alloc_buffer);
242 // create an ogg vorbis decoder from an ogg vorbis stream in memory (note
243 // this must be the entire stream!). on failure, returns NULL and sets *error
244
245 #ifndef STB_VORBIS_NO_STDIO
246 extern stb_vorbis * stb_vorbis_open_filename(const char *filename,
247 int *error, const stb_vorbis_alloc *alloc_buffer);
248 // create an ogg vorbis decoder from a filename via fopen(). on failure,
249 // returns NULL and sets *error (possibly to VORBIS_file_open_failure).
250
251 extern stb_vorbis * stb_vorbis_open_file(FILE *f, int close_handle_on_close,
252 int *error, const stb_vorbis_alloc *alloc_buffer);
253 // create an ogg vorbis decoder from an open FILE *, looking for a stream at
254 // the _current_ seek point (ftell). on failure, returns NULL and sets *error.
255 // note that stb_vorbis must "own" this stream; if you seek it in between
256 // calls to stb_vorbis, it will become confused. Morever, if you attempt to
257 // perform stb_vorbis_seek_*() operations on this file, it will assume it
258 // owns the _entire_ rest of the file after the start point. Use the next
259 // function, stb_vorbis_open_file_section(), to limit it.
260
261 extern stb_vorbis * stb_vorbis_open_file_section(FILE *f, int close_handle_on_close,
262 int *error, const stb_vorbis_alloc *alloc_buffer, unsigned int len);
263 // create an ogg vorbis decoder from an open FILE *, looking for a stream at
264 // the _current_ seek point (ftell); the stream will be of length 'len' bytes.
265 // on failure, returns NULL and sets *error. note that stb_vorbis must "own"
266 // this stream; if you seek it in between calls to stb_vorbis, it will become
267 // confused.
268 #endif
269
270 extern int stb_vorbis_seek_frame(stb_vorbis *f, unsigned int sample_number);
271 extern int stb_vorbis_seek(stb_vorbis *f, unsigned int sample_number);
272 // these functions seek in the Vorbis file to (approximately) 'sample_number'.
273 // after calling seek_frame(), the next call to get_frame_*() will include
274 // the specified sample. after calling stb_vorbis_seek(), the next call to
275 // stb_vorbis_get_samples_* will start with the specified sample. If you
276 // do not need to seek to EXACTLY the target sample when using get_samples_*,
277 // you can also use seek_frame().
278
279 extern int stb_vorbis_seek_start(stb_vorbis *f);
280 // this function is equivalent to stb_vorbis_seek(f,0)
281
282 extern unsigned int stb_vorbis_stream_length_in_samples(stb_vorbis *f);
283 extern float stb_vorbis_stream_length_in_seconds(stb_vorbis *f);
284 // these functions return the total length of the vorbis stream
285
286 extern int stb_vorbis_get_frame_float(stb_vorbis *f, int *channels, float ***output);
287 // decode the next frame and return the number of samples. the number of
288 // channels returned are stored in *channels (which can be NULL--it is always
289 // the same as the number of channels reported by get_info). *output will
290 // contain an array of float* buffers, one per channel. These outputs will
291 // be overwritten on the next call to stb_vorbis_get_frame_*.
292 //
293 // You generally should not intermix calls to stb_vorbis_get_frame_*()
294 // and stb_vorbis_get_samples_*(), since the latter calls the former.
295
296 #ifndef STB_VORBIS_NO_INTEGER_CONVERSION
297 extern int stb_vorbis_get_frame_short_interleaved(stb_vorbis *f, int num_c, short *buffer, int num_shorts);
298 extern int stb_vorbis_get_frame_short (stb_vorbis *f, int num_c, short **buffer, int num_samples);
299 #endif
300 // decode the next frame and return the number of *samples* per channel.
301 // Note that for interleaved data, you pass in the number of shorts (the
302 // size of your array), but the return value is the number of samples per
303 // channel, not the total number of samples.
304 //
305 // The data is coerced to the number of channels you request according to the
306 // channel coercion rules (see below). You must pass in the size of your
307 // buffer(s) so that stb_vorbis will not overwrite the end of the buffer.
308 // The maximum buffer size needed can be gotten from get_info(); however,
309 // the Vorbis I specification implies an absolute maximum of 4096 samples
310 // per channel.
311
312 // Channel coercion rules:
313 // Let M be the number of channels requested, and N the number of channels present,
314 // and Cn be the nth channel; let stereo L be the sum of all L and center channels,
315 // and stereo R be the sum of all R and center channels (channel assignment from the
316 // vorbis spec).
317 // M N output
318 // 1 k sum(Ck) for all k
319 // 2 * stereo L, stereo R
320 // k l k > l, the first l channels, then 0s
321 // k l k <= l, the first k channels
322 // Note that this is not _good_ surround etc. mixing at all! It's just so
323 // you get something useful.
324
325 extern int stb_vorbis_get_samples_float_interleaved(stb_vorbis *f, int channels, float *buffer, int num_floats);
326 extern int stb_vorbis_get_samples_float(stb_vorbis *f, int channels, float **buffer, int num_samples);
327 // gets num_samples samples, not necessarily on a frame boundary--this requires
328 // buffering so you have to supply the buffers. DOES NOT APPLY THE COERCION RULES.
329 // Returns the number of samples stored per channel; it may be less than requested
330 // at the end of the file. If there are no more samples in the file, returns 0.
331
332 #ifndef STB_VORBIS_NO_INTEGER_CONVERSION
333 extern int stb_vorbis_get_samples_short_interleaved(stb_vorbis *f, int channels, short *buffer, int num_shorts);
334 extern int stb_vorbis_get_samples_short(stb_vorbis *f, int channels, short **buffer, int num_samples);
335 #endif
336 // gets num_samples samples, not necessarily on a frame boundary--this requires
337 // buffering so you have to supply the buffers. Applies the coercion rules above
338 // to produce 'channels' channels. Returns the number of samples stored per channel;
339 // it may be less than requested at the end of the file. If there are no more
340 // samples in the file, returns 0.
341
342 #endif
343
344 //////// ERROR CODES
345
346 enum STBVorbisError
347 {
348 VORBIS__no_error,
349
350 VORBIS_need_more_data=1, // not a real error
351
352 VORBIS_invalid_api_mixing, // can't mix API modes
353 VORBIS_outofmem, // not enough memory
354 VORBIS_feature_not_supported, // uses floor 0
355 VORBIS_too_many_channels, // STB_VORBIS_MAX_CHANNELS is too small
356 VORBIS_file_open_failure, // fopen() failed
357 VORBIS_seek_without_length, // can't seek in unknown-length file
358
359 VORBIS_unexpected_eof=10, // file is truncated?
360 VORBIS_seek_invalid, // seek past EOF
361
362 // decoding errors (corrupt/invalid stream) -- you probably
363 // don't care about the exact details of these
364
365 // vorbis errors:
366 VORBIS_invalid_setup=20,
367 VORBIS_invalid_stream,
368
369 // ogg errors:
370 VORBIS_missing_capture_pattern=30,
371 VORBIS_invalid_stream_structure_version,
372 VORBIS_continued_packet_flag_invalid,
373 VORBIS_incorrect_stream_serial_number,
374 VORBIS_invalid_first_page,
375 VORBIS_bad_packet_type,
376 VORBIS_cant_find_last_page,
377 VORBIS_seek_failed
378 };
379
380
381 #ifdef __cplusplus
382 }
383 #endif
384
385 #endif // STB_VORBIS_INCLUDE_STB_VORBIS_H
386 //
387 // HEADER ENDS HERE
388 //
389 //////////////////////////////////////////////////////////////////////////////
390
391 #ifndef STB_VORBIS_HEADER_ONLY
392
393 // global configuration settings (e.g. set these in the project/makefile),
394 // or just set them in this file at the top (although ideally the first few
395 // should be visible when the header file is compiled too, although it's not
396 // crucial)
397
398 // STB_VORBIS_NO_PUSHDATA_API
399 // does not compile the code for the various stb_vorbis_*_pushdata()
400 // functions
401 // #define STB_VORBIS_NO_PUSHDATA_API
402
403 // STB_VORBIS_NO_PULLDATA_API
404 // does not compile the code for the non-pushdata APIs
405 // #define STB_VORBIS_NO_PULLDATA_API
406
407 // STB_VORBIS_NO_STDIO
408 // does not compile the code for the APIs that use FILE *s internally
409 // or externally (implied by STB_VORBIS_NO_PULLDATA_API)
410 // #define STB_VORBIS_NO_STDIO
411
412 // STB_VORBIS_NO_INTEGER_CONVERSION
413 // does not compile the code for converting audio sample data from
414 // float to integer (implied by STB_VORBIS_NO_PULLDATA_API)
415 // #define STB_VORBIS_NO_INTEGER_CONVERSION
416
417 // STB_VORBIS_NO_FAST_SCALED_FLOAT
418 // does not use a fast float-to-int trick to accelerate float-to-int on
419 // most platforms which requires endianness be defined correctly.
420 //#define STB_VORBIS_NO_FAST_SCALED_FLOAT
421
422
423 // STB_VORBIS_MAX_CHANNELS [number]
424 // globally define this to the maximum number of channels you need.
425 // The spec does not put a restriction on channels except that
426 // the count is stored in a byte, so 255 is the hard limit.
427 // Reducing this saves about 16 bytes per value, so using 16 saves
428 // (255-16)*16 or around 4KB. Plus anything other memory usage
429 // I forgot to account for. Can probably go as low as 8 (7.1 audio),
430 // 6 (5.1 audio), or 2 (stereo only).
431 #ifndef STB_VORBIS_MAX_CHANNELS
432 #define STB_VORBIS_MAX_CHANNELS 16 // enough for anyone?
433 #endif
434
435 // STB_VORBIS_PUSHDATA_CRC_COUNT [number]
436 // after a flush_pushdata(), stb_vorbis begins scanning for the
437 // next valid page, without backtracking. when it finds something
438 // that looks like a page, it streams through it and verifies its
439 // CRC32. Should that validation fail, it keeps scanning. But it's
440 // possible that _while_ streaming through to check the CRC32 of
441 // one candidate page, it sees another candidate page. This #define
442 // determines how many "overlapping" candidate pages it can search
443 // at once. Note that "real" pages are typically ~4KB to ~8KB, whereas
444 // garbage pages could be as big as 64KB, but probably average ~16KB.
445 // So don't hose ourselves by scanning an apparent 64KB page and
446 // missing a ton of real ones in the interim; so minimum of 2
447 #ifndef STB_VORBIS_PUSHDATA_CRC_COUNT
448 #define STB_VORBIS_PUSHDATA_CRC_COUNT 4
449 #endif
450
451 // STB_VORBIS_FAST_HUFFMAN_LENGTH [number]
452 // sets the log size of the huffman-acceleration table. Maximum
453 // supported value is 24. with larger numbers, more decodings are O(1),
454 // but the table size is larger so worse cache missing, so you'll have
455 // to probe (and try multiple ogg vorbis files) to find the sweet spot.
456 #ifndef STB_VORBIS_FAST_HUFFMAN_LENGTH
457 #define STB_VORBIS_FAST_HUFFMAN_LENGTH 10
458 #endif
459
460 // STB_VORBIS_FAST_BINARY_LENGTH [number]
461 // sets the log size of the binary-search acceleration table. this
462 // is used in similar fashion to the fast-huffman size to set initial
463 // parameters for the binary search
464
465 // STB_VORBIS_FAST_HUFFMAN_INT
466 // The fast huffman tables are much more efficient if they can be
467 // stored as 16-bit results instead of 32-bit results. This restricts
468 // the codebooks to having only 65535 possible outcomes, though.
469 // (At least, accelerated by the huffman table.)
470 #ifndef STB_VORBIS_FAST_HUFFMAN_INT
471 #define STB_VORBIS_FAST_HUFFMAN_SHORT
472 #endif
473
474 // STB_VORBIS_NO_HUFFMAN_BINARY_SEARCH
475 // If the 'fast huffman' search doesn't succeed, then stb_vorbis falls
476 // back on binary searching for the correct one. This requires storing
477 // extra tables with the huffman codes in sorted order. Defining this
478 // symbol trades off space for speed by forcing a linear search in the
479 // non-fast case, except for "sparse" codebooks.
480 // #define STB_VORBIS_NO_HUFFMAN_BINARY_SEARCH
481
482 // STB_VORBIS_DIVIDES_IN_RESIDUE
483 // stb_vorbis precomputes the result of the scalar residue decoding
484 // that would otherwise require a divide per chunk. you can trade off
485 // space for time by defining this symbol.
486 // #define STB_VORBIS_DIVIDES_IN_RESIDUE
487
488 // STB_VORBIS_DIVIDES_IN_CODEBOOK
489 // vorbis VQ codebooks can be encoded two ways: with every case explicitly
490 // stored, or with all elements being chosen from a small range of values,
491 // and all values possible in all elements. By default, stb_vorbis expands
492 // this latter kind out to look like the former kind for ease of decoding,
493 // because otherwise an integer divide-per-vector-element is required to
494 // unpack the index. If you define STB_VORBIS_DIVIDES_IN_CODEBOOK, you can
495 // trade off storage for speed.
496 //#define STB_VORBIS_DIVIDES_IN_CODEBOOK
497
498 #ifdef STB_VORBIS_CODEBOOK_SHORTS
499 #error "STB_VORBIS_CODEBOOK_SHORTS is no longer supported as it produced incorrect results for some input formats"
500 #endif
501
502 // STB_VORBIS_DIVIDE_TABLE
503 // this replaces small integer divides in the floor decode loop with
504 // table lookups. made less than 1% difference, so disabled by default.
505
506 // STB_VORBIS_NO_INLINE_DECODE
507 // disables the inlining of the scalar codebook fast-huffman decode.
508 // might save a little codespace; useful for debugging
509 // #define STB_VORBIS_NO_INLINE_DECODE
510
511 // STB_VORBIS_NO_DEFER_FLOOR
512 // Normally we only decode the floor without synthesizing the actual
513 // full curve. We can instead synthesize the curve immediately. This
514 // requires more memory and is very likely slower, so I don't think
515 // you'd ever want to do it except for debugging.
516 // #define STB_VORBIS_NO_DEFER_FLOOR
517
518
519
520
521 //////////////////////////////////////////////////////////////////////////////
522
523 #ifdef STB_VORBIS_NO_PULLDATA_API
524 #define STB_VORBIS_NO_INTEGER_CONVERSION
525 #define STB_VORBIS_NO_STDIO
526 #endif
527
528 #if defined(STB_VORBIS_NO_CRT) && !defined(STB_VORBIS_NO_STDIO)
529 #define STB_VORBIS_NO_STDIO 1
530 #endif
531
532 #ifndef STB_VORBIS_NO_INTEGER_CONVERSION
533 #ifndef STB_VORBIS_NO_FAST_SCALED_FLOAT
534
535 // only need endianness for fast-float-to-int, which we don't
536 // use for pushdata
537
538 #ifndef STB_VORBIS_BIG_ENDIAN
539 #define STB_VORBIS_ENDIAN 0
540 #else
541 #define STB_VORBIS_ENDIAN 1
542 #endif
543
544 #endif
545 #endif
546
547
548 #ifndef STB_VORBIS_NO_STDIO
549 #include <stdio.h>
550 #endif
551
552 #ifndef STB_VORBIS_NO_CRT
553 #include <stdlib.h>
554 #include <string.h>
555 #include <assert.h>
556 #include <math.h>
557
558 // find definition of alloca if it's not in stdlib.h:
559 #if defined(_MSC_VER) || defined(__MINGW32__)
560 #include <malloc.h>
561 #endif
562 #if defined(__linux__) || defined(__linux) || defined(__EMSCRIPTEN__)
563 #include <alloca.h>
564 #endif
565 #else // STB_VORBIS_NO_CRT
566 #define NULL 0
567 #define malloc(s) 0
568 #define free(s) ((void) 0)
569 #define realloc(s) 0
570 #endif // STB_VORBIS_NO_CRT
571
572 #include <limits.h>
573
574 #ifdef __MINGW32__
575 // eff you mingw:
576 // "fixed":
577 // http://sourceforge.net/p/mingw-w64/mailman/message/32882927/
578 // "no that broke the build, reverted, who cares about C":
579 // http://sourceforge.net/p/mingw-w64/mailman/message/32890381/
580 #ifdef __forceinline
581 #undef __forceinline
582 #endif
583 #define __forceinline
584 #ifndef alloca
585 #define alloca __builtin_alloca
586 #endif
587 #elif !defined(_MSC_VER)
588 #if __GNUC__
589 #define __forceinline inline
590 #else
591 #define __forceinline
592 #endif
593 #endif
594
595 #if STB_VORBIS_MAX_CHANNELS > 256
596 #error "Value of STB_VORBIS_MAX_CHANNELS outside of allowed range"
597 #endif
598
599 #if STB_VORBIS_FAST_HUFFMAN_LENGTH > 24
600 #error "Value of STB_VORBIS_FAST_HUFFMAN_LENGTH outside of allowed range"
601 #endif
602
603
604 #if 0
605 #include <crtdbg.h>
606 #define CHECK(f) _CrtIsValidHeapPointer(f->channel_buffers[1])
607 #else
608 #define CHECK(f) ((void) 0)
609 #endif
610
611 #define MAX_BLOCKSIZE_LOG 13 // from specification
612 #define MAX_BLOCKSIZE (1 << MAX_BLOCKSIZE_LOG)
613
614
615 typedef unsigned char uint8;
616 typedef signed char int8;
617 typedef unsigned short uint16;
618 typedef signed short int16;
619 typedef unsigned int uint32;
620 typedef signed int int32;
621
622 #ifndef TRUE
623 #define TRUE 1
624 #define FALSE 0
625 #endif
626
627 typedef float codetype;
628
629 // @NOTE
630 //
631 // Some arrays below are tagged "//varies", which means it's actually
632 // a variable-sized piece of data, but rather than malloc I assume it's
633 // small enough it's better to just allocate it all together with the
634 // main thing
635 //
636 // Most of the variables are specified with the smallest size I could pack
637 // them into. It might give better performance to make them all full-sized
638 // integers. It should be safe to freely rearrange the structures or change
639 // the sizes larger--nothing relies on silently truncating etc., nor the
640 // order of variables.
641
642 #define FAST_HUFFMAN_TABLE_SIZE (1 << STB_VORBIS_FAST_HUFFMAN_LENGTH)
643 #define FAST_HUFFMAN_TABLE_MASK (FAST_HUFFMAN_TABLE_SIZE - 1)
644
645 typedef struct
646 {
647 int dimensions, entries;
648 uint8 *codeword_lengths;
649 float minimum_value;
650 float delta_value;
651 uint8 value_bits;
652 uint8 lookup_type;
653 uint8 sequence_p;
654 uint8 sparse;
655 uint32 lookup_values;
656 codetype *multiplicands;
657 uint32 *codewords;
658 #ifdef STB_VORBIS_FAST_HUFFMAN_SHORT
659 int16 fast_huffman[FAST_HUFFMAN_TABLE_SIZE];
660 #else
661 int32 fast_huffman[FAST_HUFFMAN_TABLE_SIZE];
662 #endif
663 uint32 *sorted_codewords;
664 int *sorted_values;
665 int sorted_entries;
666 } Codebook;
667
668 typedef struct
669 {
670 uint8 order;
671 uint16 rate;
672 uint16 bark_map_size;
673 uint8 amplitude_bits;
674 uint8 amplitude_offset;
675 uint8 number_of_books;
676 uint8 book_list[16]; // varies
677 } Floor0;
678
679 typedef struct
680 {
681 uint8 partitions;
682 uint8 partition_class_list[32]; // varies
683 uint8 class_dimensions[16]; // varies
684 uint8 class_subclasses[16]; // varies
685 uint8 class_masterbooks[16]; // varies
686 int16 subclass_books[16][8]; // varies
687 uint16 Xlist[31*8+2]; // varies
688 uint8 sorted_order[31*8+2];
689 uint8 neighbors[31*8+2][2];
690 uint8 floor1_multiplier;
691 uint8 rangebits;
692 int values;
693 } Floor1;
694
695 typedef union
696 {
697 Floor0 floor0;
698 Floor1 floor1;
699 } Floor;
700
701 typedef struct
702 {
703 uint32 begin, end;
704 uint32 part_size;
705 uint8 classifications;
706 uint8 classbook;
707 uint8 **classdata;
708 int16 (*residue_books)[8];
709 } Residue;
710
711 typedef struct
712 {
713 uint8 magnitude;
714 uint8 angle;
715 uint8 mux;
716 } MappingChannel;
717
718 typedef struct
719 {
720 uint16 coupling_steps;
721 MappingChannel *chan;
722 uint8 submaps;
723 uint8 submap_floor[15]; // varies
724 uint8 submap_residue[15]; // varies
725 } Mapping;
726
727 typedef struct
728 {
729 uint8 blockflag;
730 uint8 mapping;
731 uint16 windowtype;
732 uint16 transformtype;
733 } Mode;
734
735 typedef struct
736 {
737 uint32 goal_crc; // expected crc if match
738 int bytes_left; // bytes left in packet
739 uint32 crc_so_far; // running crc
740 int bytes_done; // bytes processed in _current_ chunk
741 uint32 sample_loc; // granule pos encoded in page
742 } CRCscan;
743
744 typedef struct
745 {
746 uint32 page_start, page_end;
747 uint32 last_decoded_sample;
748 } ProbedPage;
749
750 struct stb_vorbis
751 {
752 // user-accessible info
753 unsigned int sample_rate;
754 int channels;
755
756 unsigned int setup_memory_required;
757 unsigned int temp_memory_required;
758 unsigned int setup_temp_memory_required;
759
760 // input config
761 #ifndef STB_VORBIS_NO_STDIO
762 FILE *f;
763 uint32 f_start;
764 int close_on_free;
765 #endif
766
767 uint8 *stream;
768 uint8 *stream_start;
769 uint8 *stream_end;
770
771 uint32 stream_len;
772
773 uint8 push_mode;
774
775 uint32 first_audio_page_offset;
776
777 ProbedPage p_first, p_last;
778
779 // memory management
780 stb_vorbis_alloc alloc;
781 int setup_offset;
782 int temp_offset;
783
784 // run-time results
785 int eof;
786 enum STBVorbisError error;
787
788 // user-useful data
789
790 // header info
791 int blocksize[2];
792 int blocksize_0, blocksize_1;
793 int codebook_count;
794 Codebook *codebooks;
795 int floor_count;
796 uint16 floor_types[64]; // varies
797 Floor *floor_config;
798 int residue_count;
799 uint16 residue_types[64]; // varies
800 Residue *residue_config;
801 int mapping_count;
802 Mapping *mapping;
803 int mode_count;
804 Mode mode_config[64]; // varies
805
806 uint32 total_samples;
807
808 // decode buffer
809 float *channel_buffers[STB_VORBIS_MAX_CHANNELS];
810 float *outputs [STB_VORBIS_MAX_CHANNELS];
811
812 float *previous_window[STB_VORBIS_MAX_CHANNELS];
813 int previous_length;
814
815 #ifndef STB_VORBIS_NO_DEFER_FLOOR
816 int16 *finalY[STB_VORBIS_MAX_CHANNELS];
817 #else
818 float *floor_buffers[STB_VORBIS_MAX_CHANNELS];
819 #endif
820
821 uint32 current_loc; // sample location of next frame to decode
822 int current_loc_valid;
823
824 // per-blocksize precomputed data
825
826 // twiddle factors
827 float *A[2],*B[2],*C[2];
828 float *window[2];
829 uint16 *bit_reverse[2];
830
831 // current page/packet/segment streaming info
832 uint32 serial; // stream serial number for verification
833 int last_page;
834 int segment_count;
835 uint8 segments[255];
836 uint8 page_flag;
837 uint8 bytes_in_seg;
838 uint8 first_decode;
839 int next_seg;
840 int last_seg; // flag that we're on the last segment
841 int last_seg_which; // what was the segment number of the last seg?
842 uint32 acc;
843 int valid_bits;
844 int packet_bytes;
845 int end_seg_with_known_loc;
846 uint32 known_loc_for_packet;
847 int discard_samples_deferred;
848 uint32 samples_output;
849
850 // push mode scanning
851 int page_crc_tests; // only in push_mode: number of tests active; -1 if not searching
852 #ifndef STB_VORBIS_NO_PUSHDATA_API
853 CRCscan scan[STB_VORBIS_PUSHDATA_CRC_COUNT];
854 #endif
855
856 // sample-access
857 int channel_buffer_start;
858 int channel_buffer_end;
859 };
860
861 #if defined(STB_VORBIS_NO_PUSHDATA_API)
862 #define IS_PUSH_MODE(f) FALSE
863 #elif defined(STB_VORBIS_NO_PULLDATA_API)
864 #define IS_PUSH_MODE(f) TRUE
865 #else
866 #define IS_PUSH_MODE(f) ((f)->push_mode)
867 #endif
868
869 typedef struct stb_vorbis vorb;
870
error(vorb * f,enum STBVorbisError e)871 static int error(vorb *f, enum STBVorbisError e)
872 {
873 f->error = e;
874 if (!f->eof && e != VORBIS_need_more_data) {
875 f->error=e; // breakpoint for debugging
876 }
877 return 0;
878 }
879
880
881 // these functions are used for allocating temporary memory
882 // while decoding. if you can afford the stack space, use
883 // alloca(); otherwise, provide a temp buffer and it will
884 // allocate out of those.
885
886 #define array_size_required(count,size) (count*(sizeof(void *)+(size)))
887
888 #define temp_alloc(f,size) setup_temp_malloc(f,size)//(f->alloc.alloc_buffer ? setup_temp_malloc(f,size) : alloca(size))
889 #define temp_free(f,p) 0
890 #define temp_alloc_save(f) ((f)->temp_offset)
891 #define temp_alloc_restore(f,p) ((f)->temp_offset = (p))
892
893 #define temp_block_array(f,count,size) make_block_array(temp_alloc(f,array_size_required(count,size)), count, size)
894
895 // given a sufficiently large block of memory, make an array of pointers to subblocks of it
make_block_array(void * mem,int count,int size)896 static void *make_block_array(void *mem, int count, int size)
897 {
898 int i;
899 void ** p = (void **) mem;
900 char *q = (char *) (p + count);
901 for (i=0; i < count; ++i) {
902 p[i] = q;
903 q += size;
904 }
905 return p;
906 }
907
setup_malloc(vorb * f,int sz)908 static void *setup_malloc(vorb *f, int sz)
909 {
910 sz = (sz+3) & ~3;
911 f->setup_memory_required += sz;
912 if (f->alloc.alloc_buffer) {
913 void *p = (char *) f->alloc.alloc_buffer + f->setup_offset;
914 if (f->setup_offset + sz > f->temp_offset) return NULL;
915 f->setup_offset += sz;
916 return p;
917 }
918 return sz ? malloc(sz) : NULL;
919 }
920
setup_free(vorb * f,void * p)921 static void setup_free(vorb *f, void *p)
922 {
923 if (f->alloc.alloc_buffer) return; // do nothing; setup mem is a stack
924 free(p);
925 }
926
setup_temp_malloc(vorb * f,int sz)927 static void *setup_temp_malloc(vorb *f, int sz)
928 {
929 sz = (sz+3) & ~3;
930 if (f->alloc.alloc_buffer) {
931 if (f->temp_offset - sz < f->setup_offset) return NULL;
932 f->temp_offset -= sz;
933 return (char *) f->alloc.alloc_buffer + f->temp_offset;
934 }
935 return malloc(sz);
936 }
937
setup_temp_free(vorb * f,void * p,int sz)938 static void setup_temp_free(vorb *f, void *p, int sz)
939 {
940 if (f->alloc.alloc_buffer) {
941 f->temp_offset += (sz+3)&~3;
942 return;
943 }
944 free(p);
945 }
946
947 #define CRC32_POLY 0x04c11db7 // from spec
948
949 static uint32 crc_table[256];
crc32_init(void)950 static void crc32_init(void)
951 {
952 int i,j;
953 uint32 s;
954 for(i=0; i < 256; i++) {
955 for (s=(uint32) i << 24, j=0; j < 8; ++j)
956 s = (s << 1) ^ (s >= (1U<<31) ? CRC32_POLY : 0);
957 crc_table[i] = s;
958 }
959 }
960
crc32_update(uint32 crc,uint8 byte)961 static __forceinline uint32 crc32_update(uint32 crc, uint8 byte)
962 {
963 return (crc << 8) ^ crc_table[byte ^ (crc >> 24)];
964 }
965
966
967 // used in setup, and for huffman that doesn't go fast path
bit_reverse(unsigned int n)968 static unsigned int bit_reverse(unsigned int n)
969 {
970 n = ((n & 0xAAAAAAAA) >> 1) | ((n & 0x55555555) << 1);
971 n = ((n & 0xCCCCCCCC) >> 2) | ((n & 0x33333333) << 2);
972 n = ((n & 0xF0F0F0F0) >> 4) | ((n & 0x0F0F0F0F) << 4);
973 n = ((n & 0xFF00FF00) >> 8) | ((n & 0x00FF00FF) << 8);
974 return (n >> 16) | (n << 16);
975 }
976
977 // this is a weird definition of log2() for which log2(1) = 1, log2(2) = 2, log2(4) = 3
978 // as required by the specification. fast(?) implementation from stb.h
979 // @OPTIMIZE: called multiple times per-packet with "constants"; move to setup
ilog(int32 n)980 static int ilog(int32 n)
981 {
982 static signed char log2_4[16] = { 0,1,2,2,3,3,3,3,4,4,4,4,4,4,4,4 };
983
984 if (n < 0) return 0; // signed n returns 0
985
986 // 2 compares if n < 16, 3 compares otherwise (4 if signed or n > 1<<29)
987 if (n < (1 << 14))
988 if (n < (1 << 4)) return 0 + log2_4[n ];
989 else if (n < (1 << 9)) return 5 + log2_4[n >> 5];
990 else return 10 + log2_4[n >> 10];
991 else if (n < (1 << 24))
992 if (n < (1 << 19)) return 15 + log2_4[n >> 15];
993 else return 20 + log2_4[n >> 20];
994 else if (n < (1 << 29)) return 25 + log2_4[n >> 25];
995 else return 30 + log2_4[n >> 30];
996 }
997
998 #ifndef M_PI
999 #define M_PI 3.14159265358979323846264f // from CRC
1000 #endif
1001
1002 // code length assigned to a value with no huffman encoding
1003 #define NO_CODE 255
1004
1005 /////////////////////// LEAF SETUP FUNCTIONS //////////////////////////
1006 //
1007 // these functions are only called at setup, and only a few times
1008 // per file
1009 /*
1010 static float float32_unpack(uint32 x)
1011 {
1012 // from the specification
1013 uint32 mantissa = x & 0x1fffff;
1014 uint32 sign = x & 0x80000000;
1015 uint32 exp = (x & 0x7fe00000) >> 21;
1016 double res = sign ? -(double)mantissa : (double)mantissa;
1017 return (float) ldexp((float)res, exp-788);
1018 }
1019 */
ldexpi(int m,int e)1020 float ldexpi(int m, int e) {
1021 return (float)(m * pow(2, e));
1022 }
1023
float32_unpack(uint32 x)1024 static float float32_unpack(uint32 x) {
1025 // from the specification
1026 uint32 s = x & 0x80000000;
1027 int32 m = x & 0x1fffff;
1028 int32 e = (x & 0x7fe00000) >> 21;
1029 return ldexpi(s ? -m : m, e - 788);
1030 }
1031
1032 // zlib & jpeg huffman tables assume that the output symbols
1033 // can either be arbitrarily arranged, or have monotonically
1034 // increasing frequencies--they rely on the lengths being sorted;
1035 // this makes for a very simple generation algorithm.
1036 // vorbis allows a huffman table with non-sorted lengths. This
1037 // requires a more sophisticated construction, since symbols in
1038 // order do not map to huffman codes "in order".
add_entry(Codebook * c,uint32 huff_code,int symbol,int count,int len,uint32 * values)1039 static void add_entry(Codebook *c, uint32 huff_code, int symbol, int count, int len, uint32 *values)
1040 {
1041 if (!c->sparse) {
1042 c->codewords [symbol] = huff_code;
1043 } else {
1044 c->codewords [count] = huff_code;
1045 c->codeword_lengths[count] = len;
1046 values [count] = symbol;
1047 }
1048 }
1049
compute_codewords(Codebook * c,uint8 * len,int n,uint32 * values)1050 static int compute_codewords(Codebook *c, uint8 *len, int n, uint32 *values)
1051 {
1052 int i,k,m=0;
1053 uint32 available[32];
1054
1055 memset(available, 0, sizeof(available));
1056 // find the first entry
1057 for (k=0; k < n; ++k) if (len[k] < NO_CODE) break;
1058 if (k == n) { assert(c->sorted_entries == 0); return TRUE; }
1059 // add to the list
1060 add_entry(c, 0, k, m++, len[k], values);
1061 // add all available leaves
1062 for (i=1; i <= len[k]; ++i)
1063 available[i] = 1U << (32-i);
1064 // note that the above code treats the first case specially,
1065 // but it's really the same as the following code, so they
1066 // could probably be combined (except the initial code is 0,
1067 // and I use 0 in available[] to mean 'empty')
1068 for (i=k+1; i < n; ++i) {
1069 uint32 res;
1070 int z = len[i], y;
1071 if (z == NO_CODE) continue;
1072 // find lowest available leaf (should always be earliest,
1073 // which is what the specification calls for)
1074 // note that this property, and the fact we can never have
1075 // more than one free leaf at a given level, isn't totally
1076 // trivial to prove, but it seems true and the assert never
1077 // fires, so!
1078 while (z > 0 && !available[z]) --z;
1079 if (z == 0) { return FALSE; }
1080 res = available[z];
1081 assert(z >= 0 && z < 32);
1082 available[z] = 0;
1083 add_entry(c, bit_reverse(res), i, m++, len[i], values);
1084 // propogate availability up the tree
1085 if (z != len[i]) {
1086 assert(len[i] >= 0 && len[i] < 32);
1087 for (y=len[i]; y > z; --y) {
1088 assert(available[y] == 0);
1089 available[y] = res + (1 << (32-y));
1090 }
1091 }
1092 }
1093 return TRUE;
1094 }
1095
1096 // accelerated huffman table allows fast O(1) match of all symbols
1097 // of length <= STB_VORBIS_FAST_HUFFMAN_LENGTH
compute_accelerated_huffman(Codebook * c)1098 static void compute_accelerated_huffman(Codebook *c)
1099 {
1100 int i, len;
1101 memset(c->fast_huffman, 0xFF, sizeof(c->fast_huffman)); // fill by -1
1102
1103 len = c->sparse ? c->sorted_entries : c->entries;
1104 #ifdef STB_VORBIS_FAST_HUFFMAN_SHORT
1105 if (len > 32767) len = 32767; // largest possible value we can encode!
1106 #endif
1107 for (i=0; i < len; ++i) {
1108 if (c->codeword_lengths[i] <= STB_VORBIS_FAST_HUFFMAN_LENGTH) {
1109 uint32 z = c->sparse ? bit_reverse(c->sorted_codewords[i]) : c->codewords[i];
1110 // set table entries for all bit combinations in the higher bits
1111 while (z < FAST_HUFFMAN_TABLE_SIZE) {
1112 c->fast_huffman[z] = i;
1113 z += 1 << c->codeword_lengths[i];
1114 }
1115 }
1116 }
1117 }
1118
1119 #ifdef _MSC_VER
1120 #define STBV_CDECL __cdecl
1121 #else
1122 #define STBV_CDECL
1123 #endif
1124
uint32_compare(const void * p,const void * q)1125 static int STBV_CDECL uint32_compare(const void *p, const void *q)
1126 {
1127 uint32 x = * (uint32 *) p;
1128 uint32 y = * (uint32 *) q;
1129 return x < y ? -1 : x > y;
1130 }
1131
include_in_sort(Codebook * c,uint8 len)1132 static int include_in_sort(Codebook *c, uint8 len)
1133 {
1134 if (c->sparse) { assert(len != NO_CODE); return TRUE; }
1135 if (len == NO_CODE) return FALSE;
1136 if (len > STB_VORBIS_FAST_HUFFMAN_LENGTH) return TRUE;
1137 return FALSE;
1138 }
1139
1140 // if the fast table above doesn't work, we want to binary
1141 // search them... need to reverse the bits
compute_sorted_huffman(Codebook * c,uint8 * lengths,uint32 * values)1142 static void compute_sorted_huffman(Codebook *c, uint8 *lengths, uint32 *values)
1143 {
1144 int i, len;
1145 // build a list of all the entries
1146 // OPTIMIZATION: don't include the short ones, since they'll be caught by FAST_HUFFMAN.
1147 // this is kind of a frivolous optimization--I don't see any performance improvement,
1148 // but it's like 4 extra lines of code, so.
1149 if (!c->sparse) {
1150 int k = 0;
1151 for (i=0; i < c->entries; ++i)
1152 if (include_in_sort(c, lengths[i]))
1153 c->sorted_codewords[k++] = bit_reverse(c->codewords[i]);
1154 assert(k == c->sorted_entries);
1155 } else {
1156 for (i=0; i < c->sorted_entries; ++i)
1157 c->sorted_codewords[i] = bit_reverse(c->codewords[i]);
1158 }
1159
1160 qsort(c->sorted_codewords, c->sorted_entries, sizeof(c->sorted_codewords[0]), uint32_compare);
1161 c->sorted_codewords[c->sorted_entries] = 0xffffffff;
1162
1163 len = c->sparse ? c->sorted_entries : c->entries;
1164 // now we need to indicate how they correspond; we could either
1165 // #1: sort a different data structure that says who they correspond to
1166 // #2: for each sorted entry, search the original list to find who corresponds
1167 // #3: for each original entry, find the sorted entry
1168 // #1 requires extra storage, #2 is slow, #3 can use binary search!
1169 for (i=0; i < len; ++i) {
1170 int huff_len = c->sparse ? lengths[values[i]] : lengths[i];
1171 if (include_in_sort(c,huff_len)) {
1172 uint32 code = bit_reverse(c->codewords[i]);
1173 int x=0, n=c->sorted_entries;
1174 while (n > 1) {
1175 // invariant: sc[x] <= code < sc[x+n]
1176 int m = x + (n >> 1);
1177 if (c->sorted_codewords[m] <= code) {
1178 x = m;
1179 n -= (n>>1);
1180 } else {
1181 n >>= 1;
1182 }
1183 }
1184 assert(c->sorted_codewords[x] == code);
1185 if (c->sparse) {
1186 c->sorted_values[x] = values[i];
1187 c->codeword_lengths[x] = huff_len;
1188 } else {
1189 c->sorted_values[x] = i;
1190 }
1191 }
1192 }
1193 }
1194
1195 // only run while parsing the header (3 times)
vorbis_validate(uint8 * data)1196 static int vorbis_validate(uint8 *data)
1197 {
1198 static uint8 vorbis[6] = { 'v', 'o', 'r', 'b', 'i', 's' };
1199 return memcmp(data, vorbis, 6) == 0;
1200 }
1201
1202 // called from setup only, once per code book
1203 // (formula implied by specification)
lookup1_values(int entries,int dim)1204 static int lookup1_values(int entries, int dim)
1205 {
1206 int r = (int) floor(exp((float) log((float) entries) / dim));
1207 if ((int) floor(pow((float) r+1, dim)) <= entries) // (int) cast for MinGW warning;
1208 ++r; // floor() to avoid _ftol() when non-CRT
1209 assert(pow((float) r+1, dim) > entries);
1210 assert((int) floor(pow((float) r, dim)) <= entries); // (int),floor() as above
1211 return r;
1212 }
1213
1214 #ifdef _PSP
1215 // pspmath.h
1216 extern float vfpu_sinf(float x);
1217 extern float vfpu_cosf(float x);
1218 extern void vfpu_sincos(float r, float *s, float *c);
1219
1220 #define sinf(x) vfpu_sinf(x)
1221 #define cosf(x) vfpu_cosf(x)
1222
sincosd(double x,double * s,double * c)1223 void sincosd(double x, double *s, double *c) {
1224 float fs, fc;
1225 vfpu_sincos((float)x, &fs, &fc);
1226 *s = (double)fs;
1227 *c = (double)fc;
1228 }
1229 #else
sincosd(double x,double * s,double * c)1230 void sincosd(double x, double *s, double *c) {
1231 *s = sin(x);
1232 *c = cos(x);
1233 }
1234 #endif
1235
1236 #define rotate(x, y, s, c) {\
1237 double t = x*c - y*s;\
1238 y = x*s + y*c;\
1239 x = t;\
1240 }
1241
1242 // called twice per file
compute_twiddle_factors(int n,float * A,float * B,float * C)1243 static void compute_twiddle_factors(int n, float *A, float *B, float *C)
1244 {
1245 int n4 = n >> 2, n8 = n >> 3;
1246 int k, k2;
1247
1248 double sA, cA, sB, cB, sC, cC;
1249 double xA, yA, xB, yB, xC, yC;
1250
1251 sincosd(4*M_PI/n, &sA, &cA);
1252 sincosd(M_PI/n/2, &sB, &cB);
1253 sincosd(2*M_PI/n, &sC, &cC);
1254
1255 xA = 1.0;
1256 yA = 0.0;
1257 A[0] = 1.0f;
1258 A[1] = 0.0f;
1259 xB = cB;
1260 yB = sB;
1261 B[0] = (float)(xB * 0.5);
1262 B[1] = (float)(yB * 0.5);
1263 rotate(cB, sB, sB, cB);
1264
1265 for (k=1, k2=2; k < n4; ++k,k2+=2) {
1266 rotate(xA, yA, sA, cA);
1267 A[k2 ] = (float) xA;
1268 A[k2+1] = (float)-yA;
1269 rotate(xB, yB, sB, cB);
1270 B[k2 ] = (float)(xB * 0.5);
1271 B[k2+1] = (float)(yB * 0.5);
1272 }
1273
1274 xC = cC;
1275 yC = sC;
1276 C[0] = (float) xC;
1277 C[1] = (float)-yC;
1278
1279 rotate(cC, sC, sC, cC);
1280
1281 for (k=1, k2=2; k < n8; ++k,k2+=2) {
1282 rotate(xC, yC, sC, cC);
1283 C[k2 ] = (float) xC;
1284 C[k2+1] = (float)-yC;
1285 }
1286 }
1287
compute_window(int n,float * window)1288 static void compute_window(int n, float *window)
1289 {
1290 int n2 = n >> 1, i;
1291 double s, c, x, y;
1292
1293 sincosd(0.5f / n2 * 0.5f * M_PI, &s, &c);
1294 x = c;
1295 y = s;
1296 rotate(c, s, s, c);
1297
1298 for (i=0; i < n2; ++i) {
1299 window[i] = (float)sin(0.5 * M_PI * (y * y));
1300 rotate(x, y, s, c);
1301 }
1302 }
1303
compute_bitreverse(int n,uint16 * rev)1304 static void compute_bitreverse(int n, uint16 *rev)
1305 {
1306 int ld = ilog(n) - 1; // ilog is off-by-one from normal definitions
1307 int i, n8 = n >> 3;
1308 for (i=0; i < n8; ++i)
1309 rev[i] = (bit_reverse(i) >> (32-ld+3)) << 2;
1310 }
1311
init_blocksize(vorb * f,int b,int n)1312 static int init_blocksize(vorb *f, int b, int n)
1313 {
1314 int n2 = n >> 1, n4 = n >> 2, n8 = n >> 3;
1315 f->A[b] = (float *) setup_malloc(f, sizeof(float) * n2);
1316 f->B[b] = (float *) setup_malloc(f, sizeof(float) * n2);
1317 f->C[b] = (float *) setup_malloc(f, sizeof(float) * n4);
1318 if (!f->A[b] || !f->B[b] || !f->C[b]) return error(f, VORBIS_outofmem);
1319 compute_twiddle_factors(n, f->A[b], f->B[b], f->C[b]);
1320 f->window[b] = (float *) setup_malloc(f, sizeof(float) * n2);
1321 if (!f->window[b]) return error(f, VORBIS_outofmem);
1322 compute_window(n, f->window[b]);
1323 f->bit_reverse[b] = (uint16 *) setup_malloc(f, sizeof(uint16) * n8);
1324 if (!f->bit_reverse[b]) return error(f, VORBIS_outofmem);
1325 compute_bitreverse(n, f->bit_reverse[b]);
1326 return TRUE;
1327 }
1328
neighbors(uint16 * x,int n,int * plow,int * phigh)1329 static void neighbors(uint16 *x, int n, int *plow, int *phigh)
1330 {
1331 int low = -1;
1332 int high = 65536;
1333 int i;
1334 for (i=0; i < n; ++i) {
1335 if (x[i] > low && x[i] < x[n]) { *plow = i; low = x[i]; }
1336 if (x[i] < high && x[i] > x[n]) { *phigh = i; high = x[i]; }
1337 }
1338 }
1339
1340 // this has been repurposed so y is now the original index instead of y
1341 typedef struct
1342 {
1343 uint16 x,id;
1344 } stbv__floor_ordering;
1345
point_compare(const void * p,const void * q)1346 static int STBV_CDECL point_compare(const void *p, const void *q)
1347 {
1348 stbv__floor_ordering *a = (stbv__floor_ordering *) p;
1349 stbv__floor_ordering *b = (stbv__floor_ordering *) q;
1350 return a->x < b->x ? -1 : a->x > b->x;
1351 }
1352
1353 //
1354 /////////////////////// END LEAF SETUP FUNCTIONS //////////////////////////
1355
1356
1357 #if defined(STB_VORBIS_NO_STDIO)
1358 #define USE_MEMORY(z) TRUE
1359 #else
1360 #define USE_MEMORY(z) ((z)->stream)
1361 #endif
1362
get8(vorb * z)1363 static uint8 get8(vorb *z)
1364 {
1365 if (USE_MEMORY(z)) {
1366 if (z->stream >= z->stream_end) { z->eof = TRUE; return 0; }
1367 return *z->stream++;
1368 }
1369
1370 #ifndef STB_VORBIS_NO_STDIO
1371 {
1372 int c = fgetc(z->f);
1373 if (c == EOF) { z->eof = TRUE; return 0; }
1374 return c;
1375 }
1376 #endif
1377 }
1378
get32(vorb * f)1379 static uint32 get32(vorb *f)
1380 {
1381 uint32 x;
1382 x = get8(f);
1383 x += get8(f) << 8;
1384 x += get8(f) << 16;
1385 x += (uint32) get8(f) << 24;
1386 return x;
1387 }
1388
getn(vorb * z,uint8 * data,int n)1389 static int getn(vorb *z, uint8 *data, int n)
1390 {
1391 if (USE_MEMORY(z)) {
1392 if (z->stream+n > z->stream_end) { z->eof = 1; return 0; }
1393 memcpy(data, z->stream, n);
1394 z->stream += n;
1395 return 1;
1396 }
1397
1398 #ifndef STB_VORBIS_NO_STDIO
1399 if (fread(data, n, 1, z->f) == 1)
1400 return 1;
1401 else {
1402 z->eof = 1;
1403 return 0;
1404 }
1405 #endif
1406 }
1407
skip(vorb * z,int n)1408 static void skip(vorb *z, int n)
1409 {
1410 if (USE_MEMORY(z)) {
1411 z->stream += n;
1412 if (z->stream >= z->stream_end) z->eof = 1;
1413 return;
1414 }
1415 #ifndef STB_VORBIS_NO_STDIO
1416 {
1417 long x = ftell(z->f);
1418 fseek(z->f, x+n, SEEK_SET);
1419 }
1420 #endif
1421 }
1422
set_file_offset(stb_vorbis * f,unsigned int loc)1423 static int set_file_offset(stb_vorbis *f, unsigned int loc)
1424 {
1425 #ifndef STB_VORBIS_NO_PUSHDATA_API
1426 if (f->push_mode) return 0;
1427 #endif
1428 f->eof = 0;
1429 if (USE_MEMORY(f)) {
1430 if (f->stream_start + loc >= f->stream_end || f->stream_start + loc < f->stream_start) {
1431 f->stream = f->stream_end;
1432 f->eof = 1;
1433 return 0;
1434 } else {
1435 f->stream = f->stream_start + loc;
1436 return 1;
1437 }
1438 }
1439 #ifndef STB_VORBIS_NO_STDIO
1440 if (loc + f->f_start < loc || loc >= 0x80000000) {
1441 loc = 0x7fffffff;
1442 f->eof = 1;
1443 } else {
1444 loc += f->f_start;
1445 }
1446 if (!fseek(f->f, loc, SEEK_SET))
1447 return 1;
1448 f->eof = 1;
1449 fseek(f->f, f->f_start, SEEK_END);
1450 return 0;
1451 #endif
1452 }
1453
1454
1455 static uint8 ogg_page_header[4] = { 0x4f, 0x67, 0x67, 0x53 };
1456
capture_pattern(vorb * f)1457 static int capture_pattern(vorb *f)
1458 {
1459 if (0x4f != get8(f)) return FALSE;
1460 if (0x67 != get8(f)) return FALSE;
1461 if (0x67 != get8(f)) return FALSE;
1462 if (0x53 != get8(f)) return FALSE;
1463 return TRUE;
1464 }
1465
1466 #define PAGEFLAG_continued_packet 1
1467 #define PAGEFLAG_first_page 2
1468 #define PAGEFLAG_last_page 4
1469
start_page_no_capturepattern(vorb * f)1470 static int start_page_no_capturepattern(vorb *f)
1471 {
1472 uint32 loc0,loc1,n;
1473 // stream structure version
1474 if (0 != get8(f)) return error(f, VORBIS_invalid_stream_structure_version);
1475 // header flag
1476 f->page_flag = get8(f);
1477 // absolute granule position
1478 loc0 = get32(f);
1479 loc1 = get32(f);
1480 // @TODO: validate loc0,loc1 as valid positions?
1481 // stream serial number -- vorbis doesn't interleave, so discard
1482 get32(f);
1483 //if (f->serial != get32(f)) return error(f, VORBIS_incorrect_stream_serial_number);
1484 // page sequence number
1485 n = get32(f);
1486 f->last_page = n;
1487 // CRC32
1488 get32(f);
1489 // page_segments
1490 f->segment_count = get8(f);
1491 if (!getn(f, f->segments, f->segment_count))
1492 return error(f, VORBIS_unexpected_eof);
1493 // assume we _don't_ know any the sample position of any segments
1494 f->end_seg_with_known_loc = -2;
1495 if (loc0 != ~0U || loc1 != ~0U) {
1496 int i;
1497 // determine which packet is the last one that will complete
1498 for (i=f->segment_count-1; i >= 0; --i)
1499 if (f->segments[i] < 255)
1500 break;
1501 // 'i' is now the index of the _last_ segment of a packet that ends
1502 if (i >= 0) {
1503 f->end_seg_with_known_loc = i;
1504 f->known_loc_for_packet = loc0;
1505 }
1506 }
1507 if (f->first_decode) {
1508 int i,len;
1509 ProbedPage p;
1510 len = 0;
1511 for (i=0; i < f->segment_count; ++i)
1512 len += f->segments[i];
1513 len += 27 + f->segment_count;
1514 p.page_start = f->first_audio_page_offset;
1515 p.page_end = p.page_start + len;
1516 p.last_decoded_sample = loc0;
1517 f->p_first = p;
1518 }
1519 f->next_seg = 0;
1520 return TRUE;
1521 }
1522
start_page(vorb * f)1523 static int start_page(vorb *f)
1524 {
1525 if (!capture_pattern(f)) return error(f, VORBIS_missing_capture_pattern);
1526 return start_page_no_capturepattern(f);
1527 }
1528
start_packet(vorb * f)1529 static int start_packet(vorb *f)
1530 {
1531 while (f->next_seg == -1) {
1532 if (!start_page(f)) return FALSE;
1533 if (f->page_flag & PAGEFLAG_continued_packet)
1534 return error(f, VORBIS_continued_packet_flag_invalid);
1535 }
1536 f->last_seg = FALSE;
1537 f->valid_bits = 0;
1538 f->packet_bytes = 0;
1539 f->bytes_in_seg = 0;
1540 // f->next_seg is now valid
1541 return TRUE;
1542 }
1543
maybe_start_packet(vorb * f)1544 static int maybe_start_packet(vorb *f)
1545 {
1546 if (f->next_seg == -1) {
1547 int x = get8(f);
1548 if (f->eof) return FALSE; // EOF at page boundary is not an error!
1549 if (0x4f != x ) return error(f, VORBIS_missing_capture_pattern);
1550 if (0x67 != get8(f)) return error(f, VORBIS_missing_capture_pattern);
1551 if (0x67 != get8(f)) return error(f, VORBIS_missing_capture_pattern);
1552 if (0x53 != get8(f)) return error(f, VORBIS_missing_capture_pattern);
1553 if (!start_page_no_capturepattern(f)) return FALSE;
1554 if (f->page_flag & PAGEFLAG_continued_packet) {
1555 // set up enough state that we can read this packet if we want,
1556 // e.g. during recovery
1557 f->last_seg = FALSE;
1558 f->bytes_in_seg = 0;
1559 return error(f, VORBIS_continued_packet_flag_invalid);
1560 }
1561 }
1562 return start_packet(f);
1563 }
1564
next_segment(vorb * f)1565 static int next_segment(vorb *f)
1566 {
1567 int len;
1568 if (f->last_seg) return 0;
1569 if (f->next_seg == -1) {
1570 f->last_seg_which = f->segment_count-1; // in case start_page fails
1571 if (!start_page(f)) { f->last_seg = 1; return 0; }
1572 if (!(f->page_flag & PAGEFLAG_continued_packet)) return error(f, VORBIS_continued_packet_flag_invalid);
1573 }
1574 len = f->segments[f->next_seg++];
1575 if (len < 255) {
1576 f->last_seg = TRUE;
1577 f->last_seg_which = f->next_seg-1;
1578 }
1579 if (f->next_seg >= f->segment_count)
1580 f->next_seg = -1;
1581 assert(f->bytes_in_seg == 0);
1582 f->bytes_in_seg = len;
1583 return len;
1584 }
1585
1586 #define EOP (-1)
1587 #define INVALID_BITS (-1)
1588
get8_packet_raw(vorb * f)1589 static int get8_packet_raw(vorb *f)
1590 {
1591 if (!f->bytes_in_seg) { // CLANG!
1592 if (f->last_seg) return EOP;
1593 else if (!next_segment(f)) return EOP;
1594 }
1595 assert(f->bytes_in_seg > 0);
1596 --f->bytes_in_seg;
1597 ++f->packet_bytes;
1598 return get8(f);
1599 }
1600
get8_packet(vorb * f)1601 static int get8_packet(vorb *f)
1602 {
1603 int x = get8_packet_raw(f);
1604 f->valid_bits = 0;
1605 return x;
1606 }
1607
flush_packet(vorb * f)1608 static void flush_packet(vorb *f)
1609 {
1610 while (get8_packet_raw(f) != EOP);
1611 }
1612
1613 // @OPTIMIZE: this is the secondary bit decoder, so it's probably not as important
1614 // as the huffman decoder?
get_bits(vorb * f,int n)1615 static uint32 get_bits(vorb *f, int n)
1616 {
1617 uint32 z;
1618
1619 if (f->valid_bits < 0) return 0;
1620 if (f->valid_bits < n) {
1621 if (n > 24) {
1622 // the accumulator technique below would not work correctly in this case
1623 z = get_bits(f, 24);
1624 z += get_bits(f, n-24) << 24;
1625 return z;
1626 }
1627 if (f->valid_bits == 0) f->acc = 0;
1628 while (f->valid_bits < n) {
1629 int z = get8_packet_raw(f);
1630 if (z == EOP) {
1631 f->valid_bits = INVALID_BITS;
1632 return 0;
1633 }
1634 f->acc += z << f->valid_bits;
1635 f->valid_bits += 8;
1636 }
1637 }
1638 if (f->valid_bits < 0) return 0;
1639 z = f->acc & ((1 << n)-1);
1640 f->acc >>= n;
1641 f->valid_bits -= n;
1642 return z;
1643 }
1644
1645 // @OPTIMIZE: primary accumulator for huffman
1646 // expand the buffer to as many bits as possible without reading off end of packet
1647 // it might be nice to allow f->valid_bits and f->acc to be stored in registers,
1648 // e.g. cache them locally and decode locally
prep_huffman(vorb * f)1649 static __forceinline void prep_huffman(vorb *f)
1650 {
1651 if (f->valid_bits <= 24) {
1652 if (f->valid_bits == 0) f->acc = 0;
1653 do {
1654 int z;
1655 if (f->last_seg && !f->bytes_in_seg) return;
1656 z = get8_packet_raw(f);
1657 if (z == EOP) return;
1658 f->acc += (unsigned) z << f->valid_bits;
1659 f->valid_bits += 8;
1660 } while (f->valid_bits <= 24);
1661 }
1662 }
1663
1664 enum
1665 {
1666 VORBIS_packet_id = 1,
1667 VORBIS_packet_comment = 3,
1668 VORBIS_packet_setup = 5
1669 };
1670
codebook_decode_scalar_raw(vorb * f,Codebook * c)1671 static int codebook_decode_scalar_raw(vorb *f, Codebook *c)
1672 {
1673 int i;
1674 prep_huffman(f);
1675
1676 if (c->codewords == NULL && c->sorted_codewords == NULL)
1677 return -1;
1678
1679 // cases to use binary search: sorted_codewords && !c->codewords
1680 // sorted_codewords && c->entries > 8
1681 if (c->entries > 8 ? c->sorted_codewords!=NULL : !c->codewords) {
1682 // binary search
1683 uint32 code = bit_reverse(f->acc);
1684 int x=0, n=c->sorted_entries, len;
1685
1686 while (n > 1) {
1687 // invariant: sc[x] <= code < sc[x+n]
1688 int m = x + (n >> 1);
1689 if (c->sorted_codewords[m] <= code) {
1690 x = m;
1691 n -= (n>>1);
1692 } else {
1693 n >>= 1;
1694 }
1695 }
1696 // x is now the sorted index
1697 if (!c->sparse) x = c->sorted_values[x];
1698 // x is now sorted index if sparse, or symbol otherwise
1699 len = c->codeword_lengths[x];
1700 if (f->valid_bits >= len) {
1701 f->acc >>= len;
1702 f->valid_bits -= len;
1703 return x;
1704 }
1705
1706 f->valid_bits = 0;
1707 return -1;
1708 }
1709
1710 // if small, linear search
1711 assert(!c->sparse);
1712 for (i=0; i < c->entries; ++i) {
1713 if (c->codeword_lengths[i] == NO_CODE) continue;
1714 if (c->codewords[i] == (f->acc & ((1 << c->codeword_lengths[i])-1))) {
1715 if (f->valid_bits >= c->codeword_lengths[i]) {
1716 f->acc >>= c->codeword_lengths[i];
1717 f->valid_bits -= c->codeword_lengths[i];
1718 return i;
1719 }
1720 f->valid_bits = 0;
1721 return -1;
1722 }
1723 }
1724
1725 error(f, VORBIS_invalid_stream);
1726 f->valid_bits = 0;
1727 return -1;
1728 }
1729
1730 #ifndef STB_VORBIS_NO_INLINE_DECODE
1731
1732 #define DECODE_RAW(var, f,c) \
1733 if (f->valid_bits < STB_VORBIS_FAST_HUFFMAN_LENGTH) \
1734 prep_huffman(f); \
1735 var = f->acc & FAST_HUFFMAN_TABLE_MASK; \
1736 var = c->fast_huffman[var]; \
1737 if (var >= 0) { \
1738 int n = c->codeword_lengths[var]; \
1739 f->acc >>= n; \
1740 f->valid_bits -= n; \
1741 if (f->valid_bits < 0) { f->valid_bits = 0; var = -1; } \
1742 } else { \
1743 var = codebook_decode_scalar_raw(f,c); \
1744 }
1745
1746 #else
1747
codebook_decode_scalar(vorb * f,Codebook * c)1748 static int codebook_decode_scalar(vorb *f, Codebook *c)
1749 {
1750 int i;
1751 if (f->valid_bits < STB_VORBIS_FAST_HUFFMAN_LENGTH)
1752 prep_huffman(f);
1753 // fast huffman table lookup
1754 i = f->acc & FAST_HUFFMAN_TABLE_MASK;
1755 i = c->fast_huffman[i];
1756 if (i >= 0) {
1757 f->acc >>= c->codeword_lengths[i];
1758 f->valid_bits -= c->codeword_lengths[i];
1759 if (f->valid_bits < 0) { f->valid_bits = 0; return -1; }
1760 return i;
1761 }
1762 return codebook_decode_scalar_raw(f,c);
1763 }
1764
1765 #define DECODE_RAW(var,f,c) var = codebook_decode_scalar(f,c);
1766
1767 #endif
1768
1769 #define DECODE(var,f,c) \
1770 DECODE_RAW(var,f,c) \
1771 if (c->sparse) var = c->sorted_values[var];
1772
1773 #ifndef STB_VORBIS_DIVIDES_IN_CODEBOOK
1774 #define DECODE_VQ(var,f,c) DECODE_RAW(var,f,c)
1775 #else
1776 #define DECODE_VQ(var,f,c) DECODE(var,f,c)
1777 #endif
1778
1779
1780
1781
1782
1783
1784 // CODEBOOK_ELEMENT_FAST is an optimization for the CODEBOOK_FLOATS case
1785 // where we avoid one addition
1786 #define CODEBOOK_ELEMENT(c,off) (c->multiplicands[off])
1787 #define CODEBOOK_ELEMENT_FAST(c,off) (c->multiplicands[off])
1788 #define CODEBOOK_ELEMENT_BASE(c) (0)
1789
codebook_decode_start(vorb * f,Codebook * c)1790 static int codebook_decode_start(vorb *f, Codebook *c)
1791 {
1792 int z = -1;
1793
1794 // type 0 is only legal in a scalar context
1795 if (c->lookup_type == 0)
1796 error(f, VORBIS_invalid_stream);
1797 else {
1798 DECODE_VQ(z,f,c);
1799 if (c->sparse) assert(z < c->sorted_entries);
1800 if (z < 0) { // check for EOP
1801 if (!f->bytes_in_seg)
1802 if (f->last_seg)
1803 return z;
1804 error(f, VORBIS_invalid_stream);
1805 }
1806 }
1807 return z;
1808 }
1809
codebook_decode(vorb * f,Codebook * c,float * output,int len)1810 static int codebook_decode(vorb *f, Codebook *c, float *output, int len)
1811 {
1812 int i,z = codebook_decode_start(f,c);
1813 if (z < 0) return FALSE;
1814 if (len > c->dimensions) len = c->dimensions;
1815
1816 #ifdef STB_VORBIS_DIVIDES_IN_CODEBOOK
1817 if (c->lookup_type == 1) {
1818 float last = CODEBOOK_ELEMENT_BASE(c);
1819 int div = 1;
1820 for (i=0; i < len; ++i) {
1821 int off = (z / div) % c->lookup_values;
1822 float val = CODEBOOK_ELEMENT_FAST(c,off) + last;
1823 output[i] += val;
1824 if (c->sequence_p) last = val + c->minimum_value;
1825 div *= c->lookup_values;
1826 }
1827 return TRUE;
1828 }
1829 #endif
1830
1831 z *= c->dimensions;
1832 if (c->sequence_p) {
1833 float last = CODEBOOK_ELEMENT_BASE(c);
1834 for (i=0; i < len; ++i) {
1835 float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1836 output[i] += val;
1837 last = val + c->minimum_value;
1838 }
1839 } else {
1840 float last = CODEBOOK_ELEMENT_BASE(c);
1841 for (i=0; i < len; ++i) {
1842 output[i] += CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1843 }
1844 }
1845
1846 return TRUE;
1847 }
1848
codebook_decode_step(vorb * f,Codebook * c,float * output,int len,int step)1849 static int codebook_decode_step(vorb *f, Codebook *c, float *output, int len, int step)
1850 {
1851 int i,z = codebook_decode_start(f,c);
1852 float last = CODEBOOK_ELEMENT_BASE(c);
1853 if (z < 0) return FALSE;
1854 if (len > c->dimensions) len = c->dimensions;
1855
1856 #ifdef STB_VORBIS_DIVIDES_IN_CODEBOOK
1857 if (c->lookup_type == 1) {
1858 int div = 1;
1859 for (i=0; i < len; ++i) {
1860 int off = (z / div) % c->lookup_values;
1861 float val = CODEBOOK_ELEMENT_FAST(c,off) + last;
1862 output[i*step] += val;
1863 if (c->sequence_p) last = val;
1864 div *= c->lookup_values;
1865 }
1866 return TRUE;
1867 }
1868 #endif
1869
1870 z *= c->dimensions;
1871 for (i=0; i < len; ++i) {
1872 float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1873 output[i*step] += val;
1874 if (c->sequence_p) last = val;
1875 }
1876
1877 return TRUE;
1878 }
1879
codebook_decode_deinterleave_repeat(vorb * f,Codebook * c,float ** outputs,int ch,int * c_inter_p,int * p_inter_p,int len,int total_decode)1880 static int codebook_decode_deinterleave_repeat(vorb *f, Codebook *c, float **outputs, int ch, int *c_inter_p, int *p_inter_p, int len, int total_decode)
1881 {
1882 int c_inter = *c_inter_p;
1883 int p_inter = *p_inter_p;
1884 int i,z, effective = c->dimensions;
1885
1886 // type 0 is only legal in a scalar context
1887 if (c->lookup_type == 0) return error(f, VORBIS_invalid_stream);
1888
1889 while (total_decode > 0) {
1890 float last = CODEBOOK_ELEMENT_BASE(c);
1891 DECODE_VQ(z,f,c);
1892 #ifndef STB_VORBIS_DIVIDES_IN_CODEBOOK
1893 assert(!c->sparse || z < c->sorted_entries);
1894 #endif
1895 if (z < 0) {
1896 if (!f->bytes_in_seg)
1897 if (f->last_seg) return FALSE;
1898 return error(f, VORBIS_invalid_stream);
1899 }
1900
1901 // if this will take us off the end of the buffers, stop short!
1902 // we check by computing the length of the virtual interleaved
1903 // buffer (len*ch), our current offset within it (p_inter*ch)+(c_inter),
1904 // and the length we'll be using (effective)
1905 if (c_inter + p_inter*ch + effective > len * ch) {
1906 effective = len*ch - (p_inter*ch - c_inter);
1907 }
1908
1909 #ifdef STB_VORBIS_DIVIDES_IN_CODEBOOK
1910 if (c->lookup_type == 1) {
1911 int div = 1;
1912 for (i=0; i < effective; ++i) {
1913 int off = (z / div) % c->lookup_values;
1914 float val = CODEBOOK_ELEMENT_FAST(c,off) + last;
1915 if (outputs[c_inter])
1916 outputs[c_inter][p_inter] += val;
1917 if (++c_inter == ch) { c_inter = 0; ++p_inter; }
1918 if (c->sequence_p) last = val;
1919 div *= c->lookup_values;
1920 }
1921 } else
1922 #endif
1923 {
1924 z *= c->dimensions;
1925 if (c->sequence_p) {
1926 for (i=0; i < effective; ++i) {
1927 float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1928 if (outputs[c_inter])
1929 outputs[c_inter][p_inter] += val;
1930 if (++c_inter == ch) { c_inter = 0; ++p_inter; }
1931 last = val;
1932 }
1933 } else {
1934 for (i=0; i < effective; ++i) {
1935 float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1936 if (outputs[c_inter])
1937 outputs[c_inter][p_inter] += val;
1938 if (++c_inter == ch) { c_inter = 0; ++p_inter; }
1939 }
1940 }
1941 }
1942
1943 total_decode -= effective;
1944 }
1945 *c_inter_p = c_inter;
1946 *p_inter_p = p_inter;
1947 return TRUE;
1948 }
1949
predict_point(int x,int x0,int x1,int y0,int y1)1950 static int predict_point(int x, int x0, int x1, int y0, int y1)
1951 {
1952 int dy = y1 - y0;
1953 int adx = x1 - x0;
1954 // @OPTIMIZE: force int division to round in the right direction... is this necessary on x86?
1955 int err = abs(dy) * (x - x0);
1956 int off = err / adx;
1957 return dy < 0 ? y0 - off : y0 + off;
1958 }
1959
1960 // the following table is block-copied from the specification
1961 static float inverse_db_table[256] =
1962 {
1963 1.0649863e-07f, 1.1341951e-07f, 1.2079015e-07f, 1.2863978e-07f,
1964 1.3699951e-07f, 1.4590251e-07f, 1.5538408e-07f, 1.6548181e-07f,
1965 1.7623575e-07f, 1.8768855e-07f, 1.9988561e-07f, 2.1287530e-07f,
1966 2.2670913e-07f, 2.4144197e-07f, 2.5713223e-07f, 2.7384213e-07f,
1967 2.9163793e-07f, 3.1059021e-07f, 3.3077411e-07f, 3.5226968e-07f,
1968 3.7516214e-07f, 3.9954229e-07f, 4.2550680e-07f, 4.5315863e-07f,
1969 4.8260743e-07f, 5.1396998e-07f, 5.4737065e-07f, 5.8294187e-07f,
1970 6.2082472e-07f, 6.6116941e-07f, 7.0413592e-07f, 7.4989464e-07f,
1971 7.9862701e-07f, 8.5052630e-07f, 9.0579828e-07f, 9.6466216e-07f,
1972 1.0273513e-06f, 1.0941144e-06f, 1.1652161e-06f, 1.2409384e-06f,
1973 1.3215816e-06f, 1.4074654e-06f, 1.4989305e-06f, 1.5963394e-06f,
1974 1.7000785e-06f, 1.8105592e-06f, 1.9282195e-06f, 2.0535261e-06f,
1975 2.1869758e-06f, 2.3290978e-06f, 2.4804557e-06f, 2.6416497e-06f,
1976 2.8133190e-06f, 2.9961443e-06f, 3.1908506e-06f, 3.3982101e-06f,
1977 3.6190449e-06f, 3.8542308e-06f, 4.1047004e-06f, 4.3714470e-06f,
1978 4.6555282e-06f, 4.9580707e-06f, 5.2802740e-06f, 5.6234160e-06f,
1979 5.9888572e-06f, 6.3780469e-06f, 6.7925283e-06f, 7.2339451e-06f,
1980 7.7040476e-06f, 8.2047000e-06f, 8.7378876e-06f, 9.3057248e-06f,
1981 9.9104632e-06f, 1.0554501e-05f, 1.1240392e-05f, 1.1970856e-05f,
1982 1.2748789e-05f, 1.3577278e-05f, 1.4459606e-05f, 1.5399272e-05f,
1983 1.6400004e-05f, 1.7465768e-05f, 1.8600792e-05f, 1.9809576e-05f,
1984 2.1096914e-05f, 2.2467911e-05f, 2.3928002e-05f, 2.5482978e-05f,
1985 2.7139006e-05f, 2.8902651e-05f, 3.0780908e-05f, 3.2781225e-05f,
1986 3.4911534e-05f, 3.7180282e-05f, 3.9596466e-05f, 4.2169667e-05f,
1987 4.4910090e-05f, 4.7828601e-05f, 5.0936773e-05f, 5.4246931e-05f,
1988 5.7772202e-05f, 6.1526565e-05f, 6.5524908e-05f, 6.9783085e-05f,
1989 7.4317983e-05f, 7.9147585e-05f, 8.4291040e-05f, 8.9768747e-05f,
1990 9.5602426e-05f, 0.00010181521f, 0.00010843174f, 0.00011547824f,
1991 0.00012298267f, 0.00013097477f, 0.00013948625f, 0.00014855085f,
1992 0.00015820453f, 0.00016848555f, 0.00017943469f, 0.00019109536f,
1993 0.00020351382f, 0.00021673929f, 0.00023082423f, 0.00024582449f,
1994 0.00026179955f, 0.00027881276f, 0.00029693158f, 0.00031622787f,
1995 0.00033677814f, 0.00035866388f, 0.00038197188f, 0.00040679456f,
1996 0.00043323036f, 0.00046138411f, 0.00049136745f, 0.00052329927f,
1997 0.00055730621f, 0.00059352311f, 0.00063209358f, 0.00067317058f,
1998 0.00071691700f, 0.00076350630f, 0.00081312324f, 0.00086596457f,
1999 0.00092223983f, 0.00098217216f, 0.0010459992f, 0.0011139742f,
2000 0.0011863665f, 0.0012634633f, 0.0013455702f, 0.0014330129f,
2001 0.0015261382f, 0.0016253153f, 0.0017309374f, 0.0018434235f,
2002 0.0019632195f, 0.0020908006f, 0.0022266726f, 0.0023713743f,
2003 0.0025254795f, 0.0026895994f, 0.0028643847f, 0.0030505286f,
2004 0.0032487691f, 0.0034598925f, 0.0036847358f, 0.0039241906f,
2005 0.0041792066f, 0.0044507950f, 0.0047400328f, 0.0050480668f,
2006 0.0053761186f, 0.0057254891f, 0.0060975636f, 0.0064938176f,
2007 0.0069158225f, 0.0073652516f, 0.0078438871f, 0.0083536271f,
2008 0.0088964928f, 0.009474637f, 0.010090352f, 0.010746080f,
2009 0.011444421f, 0.012188144f, 0.012980198f, 0.013823725f,
2010 0.014722068f, 0.015678791f, 0.016697687f, 0.017782797f,
2011 0.018938423f, 0.020169149f, 0.021479854f, 0.022875735f,
2012 0.024362330f, 0.025945531f, 0.027631618f, 0.029427276f,
2013 0.031339626f, 0.033376252f, 0.035545228f, 0.037855157f,
2014 0.040315199f, 0.042935108f, 0.045725273f, 0.048696758f,
2015 0.051861348f, 0.055231591f, 0.058820850f, 0.062643361f,
2016 0.066714279f, 0.071049749f, 0.075666962f, 0.080584227f,
2017 0.085821044f, 0.091398179f, 0.097337747f, 0.10366330f,
2018 0.11039993f, 0.11757434f, 0.12521498f, 0.13335215f,
2019 0.14201813f, 0.15124727f, 0.16107617f, 0.17154380f,
2020 0.18269168f, 0.19456402f, 0.20720788f, 0.22067342f,
2021 0.23501402f, 0.25028656f, 0.26655159f, 0.28387361f,
2022 0.30232132f, 0.32196786f, 0.34289114f, 0.36517414f,
2023 0.38890521f, 0.41417847f, 0.44109412f, 0.46975890f,
2024 0.50028648f, 0.53279791f, 0.56742212f, 0.60429640f,
2025 0.64356699f, 0.68538959f, 0.72993007f, 0.77736504f,
2026 0.82788260f, 0.88168307f, 0.9389798f, 1.0f
2027 };
2028
2029
2030 // @OPTIMIZE: if you want to replace this bresenham line-drawing routine,
2031 // note that you must produce bit-identical output to decode correctly;
2032 // this specific sequence of operations is specified in the spec (it's
2033 // drawing integer-quantized frequency-space lines that the encoder
2034 // expects to be exactly the same)
2035 // ... also, isn't the whole point of Bresenham's algorithm to NOT
2036 // have to divide in the setup? sigh.
2037 #ifndef STB_VORBIS_NO_DEFER_FLOOR
2038 #define LINE_OP(a,b) a *= b
2039 #else
2040 #define LINE_OP(a,b) a = b
2041 #endif
2042
2043 #ifdef STB_VORBIS_DIVIDE_TABLE
2044 #define DIVTAB_NUMER 32
2045 #define DIVTAB_DENOM 64
2046 int8 integer_divide_table[DIVTAB_NUMER][DIVTAB_DENOM]; // 2KB
2047 #endif
2048
draw_line(float * output,int x0,int y0,int x1,int y1,int n)2049 static __forceinline void draw_line(float *output, int x0, int y0, int x1, int y1, int n)
2050 {
2051 int dy = y1 - y0;
2052 int adx = x1 - x0;
2053 int ady = abs(dy);
2054 int base;
2055 int x=x0,y=y0;
2056 int err = 0;
2057 int sy;
2058
2059 #ifdef STB_VORBIS_DIVIDE_TABLE
2060 if (adx < DIVTAB_DENOM && ady < DIVTAB_NUMER) {
2061 if (dy < 0) {
2062 base = -integer_divide_table[ady][adx];
2063 sy = base-1;
2064 } else {
2065 base = integer_divide_table[ady][adx];
2066 sy = base+1;
2067 }
2068 } else {
2069 base = dy / adx;
2070 if (dy < 0)
2071 sy = base - 1;
2072 else
2073 sy = base+1;
2074 }
2075 #else
2076 base = dy / adx;
2077 if (dy < 0)
2078 sy = base - 1;
2079 else
2080 sy = base+1;
2081 #endif
2082 ady -= abs(base) * adx;
2083 if (x1 > n) x1 = n;
2084 if (x < x1) {
2085 LINE_OP(output[x], inverse_db_table[y]);
2086 for (++x; x < x1; ++x) {
2087 err += ady;
2088 if (err >= adx) {
2089 err -= adx;
2090 y += sy;
2091 } else
2092 y += base;
2093 LINE_OP(output[x], inverse_db_table[y]);
2094 }
2095 }
2096 }
2097
residue_decode(vorb * f,Codebook * book,float * target,int offset,int n,int rtype)2098 static int residue_decode(vorb *f, Codebook *book, float *target, int offset, int n, int rtype)
2099 {
2100 int k;
2101 if (rtype == 0) {
2102 int step = n / book->dimensions;
2103 for (k=0; k < step; ++k)
2104 if (!codebook_decode_step(f, book, target+offset+k, n-offset-k, step))
2105 return FALSE;
2106 } else {
2107 for (k=0; k < n; ) {
2108 if (!codebook_decode(f, book, target+offset, n-k))
2109 return FALSE;
2110 k += book->dimensions;
2111 offset += book->dimensions;
2112 }
2113 }
2114 return TRUE;
2115 }
2116
2117 // n is 1/2 of the blocksize --
2118 // specification: "Correct per-vector decode length is [n]/2"
decode_residue(vorb * f,float * residue_buffers[],int ch,int n,int rn,uint8 * do_not_decode)2119 static void decode_residue(vorb *f, float *residue_buffers[], int ch, int n, int rn, uint8 *do_not_decode)
2120 {
2121 int i,j,pass;
2122 Residue *r = f->residue_config + rn;
2123 int rtype = f->residue_types[rn];
2124 int c = r->classbook;
2125 int classwords = f->codebooks[c].dimensions;
2126 unsigned int actual_size = rtype == 2 ? n*2 : n;
2127 unsigned int limit_r_begin = (r->begin < actual_size ? r->begin : actual_size);
2128 unsigned int limit_r_end = (r->end < actual_size ? r->end : actual_size);
2129 int n_read = limit_r_end - limit_r_begin;
2130 int part_read = n_read / r->part_size;
2131 int temp_alloc_point = temp_alloc_save(f);
2132 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2133 uint8 ***part_classdata = (uint8 ***) temp_block_array(f,f->channels, part_read * sizeof(**part_classdata));
2134 #else
2135 int **classifications = (int **) temp_block_array(f,f->channels, part_read * sizeof(**classifications));
2136 #endif
2137
2138 CHECK(f);
2139
2140 for (i=0; i < ch; ++i)
2141 if (!do_not_decode[i])
2142 memset(residue_buffers[i], 0, sizeof(float) * n);
2143
2144 if (rtype == 2 && ch != 1) {
2145 for (j=0; j < ch; ++j)
2146 if (!do_not_decode[j])
2147 break;
2148 if (j == ch)
2149 goto done;
2150
2151 for (pass=0; pass < 8; ++pass) {
2152 int pcount = 0, class_set = 0;
2153 if (ch == 2) {
2154 while (pcount < part_read) {
2155 int z = r->begin + pcount*r->part_size;
2156 int c_inter = (z & 1), p_inter = z>>1;
2157 if (pass == 0) {
2158 Codebook *c = f->codebooks+r->classbook;
2159 int q;
2160 DECODE(q,f,c);
2161 if (q == EOP) goto done;
2162 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2163 part_classdata[0][class_set] = r->classdata[q];
2164 #else
2165 for (i=classwords-1; i >= 0; --i) {
2166 classifications[0][i+pcount] = q % r->classifications;
2167 q /= r->classifications;
2168 }
2169 #endif
2170 }
2171 for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) {
2172 int z = r->begin + pcount*r->part_size;
2173 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2174 int c = part_classdata[0][class_set][i];
2175 #else
2176 int c = classifications[0][pcount];
2177 #endif
2178 int b = r->residue_books[c][pass];
2179 if (b >= 0) {
2180 Codebook *book = f->codebooks + b;
2181 #ifdef STB_VORBIS_DIVIDES_IN_CODEBOOK
2182 if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r->part_size))
2183 goto done;
2184 #else
2185 // saves 1%
2186 if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r->part_size))
2187 goto done;
2188 #endif
2189 } else {
2190 z += r->part_size;
2191 c_inter = z & 1;
2192 p_inter = z >> 1;
2193 }
2194 }
2195 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2196 ++class_set;
2197 #endif
2198 }
2199 } else if (ch == 1) {
2200 while (pcount < part_read) {
2201 int z = r->begin + pcount*r->part_size;
2202 int c_inter = 0, p_inter = z;
2203 if (pass == 0) {
2204 Codebook *c = f->codebooks+r->classbook;
2205 int q;
2206 DECODE(q,f,c);
2207 if (q == EOP) goto done;
2208 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2209 part_classdata[0][class_set] = r->classdata[q];
2210 #else
2211 for (i=classwords-1; i >= 0; --i) {
2212 classifications[0][i+pcount] = q % r->classifications;
2213 q /= r->classifications;
2214 }
2215 #endif
2216 }
2217 for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) {
2218 int z = r->begin + pcount*r->part_size;
2219 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2220 int c = part_classdata[0][class_set][i];
2221 #else
2222 int c = classifications[0][pcount];
2223 #endif
2224 int b = r->residue_books[c][pass];
2225 if (b >= 0) {
2226 Codebook *book = f->codebooks + b;
2227 if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r->part_size))
2228 goto done;
2229 } else {
2230 z += r->part_size;
2231 c_inter = 0;
2232 p_inter = z;
2233 }
2234 }
2235 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2236 ++class_set;
2237 #endif
2238 }
2239 } else {
2240 while (pcount < part_read) {
2241 int z = r->begin + pcount*r->part_size;
2242 int c_inter = z % ch, p_inter = z/ch;
2243 if (pass == 0) {
2244 Codebook *c = f->codebooks+r->classbook;
2245 int q;
2246 DECODE(q,f,c);
2247 if (q == EOP) goto done;
2248 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2249 part_classdata[0][class_set] = r->classdata[q];
2250 #else
2251 for (i=classwords-1; i >= 0; --i) {
2252 classifications[0][i+pcount] = q % r->classifications;
2253 q /= r->classifications;
2254 }
2255 #endif
2256 }
2257 for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) {
2258 int z = r->begin + pcount*r->part_size;
2259 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2260 int c = part_classdata[0][class_set][i];
2261 #else
2262 int c = classifications[0][pcount];
2263 #endif
2264 int b = r->residue_books[c][pass];
2265 if (b >= 0) {
2266 Codebook *book = f->codebooks + b;
2267 if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r->part_size))
2268 goto done;
2269 } else {
2270 z += r->part_size;
2271 c_inter = z % ch;
2272 p_inter = z / ch;
2273 }
2274 }
2275 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2276 ++class_set;
2277 #endif
2278 }
2279 }
2280 }
2281 goto done;
2282 }
2283 CHECK(f);
2284
2285 for (pass=0; pass < 8; ++pass) {
2286 int pcount = 0, class_set=0;
2287 while (pcount < part_read) {
2288 if (pass == 0) {
2289 for (j=0; j < ch; ++j) {
2290 if (!do_not_decode[j]) {
2291 Codebook *c = f->codebooks+r->classbook;
2292 int temp;
2293 DECODE(temp,f,c);
2294 if (temp == EOP) goto done;
2295 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2296 part_classdata[j][class_set] = r->classdata[temp];
2297 #else
2298 for (i=classwords-1; i >= 0; --i) {
2299 classifications[j][i+pcount] = temp % r->classifications;
2300 temp /= r->classifications;
2301 }
2302 #endif
2303 }
2304 }
2305 }
2306 for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) {
2307 for (j=0; j < ch; ++j) {
2308 if (!do_not_decode[j]) {
2309 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2310 int c = part_classdata[j][class_set][i];
2311 #else
2312 int c = classifications[j][pcount];
2313 #endif
2314 int b = r->residue_books[c][pass];
2315 if (b >= 0) {
2316 float *target = residue_buffers[j];
2317 int offset = r->begin + pcount * r->part_size;
2318 int n = r->part_size;
2319 Codebook *book = f->codebooks + b;
2320 if (!residue_decode(f, book, target, offset, n, rtype))
2321 goto done;
2322 }
2323 }
2324 }
2325 }
2326 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2327 ++class_set;
2328 #endif
2329 }
2330 }
2331 done:
2332 CHECK(f);
2333 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2334 temp_free(f,part_classdata);
2335 #else
2336 temp_free(f,classifications);
2337 #endif
2338 temp_alloc_restore(f,temp_alloc_point);
2339 }
2340
2341
2342 #if 0
2343 // slow way for debugging
2344 void inverse_mdct_slow(float *buffer, int n)
2345 {
2346 int i,j;
2347 int n2 = n >> 1;
2348 float *x = (float *) malloc(sizeof(*x) * n2);
2349 memcpy(x, buffer, sizeof(*x) * n2);
2350 for (i=0; i < n; ++i) {
2351 float acc = 0;
2352 for (j=0; j < n2; ++j)
2353 // formula from paper:
2354 //acc += n/4.0f * x[j] * (float) cos(M_PI / 2 / n * (2 * i + 1 + n/2.0)*(2*j+1));
2355 // formula from wikipedia
2356 //acc += 2.0f / n2 * x[j] * (float) cos(M_PI/n2 * (i + 0.5 + n2/2)*(j + 0.5));
2357 // these are equivalent, except the formula from the paper inverts the multiplier!
2358 // however, what actually works is NO MULTIPLIER!?!
2359 //acc += 64 * 2.0f / n2 * x[j] * (float) cos(M_PI/n2 * (i + 0.5 + n2/2)*(j + 0.5));
2360 acc += x[j] * (float) cos(M_PI / 2 / n * (2 * i + 1 + n/2.0)*(2*j+1));
2361 buffer[i] = acc;
2362 }
2363 free(x);
2364 }
2365 #elif 0
2366 // same as above, but just barely able to run in real time on modern machines
inverse_mdct_slow(float * buffer,int n,vorb * f,int blocktype)2367 void inverse_mdct_slow(float *buffer, int n, vorb *f, int blocktype)
2368 {
2369 float mcos[16384];
2370 int i,j;
2371 int n2 = n >> 1, nmask = (n << 2) -1;
2372 float *x = (float *) malloc(sizeof(*x) * n2);
2373 memcpy(x, buffer, sizeof(*x) * n2);
2374 for (i=0; i < 4*n; ++i)
2375 mcos[i] = (float) cos(M_PI / 2 * i / n);
2376
2377 for (i=0; i < n; ++i) {
2378 float acc = 0;
2379 for (j=0; j < n2; ++j)
2380 acc += x[j] * mcos[(2 * i + 1 + n2)*(2*j+1) & nmask];
2381 buffer[i] = acc;
2382 }
2383 free(x);
2384 }
2385 #elif 0
2386 // transform to use a slow dct-iv; this is STILL basically trivial,
2387 // but only requires half as many ops
dct_iv_slow(float * buffer,int n)2388 void dct_iv_slow(float *buffer, int n)
2389 {
2390 float mcos[16384];
2391 float x[2048];
2392 int i,j;
2393 int n2 = n >> 1, nmask = (n << 3) - 1;
2394 memcpy(x, buffer, sizeof(*x) * n);
2395 for (i=0; i < 8*n; ++i)
2396 mcos[i] = (float) cos(M_PI / 4 * i / n);
2397 for (i=0; i < n; ++i) {
2398 float acc = 0;
2399 for (j=0; j < n; ++j)
2400 acc += x[j] * mcos[((2 * i + 1)*(2*j+1)) & nmask];
2401 buffer[i] = acc;
2402 }
2403 }
2404
inverse_mdct_slow(float * buffer,int n,vorb * f,int blocktype)2405 void inverse_mdct_slow(float *buffer, int n, vorb *f, int blocktype)
2406 {
2407 int i, n4 = n >> 2, n2 = n >> 1, n3_4 = n - n4;
2408 float temp[4096];
2409
2410 memcpy(temp, buffer, n2 * sizeof(float));
2411 dct_iv_slow(temp, n2); // returns -c'-d, a-b'
2412
2413 for (i=0; i < n4 ; ++i) buffer[i] = temp[i+n4]; // a-b'
2414 for ( ; i < n3_4; ++i) buffer[i] = -temp[n3_4 - i - 1]; // b-a', c+d'
2415 for ( ; i < n ; ++i) buffer[i] = -temp[i - n3_4]; // c'+d
2416 }
2417 #endif
2418
2419 #ifndef LIBVORBIS_MDCT
2420 #define LIBVORBIS_MDCT 0
2421 #endif
2422
2423 #if LIBVORBIS_MDCT
2424 // directly call the vorbis MDCT using an interface documented
2425 // by Jeff Roberts... useful for performance comparison
2426 typedef struct
2427 {
2428 int n;
2429 int log2n;
2430
2431 float *trig;
2432 int *bitrev;
2433
2434 float scale;
2435 } mdct_lookup;
2436
2437 extern void mdct_init(mdct_lookup *lookup, int n);
2438 extern void mdct_clear(mdct_lookup *l);
2439 extern void mdct_backward(mdct_lookup *init, float *in, float *out);
2440
2441 mdct_lookup M1,M2;
2442
inverse_mdct(float * buffer,int n,vorb * f,int blocktype)2443 void inverse_mdct(float *buffer, int n, vorb *f, int blocktype)
2444 {
2445 mdct_lookup *M;
2446 if (M1.n == n) M = &M1;
2447 else if (M2.n == n) M = &M2;
2448 else if (M1.n == 0) { mdct_init(&M1, n); M = &M1; }
2449 else {
2450 if (M2.n) __asm int 3;
2451 mdct_init(&M2, n);
2452 M = &M2;
2453 }
2454
2455 mdct_backward(M, buffer, buffer);
2456 }
2457 #endif
2458
2459
2460 // the following were split out into separate functions while optimizing;
2461 // they could be pushed back up but eh. __forceinline showed no change;
2462 // they're probably already being inlined.
imdct_step3_iter0_loop(int n,float * e,int i_off,int k_off,float * A)2463 static void imdct_step3_iter0_loop(int n, float *e, int i_off, int k_off, float *A)
2464 {
2465 float *ee0 = e + i_off;
2466 float *ee2 = ee0 + k_off;
2467 int i;
2468
2469 assert((n & 3) == 0);
2470 for (i=(n>>2); i > 0; --i) {
2471 float k00_20, k01_21;
2472 k00_20 = ee0[ 0] - ee2[ 0];
2473 k01_21 = ee0[-1] - ee2[-1];
2474 ee0[ 0] += ee2[ 0];//ee0[ 0] = ee0[ 0] + ee2[ 0];
2475 ee0[-1] += ee2[-1];//ee0[-1] = ee0[-1] + ee2[-1];
2476 ee2[ 0] = k00_20 * A[0] - k01_21 * A[1];
2477 ee2[-1] = k01_21 * A[0] + k00_20 * A[1];
2478 A += 8;
2479
2480 k00_20 = ee0[-2] - ee2[-2];
2481 k01_21 = ee0[-3] - ee2[-3];
2482 ee0[-2] += ee2[-2];//ee0[-2] = ee0[-2] + ee2[-2];
2483 ee0[-3] += ee2[-3];//ee0[-3] = ee0[-3] + ee2[-3];
2484 ee2[-2] = k00_20 * A[0] - k01_21 * A[1];
2485 ee2[-3] = k01_21 * A[0] + k00_20 * A[1];
2486 A += 8;
2487
2488 k00_20 = ee0[-4] - ee2[-4];
2489 k01_21 = ee0[-5] - ee2[-5];
2490 ee0[-4] += ee2[-4];//ee0[-4] = ee0[-4] + ee2[-4];
2491 ee0[-5] += ee2[-5];//ee0[-5] = ee0[-5] + ee2[-5];
2492 ee2[-4] = k00_20 * A[0] - k01_21 * A[1];
2493 ee2[-5] = k01_21 * A[0] + k00_20 * A[1];
2494 A += 8;
2495
2496 k00_20 = ee0[-6] - ee2[-6];
2497 k01_21 = ee0[-7] - ee2[-7];
2498 ee0[-6] += ee2[-6];//ee0[-6] = ee0[-6] + ee2[-6];
2499 ee0[-7] += ee2[-7];//ee0[-7] = ee0[-7] + ee2[-7];
2500 ee2[-6] = k00_20 * A[0] - k01_21 * A[1];
2501 ee2[-7] = k01_21 * A[0] + k00_20 * A[1];
2502 A += 8;
2503 ee0 -= 8;
2504 ee2 -= 8;
2505 }
2506 }
2507
imdct_step3_inner_r_loop(int lim,float * e,int d0,int k_off,float * A,int k1)2508 static void imdct_step3_inner_r_loop(int lim, float *e, int d0, int k_off, float *A, int k1)
2509 {
2510 int i;
2511 float k00_20, k01_21;
2512
2513 float *e0 = e + d0;
2514 float *e2 = e0 + k_off;
2515
2516 for (i=lim >> 2; i > 0; --i) {
2517 k00_20 = e0[-0] - e2[-0];
2518 k01_21 = e0[-1] - e2[-1];
2519 e0[-0] += e2[-0];//e0[-0] = e0[-0] + e2[-0];
2520 e0[-1] += e2[-1];//e0[-1] = e0[-1] + e2[-1];
2521 e2[-0] = (k00_20)*A[0] - (k01_21) * A[1];
2522 e2[-1] = (k01_21)*A[0] + (k00_20) * A[1];
2523
2524 A += k1;
2525
2526 k00_20 = e0[-2] - e2[-2];
2527 k01_21 = e0[-3] - e2[-3];
2528 e0[-2] += e2[-2];//e0[-2] = e0[-2] + e2[-2];
2529 e0[-3] += e2[-3];//e0[-3] = e0[-3] + e2[-3];
2530 e2[-2] = (k00_20)*A[0] - (k01_21) * A[1];
2531 e2[-3] = (k01_21)*A[0] + (k00_20) * A[1];
2532
2533 A += k1;
2534
2535 k00_20 = e0[-4] - e2[-4];
2536 k01_21 = e0[-5] - e2[-5];
2537 e0[-4] += e2[-4];//e0[-4] = e0[-4] + e2[-4];
2538 e0[-5] += e2[-5];//e0[-5] = e0[-5] + e2[-5];
2539 e2[-4] = (k00_20)*A[0] - (k01_21) * A[1];
2540 e2[-5] = (k01_21)*A[0] + (k00_20) * A[1];
2541
2542 A += k1;
2543
2544 k00_20 = e0[-6] - e2[-6];
2545 k01_21 = e0[-7] - e2[-7];
2546 e0[-6] += e2[-6];//e0[-6] = e0[-6] + e2[-6];
2547 e0[-7] += e2[-7];//e0[-7] = e0[-7] + e2[-7];
2548 e2[-6] = (k00_20)*A[0] - (k01_21) * A[1];
2549 e2[-7] = (k01_21)*A[0] + (k00_20) * A[1];
2550
2551 e0 -= 8;
2552 e2 -= 8;
2553
2554 A += k1;
2555 }
2556 }
2557
imdct_step3_inner_s_loop(int n,float * e,int i_off,int k_off,float * A,int a_off,int k0)2558 static void imdct_step3_inner_s_loop(int n, float *e, int i_off, int k_off, float *A, int a_off, int k0)
2559 {
2560 int i;
2561 float A0 = A[0];
2562 float A1 = A[0+1];
2563 float A2 = A[0+a_off];
2564 float A3 = A[0+a_off+1];
2565 float A4 = A[0+a_off*2+0];
2566 float A5 = A[0+a_off*2+1];
2567 float A6 = A[0+a_off*3+0];
2568 float A7 = A[0+a_off*3+1];
2569
2570 float k00,k11;
2571
2572 float *ee0 = e +i_off;
2573 float *ee2 = ee0+k_off;
2574
2575 for (i=n; i > 0; --i) {
2576 k00 = ee0[ 0] - ee2[ 0];
2577 k11 = ee0[-1] - ee2[-1];
2578 ee0[ 0] = ee0[ 0] + ee2[ 0];
2579 ee0[-1] = ee0[-1] + ee2[-1];
2580 ee2[ 0] = (k00) * A0 - (k11) * A1;
2581 ee2[-1] = (k11) * A0 + (k00) * A1;
2582
2583 k00 = ee0[-2] - ee2[-2];
2584 k11 = ee0[-3] - ee2[-3];
2585 ee0[-2] = ee0[-2] + ee2[-2];
2586 ee0[-3] = ee0[-3] + ee2[-3];
2587 ee2[-2] = (k00) * A2 - (k11) * A3;
2588 ee2[-3] = (k11) * A2 + (k00) * A3;
2589
2590 k00 = ee0[-4] - ee2[-4];
2591 k11 = ee0[-5] - ee2[-5];
2592 ee0[-4] = ee0[-4] + ee2[-4];
2593 ee0[-5] = ee0[-5] + ee2[-5];
2594 ee2[-4] = (k00) * A4 - (k11) * A5;
2595 ee2[-5] = (k11) * A4 + (k00) * A5;
2596
2597 k00 = ee0[-6] - ee2[-6];
2598 k11 = ee0[-7] - ee2[-7];
2599 ee0[-6] = ee0[-6] + ee2[-6];
2600 ee0[-7] = ee0[-7] + ee2[-7];
2601 ee2[-6] = (k00) * A6 - (k11) * A7;
2602 ee2[-7] = (k11) * A6 + (k00) * A7;
2603
2604 ee0 -= k0;
2605 ee2 -= k0;
2606 }
2607 }
2608
iter_54(float * z)2609 static __forceinline void iter_54(float *z)
2610 {
2611 float k00,k11,k22,k33;
2612 float y0,y1,y2,y3;
2613
2614 k00 = z[ 0] - z[-4];
2615 y0 = z[ 0] + z[-4];
2616 y2 = z[-2] + z[-6];
2617 k22 = z[-2] - z[-6];
2618
2619 z[-0] = y0 + y2; // z0 + z4 + z2 + z6
2620 z[-2] = y0 - y2; // z0 + z4 - z2 - z6
2621
2622 // done with y0,y2
2623
2624 k33 = z[-3] - z[-7];
2625
2626 z[-4] = k00 + k33; // z0 - z4 + z3 - z7
2627 z[-6] = k00 - k33; // z0 - z4 - z3 + z7
2628
2629 // done with k33
2630
2631 k11 = z[-1] - z[-5];
2632 y1 = z[-1] + z[-5];
2633 y3 = z[-3] + z[-7];
2634
2635 z[-1] = y1 + y3; // z1 + z5 + z3 + z7
2636 z[-3] = y1 - y3; // z1 + z5 - z3 - z7
2637 z[-5] = k11 - k22; // z1 - z5 + z2 - z6
2638 z[-7] = k11 + k22; // z1 - z5 - z2 + z6
2639 }
2640
imdct_step3_inner_s_loop_ld654(int n,float * e,int i_off,float * A,int base_n)2641 static void imdct_step3_inner_s_loop_ld654(int n, float *e, int i_off, float *A, int base_n)
2642 {
2643 int a_off = base_n >> 3;
2644 float A2 = A[0+a_off];
2645 float *z = e + i_off;
2646 float *base = z - 16 * n;
2647
2648 while (z > base) {
2649 float k00,k11;
2650
2651 k00 = z[-0] - z[-8];
2652 k11 = z[-1] - z[-9];
2653 z[-0] = z[-0] + z[-8];
2654 z[-1] = z[-1] + z[-9];
2655 z[-8] = k00;
2656 z[-9] = k11 ;
2657
2658 k00 = z[ -2] - z[-10];
2659 k11 = z[ -3] - z[-11];
2660 z[ -2] = z[ -2] + z[-10];
2661 z[ -3] = z[ -3] + z[-11];
2662 z[-10] = (k00+k11) * A2;
2663 z[-11] = (k11-k00) * A2;
2664
2665 k00 = z[-12] - z[ -4]; // reverse to avoid a unary negation
2666 k11 = z[ -5] - z[-13];
2667 z[ -4] = z[ -4] + z[-12];
2668 z[ -5] = z[ -5] + z[-13];
2669 z[-12] = k11;
2670 z[-13] = k00;
2671
2672 k00 = z[-14] - z[ -6]; // reverse to avoid a unary negation
2673 k11 = z[ -7] - z[-15];
2674 z[ -6] = z[ -6] + z[-14];
2675 z[ -7] = z[ -7] + z[-15];
2676 z[-14] = (k00+k11) * A2;
2677 z[-15] = (k00-k11) * A2;
2678
2679 iter_54(z);
2680 iter_54(z-8);
2681 z -= 16;
2682 }
2683 }
2684
inverse_mdct(float * buffer,int n,vorb * f,int blocktype)2685 static void inverse_mdct(float *buffer, int n, vorb *f, int blocktype)
2686 {
2687 int n2 = n >> 1, n4 = n >> 2, n8 = n >> 3, l;
2688 int ld;
2689 // @OPTIMIZE: reduce register pressure by using fewer variables?
2690 int save_point = temp_alloc_save(f);
2691 float *buf2 = (float *) temp_alloc(f, n2 * sizeof(*buf2));
2692 float *u=NULL,*v=NULL;
2693 // twiddle factors
2694 float *A = f->A[blocktype];
2695
2696 // IMDCT algorithm from "The use of multirate filter banks for coding of high quality digital audio"
2697 // See notes about bugs in that paper in less-optimal implementation 'inverse_mdct_old' after this function.
2698
2699 // kernel from paper
2700
2701
2702 // merged:
2703 // copy and reflect spectral data
2704 // step 0
2705
2706 // note that it turns out that the items added together during
2707 // this step are, in fact, being added to themselves (as reflected
2708 // by step 0). inexplicable inefficiency! this became obvious
2709 // once I combined the passes.
2710
2711 // so there's a missing 'times 2' here (for adding X to itself).
2712 // this propogates through linearly to the end, where the numbers
2713 // are 1/2 too small, and need to be compensated for.
2714
2715 {
2716 float *d,*e, *AA, *e_stop;
2717 d = &buf2[n2-2];
2718 AA = A;
2719 e = &buffer[0];
2720 e_stop = &buffer[n2];
2721 while (e != e_stop) {
2722 d[1] = (e[0] * AA[0] - e[2]*AA[1]);
2723 d[0] = (e[0] * AA[1] + e[2]*AA[0]);
2724 d -= 2;
2725 AA += 2;
2726 e += 4;
2727 }
2728
2729 e = &buffer[n2-3];
2730 while (d >= buf2) {
2731 d[1] = (-e[2] * AA[0] - -e[0]*AA[1]);
2732 d[0] = (-e[2] * AA[1] + -e[0]*AA[0]);
2733 d -= 2;
2734 AA += 2;
2735 e -= 4;
2736 }
2737 }
2738
2739 // now we use symbolic names for these, so that we can
2740 // possibly swap their meaning as we change which operations
2741 // are in place
2742
2743 u = buffer;
2744 v = buf2;
2745
2746 // step 2 (paper output is w, now u)
2747 // this could be in place, but the data ends up in the wrong
2748 // place... _somebody_'s got to swap it, so this is nominated
2749 {
2750 float *AA = &A[n2-8];
2751 float *d0,*d1, *e0, *e1;
2752
2753 e0 = &v[n4];
2754 e1 = &v[0];
2755
2756 d0 = &u[n4];
2757 d1 = &u[0];
2758
2759 while (AA >= A) {
2760 float v40_20, v41_21;
2761
2762 v41_21 = e0[1] - e1[1];
2763 v40_20 = e0[0] - e1[0];
2764 d0[1] = e0[1] + e1[1];
2765 d0[0] = e0[0] + e1[0];
2766 d1[1] = v41_21*AA[4] - v40_20*AA[5];
2767 d1[0] = v40_20*AA[4] + v41_21*AA[5];
2768
2769 v41_21 = e0[3] - e1[3];
2770 v40_20 = e0[2] - e1[2];
2771 d0[3] = e0[3] + e1[3];
2772 d0[2] = e0[2] + e1[2];
2773 d1[3] = v41_21*AA[0] - v40_20*AA[1];
2774 d1[2] = v40_20*AA[0] + v41_21*AA[1];
2775
2776 AA -= 8;
2777
2778 d0 += 4;
2779 d1 += 4;
2780 e0 += 4;
2781 e1 += 4;
2782 }
2783 }
2784
2785 // step 3
2786 ld = ilog(n) - 1; // ilog is off-by-one from normal definitions
2787
2788 // optimized step 3:
2789
2790 // the original step3 loop can be nested r inside s or s inside r;
2791 // it's written originally as s inside r, but this is dumb when r
2792 // iterates many times, and s few. So I have two copies of it and
2793 // switch between them halfway.
2794
2795 // this is iteration 0 of step 3
2796 imdct_step3_iter0_loop(n >> 4, u, n2-1-n4*0, -(n >> 3), A);
2797 imdct_step3_iter0_loop(n >> 4, u, n2-1-n4*1, -(n >> 3), A);
2798
2799 // this is iteration 1 of step 3
2800 imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*0, -(n >> 4), A, 16);
2801 imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*1, -(n >> 4), A, 16);
2802 imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*2, -(n >> 4), A, 16);
2803 imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*3, -(n >> 4), A, 16);
2804
2805 l=2;
2806 for (; l < (ld-3)>>1; ++l) {
2807 int k0 = n >> (l+2), k0_2 = k0>>1;
2808 int lim = 1 << (l+1);
2809 int i;
2810 for (i=0; i < lim; ++i)
2811 imdct_step3_inner_r_loop(n >> (l+4), u, n2-1 - k0*i, -k0_2, A, 1 << (l+3));
2812 }
2813
2814 for (; l < ld-6; ++l) {
2815 int k0 = n >> (l+2), k1 = 1 << (l+3), k0_2 = k0>>1;
2816 int rlim = n >> (l+6), r;
2817 int lim = 1 << (l+1);
2818 int i_off;
2819 float *A0 = A;
2820 i_off = n2-1;
2821 for (r=rlim; r > 0; --r) {
2822 imdct_step3_inner_s_loop(lim, u, i_off, -k0_2, A0, k1, k0);
2823 A0 += k1*4;
2824 i_off -= 8;
2825 }
2826 }
2827
2828 // iterations with count:
2829 // ld-6,-5,-4 all interleaved together
2830 // the big win comes from getting rid of needless flops
2831 // due to the constants on pass 5 & 4 being all 1 and 0;
2832 // combining them to be simultaneous to improve cache made little difference
2833 imdct_step3_inner_s_loop_ld654(n >> 5, u, n2-1, A, n);
2834
2835 // output is u
2836
2837 // step 4, 5, and 6
2838 // cannot be in-place because of step 5
2839 {
2840 uint16 *bitrev = f->bit_reverse[blocktype];
2841 // weirdly, I'd have thought reading sequentially and writing
2842 // erratically would have been better than vice-versa, but in
2843 // fact that's not what my testing showed. (That is, with
2844 // j = bitreverse(i), do you read i and write j, or read j and write i.)
2845
2846 float *d0 = &v[n4-4];
2847 float *d1 = &v[n2-4];
2848 while (d0 >= v) {
2849 int k4;
2850
2851 k4 = bitrev[0];
2852 d1[3] = u[k4+0];
2853 d1[2] = u[k4+1];
2854 d0[3] = u[k4+2];
2855 d0[2] = u[k4+3];
2856
2857 k4 = bitrev[1];
2858 d1[1] = u[k4+0];
2859 d1[0] = u[k4+1];
2860 d0[1] = u[k4+2];
2861 d0[0] = u[k4+3];
2862
2863 d0 -= 4;
2864 d1 -= 4;
2865 bitrev += 2;
2866 }
2867 }
2868 // (paper output is u, now v)
2869
2870
2871 // data must be in buf2
2872 assert(v == buf2);
2873
2874 // step 7 (paper output is v, now v)
2875 // this is now in place
2876 {
2877 float *C = f->C[blocktype];
2878 float *d, *e;
2879
2880 d = v;
2881 e = v + n2 - 4;
2882
2883 while (d < e) {
2884 float a02,a11,b0,b1,b2,b3;
2885
2886 a02 = d[0] - e[2];
2887 a11 = d[1] + e[3];
2888
2889 b0 = C[1]*a02 + C[0]*a11;
2890 b1 = C[1]*a11 - C[0]*a02;
2891
2892 b2 = d[0] + e[ 2];
2893 b3 = d[1] - e[ 3];
2894
2895 d[0] = b2 + b0;
2896 d[1] = b3 + b1;
2897 e[2] = b2 - b0;
2898 e[3] = b1 - b3;
2899
2900 a02 = d[2] - e[0];
2901 a11 = d[3] + e[1];
2902
2903 b0 = C[3]*a02 + C[2]*a11;
2904 b1 = C[3]*a11 - C[2]*a02;
2905
2906 b2 = d[2] + e[ 0];
2907 b3 = d[3] - e[ 1];
2908
2909 d[2] = b2 + b0;
2910 d[3] = b3 + b1;
2911 e[0] = b2 - b0;
2912 e[1] = b1 - b3;
2913
2914 C += 4;
2915 d += 4;
2916 e -= 4;
2917 }
2918 }
2919
2920 // data must be in buf2
2921
2922
2923 // step 8+decode (paper output is X, now buffer)
2924 // this generates pairs of data a la 8 and pushes them directly through
2925 // the decode kernel (pushing rather than pulling) to avoid having
2926 // to make another pass later
2927
2928 // this cannot POSSIBLY be in place, so we refer to the buffers directly
2929
2930 {
2931 float *d0,*d1,*d2,*d3;
2932
2933 float *B = f->B[blocktype] + n2 - 8;
2934 float *e = buf2 + n2 - 8;
2935 d0 = &buffer[0];
2936 d1 = &buffer[n2-4];
2937 d2 = &buffer[n2];
2938 d3 = &buffer[n-4];
2939 while (e >= v) {
2940 float p0,p1,p2,p3;
2941
2942 p3 = e[6]*B[7] - e[7]*B[6];
2943 p2 = -e[6]*B[6] - e[7]*B[7];
2944
2945 d0[0] = p3;
2946 d1[3] = - p3;
2947 d2[0] = p2;
2948 d3[3] = p2;
2949
2950 p1 = e[4]*B[5] - e[5]*B[4];
2951 p0 = -e[4]*B[4] - e[5]*B[5];
2952
2953 d0[1] = p1;
2954 d1[2] = - p1;
2955 d2[1] = p0;
2956 d3[2] = p0;
2957
2958 p3 = e[2]*B[3] - e[3]*B[2];
2959 p2 = -e[2]*B[2] - e[3]*B[3];
2960
2961 d0[2] = p3;
2962 d1[1] = - p3;
2963 d2[2] = p2;
2964 d3[1] = p2;
2965
2966 p1 = e[0]*B[1] - e[1]*B[0];
2967 p0 = -e[0]*B[0] - e[1]*B[1];
2968
2969 d0[3] = p1;
2970 d1[0] = - p1;
2971 d2[3] = p0;
2972 d3[0] = p0;
2973
2974 B -= 8;
2975 e -= 8;
2976 d0 += 4;
2977 d2 += 4;
2978 d1 -= 4;
2979 d3 -= 4;
2980 }
2981 }
2982
2983 temp_free(f,buf2);
2984 temp_alloc_restore(f,save_point);
2985 }
2986
2987 #if 0
2988 // this is the original version of the above code, if you want to optimize it from scratch
2989 void inverse_mdct_naive(float *buffer, int n)
2990 {
2991 float s;
2992 float A[1 << 12], B[1 << 12], C[1 << 11];
2993 int i,k,k2,k4, n2 = n >> 1, n4 = n >> 2, n8 = n >> 3, l;
2994 int n3_4 = n - n4, ld;
2995 // how can they claim this only uses N words?!
2996 // oh, because they're only used sparsely, whoops
2997 float u[1 << 13], X[1 << 13], v[1 << 13], w[1 << 13];
2998 // set up twiddle factors
2999
3000 for (k=k2=0; k < n4; ++k,k2+=2) {
3001 A[k2 ] = (float) cos(4*k*M_PI/n);
3002 A[k2+1] = (float) -sin(4*k*M_PI/n);
3003 B[k2 ] = (float) cos((k2+1)*M_PI/n/2);
3004 B[k2+1] = (float) sin((k2+1)*M_PI/n/2);
3005 }
3006 for (k=k2=0; k < n8; ++k,k2+=2) {
3007 C[k2 ] = (float) cos(2*(k2+1)*M_PI/n);
3008 C[k2+1] = (float) -sin(2*(k2+1)*M_PI/n);
3009 }
3010
3011 // IMDCT algorithm from "The use of multirate filter banks for coding of high quality digital audio"
3012 // Note there are bugs in that pseudocode, presumably due to them attempting
3013 // to rename the arrays nicely rather than representing the way their actual
3014 // implementation bounces buffers back and forth. As a result, even in the
3015 // "some formulars corrected" version, a direct implementation fails. These
3016 // are noted below as "paper bug".
3017
3018 // copy and reflect spectral data
3019 for (k=0; k < n2; ++k) u[k] = buffer[k];
3020 for ( ; k < n ; ++k) u[k] = -buffer[n - k - 1];
3021 // kernel from paper
3022 // step 1
3023 for (k=k2=k4=0; k < n4; k+=1, k2+=2, k4+=4) {
3024 v[n-k4-1] = (u[k4] - u[n-k4-1]) * A[k2] - (u[k4+2] - u[n-k4-3])*A[k2+1];
3025 v[n-k4-3] = (u[k4] - u[n-k4-1]) * A[k2+1] + (u[k4+2] - u[n-k4-3])*A[k2];
3026 }
3027 // step 2
3028 for (k=k4=0; k < n8; k+=1, k4+=4) {
3029 w[n2+3+k4] = v[n2+3+k4] + v[k4+3];
3030 w[n2+1+k4] = v[n2+1+k4] + v[k4+1];
3031 w[k4+3] = (v[n2+3+k4] - v[k4+3])*A[n2-4-k4] - (v[n2+1+k4]-v[k4+1])*A[n2-3-k4];
3032 w[k4+1] = (v[n2+1+k4] - v[k4+1])*A[n2-4-k4] + (v[n2+3+k4]-v[k4+3])*A[n2-3-k4];
3033 }
3034 // step 3
3035 ld = ilog(n) - 1; // ilog is off-by-one from normal definitions
3036 for (l=0; l < ld-3; ++l) {
3037 int k0 = n >> (l+2), k1 = 1 << (l+3);
3038 int rlim = n >> (l+4), r4, r;
3039 int s2lim = 1 << (l+2), s2;
3040 for (r=r4=0; r < rlim; r4+=4,++r) {
3041 for (s2=0; s2 < s2lim; s2+=2) {
3042 u[n-1-k0*s2-r4] = w[n-1-k0*s2-r4] + w[n-1-k0*(s2+1)-r4];
3043 u[n-3-k0*s2-r4] = w[n-3-k0*s2-r4] + w[n-3-k0*(s2+1)-r4];
3044 u[n-1-k0*(s2+1)-r4] = (w[n-1-k0*s2-r4] - w[n-1-k0*(s2+1)-r4]) * A[r*k1]
3045 - (w[n-3-k0*s2-r4] - w[n-3-k0*(s2+1)-r4]) * A[r*k1+1];
3046 u[n-3-k0*(s2+1)-r4] = (w[n-3-k0*s2-r4] - w[n-3-k0*(s2+1)-r4]) * A[r*k1]
3047 + (w[n-1-k0*s2-r4] - w[n-1-k0*(s2+1)-r4]) * A[r*k1+1];
3048 }
3049 }
3050 if (l+1 < ld-3) {
3051 // paper bug: ping-ponging of u&w here is omitted
3052 memcpy(w, u, sizeof(u));
3053 }
3054 }
3055
3056 // step 4
3057 for (i=0; i < n8; ++i) {
3058 int j = bit_reverse(i) >> (32-ld+3);
3059 assert(j < n8);
3060 if (i == j) {
3061 // paper bug: original code probably swapped in place; if copying,
3062 // need to directly copy in this case
3063 int i8 = i << 3;
3064 v[i8+1] = u[i8+1];
3065 v[i8+3] = u[i8+3];
3066 v[i8+5] = u[i8+5];
3067 v[i8+7] = u[i8+7];
3068 } else if (i < j) {
3069 int i8 = i << 3, j8 = j << 3;
3070 v[j8+1] = u[i8+1], v[i8+1] = u[j8 + 1];
3071 v[j8+3] = u[i8+3], v[i8+3] = u[j8 + 3];
3072 v[j8+5] = u[i8+5], v[i8+5] = u[j8 + 5];
3073 v[j8+7] = u[i8+7], v[i8+7] = u[j8 + 7];
3074 }
3075 }
3076 // step 5
3077 for (k=0; k < n2; ++k) {
3078 w[k] = v[k*2+1];
3079 }
3080 // step 6
3081 for (k=k2=k4=0; k < n8; ++k, k2 += 2, k4 += 4) {
3082 u[n-1-k2] = w[k4];
3083 u[n-2-k2] = w[k4+1];
3084 u[n3_4 - 1 - k2] = w[k4+2];
3085 u[n3_4 - 2 - k2] = w[k4+3];
3086 }
3087 // step 7
3088 for (k=k2=0; k < n8; ++k, k2 += 2) {
3089 v[n2 + k2 ] = ( u[n2 + k2] + u[n-2-k2] + C[k2+1]*(u[n2+k2]-u[n-2-k2]) + C[k2]*(u[n2+k2+1]+u[n-2-k2+1]))/2;
3090 v[n-2 - k2] = ( u[n2 + k2] + u[n-2-k2] - C[k2+1]*(u[n2+k2]-u[n-2-k2]) - C[k2]*(u[n2+k2+1]+u[n-2-k2+1]))/2;
3091 v[n2+1+ k2] = ( u[n2+1+k2] - u[n-1-k2] + C[k2+1]*(u[n2+1+k2]+u[n-1-k2]) - C[k2]*(u[n2+k2]-u[n-2-k2]))/2;
3092 v[n-1 - k2] = (-u[n2+1+k2] + u[n-1-k2] + C[k2+1]*(u[n2+1+k2]+u[n-1-k2]) - C[k2]*(u[n2+k2]-u[n-2-k2]))/2;
3093 }
3094 // step 8
3095 for (k=k2=0; k < n4; ++k,k2 += 2) {
3096 X[k] = v[k2+n2]*B[k2 ] + v[k2+1+n2]*B[k2+1];
3097 X[n2-1-k] = v[k2+n2]*B[k2+1] - v[k2+1+n2]*B[k2 ];
3098 }
3099
3100 // decode kernel to output
3101 // determined the following value experimentally
3102 // (by first figuring out what made inverse_mdct_slow work); then matching that here
3103 // (probably vorbis encoder premultiplies by n or n/2, to save it on the decoder?)
3104 s = 0.5; // theoretically would be n4
3105
3106 // [[[ note! the s value of 0.5 is compensated for by the B[] in the current code,
3107 // so it needs to use the "old" B values to behave correctly, or else
3108 // set s to 1.0 ]]]
3109 for (i=0; i < n4 ; ++i) buffer[i] = s * X[i+n4];
3110 for ( ; i < n3_4; ++i) buffer[i] = -s * X[n3_4 - i - 1];
3111 for ( ; i < n ; ++i) buffer[i] = -s * X[i - n3_4];
3112 }
3113 #endif
3114
get_window(vorb * f,int len)3115 static float *get_window(vorb *f, int len)
3116 {
3117 len <<= 1;
3118 if (len == f->blocksize_0) return f->window[0];
3119 if (len == f->blocksize_1) return f->window[1];
3120 assert(0);
3121 return NULL;
3122 }
3123
3124 #ifndef STB_VORBIS_NO_DEFER_FLOOR
3125 typedef int16 YTYPE;
3126 #else
3127 typedef int YTYPE;
3128 #endif
do_floor(vorb * f,Mapping * map,int i,int n,float * target,YTYPE * finalY,uint8 * step2_flag)3129 static int do_floor(vorb *f, Mapping *map, int i, int n, float *target, YTYPE *finalY, uint8 *step2_flag)
3130 {
3131 int n2 = n >> 1;
3132 int s = map->chan[i].mux, floor;
3133 floor = map->submap_floor[s];
3134 if (f->floor_types[floor] == 0) {
3135 return error(f, VORBIS_invalid_stream);
3136 } else {
3137 Floor1 *g = &f->floor_config[floor].floor1;
3138 int j,q;
3139 int lx = 0, ly = finalY[0] * g->floor1_multiplier;
3140 for (q=1; q < g->values; ++q) {
3141 j = g->sorted_order[q];
3142 #ifndef STB_VORBIS_NO_DEFER_FLOOR
3143 if (finalY[j] >= 0)
3144 #else
3145 if (step2_flag[j])
3146 #endif
3147 {
3148 int hy = finalY[j] * g->floor1_multiplier;
3149 int hx = g->Xlist[j];
3150 if (lx != hx)
3151 draw_line(target, lx,ly, hx,hy, n2);
3152 CHECK(f);
3153 lx = hx, ly = hy;
3154 }
3155 }
3156 if (lx < n2) {
3157 // optimization of: draw_line(target, lx,ly, n,ly, n2);
3158 for (j=lx; j < n2; ++j)
3159 LINE_OP(target[j], inverse_db_table[ly]);
3160 CHECK(f);
3161 }
3162 }
3163 return TRUE;
3164 }
3165
3166 // The meaning of "left" and "right"
3167 //
3168 // For a given frame:
3169 // we compute samples from 0..n
3170 // window_center is n/2
3171 // we'll window and mix the samples from left_start to left_end with data from the previous frame
3172 // all of the samples from left_end to right_start can be output without mixing; however,
3173 // this interval is 0-length except when transitioning between short and long frames
3174 // all of the samples from right_start to right_end need to be mixed with the next frame,
3175 // which we don't have, so those get saved in a buffer
3176 // frame N's right_end-right_start, the number of samples to mix with the next frame,
3177 // has to be the same as frame N+1's left_end-left_start (which they are by
3178 // construction)
3179
vorbis_decode_initial(vorb * f,int * p_left_start,int * p_left_end,int * p_right_start,int * p_right_end,int * mode)3180 static int vorbis_decode_initial(vorb *f, int *p_left_start, int *p_left_end, int *p_right_start, int *p_right_end, int *mode)
3181 {
3182 Mode *m;
3183 int i, n, prev, next, window_center;
3184 f->channel_buffer_start = f->channel_buffer_end = 0;
3185
3186 retry:
3187 if (f->eof) return FALSE;
3188 if (!maybe_start_packet(f))
3189 return FALSE;
3190 // check packet type
3191 if (get_bits(f,1) != 0) {
3192 if (IS_PUSH_MODE(f))
3193 return error(f,VORBIS_bad_packet_type);
3194 while (EOP != get8_packet(f));
3195 goto retry;
3196 }
3197
3198 if (f->alloc.alloc_buffer)
3199 assert(f->alloc.alloc_buffer_length_in_bytes == f->temp_offset);
3200
3201 i = get_bits(f, ilog(f->mode_count-1));
3202 if (i == EOP) return FALSE;
3203 if (i >= f->mode_count) return FALSE;
3204 *mode = i;
3205 m = f->mode_config + i;
3206 if (m->blockflag) {
3207 n = f->blocksize_1;
3208 prev = get_bits(f,1);
3209 next = get_bits(f,1);
3210 } else {
3211 prev = next = 0;
3212 n = f->blocksize_0;
3213 }
3214
3215 // WINDOWING
3216
3217 window_center = n >> 1;
3218 if (m->blockflag && !prev) {
3219 *p_left_start = (n - f->blocksize_0) >> 2;
3220 *p_left_end = (n + f->blocksize_0) >> 2;
3221 } else {
3222 *p_left_start = 0;
3223 *p_left_end = window_center;
3224 }
3225 if (m->blockflag && !next) {
3226 *p_right_start = (n*3 - f->blocksize_0) >> 2;
3227 *p_right_end = (n*3 + f->blocksize_0) >> 2;
3228 } else {
3229 *p_right_start = window_center;
3230 *p_right_end = n;
3231 }
3232
3233 return TRUE;
3234 }
3235
vorbis_decode_packet_rest(vorb * f,int * len,Mode * m,int left_start,int left_end,int right_start,int right_end,int * p_left)3236 static int vorbis_decode_packet_rest(vorb *f, int *len, Mode *m, int left_start, int left_end, int right_start, int right_end, int *p_left)
3237 {
3238 Mapping *map;
3239 int i,j,k,n,n2;
3240 int zero_channel[256];
3241 int really_zero_channel[256];
3242
3243 // WINDOWING
3244
3245 n = f->blocksize[m->blockflag];
3246 map = &f->mapping[m->mapping];
3247
3248 // FLOORS
3249 n2 = n >> 1;
3250
3251 CHECK(f);
3252
3253 for (i=0; i < f->channels; ++i) {
3254 int s = map->chan[i].mux, floor;
3255 zero_channel[i] = FALSE;
3256 floor = map->submap_floor[s];
3257 if (f->floor_types[floor] == 0) {
3258 return error(f, VORBIS_invalid_stream);
3259 } else {
3260 Floor1 *g = &f->floor_config[floor].floor1;
3261 if (get_bits(f, 1)) {
3262 short *finalY;
3263 uint8 step2_flag[256];
3264 static int range_list[4] = { 256, 128, 86, 64 };
3265 int range = range_list[g->floor1_multiplier-1];
3266 int offset = 2;
3267 finalY = f->finalY[i];
3268 finalY[0] = get_bits(f, ilog(range)-1);
3269 finalY[1] = get_bits(f, ilog(range)-1);
3270 for (j=0; j < g->partitions; ++j) {
3271 int pclass = g->partition_class_list[j];
3272 int cdim = g->class_dimensions[pclass];
3273 int cbits = g->class_subclasses[pclass];
3274 int csub = (1 << cbits)-1;
3275 int cval = 0;
3276 if (cbits) {
3277 Codebook *c = f->codebooks + g->class_masterbooks[pclass];
3278 DECODE(cval,f,c);
3279 }
3280 for (k=0; k < cdim; ++k) {
3281 int book = g->subclass_books[pclass][cval & csub];
3282 cval = cval >> cbits;
3283 if (book >= 0) {
3284 int temp;
3285 Codebook *c = f->codebooks + book;
3286 DECODE(temp,f,c);
3287 finalY[offset++] = temp;
3288 } else
3289 finalY[offset++] = 0;
3290 }
3291 }
3292 if (f->valid_bits == INVALID_BITS) goto error; // behavior according to spec
3293 step2_flag[0] = step2_flag[1] = 1;
3294 for (j=2; j < g->values; ++j) {
3295 int low, high, pred, highroom, lowroom, room, val;
3296 low = g->neighbors[j][0];
3297 high = g->neighbors[j][1];
3298 //neighbors(g->Xlist, j, &low, &high);
3299 pred = predict_point(g->Xlist[j], g->Xlist[low], g->Xlist[high], finalY[low], finalY[high]);
3300 val = finalY[j];
3301 highroom = range - pred;
3302 lowroom = pred;
3303 if (highroom < lowroom)
3304 room = highroom * 2;
3305 else
3306 room = lowroom * 2;
3307 if (val) {
3308 step2_flag[low] = step2_flag[high] = 1;
3309 step2_flag[j] = 1;
3310 if (val >= room)
3311 if (highroom > lowroom)
3312 finalY[j] = val - lowroom + pred;
3313 else
3314 finalY[j] = pred - val + highroom - 1;
3315 else
3316 if (val & 1)
3317 finalY[j] = pred - ((val+1)>>1);
3318 else
3319 finalY[j] = pred + (val>>1);
3320 } else {
3321 step2_flag[j] = 0;
3322 finalY[j] = pred;
3323 }
3324 }
3325
3326 #ifdef STB_VORBIS_NO_DEFER_FLOOR
3327 do_floor(f, map, i, n, f->floor_buffers[i], finalY, step2_flag);
3328 #else
3329 // defer final floor computation until _after_ residue
3330 for (j=0; j < g->values; ++j) {
3331 if (!step2_flag[j])
3332 finalY[j] = -1;
3333 }
3334 #endif
3335 } else {
3336 error:
3337 zero_channel[i] = TRUE;
3338 }
3339 // So we just defer everything else to later
3340
3341 // at this point we've decoded the floor into buffer
3342 }
3343 }
3344 CHECK(f);
3345 // at this point we've decoded all floors
3346
3347 if (f->alloc.alloc_buffer)
3348 assert(f->alloc.alloc_buffer_length_in_bytes == f->temp_offset);
3349
3350 // re-enable coupled channels if necessary
3351 memcpy(really_zero_channel, zero_channel, sizeof(really_zero_channel[0]) * f->channels);
3352 for (i=0; i < map->coupling_steps; ++i)
3353 if (!zero_channel[map->chan[i].magnitude] || !zero_channel[map->chan[i].angle]) {
3354 zero_channel[map->chan[i].magnitude] = zero_channel[map->chan[i].angle] = FALSE;
3355 }
3356
3357 CHECK(f);
3358 // RESIDUE DECODE
3359 for (i=0; i < map->submaps; ++i) {
3360 float *residue_buffers[STB_VORBIS_MAX_CHANNELS];
3361 int r;
3362 uint8 do_not_decode[256];
3363 int ch = 0;
3364 for (j=0; j < f->channels; ++j) {
3365 if (map->chan[j].mux == i) {
3366 if (zero_channel[j]) {
3367 do_not_decode[ch] = TRUE;
3368 residue_buffers[ch] = NULL;
3369 } else {
3370 do_not_decode[ch] = FALSE;
3371 residue_buffers[ch] = f->channel_buffers[j];
3372 }
3373 ++ch;
3374 }
3375 }
3376 r = map->submap_residue[i];
3377 decode_residue(f, residue_buffers, ch, n2, r, do_not_decode);
3378 }
3379
3380 if (f->alloc.alloc_buffer)
3381 assert(f->alloc.alloc_buffer_length_in_bytes == f->temp_offset);
3382 CHECK(f);
3383
3384 // INVERSE COUPLING
3385 for (i = map->coupling_steps-1; i >= 0; --i) {
3386 int n2 = n >> 1;
3387 float *m = f->channel_buffers[map->chan[i].magnitude];
3388 float *a = f->channel_buffers[map->chan[i].angle ];
3389 for (j=0; j < n2; ++j) {
3390 float a2,m2;
3391 if (m[j] > 0)
3392 if (a[j] > 0)
3393 m2 = m[j], a2 = m[j] - a[j];
3394 else
3395 a2 = m[j], m2 = m[j] + a[j];
3396 else
3397 if (a[j] > 0)
3398 m2 = m[j], a2 = m[j] + a[j];
3399 else
3400 a2 = m[j], m2 = m[j] - a[j];
3401 m[j] = m2;
3402 a[j] = a2;
3403 }
3404 }
3405 CHECK(f);
3406
3407 // finish decoding the floors
3408 #ifndef STB_VORBIS_NO_DEFER_FLOOR
3409 for (i=0; i < f->channels; ++i) {
3410 if (really_zero_channel[i]) {
3411 memset(f->channel_buffers[i], 0, sizeof(*f->channel_buffers[i]) * n2);
3412 } else {
3413 do_floor(f, map, i, n, f->channel_buffers[i], f->finalY[i], NULL);
3414 }
3415 }
3416 #else
3417 for (i=0; i < f->channels; ++i) {
3418 if (really_zero_channel[i]) {
3419 memset(f->channel_buffers[i], 0, sizeof(*f->channel_buffers[i]) * n2);
3420 } else {
3421 for (j=0; j < n2; ++j)
3422 f->channel_buffers[i][j] *= f->floor_buffers[i][j];
3423 }
3424 }
3425 #endif
3426
3427 // INVERSE MDCT
3428 CHECK(f);
3429 for (i=0; i < f->channels; ++i)
3430 inverse_mdct(f->channel_buffers[i], n, f, m->blockflag);
3431 CHECK(f);
3432
3433 // this shouldn't be necessary, unless we exited on an error
3434 // and want to flush to get to the next packet
3435 flush_packet(f);
3436
3437 if (f->first_decode) {
3438 // assume we start so first non-discarded sample is sample 0
3439 // this isn't to spec, but spec would require us to read ahead
3440 // and decode the size of all current frames--could be done,
3441 // but presumably it's not a commonly used feature
3442 f->current_loc = -n2; // start of first frame is positioned for discard
3443 // we might have to discard samples "from" the next frame too,
3444 // if we're lapping a large block then a small at the start?
3445 f->discard_samples_deferred = n - right_end;
3446 f->current_loc_valid = TRUE;
3447 f->first_decode = FALSE;
3448 } else if (f->discard_samples_deferred) {
3449 if (f->discard_samples_deferred >= right_start - left_start) {
3450 f->discard_samples_deferred -= (right_start - left_start);
3451 left_start = right_start;
3452 *p_left = left_start;
3453 } else {
3454 left_start += f->discard_samples_deferred;
3455 *p_left = left_start;
3456 f->discard_samples_deferred = 0;
3457 }
3458 } else if (f->previous_length == 0 && f->current_loc_valid) {
3459 // we're recovering from a seek... that means we're going to discard
3460 // the samples from this packet even though we know our position from
3461 // the last page header, so we need to update the position based on
3462 // the discarded samples here
3463 // but wait, the code below is going to add this in itself even
3464 // on a discard, so we don't need to do it here...
3465 }
3466
3467 // check if we have ogg information about the sample # for this packet
3468 if (f->last_seg_which == f->end_seg_with_known_loc) {
3469 // if we have a valid current loc, and this is final:
3470 if (f->current_loc_valid && (f->page_flag & PAGEFLAG_last_page)) {
3471 uint32 current_end = f->known_loc_for_packet;
3472 // then let's infer the size of the (probably) short final frame
3473 if (current_end < f->current_loc + (right_end-left_start)) {
3474 if (current_end < f->current_loc) {
3475 // negative truncation, that's impossible!
3476 *len = 0;
3477 } else {
3478 *len = current_end - f->current_loc;
3479 }
3480 *len += left_start; // this doesn't seem right, but has no ill effect on my test files
3481 if (*len > right_end) *len = right_end; // this should never happen
3482 f->current_loc += *len;
3483 return TRUE;
3484 }
3485 }
3486 // otherwise, just set our sample loc
3487 // guess that the ogg granule pos refers to the _middle_ of the
3488 // last frame?
3489 // set f->current_loc to the position of left_start
3490 f->current_loc = f->known_loc_for_packet - (n2-left_start);
3491 f->current_loc_valid = TRUE;
3492 }
3493 if (f->current_loc_valid)
3494 f->current_loc += (right_start - left_start);
3495
3496 if (f->alloc.alloc_buffer)
3497 assert(f->alloc.alloc_buffer_length_in_bytes == f->temp_offset);
3498 *len = right_end; // ignore samples after the window goes to 0
3499 CHECK(f);
3500
3501 return TRUE;
3502 }
3503
vorbis_decode_packet(vorb * f,int * len,int * p_left,int * p_right)3504 static int vorbis_decode_packet(vorb *f, int *len, int *p_left, int *p_right)
3505 {
3506 int mode, left_end, right_end;
3507 if (!vorbis_decode_initial(f, p_left, &left_end, p_right, &right_end, &mode)) return 0;
3508 return vorbis_decode_packet_rest(f, len, f->mode_config + mode, *p_left, left_end, *p_right, right_end, p_left);
3509 }
3510
vorbis_finish_frame(stb_vorbis * f,int len,int left,int right)3511 static int vorbis_finish_frame(stb_vorbis *f, int len, int left, int right)
3512 {
3513 int prev,i,j;
3514 // we use right&left (the start of the right- and left-window sin()-regions)
3515 // to determine how much to return, rather than inferring from the rules
3516 // (same result, clearer code); 'left' indicates where our sin() window
3517 // starts, therefore where the previous window's right edge starts, and
3518 // therefore where to start mixing from the previous buffer. 'right'
3519 // indicates where our sin() ending-window starts, therefore that's where
3520 // we start saving, and where our returned-data ends.
3521
3522 // mixin from previous window
3523 if (f->previous_length) {
3524 int i,j, n = f->previous_length;
3525 float *w = get_window(f, n);
3526 for (i=0; i < f->channels; ++i) {
3527 for (j=0; j < n; ++j)
3528 f->channel_buffers[i][left+j] =
3529 f->channel_buffers[i][left+j]*w[ j] +
3530 f->previous_window[i][ j]*w[n-1-j];
3531 }
3532 }
3533
3534 prev = f->previous_length;
3535
3536 // last half of this data becomes previous window
3537 f->previous_length = len - right;
3538
3539 // @OPTIMIZE: could avoid this copy by double-buffering the
3540 // output (flipping previous_window with channel_buffers), but
3541 // then previous_window would have to be 2x as large, and
3542 // channel_buffers couldn't be temp mem (although they're NOT
3543 // currently temp mem, they could be (unless we want to level
3544 // performance by spreading out the computation))
3545 for (i=0; i < f->channels; ++i)
3546 for (j=0; right+j < len; ++j)
3547 f->previous_window[i][j] = f->channel_buffers[i][right+j];
3548
3549 if (!prev)
3550 // there was no previous packet, so this data isn't valid...
3551 // this isn't entirely true, only the would-have-overlapped data
3552 // isn't valid, but this seems to be what the spec requires
3553 return 0;
3554
3555 // truncate a short frame
3556 if (len < right) right = len;
3557
3558 f->samples_output += right-left;
3559
3560 return right - left;
3561 }
3562
vorbis_pump_first_frame(stb_vorbis * f)3563 static int vorbis_pump_first_frame(stb_vorbis *f)
3564 {
3565 int len, right, left, res;
3566 res = vorbis_decode_packet(f, &len, &left, &right);
3567 if (res)
3568 vorbis_finish_frame(f, len, left, right);
3569 return res;
3570 }
3571
3572 #ifndef STB_VORBIS_NO_PUSHDATA_API
is_whole_packet_present(stb_vorbis * f,int end_page)3573 static int is_whole_packet_present(stb_vorbis *f, int end_page)
3574 {
3575 // make sure that we have the packet available before continuing...
3576 // this requires a full ogg parse, but we know we can fetch from f->stream
3577
3578 // instead of coding this out explicitly, we could save the current read state,
3579 // read the next packet with get8() until end-of-packet, check f->eof, then
3580 // reset the state? but that would be slower, esp. since we'd have over 256 bytes
3581 // of state to restore (primarily the page segment table)
3582
3583 int s = f->next_seg, first = TRUE;
3584 uint8 *p = f->stream;
3585
3586 if (s != -1) { // if we're not starting the packet with a 'continue on next page' flag
3587 for (; s < f->segment_count; ++s) {
3588 p += f->segments[s];
3589 if (f->segments[s] < 255) // stop at first short segment
3590 break;
3591 }
3592 // either this continues, or it ends it...
3593 if (end_page)
3594 if (s < f->segment_count-1) return error(f, VORBIS_invalid_stream);
3595 if (s == f->segment_count)
3596 s = -1; // set 'crosses page' flag
3597 if (p > f->stream_end) return error(f, VORBIS_need_more_data);
3598 first = FALSE;
3599 }
3600 for (; s == -1;) {
3601 uint8 *q;
3602 int n;
3603
3604 // check that we have the page header ready
3605 if (p + 26 >= f->stream_end) return error(f, VORBIS_need_more_data);
3606 // validate the page
3607 if (memcmp(p, ogg_page_header, 4)) return error(f, VORBIS_invalid_stream);
3608 if (p[4] != 0) return error(f, VORBIS_invalid_stream);
3609 if (first) { // the first segment must NOT have 'continued_packet', later ones MUST
3610 if (f->previous_length)
3611 if ((p[5] & PAGEFLAG_continued_packet)) return error(f, VORBIS_invalid_stream);
3612 // if no previous length, we're resynching, so we can come in on a continued-packet,
3613 // which we'll just drop
3614 } else {
3615 if (!(p[5] & PAGEFLAG_continued_packet)) return error(f, VORBIS_invalid_stream);
3616 }
3617 n = p[26]; // segment counts
3618 q = p+27; // q points to segment table
3619 p = q + n; // advance past header
3620 // make sure we've read the segment table
3621 if (p > f->stream_end) return error(f, VORBIS_need_more_data);
3622 for (s=0; s < n; ++s) {
3623 p += q[s];
3624 if (q[s] < 255)
3625 break;
3626 }
3627 if (end_page)
3628 if (s < n-1) return error(f, VORBIS_invalid_stream);
3629 if (s == n)
3630 s = -1; // set 'crosses page' flag
3631 if (p > f->stream_end) return error(f, VORBIS_need_more_data);
3632 first = FALSE;
3633 }
3634 return TRUE;
3635 }
3636 #endif // !STB_VORBIS_NO_PUSHDATA_API
3637
start_decoder(vorb * f)3638 static int start_decoder(vorb *f)
3639 {
3640 uint8 header[6], x,y;
3641 int len,i,j,k, max_submaps = 0;
3642 int longest_floorlist=0;
3643
3644 // first page, first packet
3645
3646 if (!start_page(f)) return FALSE;
3647 // validate page flag
3648 if (!(f->page_flag & PAGEFLAG_first_page)) return error(f, VORBIS_invalid_first_page);
3649 if (f->page_flag & PAGEFLAG_last_page) return error(f, VORBIS_invalid_first_page);
3650 if (f->page_flag & PAGEFLAG_continued_packet) return error(f, VORBIS_invalid_first_page);
3651 // check for expected packet length
3652 if (f->segment_count != 1) return error(f, VORBIS_invalid_first_page);
3653 if (f->segments[0] != 30) return error(f, VORBIS_invalid_first_page);
3654 // read packet
3655 // check packet header
3656 if (get8(f) != VORBIS_packet_id) return error(f, VORBIS_invalid_first_page);
3657 if (!getn(f, header, 6)) return error(f, VORBIS_unexpected_eof);
3658 if (!vorbis_validate(header)) return error(f, VORBIS_invalid_first_page);
3659 // vorbis_version
3660 if (get32(f) != 0) return error(f, VORBIS_invalid_first_page);
3661 f->channels = get8(f); if (!f->channels) return error(f, VORBIS_invalid_first_page);
3662 if (f->channels > STB_VORBIS_MAX_CHANNELS) return error(f, VORBIS_too_many_channels);
3663 f->sample_rate = get32(f); if (!f->sample_rate) return error(f, VORBIS_invalid_first_page);
3664 get32(f); // bitrate_maximum
3665 get32(f); // bitrate_nominal
3666 get32(f); // bitrate_minimum
3667 x = get8(f);
3668 {
3669 int log0,log1;
3670 log0 = x & 15;
3671 log1 = x >> 4;
3672 f->blocksize_0 = 1 << log0;
3673 f->blocksize_1 = 1 << log1;
3674 if (log0 < 6 || log0 > 13) return error(f, VORBIS_invalid_setup);
3675 if (log1 < 6 || log1 > 13) return error(f, VORBIS_invalid_setup);
3676 if (log0 > log1) return error(f, VORBIS_invalid_setup);
3677 }
3678
3679 // framing_flag
3680 x = get8(f);
3681 if (!(x & 1)) return error(f, VORBIS_invalid_first_page);
3682
3683 // second packet!
3684 if (!start_page(f)) return FALSE;
3685
3686 if (!start_packet(f)) return FALSE;
3687 do {
3688 len = next_segment(f);
3689 skip(f, len);
3690 f->bytes_in_seg = 0;
3691 } while (len);
3692
3693 // third packet!
3694 if (!start_packet(f)) return FALSE;
3695
3696 #ifndef STB_VORBIS_NO_PUSHDATA_API
3697 if (IS_PUSH_MODE(f)) {
3698 if (!is_whole_packet_present(f, TRUE)) {
3699 // convert error in ogg header to write type
3700 if (f->error == VORBIS_invalid_stream)
3701 f->error = VORBIS_invalid_setup;
3702 return FALSE;
3703 }
3704 }
3705 #endif
3706
3707 crc32_init(); // always init it, to avoid multithread race conditions
3708
3709 if (get8_packet(f) != VORBIS_packet_setup) return error(f, VORBIS_invalid_setup);
3710 for (i=0; i < 6; ++i) header[i] = get8_packet(f);
3711 if (!vorbis_validate(header)) return error(f, VORBIS_invalid_setup);
3712
3713 // codebooks
3714
3715 f->codebook_count = get_bits(f,8) + 1;
3716 f->codebooks = (Codebook *) setup_malloc(f, sizeof(*f->codebooks) * f->codebook_count);
3717 if (f->codebooks == NULL) return error(f, VORBIS_outofmem);
3718 memset(f->codebooks, 0, sizeof(*f->codebooks) * f->codebook_count);
3719 for (i=0; i < f->codebook_count; ++i) {
3720 uint32 *values;
3721 int ordered, sorted_count;
3722 int total=0;
3723 uint8 *lengths;
3724 Codebook *c = f->codebooks+i;
3725 CHECK(f);
3726 x = get_bits(f, 8); if (x != 0x42) return error(f, VORBIS_invalid_setup);
3727 x = get_bits(f, 8); if (x != 0x43) return error(f, VORBIS_invalid_setup);
3728 x = get_bits(f, 8); if (x != 0x56) return error(f, VORBIS_invalid_setup);
3729 x = get_bits(f, 8);
3730 c->dimensions = (get_bits(f, 8)<<8) + x;
3731 x = get_bits(f, 8);
3732 y = get_bits(f, 8);
3733 c->entries = (get_bits(f, 8)<<16) + (y<<8) + x;
3734 ordered = get_bits(f,1);
3735 c->sparse = ordered ? 0 : get_bits(f,1);
3736
3737 if (c->dimensions == 0 && c->entries != 0) return error(f, VORBIS_invalid_setup);
3738
3739 if (c->sparse)
3740 lengths = (uint8 *) setup_temp_malloc(f, c->entries);
3741 else
3742 lengths = c->codeword_lengths = (uint8 *) setup_malloc(f, c->entries);
3743
3744 if (!lengths) return error(f, VORBIS_outofmem);
3745
3746 if (ordered) {
3747 int current_entry = 0;
3748 int current_length = get_bits(f,5) + 1;
3749 while (current_entry < c->entries) {
3750 int limit = c->entries - current_entry;
3751 int n = get_bits(f, ilog(limit));
3752 if (current_entry + n > (int) c->entries) { return error(f, VORBIS_invalid_setup); }
3753 memset(lengths + current_entry, current_length, n);
3754 current_entry += n;
3755 ++current_length;
3756 }
3757 } else {
3758 for (j=0; j < c->entries; ++j) {
3759 int present = c->sparse ? get_bits(f,1) : 1;
3760 if (present) {
3761 lengths[j] = get_bits(f, 5) + 1;
3762 ++total;
3763 if (lengths[j] == 32)
3764 return error(f, VORBIS_invalid_setup);
3765 } else {
3766 lengths[j] = NO_CODE;
3767 }
3768 }
3769 }
3770
3771 if (c->sparse && total >= c->entries >> 2) {
3772 // convert sparse items to non-sparse!
3773 if (c->entries > (int) f->setup_temp_memory_required)
3774 f->setup_temp_memory_required = c->entries;
3775
3776 c->codeword_lengths = (uint8 *) setup_malloc(f, c->entries);
3777 if (c->codeword_lengths == NULL) return error(f, VORBIS_outofmem);
3778 memcpy(c->codeword_lengths, lengths, c->entries);
3779 setup_temp_free(f, lengths, c->entries); // note this is only safe if there have been no intervening temp mallocs!
3780 lengths = c->codeword_lengths;
3781 c->sparse = 0;
3782 }
3783
3784 // compute the size of the sorted tables
3785 if (c->sparse) {
3786 sorted_count = total;
3787 } else {
3788 sorted_count = 0;
3789 #ifndef STB_VORBIS_NO_HUFFMAN_BINARY_SEARCH
3790 for (j=0; j < c->entries; ++j)
3791 if (lengths[j] > STB_VORBIS_FAST_HUFFMAN_LENGTH && lengths[j] != NO_CODE)
3792 ++sorted_count;
3793 #endif
3794 }
3795
3796 c->sorted_entries = sorted_count;
3797 values = NULL;
3798
3799 CHECK(f);
3800 if (!c->sparse) {
3801 c->codewords = (uint32 *) setup_malloc(f, sizeof(c->codewords[0]) * c->entries);
3802 if (!c->codewords) return error(f, VORBIS_outofmem);
3803 } else {
3804 unsigned int size;
3805 if (c->sorted_entries) {
3806 c->codeword_lengths = (uint8 *) setup_malloc(f, c->sorted_entries);
3807 if (!c->codeword_lengths) return error(f, VORBIS_outofmem);
3808 c->codewords = (uint32 *) setup_temp_malloc(f, sizeof(*c->codewords) * c->sorted_entries);
3809 if (!c->codewords) return error(f, VORBIS_outofmem);
3810 values = (uint32 *) setup_temp_malloc(f, sizeof(*values) * c->sorted_entries);
3811 if (!values) return error(f, VORBIS_outofmem);
3812 }
3813 size = c->entries + (sizeof(*c->codewords) + sizeof(*values)) * c->sorted_entries;
3814 if (size > f->setup_temp_memory_required)
3815 f->setup_temp_memory_required = size;
3816 }
3817
3818 if (!compute_codewords(c, lengths, c->entries, values)) {
3819 if (c->sparse) setup_temp_free(f, values, 0);
3820 return error(f, VORBIS_invalid_setup);
3821 }
3822
3823 if (c->sorted_entries) {
3824 // allocate an extra slot for sentinels
3825 c->sorted_codewords = (uint32 *) setup_malloc(f, sizeof(*c->sorted_codewords) * (c->sorted_entries+1));
3826 if (c->sorted_codewords == NULL) return error(f, VORBIS_outofmem);
3827 // allocate an extra slot at the front so that c->sorted_values[-1] is defined
3828 // so that we can catch that case without an extra if
3829 c->sorted_values = ( int *) setup_malloc(f, sizeof(*c->sorted_values ) * (c->sorted_entries+1));
3830 if (c->sorted_values == NULL) return error(f, VORBIS_outofmem);
3831 ++c->sorted_values;
3832 c->sorted_values[-1] = -1;
3833 compute_sorted_huffman(c, lengths, values);
3834 }
3835
3836 if (c->sparse) {
3837 setup_temp_free(f, values, sizeof(*values)*c->sorted_entries);
3838 setup_temp_free(f, c->codewords, sizeof(*c->codewords)*c->sorted_entries);
3839 setup_temp_free(f, lengths, c->entries);
3840 c->codewords = NULL;
3841 }
3842
3843 compute_accelerated_huffman(c);
3844
3845 CHECK(f);
3846 c->lookup_type = get_bits(f, 4);
3847 if (c->lookup_type > 2) return error(f, VORBIS_invalid_setup);
3848 if (c->lookup_type > 0) {
3849 uint16 *mults;
3850 c->minimum_value = float32_unpack(get_bits(f, 32));
3851 c->delta_value = float32_unpack(get_bits(f, 32));
3852 c->value_bits = get_bits(f, 4)+1;
3853 c->sequence_p = get_bits(f,1);
3854 if (c->lookup_type == 1) {
3855 c->lookup_values = lookup1_values(c->entries, c->dimensions);
3856 } else {
3857 c->lookup_values = c->entries * c->dimensions;
3858 }
3859 if (c->lookup_values == 0) return error(f, VORBIS_invalid_setup);
3860 mults = (uint16 *) setup_temp_malloc(f, sizeof(mults[0]) * c->lookup_values);
3861 if (mults == NULL) return error(f, VORBIS_outofmem);
3862 for (j=0; j < (int) c->lookup_values; ++j) {
3863 int q = get_bits(f, c->value_bits);
3864 if (q == EOP) { setup_temp_free(f,mults,sizeof(mults[0])*c->lookup_values); return error(f, VORBIS_invalid_setup); }
3865 mults[j] = q;
3866 }
3867
3868 #ifndef STB_VORBIS_DIVIDES_IN_CODEBOOK
3869 if (c->lookup_type == 1) {
3870 int len, sparse = c->sparse;
3871 float last=0;
3872 // pre-expand the lookup1-style multiplicands, to avoid a divide in the inner loop
3873 if (sparse) {
3874 if (c->sorted_entries == 0) goto skip;
3875 c->multiplicands = (codetype *) setup_malloc(f, sizeof(c->multiplicands[0]) * c->sorted_entries * c->dimensions);
3876 } else
3877 c->multiplicands = (codetype *) setup_malloc(f, sizeof(c->multiplicands[0]) * c->entries * c->dimensions);
3878 if (c->multiplicands == NULL) { setup_temp_free(f,mults,sizeof(mults[0])*c->lookup_values); return error(f, VORBIS_outofmem); }
3879 len = sparse ? c->sorted_entries : c->entries;
3880 for (j=0; j < len; ++j) {
3881 unsigned int z = sparse ? c->sorted_values[j] : j;
3882 unsigned int div=1;
3883 for (k=0; k < c->dimensions; ++k) {
3884 int off = (z / div) % c->lookup_values;
3885 float val = mults[off];
3886 val = mults[off]*c->delta_value + c->minimum_value + last;
3887 c->multiplicands[j*c->dimensions + k] = val;
3888 if (c->sequence_p)
3889 last = val;
3890 if (k+1 < c->dimensions) {
3891 if (div > UINT_MAX / (unsigned int) c->lookup_values) {
3892 setup_temp_free(f, mults,sizeof(mults[0])*c->lookup_values);
3893 return error(f, VORBIS_invalid_setup);
3894 }
3895 div *= c->lookup_values;
3896 }
3897 }
3898 }
3899 c->lookup_type = 2;
3900 }
3901 else
3902 #endif
3903 {
3904 float last=0;
3905 CHECK(f);
3906 c->multiplicands = (codetype *) setup_malloc(f, sizeof(c->multiplicands[0]) * c->lookup_values);
3907 if (c->multiplicands == NULL) { setup_temp_free(f, mults,sizeof(mults[0])*c->lookup_values); return error(f, VORBIS_outofmem); }
3908 for (j=0; j < (int) c->lookup_values; ++j) {
3909 float val = mults[j] * c->delta_value + c->minimum_value + last;
3910 c->multiplicands[j] = val;
3911 if (c->sequence_p)
3912 last = val;
3913 }
3914 }
3915 #ifndef STB_VORBIS_DIVIDES_IN_CODEBOOK
3916 skip:;
3917 #endif
3918 setup_temp_free(f, mults, sizeof(mults[0])*c->lookup_values);
3919
3920 CHECK(f);
3921 }
3922 CHECK(f);
3923 }
3924
3925 // time domain transfers (notused)
3926
3927 x = get_bits(f, 6) + 1;
3928 for (i=0; i < x; ++i) {
3929 uint32 z = get_bits(f, 16);
3930 if (z != 0) return error(f, VORBIS_invalid_setup);
3931 }
3932
3933 // Floors
3934 f->floor_count = get_bits(f, 6)+1;
3935 f->floor_config = (Floor *) setup_malloc(f, f->floor_count * sizeof(*f->floor_config));
3936 if (f->floor_config == NULL) return error(f, VORBIS_outofmem);
3937 for (i=0; i < f->floor_count; ++i) {
3938 f->floor_types[i] = get_bits(f, 16);
3939 if (f->floor_types[i] > 1) return error(f, VORBIS_invalid_setup);
3940 if (f->floor_types[i] == 0) {
3941 Floor0 *g = &f->floor_config[i].floor0;
3942 g->order = get_bits(f,8);
3943 g->rate = get_bits(f,16);
3944 g->bark_map_size = get_bits(f,16);
3945 g->amplitude_bits = get_bits(f,6);
3946 g->amplitude_offset = get_bits(f,8);
3947 g->number_of_books = get_bits(f,4) + 1;
3948 for (j=0; j < g->number_of_books; ++j)
3949 g->book_list[j] = get_bits(f,8);
3950 return error(f, VORBIS_feature_not_supported);
3951 } else {
3952 stbv__floor_ordering p[31*8+2];
3953 Floor1 *g = &f->floor_config[i].floor1;
3954 int max_class = -1;
3955 g->partitions = get_bits(f, 5);
3956 for (j=0; j < g->partitions; ++j) {
3957 g->partition_class_list[j] = get_bits(f, 4);
3958 if (g->partition_class_list[j] > max_class)
3959 max_class = g->partition_class_list[j];
3960 }
3961 for (j=0; j <= max_class; ++j) {
3962 g->class_dimensions[j] = get_bits(f, 3)+1;
3963 g->class_subclasses[j] = get_bits(f, 2);
3964 if (g->class_subclasses[j]) {
3965 g->class_masterbooks[j] = get_bits(f, 8);
3966 if (g->class_masterbooks[j] >= f->codebook_count) return error(f, VORBIS_invalid_setup);
3967 }
3968 for (k=0; k < 1 << g->class_subclasses[j]; ++k) {
3969 g->subclass_books[j][k] = get_bits(f,8)-1;
3970 if (g->subclass_books[j][k] >= f->codebook_count) return error(f, VORBIS_invalid_setup);
3971 }
3972 }
3973 g->floor1_multiplier = get_bits(f,2)+1;
3974 g->rangebits = get_bits(f,4);
3975 g->Xlist[0] = 0;
3976 g->Xlist[1] = 1 << g->rangebits;
3977 g->values = 2;
3978 for (j=0; j < g->partitions; ++j) {
3979 int c = g->partition_class_list[j];
3980 for (k=0; k < g->class_dimensions[c]; ++k) {
3981 g->Xlist[g->values] = get_bits(f, g->rangebits);
3982 ++g->values;
3983 }
3984 }
3985 // precompute the sorting
3986 for (j=0; j < g->values; ++j) {
3987 p[j].x = g->Xlist[j];
3988 p[j].id = j;
3989 }
3990 qsort(p, g->values, sizeof(p[0]), point_compare);
3991 for (j=0; j < g->values; ++j)
3992 g->sorted_order[j] = (uint8) p[j].id;
3993 // precompute the neighbors
3994 for (j=2; j < g->values; ++j) {
3995 int low,hi;
3996 neighbors(g->Xlist, j, &low,&hi);
3997 g->neighbors[j][0] = low;
3998 g->neighbors[j][1] = hi;
3999 }
4000
4001 if (g->values > longest_floorlist)
4002 longest_floorlist = g->values;
4003 }
4004 }
4005
4006 // Residue
4007 f->residue_count = get_bits(f, 6)+1;
4008 f->residue_config = (Residue *) setup_malloc(f, f->residue_count * sizeof(f->residue_config[0]));
4009 if (f->residue_config == NULL) return error(f, VORBIS_outofmem);
4010 memset(f->residue_config, 0, f->residue_count * sizeof(f->residue_config[0]));
4011 for (i=0; i < f->residue_count; ++i) {
4012 uint8 residue_cascade[64];
4013 Residue *r = f->residue_config+i;
4014 f->residue_types[i] = get_bits(f, 16);
4015 if (f->residue_types[i] > 2) return error(f, VORBIS_invalid_setup);
4016 r->begin = get_bits(f, 24);
4017 r->end = get_bits(f, 24);
4018 if (r->end < r->begin) return error(f, VORBIS_invalid_setup);
4019 r->part_size = get_bits(f,24)+1;
4020 r->classifications = get_bits(f,6)+1;
4021 r->classbook = get_bits(f,8);
4022 if (r->classbook >= f->codebook_count) return error(f, VORBIS_invalid_setup);
4023 for (j=0; j < r->classifications; ++j) {
4024 uint8 high_bits=0;
4025 uint8 low_bits=get_bits(f,3);
4026 if (get_bits(f,1))
4027 high_bits = get_bits(f,5);
4028 residue_cascade[j] = high_bits*8 + low_bits;
4029 }
4030 r->residue_books = (short (*)[8]) setup_malloc(f, sizeof(r->residue_books[0]) * r->classifications);
4031 if (r->residue_books == NULL) return error(f, VORBIS_outofmem);
4032 for (j=0; j < r->classifications; ++j) {
4033 for (k=0; k < 8; ++k) {
4034 if (residue_cascade[j] & (1 << k)) {
4035 r->residue_books[j][k] = get_bits(f, 8);
4036 if (r->residue_books[j][k] >= f->codebook_count) return error(f, VORBIS_invalid_setup);
4037 } else {
4038 r->residue_books[j][k] = -1;
4039 }
4040 }
4041 }
4042 // precompute the classifications[] array to avoid inner-loop mod/divide
4043 // call it 'classdata' since we already have r->classifications
4044 r->classdata = (uint8 **) setup_malloc(f, sizeof(*r->classdata) * f->codebooks[r->classbook].entries);
4045 if (!r->classdata) return error(f, VORBIS_outofmem);
4046 memset(r->classdata, 0, sizeof(*r->classdata) * f->codebooks[r->classbook].entries);
4047 for (j=0; j < f->codebooks[r->classbook].entries; ++j) {
4048 int classwords = f->codebooks[r->classbook].dimensions;
4049 int temp = j;
4050 r->classdata[j] = (uint8 *) setup_malloc(f, sizeof(r->classdata[j][0]) * classwords);
4051 if (r->classdata[j] == NULL) return error(f, VORBIS_outofmem);
4052 for (k=classwords-1; k >= 0; --k) {
4053 r->classdata[j][k] = temp % r->classifications;
4054 temp /= r->classifications;
4055 }
4056 }
4057 }
4058
4059 f->mapping_count = get_bits(f,6)+1;
4060 f->mapping = (Mapping *) setup_malloc(f, f->mapping_count * sizeof(*f->mapping));
4061 if (f->mapping == NULL) return error(f, VORBIS_outofmem);
4062 memset(f->mapping, 0, f->mapping_count * sizeof(*f->mapping));
4063 for (i=0; i < f->mapping_count; ++i) {
4064 Mapping *m = f->mapping + i;
4065 int mapping_type = get_bits(f,16);
4066 if (mapping_type != 0) return error(f, VORBIS_invalid_setup);
4067 m->chan = (MappingChannel *) setup_malloc(f, f->channels * sizeof(*m->chan));
4068 if (m->chan == NULL) return error(f, VORBIS_outofmem);
4069 if (get_bits(f,1))
4070 m->submaps = get_bits(f,4)+1;
4071 else
4072 m->submaps = 1;
4073 if (m->submaps > max_submaps)
4074 max_submaps = m->submaps;
4075 if (get_bits(f,1)) {
4076 m->coupling_steps = get_bits(f,8)+1;
4077 for (k=0; k < m->coupling_steps; ++k) {
4078 m->chan[k].magnitude = get_bits(f, ilog(f->channels-1));
4079 m->chan[k].angle = get_bits(f, ilog(f->channels-1));
4080 if (m->chan[k].magnitude >= f->channels) return error(f, VORBIS_invalid_setup);
4081 if (m->chan[k].angle >= f->channels) return error(f, VORBIS_invalid_setup);
4082 if (m->chan[k].magnitude == m->chan[k].angle) return error(f, VORBIS_invalid_setup);
4083 }
4084 } else
4085 m->coupling_steps = 0;
4086
4087 // reserved field
4088 if (get_bits(f,2)) return error(f, VORBIS_invalid_setup);
4089 if (m->submaps > 1) {
4090 for (j=0; j < f->channels; ++j) {
4091 m->chan[j].mux = get_bits(f, 4);
4092 if (m->chan[j].mux >= m->submaps) return error(f, VORBIS_invalid_setup);
4093 }
4094 } else
4095 // @SPECIFICATION: this case is missing from the spec
4096 for (j=0; j < f->channels; ++j)
4097 m->chan[j].mux = 0;
4098
4099 for (j=0; j < m->submaps; ++j) {
4100 get_bits(f,8); // discard
4101 m->submap_floor[j] = get_bits(f,8);
4102 m->submap_residue[j] = get_bits(f,8);
4103 if (m->submap_floor[j] >= f->floor_count) return error(f, VORBIS_invalid_setup);
4104 if (m->submap_residue[j] >= f->residue_count) return error(f, VORBIS_invalid_setup);
4105 }
4106 }
4107
4108 // Modes
4109 f->mode_count = get_bits(f, 6)+1;
4110 for (i=0; i < f->mode_count; ++i) {
4111 Mode *m = f->mode_config+i;
4112 m->blockflag = get_bits(f,1);
4113 m->windowtype = get_bits(f,16);
4114 m->transformtype = get_bits(f,16);
4115 m->mapping = get_bits(f,8);
4116 if (m->windowtype != 0) return error(f, VORBIS_invalid_setup);
4117 if (m->transformtype != 0) return error(f, VORBIS_invalid_setup);
4118 if (m->mapping >= f->mapping_count) return error(f, VORBIS_invalid_setup);
4119 }
4120
4121 flush_packet(f);
4122
4123 f->previous_length = 0;
4124
4125 for (i=0; i < f->channels; ++i) {
4126 f->channel_buffers[i] = (float *) setup_malloc(f, sizeof(float) * f->blocksize_1);
4127 f->previous_window[i] = (float *) setup_malloc(f, sizeof(float) * f->blocksize_1/2);
4128 f->finalY[i] = (int16 *) setup_malloc(f, sizeof(int16) * longest_floorlist);
4129 if (f->channel_buffers[i] == NULL || f->previous_window[i] == NULL || f->finalY[i] == NULL) return error(f, VORBIS_outofmem);
4130 memset(f->channel_buffers[i], 0, sizeof(float) * f->blocksize_1);
4131 #ifdef STB_VORBIS_NO_DEFER_FLOOR
4132 f->floor_buffers[i] = (float *) setup_malloc(f, sizeof(float) * f->blocksize_1/2);
4133 if (f->floor_buffers[i] == NULL) return error(f, VORBIS_outofmem);
4134 #endif
4135 }
4136
4137 if (!init_blocksize(f, 0, f->blocksize_0)) return FALSE;
4138 if (!init_blocksize(f, 1, f->blocksize_1)) return FALSE;
4139 f->blocksize[0] = f->blocksize_0;
4140 f->blocksize[1] = f->blocksize_1;
4141
4142 #ifdef STB_VORBIS_DIVIDE_TABLE
4143 if (integer_divide_table[1][1]==0)
4144 for (i=0; i < DIVTAB_NUMER; ++i)
4145 for (j=1; j < DIVTAB_DENOM; ++j)
4146 integer_divide_table[i][j] = i / j;
4147 #endif
4148
4149 // compute how much temporary memory is needed
4150
4151 // 1.
4152 {
4153 uint32 imdct_mem = (f->blocksize_1 * sizeof(float) >> 1);
4154 uint32 classify_mem;
4155 int i,max_part_read=0;
4156 for (i=0; i < f->residue_count; ++i) {
4157 Residue *r = f->residue_config + i;
4158 unsigned int actual_size = f->blocksize_1 / 2;
4159 unsigned int limit_r_begin = r->begin < actual_size ? r->begin : actual_size;
4160 unsigned int limit_r_end = r->end < actual_size ? r->end : actual_size;
4161 int n_read = limit_r_end - limit_r_begin;
4162 int part_read = n_read / r->part_size;
4163 if (part_read > max_part_read)
4164 max_part_read = part_read;
4165 }
4166 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
4167 classify_mem = f->channels * (sizeof(void*) + max_part_read * sizeof(uint8 *));
4168 #else
4169 classify_mem = f->channels * (sizeof(void*) + max_part_read * sizeof(int *));
4170 #endif
4171
4172 // maximum reasonable partition size is f->blocksize_1
4173
4174 f->temp_memory_required = classify_mem;
4175 if (imdct_mem > f->temp_memory_required)
4176 f->temp_memory_required = imdct_mem;
4177 }
4178
4179 f->first_decode = TRUE;
4180
4181 if (f->alloc.alloc_buffer) {
4182 assert(f->temp_offset == f->alloc.alloc_buffer_length_in_bytes);
4183 // check if there's enough temp memory so we don't error later
4184 if (f->setup_offset + sizeof(*f) + f->temp_memory_required > (unsigned) f->temp_offset)
4185 return error(f, VORBIS_outofmem);
4186 }
4187
4188 f->first_audio_page_offset = stb_vorbis_get_file_offset(f);
4189
4190 return TRUE;
4191 }
4192
vorbis_deinit(stb_vorbis * p)4193 static void vorbis_deinit(stb_vorbis *p)
4194 {
4195 int i,j;
4196 if (p->residue_config) {
4197 for (i=0; i < p->residue_count; ++i) {
4198 Residue *r = p->residue_config+i;
4199 if (r->classdata) {
4200 for (j=0; j < p->codebooks[r->classbook].entries; ++j)
4201 setup_free(p, r->classdata[j]);
4202 setup_free(p, r->classdata);
4203 }
4204 setup_free(p, r->residue_books);
4205 }
4206 }
4207
4208 if (p->codebooks) {
4209 CHECK(p);
4210 for (i=0; i < p->codebook_count; ++i) {
4211 Codebook *c = p->codebooks + i;
4212 setup_free(p, c->codeword_lengths);
4213 setup_free(p, c->multiplicands);
4214 setup_free(p, c->codewords);
4215 setup_free(p, c->sorted_codewords);
4216 // c->sorted_values[-1] is the first entry in the array
4217 setup_free(p, c->sorted_values ? c->sorted_values-1 : NULL);
4218 }
4219 setup_free(p, p->codebooks);
4220 }
4221 setup_free(p, p->floor_config);
4222 setup_free(p, p->residue_config);
4223 if (p->mapping) {
4224 for (i=0; i < p->mapping_count; ++i)
4225 setup_free(p, p->mapping[i].chan);
4226 setup_free(p, p->mapping);
4227 }
4228 CHECK(p);
4229 for (i=0; i < p->channels && i < STB_VORBIS_MAX_CHANNELS; ++i) {
4230 setup_free(p, p->channel_buffers[i]);
4231 setup_free(p, p->previous_window[i]);
4232 #ifdef STB_VORBIS_NO_DEFER_FLOOR
4233 setup_free(p, p->floor_buffers[i]);
4234 #endif
4235 setup_free(p, p->finalY[i]);
4236 }
4237 for (i=0; i < 2; ++i) {
4238 setup_free(p, p->A[i]);
4239 setup_free(p, p->B[i]);
4240 setup_free(p, p->C[i]);
4241 setup_free(p, p->window[i]);
4242 setup_free(p, p->bit_reverse[i]);
4243 }
4244 #ifndef STB_VORBIS_NO_STDIO
4245 if (p->close_on_free) fclose(p->f);
4246 #endif
4247 }
4248
stb_vorbis_close(stb_vorbis * p)4249 void stb_vorbis_close(stb_vorbis *p)
4250 {
4251 if (p == NULL) return;
4252 vorbis_deinit(p);
4253 setup_free(p,p);
4254 }
4255
vorbis_init(stb_vorbis * p,const stb_vorbis_alloc * z)4256 static void vorbis_init(stb_vorbis *p, const stb_vorbis_alloc *z)
4257 {
4258 memset(p, 0, sizeof(*p)); // NULL out all malloc'd pointers to start
4259 if (z) {
4260 p->alloc = *z;
4261 p->alloc.alloc_buffer_length_in_bytes = (p->alloc.alloc_buffer_length_in_bytes+3) & ~3;
4262 p->temp_offset = p->alloc.alloc_buffer_length_in_bytes;
4263 }
4264 p->eof = 0;
4265 p->error = VORBIS__no_error;
4266 p->stream = NULL;
4267 p->codebooks = NULL;
4268 p->page_crc_tests = -1;
4269 #ifndef STB_VORBIS_NO_STDIO
4270 p->close_on_free = FALSE;
4271 p->f = NULL;
4272 #endif
4273 }
4274
stb_vorbis_get_sample_offset(stb_vorbis * f)4275 int stb_vorbis_get_sample_offset(stb_vorbis *f)
4276 {
4277 if (f->current_loc_valid)
4278 return f->current_loc;
4279 else
4280 return -1;
4281 }
4282
stb_vorbis_get_info(stb_vorbis * f)4283 stb_vorbis_info stb_vorbis_get_info(stb_vorbis *f)
4284 {
4285 stb_vorbis_info d;
4286 d.channels = f->channels;
4287 d.sample_rate = f->sample_rate;
4288 d.setup_memory_required = f->setup_memory_required;
4289 d.setup_temp_memory_required = f->setup_temp_memory_required;
4290 d.temp_memory_required = f->temp_memory_required;
4291 d.max_frame_size = f->blocksize_1 >> 1;
4292 return d;
4293 }
4294
stb_vorbis_get_error(stb_vorbis * f)4295 int stb_vorbis_get_error(stb_vorbis *f)
4296 {
4297 int e = f->error;
4298 f->error = VORBIS__no_error;
4299 return e;
4300 }
4301
vorbis_alloc(stb_vorbis * f)4302 static stb_vorbis * vorbis_alloc(stb_vorbis *f)
4303 {
4304 stb_vorbis *p = (stb_vorbis *) setup_malloc(f, sizeof(*p));
4305 return p;
4306 }
4307
4308 #ifndef STB_VORBIS_NO_PUSHDATA_API
4309
stb_vorbis_flush_pushdata(stb_vorbis * f)4310 void stb_vorbis_flush_pushdata(stb_vorbis *f)
4311 {
4312 f->previous_length = 0;
4313 f->page_crc_tests = 0;
4314 f->discard_samples_deferred = 0;
4315 f->current_loc_valid = FALSE;
4316 f->first_decode = FALSE;
4317 f->samples_output = 0;
4318 f->channel_buffer_start = 0;
4319 f->channel_buffer_end = 0;
4320 }
4321
vorbis_search_for_page_pushdata(vorb * f,uint8 * data,int data_len)4322 static int vorbis_search_for_page_pushdata(vorb *f, uint8 *data, int data_len)
4323 {
4324 int i,n;
4325 for (i=0; i < f->page_crc_tests; ++i)
4326 f->scan[i].bytes_done = 0;
4327
4328 // if we have room for more scans, search for them first, because
4329 // they may cause us to stop early if their header is incomplete
4330 if (f->page_crc_tests < STB_VORBIS_PUSHDATA_CRC_COUNT) {
4331 if (data_len < 4) return 0;
4332 data_len -= 3; // need to look for 4-byte sequence, so don't miss
4333 // one that straddles a boundary
4334 for (i=0; i < data_len; ++i) {
4335 if (data[i] == 0x4f) {
4336 if (0==memcmp(data+i, ogg_page_header, 4)) {
4337 int j,len;
4338 uint32 crc;
4339 // make sure we have the whole page header
4340 if (i+26 >= data_len || i+27+data[i+26] >= data_len) {
4341 // only read up to this page start, so hopefully we'll
4342 // have the whole page header start next time
4343 data_len = i;
4344 break;
4345 }
4346 // ok, we have it all; compute the length of the page
4347 len = 27 + data[i+26];
4348 for (j=0; j < data[i+26]; ++j)
4349 len += data[i+27+j];
4350 // scan everything up to the embedded crc (which we must 0)
4351 crc = 0;
4352 for (j=0; j < 22; ++j)
4353 crc = crc32_update(crc, data[i+j]);
4354 // now process 4 0-bytes
4355 for ( ; j < 26; ++j)
4356 crc = crc32_update(crc, 0);
4357 // len is the total number of bytes we need to scan
4358 n = f->page_crc_tests++;
4359 f->scan[n].bytes_left = len-j;
4360 f->scan[n].crc_so_far = crc;
4361 f->scan[n].goal_crc = data[i+22] + (data[i+23] << 8) + (data[i+24]<<16) + (data[i+25]<<24);
4362 // if the last frame on a page is continued to the next, then
4363 // we can't recover the sample_loc immediately
4364 if (data[i+27+data[i+26]-1] == 255)
4365 f->scan[n].sample_loc = ~0;
4366 else
4367 f->scan[n].sample_loc = data[i+6] + (data[i+7] << 8) + (data[i+ 8]<<16) + (data[i+ 9]<<24);
4368 f->scan[n].bytes_done = i+j;
4369 if (f->page_crc_tests == STB_VORBIS_PUSHDATA_CRC_COUNT)
4370 break;
4371 // keep going if we still have room for more
4372 }
4373 }
4374 }
4375 }
4376
4377 for (i=0; i < f->page_crc_tests;) {
4378 uint32 crc;
4379 int j;
4380 int n = f->scan[i].bytes_done;
4381 int m = f->scan[i].bytes_left;
4382 if (m > data_len - n) m = data_len - n;
4383 // m is the bytes to scan in the current chunk
4384 crc = f->scan[i].crc_so_far;
4385 for (j=0; j < m; ++j)
4386 crc = crc32_update(crc, data[n+j]);
4387 f->scan[i].bytes_left -= m;
4388 f->scan[i].crc_so_far = crc;
4389 if (f->scan[i].bytes_left == 0) {
4390 // does it match?
4391 if (f->scan[i].crc_so_far == f->scan[i].goal_crc) {
4392 // Houston, we have page
4393 data_len = n+m; // consumption amount is wherever that scan ended
4394 f->page_crc_tests = -1; // drop out of page scan mode
4395 f->previous_length = 0; // decode-but-don't-output one frame
4396 f->next_seg = -1; // start a new page
4397 f->current_loc = f->scan[i].sample_loc; // set the current sample location
4398 // to the amount we'd have decoded had we decoded this page
4399 f->current_loc_valid = f->current_loc != ~0U;
4400 return data_len;
4401 }
4402 // delete entry
4403 f->scan[i] = f->scan[--f->page_crc_tests];
4404 } else {
4405 ++i;
4406 }
4407 }
4408
4409 return data_len;
4410 }
4411
4412 // return value: number of bytes we used
stb_vorbis_decode_frame_pushdata(stb_vorbis * f,const uint8 * data,int data_len,int * channels,float *** output,int * samples)4413 int stb_vorbis_decode_frame_pushdata(
4414 stb_vorbis *f, // the file we're decoding
4415 const uint8 *data, int data_len, // the memory available for decoding
4416 int *channels, // place to write number of float * buffers
4417 float ***output, // place to write float ** array of float * buffers
4418 int *samples // place to write number of output samples
4419 )
4420 {
4421 int i;
4422 int len,right,left;
4423
4424 if (!IS_PUSH_MODE(f)) return error(f, VORBIS_invalid_api_mixing);
4425
4426 if (f->page_crc_tests >= 0) {
4427 *samples = 0;
4428 return vorbis_search_for_page_pushdata(f, (uint8 *) data, data_len);
4429 }
4430
4431 f->stream = (uint8 *) data;
4432 f->stream_end = (uint8 *) data + data_len;
4433 f->error = VORBIS__no_error;
4434
4435 // check that we have the entire packet in memory
4436 if (!is_whole_packet_present(f, FALSE)) {
4437 *samples = 0;
4438 return 0;
4439 }
4440
4441 if (!vorbis_decode_packet(f, &len, &left, &right)) {
4442 // save the actual error we encountered
4443 enum STBVorbisError error = f->error;
4444 if (error == VORBIS_bad_packet_type) {
4445 // flush and resynch
4446 f->error = VORBIS__no_error;
4447 while (get8_packet(f) != EOP)
4448 if (f->eof) break;
4449 *samples = 0;
4450 return (int) (f->stream - data);
4451 }
4452 if (error == VORBIS_continued_packet_flag_invalid) {
4453 if (f->previous_length == 0) {
4454 // we may be resynching, in which case it's ok to hit one
4455 // of these; just discard the packet
4456 f->error = VORBIS__no_error;
4457 while (get8_packet(f) != EOP)
4458 if (f->eof) break;
4459 *samples = 0;
4460 return (int) (f->stream - data);
4461 }
4462 }
4463 // if we get an error while parsing, what to do?
4464 // well, it DEFINITELY won't work to continue from where we are!
4465 stb_vorbis_flush_pushdata(f);
4466 // restore the error that actually made us bail
4467 f->error = error;
4468 *samples = 0;
4469 return 1;
4470 }
4471
4472 // success!
4473 len = vorbis_finish_frame(f, len, left, right);
4474 for (i=0; i < f->channels; ++i)
4475 f->outputs[i] = f->channel_buffers[i] + left;
4476
4477 if (channels) *channels = f->channels;
4478 *samples = len;
4479 *output = f->outputs;
4480 return (int) (f->stream - data);
4481 }
4482
stb_vorbis_open_pushdata(const unsigned char * data,int data_len,int * data_used,int * error,const stb_vorbis_alloc * alloc)4483 stb_vorbis *stb_vorbis_open_pushdata(
4484 const unsigned char *data, int data_len, // the memory available for decoding
4485 int *data_used, // only defined if result is not NULL
4486 int *error, const stb_vorbis_alloc *alloc)
4487 {
4488 stb_vorbis *f, p;
4489 vorbis_init(&p, alloc);
4490 p.stream = (uint8 *) data;
4491 p.stream_end = (uint8 *) data + data_len;
4492 p.push_mode = TRUE;
4493 if (!start_decoder(&p)) {
4494 if (p.eof)
4495 *error = VORBIS_need_more_data;
4496 else
4497 *error = p.error;
4498 return NULL;
4499 }
4500 f = vorbis_alloc(&p);
4501 if (f) {
4502 *f = p;
4503 *data_used = (int) (f->stream - data);
4504 *error = 0;
4505 return f;
4506 } else {
4507 vorbis_deinit(&p);
4508 return NULL;
4509 }
4510 }
4511 #endif // STB_VORBIS_NO_PUSHDATA_API
4512
stb_vorbis_get_file_offset(stb_vorbis * f)4513 unsigned int stb_vorbis_get_file_offset(stb_vorbis *f)
4514 {
4515 #ifndef STB_VORBIS_NO_PUSHDATA_API
4516 if (f->push_mode) return 0;
4517 #endif
4518 if (USE_MEMORY(f)) return (unsigned int) (f->stream - f->stream_start);
4519 #ifndef STB_VORBIS_NO_STDIO
4520 return (unsigned int) (ftell(f->f) - f->f_start);
4521 #endif
4522 }
4523
4524 #ifndef STB_VORBIS_NO_PULLDATA_API
4525 //
4526 // DATA-PULLING API
4527 //
4528
vorbis_find_page(stb_vorbis * f,uint32 * end,uint32 * last)4529 static uint32 vorbis_find_page(stb_vorbis *f, uint32 *end, uint32 *last)
4530 {
4531 for(;;) {
4532 int n;
4533 if (f->eof) return 0;
4534 n = get8(f);
4535 if (n == 0x4f) { // page header candidate
4536 unsigned int retry_loc = stb_vorbis_get_file_offset(f);
4537 int i;
4538 // check if we're off the end of a file_section stream
4539 if (retry_loc - 25 > f->stream_len)
4540 return 0;
4541 // check the rest of the header
4542 for (i=1; i < 4; ++i)
4543 if (get8(f) != ogg_page_header[i])
4544 break;
4545 if (f->eof) return 0;
4546 if (i == 4) {
4547 uint8 header[27];
4548 uint32 i, crc, goal, len;
4549 for (i=0; i < 4; ++i)
4550 header[i] = ogg_page_header[i];
4551 for (; i < 27; ++i)
4552 header[i] = get8(f);
4553 if (f->eof) return 0;
4554 if (header[4] != 0) goto invalid;
4555 goal = header[22] + (header[23] << 8) + (header[24]<<16) + (header[25]<<24);
4556 for (i=22; i < 26; ++i)
4557 header[i] = 0;
4558 crc = 0;
4559 for (i=0; i < 27; ++i)
4560 crc = crc32_update(crc, header[i]);
4561 len = 0;
4562 for (i=0; i < header[26]; ++i) {
4563 int s = get8(f);
4564 crc = crc32_update(crc, s);
4565 len += s;
4566 }
4567 if (len && f->eof) return 0;
4568 for (i=0; i < len; ++i)
4569 crc = crc32_update(crc, get8(f));
4570 // finished parsing probable page
4571 if (crc == goal) {
4572 // we could now check that it's either got the last
4573 // page flag set, OR it's followed by the capture
4574 // pattern, but I guess TECHNICALLY you could have
4575 // a file with garbage between each ogg page and recover
4576 // from it automatically? So even though that paranoia
4577 // might decrease the chance of an invalid decode by
4578 // another 2^32, not worth it since it would hose those
4579 // invalid-but-useful files?
4580 if (end)
4581 *end = stb_vorbis_get_file_offset(f);
4582 if (last) {
4583 if (header[5] & 0x04)
4584 *last = 1;
4585 else
4586 *last = 0;
4587 }
4588 set_file_offset(f, retry_loc-1);
4589 return 1;
4590 }
4591 }
4592 invalid:
4593 // not a valid page, so rewind and look for next one
4594 set_file_offset(f, retry_loc);
4595 }
4596 }
4597 }
4598
4599
4600 #define SAMPLE_unknown 0xffffffff
4601
4602 // seeking is implemented with a binary search, which narrows down the range to
4603 // 64K, before using a linear search (because finding the synchronization
4604 // pattern can be expensive, and the chance we'd find the end page again is
4605 // relatively high for small ranges)
4606 //
4607 // two initial interpolation-style probes are used at the start of the search
4608 // to try to bound either side of the binary search sensibly, while still
4609 // working in O(log n) time if they fail.
4610
get_seek_page_info(stb_vorbis * f,ProbedPage * z)4611 static int get_seek_page_info(stb_vorbis *f, ProbedPage *z)
4612 {
4613 uint8 header[27], lacing[255];
4614 int i,len;
4615
4616 // record where the page starts
4617 z->page_start = stb_vorbis_get_file_offset(f);
4618
4619 // parse the header
4620 getn(f, header, 27);
4621 if (header[0] != 'O' || header[1] != 'g' || header[2] != 'g' || header[3] != 'S')
4622 return 0;
4623 getn(f, lacing, header[26]);
4624
4625 // determine the length of the payload
4626 len = 0;
4627 for (i=0; i < header[26]; ++i)
4628 len += lacing[i];
4629
4630 // this implies where the page ends
4631 z->page_end = z->page_start + 27 + header[26] + len;
4632
4633 // read the last-decoded sample out of the data
4634 z->last_decoded_sample = header[6] + (header[7] << 8) + (header[8] << 16) + (header[9] << 24);
4635
4636 // restore file state to where we were
4637 set_file_offset(f, z->page_start);
4638 return 1;
4639 }
4640
4641 // rarely used function to seek back to the preceeding page while finding the
4642 // start of a packet
go_to_page_before(stb_vorbis * f,unsigned int limit_offset)4643 static int go_to_page_before(stb_vorbis *f, unsigned int limit_offset)
4644 {
4645 unsigned int previous_safe, end;
4646
4647 // now we want to seek back 64K from the limit
4648 if (limit_offset >= 65536 && limit_offset-65536 >= f->first_audio_page_offset)
4649 previous_safe = limit_offset - 65536;
4650 else
4651 previous_safe = f->first_audio_page_offset;
4652
4653 set_file_offset(f, previous_safe);
4654
4655 while (vorbis_find_page(f, &end, NULL)) {
4656 if (end >= limit_offset && stb_vorbis_get_file_offset(f) < limit_offset)
4657 return 1;
4658 set_file_offset(f, end);
4659 }
4660
4661 return 0;
4662 }
4663
4664 // implements the search logic for finding a page and starting decoding. if
4665 // the function succeeds, current_loc_valid will be true and current_loc will
4666 // be less than or equal to the provided sample number (the closer the
4667 // better).
seek_to_sample_coarse(stb_vorbis * f,uint32 sample_number)4668 static int seek_to_sample_coarse(stb_vorbis *f, uint32 sample_number)
4669 {
4670 ProbedPage left, right, mid;
4671 int i, start_seg_with_known_loc, end_pos, page_start;
4672 uint32 delta, stream_length, padding;
4673 double offset, bytes_per_sample;
4674 int probe = 0;
4675
4676 // find the last page and validate the target sample
4677 stream_length = stb_vorbis_stream_length_in_samples(f);
4678 if (stream_length == 0) return error(f, VORBIS_seek_without_length);
4679 if (sample_number > stream_length) return error(f, VORBIS_seek_invalid);
4680
4681 // this is the maximum difference between the window-center (which is the
4682 // actual granule position value), and the right-start (which the spec
4683 // indicates should be the granule position (give or take one)).
4684 padding = ((f->blocksize_1 - f->blocksize_0) >> 2);
4685 if (sample_number < padding)
4686 sample_number = 0;
4687 else
4688 sample_number -= padding;
4689
4690 left = f->p_first;
4691 while (left.last_decoded_sample == ~0U) {
4692 // (untested) the first page does not have a 'last_decoded_sample'
4693 set_file_offset(f, left.page_end);
4694 if (!get_seek_page_info(f, &left)) goto error;
4695 }
4696
4697 right = f->p_last;
4698 assert(right.last_decoded_sample != ~0U);
4699
4700 // starting from the start is handled differently
4701 if (sample_number <= left.last_decoded_sample) {
4702 if (stb_vorbis_seek_start(f))
4703 return 1;
4704 return 0;
4705 }
4706
4707 while (left.page_end != right.page_start) {
4708 assert(left.page_end < right.page_start);
4709 // search range in bytes
4710 delta = right.page_start - left.page_end;
4711 if (delta <= 65536) {
4712 // there's only 64K left to search - handle it linearly
4713 set_file_offset(f, left.page_end);
4714 } else {
4715 if (probe < 2) {
4716 if (probe == 0) {
4717 // first probe (interpolate)
4718 double data_bytes = right.page_end - left.page_start;
4719 bytes_per_sample = data_bytes / right.last_decoded_sample;
4720 offset = left.page_start + bytes_per_sample * (sample_number - left.last_decoded_sample);
4721 } else {
4722 // second probe (try to bound the other side)
4723 double error = ((double) sample_number - mid.last_decoded_sample) * bytes_per_sample;
4724 if (error >= 0 && error < 8000) error = 8000;
4725 if (error < 0 && error > -8000) error = -8000;
4726 offset += error * 2;
4727 }
4728
4729 // ensure the offset is valid
4730 if (offset < left.page_end)
4731 offset = left.page_end;
4732 if (offset > right.page_start - 65536)
4733 offset = right.page_start - 65536;
4734
4735 set_file_offset(f, (unsigned int) offset);
4736 } else {
4737 // binary search for large ranges (offset by 32K to ensure
4738 // we don't hit the right page)
4739 set_file_offset(f, left.page_end + (delta / 2) - 32768);
4740 }
4741
4742 if (!vorbis_find_page(f, NULL, NULL)) goto error;
4743 }
4744
4745 for (;;) {
4746 if (!get_seek_page_info(f, &mid)) goto error;
4747 if (mid.last_decoded_sample != ~0U) break;
4748 // (untested) no frames end on this page
4749 set_file_offset(f, mid.page_end);
4750 assert(mid.page_start < right.page_start);
4751 }
4752
4753 // if we've just found the last page again then we're in a tricky file,
4754 // and we're close enough.
4755 if (mid.page_start == right.page_start)
4756 break;
4757
4758 if (sample_number < mid.last_decoded_sample)
4759 right = mid;
4760 else
4761 left = mid;
4762
4763 ++probe;
4764 }
4765
4766 // seek back to start of the last packet
4767 page_start = left.page_start;
4768 set_file_offset(f, page_start);
4769 if (!start_page(f)) return error(f, VORBIS_seek_failed);
4770 end_pos = f->end_seg_with_known_loc;
4771 assert(end_pos >= 0);
4772
4773 for (;;) {
4774 for (i = end_pos; i > 0; --i)
4775 if (f->segments[i-1] != 255)
4776 break;
4777
4778 start_seg_with_known_loc = i;
4779
4780 if (start_seg_with_known_loc > 0 || !(f->page_flag & PAGEFLAG_continued_packet))
4781 break;
4782
4783 // (untested) the final packet begins on an earlier page
4784 if (!go_to_page_before(f, page_start))
4785 goto error;
4786
4787 page_start = stb_vorbis_get_file_offset(f);
4788 if (!start_page(f)) goto error;
4789 end_pos = f->segment_count - 1;
4790 }
4791
4792 // prepare to start decoding
4793 f->current_loc_valid = FALSE;
4794 f->last_seg = FALSE;
4795 f->valid_bits = 0;
4796 f->packet_bytes = 0;
4797 f->bytes_in_seg = 0;
4798 f->previous_length = 0;
4799 f->next_seg = start_seg_with_known_loc;
4800
4801 for (i = 0; i < start_seg_with_known_loc; i++)
4802 skip(f, f->segments[i]);
4803
4804 // start decoding (optimizable - this frame is generally discarded)
4805 if (!vorbis_pump_first_frame(f))
4806 return 0;
4807 if (f->current_loc > sample_number)
4808 return error(f, VORBIS_seek_failed);
4809 return 1;
4810
4811 error:
4812 // try to restore the file to a valid state
4813 stb_vorbis_seek_start(f);
4814 return error(f, VORBIS_seek_failed);
4815 }
4816
4817 // the same as vorbis_decode_initial, but without advancing
peek_decode_initial(vorb * f,int * p_left_start,int * p_left_end,int * p_right_start,int * p_right_end,int * mode)4818 static int peek_decode_initial(vorb *f, int *p_left_start, int *p_left_end, int *p_right_start, int *p_right_end, int *mode)
4819 {
4820 int bits_read, bytes_read;
4821
4822 if (!vorbis_decode_initial(f, p_left_start, p_left_end, p_right_start, p_right_end, mode))
4823 return 0;
4824
4825 // either 1 or 2 bytes were read, figure out which so we can rewind
4826 bits_read = 1 + ilog(f->mode_count-1);
4827 if (f->mode_config[*mode].blockflag)
4828 bits_read += 2;
4829 bytes_read = (bits_read + 7) / 8;
4830
4831 f->bytes_in_seg += bytes_read;
4832 f->packet_bytes -= bytes_read;
4833 skip(f, -bytes_read);
4834 if (f->next_seg == -1)
4835 f->next_seg = f->segment_count - 1;
4836 else
4837 f->next_seg--;
4838 f->valid_bits = 0;
4839
4840 return 1;
4841 }
4842
stb_vorbis_seek_frame(stb_vorbis * f,unsigned int sample_number)4843 int stb_vorbis_seek_frame(stb_vorbis *f, unsigned int sample_number)
4844 {
4845 uint32 max_frame_samples;
4846
4847 if (IS_PUSH_MODE(f)) return error(f, VORBIS_invalid_api_mixing);
4848
4849 // fast page-level search
4850 if (!seek_to_sample_coarse(f, sample_number))
4851 return 0;
4852
4853 assert(f->current_loc_valid);
4854 assert(f->current_loc <= sample_number);
4855
4856 // linear search for the relevant packet
4857 max_frame_samples = (f->blocksize_1*3 - f->blocksize_0) >> 2;
4858 while (f->current_loc < sample_number) {
4859 int left_start, left_end, right_start, right_end, mode, frame_samples;
4860 if (!peek_decode_initial(f, &left_start, &left_end, &right_start, &right_end, &mode))
4861 return error(f, VORBIS_seek_failed);
4862 // calculate the number of samples returned by the next frame
4863 frame_samples = right_start - left_start;
4864 if (f->current_loc + frame_samples > sample_number) {
4865 return 1; // the next frame will contain the sample
4866 } else if (f->current_loc + frame_samples + max_frame_samples > sample_number) {
4867 // there's a chance the frame after this could contain the sample
4868 vorbis_pump_first_frame(f);
4869 } else {
4870 // this frame is too early to be relevant
4871 f->current_loc += frame_samples;
4872 f->previous_length = 0;
4873 maybe_start_packet(f);
4874 flush_packet(f);
4875 }
4876 }
4877 // the next frame will start with the sample
4878 assert(f->current_loc == sample_number);
4879 return 1;
4880 }
4881
stb_vorbis_seek(stb_vorbis * f,unsigned int sample_number)4882 int stb_vorbis_seek(stb_vorbis *f, unsigned int sample_number)
4883 {
4884 if (!stb_vorbis_seek_frame(f, sample_number))
4885 return 0;
4886
4887 if (sample_number != f->current_loc) {
4888 int n;
4889 uint32 frame_start = f->current_loc;
4890 stb_vorbis_get_frame_float(f, &n, NULL);
4891 assert(sample_number > frame_start);
4892 assert(f->channel_buffer_start + (int) (sample_number-frame_start) <= f->channel_buffer_end);
4893 f->channel_buffer_start += (sample_number - frame_start);
4894 }
4895
4896 return 1;
4897 }
4898
stb_vorbis_seek_start(stb_vorbis * f)4899 int stb_vorbis_seek_start(stb_vorbis *f)
4900 {
4901 if (IS_PUSH_MODE(f)) { return error(f, VORBIS_invalid_api_mixing); }
4902 set_file_offset(f, f->first_audio_page_offset);
4903 f->previous_length = 0;
4904 f->first_decode = TRUE;
4905 f->next_seg = -1;
4906 return vorbis_pump_first_frame(f);
4907 }
4908
stb_vorbis_stream_length_in_samples(stb_vorbis * f)4909 unsigned int stb_vorbis_stream_length_in_samples(stb_vorbis *f)
4910 {
4911 unsigned int restore_offset, previous_safe;
4912 unsigned int end, last_page_loc;
4913
4914 if (IS_PUSH_MODE(f)) return error(f, VORBIS_invalid_api_mixing);
4915 if (!f->total_samples) {
4916 unsigned int last;
4917 uint32 lo,hi;
4918 char header[6];
4919
4920 // first, store the current decode position so we can restore it
4921 restore_offset = stb_vorbis_get_file_offset(f);
4922
4923 // now we want to seek back 64K from the end (the last page must
4924 // be at most a little less than 64K, but let's allow a little slop)
4925 if (f->stream_len >= 65536 && f->stream_len-65536 >= f->first_audio_page_offset)
4926 previous_safe = f->stream_len - 65536;
4927 else
4928 previous_safe = f->first_audio_page_offset;
4929
4930 set_file_offset(f, previous_safe);
4931 // previous_safe is now our candidate 'earliest known place that seeking
4932 // to will lead to the final page'
4933
4934 if (!vorbis_find_page(f, &end, &last)) {
4935 // if we can't find a page, we're hosed!
4936 f->error = VORBIS_cant_find_last_page;
4937 f->total_samples = 0xffffffff;
4938 goto done;
4939 }
4940
4941 // check if there are more pages
4942 last_page_loc = stb_vorbis_get_file_offset(f);
4943
4944 // stop when the last_page flag is set, not when we reach eof;
4945 // this allows us to stop short of a 'file_section' end without
4946 // explicitly checking the length of the section
4947 while (!last) {
4948 set_file_offset(f, end);
4949 if (!vorbis_find_page(f, &end, &last)) {
4950 // the last page we found didn't have the 'last page' flag
4951 // set. whoops!
4952 break;
4953 }
4954 previous_safe = last_page_loc+1;
4955 last_page_loc = stb_vorbis_get_file_offset(f);
4956 }
4957
4958 set_file_offset(f, last_page_loc);
4959
4960 // parse the header
4961 getn(f, (unsigned char *)header, 6);
4962 // extract the absolute granule position
4963 lo = get32(f);
4964 hi = get32(f);
4965 if (lo == 0xffffffff && hi == 0xffffffff) {
4966 f->error = VORBIS_cant_find_last_page;
4967 f->total_samples = SAMPLE_unknown;
4968 goto done;
4969 }
4970 if (hi)
4971 lo = 0xfffffffe; // saturate
4972 f->total_samples = lo;
4973
4974 f->p_last.page_start = last_page_loc;
4975 f->p_last.page_end = end;
4976 f->p_last.last_decoded_sample = lo;
4977
4978 done:
4979 set_file_offset(f, restore_offset);
4980 }
4981 return f->total_samples == SAMPLE_unknown ? 0 : f->total_samples;
4982 }
4983
stb_vorbis_stream_length_in_seconds(stb_vorbis * f)4984 float stb_vorbis_stream_length_in_seconds(stb_vorbis *f)
4985 {
4986 return stb_vorbis_stream_length_in_samples(f) / (float) f->sample_rate;
4987 }
4988
4989
4990
stb_vorbis_get_frame_float(stb_vorbis * f,int * channels,float *** output)4991 int stb_vorbis_get_frame_float(stb_vorbis *f, int *channels, float ***output)
4992 {
4993 int len, right,left,i;
4994 if (IS_PUSH_MODE(f)) return error(f, VORBIS_invalid_api_mixing);
4995
4996 if (!vorbis_decode_packet(f, &len, &left, &right)) {
4997 f->channel_buffer_start = f->channel_buffer_end = 0;
4998 return 0;
4999 }
5000
5001 len = vorbis_finish_frame(f, len, left, right);
5002 for (i=0; i < f->channels; ++i)
5003 f->outputs[i] = f->channel_buffers[i] + left;
5004
5005 f->channel_buffer_start = left;
5006 f->channel_buffer_end = left+len;
5007
5008 if (channels) *channels = f->channels;
5009 if (output) *output = f->outputs;
5010 return len;
5011 }
5012
5013 #ifndef STB_VORBIS_NO_STDIO
5014
stb_vorbis_open_file_section(FILE * file,int close_on_free,int * error,const stb_vorbis_alloc * alloc,unsigned int length)5015 stb_vorbis * stb_vorbis_open_file_section(FILE *file, int close_on_free, int *error, const stb_vorbis_alloc *alloc, unsigned int length)
5016 {
5017 stb_vorbis *f, p;
5018 vorbis_init(&p, alloc);
5019 p.f = file;
5020 p.f_start = (uint32) ftell(file);
5021 p.stream_len = length;
5022 p.close_on_free = close_on_free;
5023 if (start_decoder(&p)) {
5024 f = vorbis_alloc(&p);
5025 if (f) {
5026 *f = p;
5027 vorbis_pump_first_frame(f);
5028 return f;
5029 }
5030 }
5031 if (error) *error = p.error;
5032 vorbis_deinit(&p);
5033 return NULL;
5034 }
5035
stb_vorbis_open_file(FILE * file,int close_on_free,int * error,const stb_vorbis_alloc * alloc)5036 stb_vorbis * stb_vorbis_open_file(FILE *file, int close_on_free, int *error, const stb_vorbis_alloc *alloc)
5037 {
5038 unsigned int len, start;
5039 start = (unsigned int) ftell(file);
5040 fseek(file, 0, SEEK_END);
5041 len = (unsigned int) (ftell(file) - start);
5042 fseek(file, start, SEEK_SET);
5043 return stb_vorbis_open_file_section(file, close_on_free, error, alloc, len);
5044 }
5045
stb_vorbis_open_filename(const char * filename,int * error,const stb_vorbis_alloc * alloc)5046 stb_vorbis * stb_vorbis_open_filename(const char *filename, int *error, const stb_vorbis_alloc *alloc)
5047 {
5048 FILE *f = fopen(filename, "rb");
5049 if (f)
5050 return stb_vorbis_open_file(f, TRUE, error, alloc);
5051 if (error) *error = VORBIS_file_open_failure;
5052 return NULL;
5053 }
5054 #endif // STB_VORBIS_NO_STDIO
5055
stb_vorbis_open_memory(const unsigned char * data,int len,int * error,const stb_vorbis_alloc * alloc)5056 stb_vorbis * stb_vorbis_open_memory(const unsigned char *data, int len, int *error, const stb_vorbis_alloc *alloc)
5057 {
5058 stb_vorbis *f, p;
5059 if (data == NULL) return NULL;
5060 vorbis_init(&p, alloc);
5061 p.stream = (uint8 *) data;
5062 p.stream_end = (uint8 *) data + len;
5063 p.stream_start = (uint8 *) p.stream;
5064 p.stream_len = len;
5065 p.push_mode = FALSE;
5066 if (start_decoder(&p)) {
5067 f = vorbis_alloc(&p);
5068 if (f) {
5069 *f = p;
5070 vorbis_pump_first_frame(f);
5071 if (error) *error = VORBIS__no_error;
5072 return f;
5073 }
5074 }
5075 if (error) *error = p.error;
5076 vorbis_deinit(&p);
5077 return NULL;
5078 }
5079
5080 #ifndef STB_VORBIS_NO_INTEGER_CONVERSION
5081 #define PLAYBACK_MONO 1
5082 #define PLAYBACK_LEFT 2
5083 #define PLAYBACK_RIGHT 4
5084
5085 #define L (PLAYBACK_LEFT | PLAYBACK_MONO)
5086 #define C (PLAYBACK_LEFT | PLAYBACK_RIGHT | PLAYBACK_MONO)
5087 #define R (PLAYBACK_RIGHT | PLAYBACK_MONO)
5088
5089 static int8 channel_position[7][6] =
5090 {
5091 { 0 },
5092 { C },
5093 { L, R },
5094 { L, C, R },
5095 { L, R, L, R },
5096 { L, C, R, L, R },
5097 { L, C, R, L, R, C },
5098 };
5099
5100
5101 #ifndef STB_VORBIS_NO_FAST_SCALED_FLOAT
5102 typedef union {
5103 float f;
5104 int i;
5105 } float_conv;
5106 typedef char stb_vorbis_float_size_test[sizeof(float)==4 && sizeof(int) == 4];
5107 #define FASTDEF(x) float_conv x
5108 // add (1<<23) to convert to int, then divide by 2^SHIFT, then add 0.5/2^SHIFT to round
5109 #define MAGIC(SHIFT) (1.5f * (1 << (23-SHIFT)) + 0.5f/(1 << SHIFT))
5110 #define ADDEND(SHIFT) (((150-SHIFT) << 23) + (1 << 22))
5111 #define FAST_SCALED_FLOAT_TO_INT(temp,x,s) (temp.f = (x) + MAGIC(s), temp.i - ADDEND(s))
5112 #define check_endianness()
5113 #else
5114 #define FAST_SCALED_FLOAT_TO_INT(temp,x,s) ((int) ((x) * (1 << (s))))
5115 #define check_endianness()
5116 #define FASTDEF(x)
5117 #endif
5118
copy_samples(short * dest,float * src,int len)5119 static void copy_samples(short *dest, float *src, int len)
5120 {
5121 int i;
5122 check_endianness();
5123 for (i=0; i < len; ++i) {
5124 FASTDEF(temp);
5125 int v = FAST_SCALED_FLOAT_TO_INT(temp, src[i],15);
5126 if ((unsigned int) (v + 32768) > 65535)
5127 v = v < 0 ? -32768 : 32767;
5128 dest[i] = v;
5129 }
5130 }
5131
compute_samples(int mask,short * output,int num_c,float ** data,int d_offset,int len)5132 static void compute_samples(int mask, short *output, int num_c, float **data, int d_offset, int len)
5133 {
5134 #define BUFFER_SIZE 32
5135 float buffer[BUFFER_SIZE];
5136 int i,j,o,n = BUFFER_SIZE;
5137 check_endianness();
5138 for (o = 0; o < len; o += BUFFER_SIZE) {
5139 memset(buffer, 0, sizeof(buffer));
5140 if (o + n > len) n = len - o;
5141 for (j=0; j < num_c; ++j) {
5142 if (channel_position[num_c][j] & mask) {
5143 for (i=0; i < n; ++i)
5144 buffer[i] += data[j][d_offset+o+i];
5145 }
5146 }
5147 for (i=0; i < n; ++i) {
5148 FASTDEF(temp);
5149 int v = FAST_SCALED_FLOAT_TO_INT(temp,buffer[i],15);
5150 if ((unsigned int) (v + 32768) > 65535)
5151 v = v < 0 ? -32768 : 32767;
5152 output[o+i] = v;
5153 }
5154 }
5155 }
5156
compute_stereo_samples(short * output,int num_c,float ** data,int d_offset,int len)5157 static void compute_stereo_samples(short *output, int num_c, float **data, int d_offset, int len)
5158 {
5159 #define BUFFER_SIZE 32
5160 float buffer[BUFFER_SIZE];
5161 int i,j,o,n = BUFFER_SIZE >> 1;
5162 // o is the offset in the source data
5163 check_endianness();
5164 for (o = 0; o < len; o += BUFFER_SIZE >> 1) {
5165 // o2 is the offset in the output data
5166 int o2 = o << 1;
5167 memset(buffer, 0, sizeof(buffer));
5168 if (o + n > len) n = len - o;
5169 for (j=0; j < num_c; ++j) {
5170 int m = channel_position[num_c][j] & (PLAYBACK_LEFT | PLAYBACK_RIGHT);
5171 if (m == (PLAYBACK_LEFT | PLAYBACK_RIGHT)) {
5172 for (i=0; i < n; ++i) {
5173 buffer[i*2+0] += data[j][d_offset+o+i];
5174 buffer[i*2+1] += data[j][d_offset+o+i];
5175 }
5176 } else if (m == PLAYBACK_LEFT) {
5177 for (i=0; i < n; ++i) {
5178 buffer[i*2+0] += data[j][d_offset+o+i];
5179 }
5180 } else if (m == PLAYBACK_RIGHT) {
5181 for (i=0; i < n; ++i) {
5182 buffer[i*2+1] += data[j][d_offset+o+i];
5183 }
5184 }
5185 }
5186 for (i=0; i < (n<<1); ++i) {
5187 FASTDEF(temp);
5188 int v = FAST_SCALED_FLOAT_TO_INT(temp,buffer[i],15);
5189 if ((unsigned int) (v + 32768) > 65535)
5190 v = v < 0 ? -32768 : 32767;
5191 output[o2+i] = v;
5192 }
5193 }
5194 }
5195
convert_samples_short(int buf_c,short ** buffer,int b_offset,int data_c,float ** data,int d_offset,int samples)5196 static void convert_samples_short(int buf_c, short **buffer, int b_offset, int data_c, float **data, int d_offset, int samples)
5197 {
5198 int i;
5199 if (buf_c != data_c && buf_c <= 2 && data_c <= 6) {
5200 static int channel_selector[3][2] = { {0}, {PLAYBACK_MONO}, {PLAYBACK_LEFT, PLAYBACK_RIGHT} };
5201 for (i=0; i < buf_c; ++i)
5202 compute_samples(channel_selector[buf_c][i], buffer[i]+b_offset, data_c, data, d_offset, samples);
5203 } else {
5204 int limit = buf_c < data_c ? buf_c : data_c;
5205 for (i=0; i < limit; ++i)
5206 copy_samples(buffer[i]+b_offset, data[i]+d_offset, samples);
5207 for ( ; i < buf_c; ++i)
5208 memset(buffer[i]+b_offset, 0, sizeof(short) * samples);
5209 }
5210 }
5211
stb_vorbis_get_frame_short(stb_vorbis * f,int num_c,short ** buffer,int num_samples)5212 int stb_vorbis_get_frame_short(stb_vorbis *f, int num_c, short **buffer, int num_samples)
5213 {
5214 float **output;
5215 int len = stb_vorbis_get_frame_float(f, NULL, &output);
5216 if (len > num_samples) len = num_samples;
5217 if (len)
5218 convert_samples_short(num_c, buffer, 0, f->channels, output, 0, len);
5219 return len;
5220 }
5221
convert_channels_short_interleaved(int buf_c,short * buffer,int data_c,float ** data,int d_offset,int len)5222 static void convert_channels_short_interleaved(int buf_c, short *buffer, int data_c, float **data, int d_offset, int len)
5223 {
5224 int i;
5225 check_endianness();
5226 if (buf_c != data_c && buf_c <= 2 && data_c <= 6) {
5227 assert(buf_c == 2);
5228 for (i=0; i < buf_c; ++i)
5229 compute_stereo_samples(buffer, data_c, data, d_offset, len);
5230 } else {
5231 int limit = buf_c < data_c ? buf_c : data_c;
5232 int j;
5233 for (j=0; j < len; ++j) {
5234 for (i=0; i < limit; ++i) {
5235 FASTDEF(temp);
5236 float f = data[i][d_offset+j];
5237 int v = FAST_SCALED_FLOAT_TO_INT(temp, f,15);//data[i][d_offset+j],15);
5238 if ((unsigned int) (v + 32768) > 65535)
5239 v = v < 0 ? -32768 : 32767;
5240 *buffer++ = v;
5241 }
5242 for ( ; i < buf_c; ++i)
5243 *buffer++ = 0;
5244 }
5245 }
5246 }
5247
stb_vorbis_get_frame_short_interleaved(stb_vorbis * f,int num_c,short * buffer,int num_shorts)5248 int stb_vorbis_get_frame_short_interleaved(stb_vorbis *f, int num_c, short *buffer, int num_shorts)
5249 {
5250 float **output;
5251 int len;
5252 if (num_c == 1) return stb_vorbis_get_frame_short(f,num_c,&buffer, num_shorts);
5253 len = stb_vorbis_get_frame_float(f, NULL, &output);
5254 if (len) {
5255 if (len*num_c > num_shorts) len = num_shorts / num_c;
5256 convert_channels_short_interleaved(num_c, buffer, f->channels, output, 0, len);
5257 }
5258 return len;
5259 }
5260
stb_vorbis_get_samples_short_interleaved(stb_vorbis * f,int channels,short * buffer,int num_shorts)5261 int stb_vorbis_get_samples_short_interleaved(stb_vorbis *f, int channels, short *buffer, int num_shorts)
5262 {
5263 float **outputs;
5264 int len = num_shorts / channels;
5265 int n=0;
5266 int z = f->channels;
5267 if (z > channels) z = channels;
5268 while (n < len) {
5269 int k = f->channel_buffer_end - f->channel_buffer_start;
5270 if (n+k >= len) k = len - n;
5271 if (k)
5272 convert_channels_short_interleaved(channels, buffer, f->channels, f->channel_buffers, f->channel_buffer_start, k);
5273 buffer += k*channels;
5274 n += k;
5275 f->channel_buffer_start += k;
5276 if (n == len) break;
5277 if (!stb_vorbis_get_frame_float(f, NULL, &outputs)) break;
5278 }
5279 return n;
5280 }
5281
stb_vorbis_get_samples_short(stb_vorbis * f,int channels,short ** buffer,int len)5282 int stb_vorbis_get_samples_short(stb_vorbis *f, int channels, short **buffer, int len)
5283 {
5284 float **outputs;
5285 int n=0;
5286 int z = f->channels;
5287 if (z > channels) z = channels;
5288 while (n < len) {
5289 int k = f->channel_buffer_end - f->channel_buffer_start;
5290 if (n+k >= len) k = len - n;
5291 if (k)
5292 convert_samples_short(channels, buffer, n, f->channels, f->channel_buffers, f->channel_buffer_start, k);
5293 n += k;
5294 f->channel_buffer_start += k;
5295 if (n == len) break;
5296 if (!stb_vorbis_get_frame_float(f, NULL, &outputs)) break;
5297 }
5298 return n;
5299 }
5300
5301 #ifndef STB_VORBIS_NO_STDIO
stb_vorbis_decode_filename(const char * filename,int * channels,int * sample_rate,short ** output)5302 int stb_vorbis_decode_filename(const char *filename, int *channels, int *sample_rate, short **output)
5303 {
5304 int data_len, offset, total, limit, error;
5305 short *data;
5306 stb_vorbis *v = stb_vorbis_open_filename(filename, &error, NULL);
5307 if (v == NULL) return -1;
5308 limit = v->channels * 4096;
5309 *channels = v->channels;
5310 if (sample_rate)
5311 *sample_rate = v->sample_rate;
5312 offset = data_len = 0;
5313 total = limit;
5314 data = (short *) malloc(total * sizeof(*data));
5315 if (data == NULL) {
5316 stb_vorbis_close(v);
5317 return -2;
5318 }
5319 for (;;) {
5320 int n = stb_vorbis_get_frame_short_interleaved(v, v->channels, data+offset, total-offset);
5321 if (n == 0) break;
5322 data_len += n;
5323 offset += n * v->channels;
5324 if (offset + limit > total) {
5325 short *data2;
5326 total *= 2;
5327 data2 = (short *) realloc(data, total * sizeof(*data));
5328 if (data2 == NULL) {
5329 free(data);
5330 stb_vorbis_close(v);
5331 return -2;
5332 }
5333 data = data2;
5334 }
5335 }
5336 *output = data;
5337 stb_vorbis_close(v);
5338 return data_len;
5339 }
5340 #endif // NO_STDIO
5341
stb_vorbis_decode_memory(const uint8 * mem,int len,int * channels,int * sample_rate,short ** output)5342 int stb_vorbis_decode_memory(const uint8 *mem, int len, int *channels, int *sample_rate, short **output)
5343 {
5344 int data_len, offset, total, limit, error;
5345 short *data;
5346 stb_vorbis *v = stb_vorbis_open_memory(mem, len, &error, NULL);
5347 if (v == NULL) return -1;
5348 limit = v->channels * 4096;
5349 *channels = v->channels;
5350 if (sample_rate)
5351 *sample_rate = v->sample_rate;
5352 offset = data_len = 0;
5353 total = limit;
5354 data = (short *) malloc(total * sizeof(*data));
5355 if (data == NULL) {
5356 stb_vorbis_close(v);
5357 return -2;
5358 }
5359 for (;;) {
5360 int n = stb_vorbis_get_frame_short_interleaved(v, v->channels, data+offset, total-offset);
5361 if (n == 0) break;
5362 data_len += n;
5363 offset += n * v->channels;
5364 if (offset + limit > total) {
5365 short *data2;
5366 total *= 2;
5367 data2 = (short *) realloc(data, total * sizeof(*data));
5368 if (data2 == NULL) {
5369 free(data);
5370 stb_vorbis_close(v);
5371 return -2;
5372 }
5373 data = data2;
5374 }
5375 }
5376 *output = data;
5377 stb_vorbis_close(v);
5378 return data_len;
5379 }
5380 #endif // STB_VORBIS_NO_INTEGER_CONVERSION
5381
stb_vorbis_get_samples_float_interleaved(stb_vorbis * f,int channels,float * buffer,int num_floats)5382 int stb_vorbis_get_samples_float_interleaved(stb_vorbis *f, int channels, float *buffer, int num_floats)
5383 {
5384 float **outputs;
5385 int len = num_floats / channels;
5386 int n=0;
5387 int z = f->channels;
5388 if (z > channels) z = channels;
5389 while (n < len) {
5390 int i,j;
5391 int k = f->channel_buffer_end - f->channel_buffer_start;
5392 if (n+k >= len) k = len - n;
5393 for (j=0; j < k; ++j) {
5394 for (i=0; i < z; ++i)
5395 *buffer++ = f->channel_buffers[i][f->channel_buffer_start+j];
5396 for ( ; i < channels; ++i)
5397 *buffer++ = 0;
5398 }
5399 n += k;
5400 f->channel_buffer_start += k;
5401 if (n == len)
5402 break;
5403 if (!stb_vorbis_get_frame_float(f, NULL, &outputs))
5404 break;
5405 }
5406 return n;
5407 }
5408
stb_vorbis_get_samples_float(stb_vorbis * f,int channels,float ** buffer,int num_samples)5409 int stb_vorbis_get_samples_float(stb_vorbis *f, int channels, float **buffer, int num_samples)
5410 {
5411 float **outputs;
5412 int n=0;
5413 int z = f->channels;
5414 if (z > channels) z = channels;
5415 while (n < num_samples) {
5416 int i;
5417 int k = f->channel_buffer_end - f->channel_buffer_start;
5418 if (n+k >= num_samples) k = num_samples - n;
5419 if (k) {
5420 for (i=0; i < z; ++i)
5421 memcpy(buffer[i]+n, f->channel_buffers[i]+f->channel_buffer_start, sizeof(float)*k);
5422 for ( ; i < channels; ++i)
5423 memset(buffer[i]+n, 0, sizeof(float) * k);
5424 }
5425 n += k;
5426 f->channel_buffer_start += k;
5427 if (n == num_samples)
5428 break;
5429 if (!stb_vorbis_get_frame_float(f, NULL, &outputs))
5430 break;
5431 }
5432 return n;
5433 }
5434 #endif // STB_VORBIS_NO_PULLDATA_API
5435
5436 /* Version history
5437 1.12 - 2017-11-21 - limit residue begin/end to blocksize/2 to avoid large temp allocs in bad/corrupt files
5438 1.11 - 2017-07-23 - fix MinGW compilation
5439 1.10 - 2017-03-03 - more robust seeking; fix negative ilog(); clear error in open_memory
5440 1.09 - 2016-04-04 - back out 'avoid discarding last frame' fix from previous version
5441 1.08 - 2016-04-02 - fixed multiple warnings; fix setup memory leaks;
5442 avoid discarding last frame of audio data
5443 1.07 - 2015-01-16 - fixed some warnings, fix mingw, const-correct API
5444 some more crash fixes when out of memory or with corrupt files
5445 1.06 - 2015-08-31 - full, correct support for seeking API (Dougall Johnson)
5446 some crash fixes when out of memory or with corrupt files
5447 1.05 - 2015-04-19 - don't define __forceinline if it's redundant
5448 1.04 - 2014-08-27 - fix missing const-correct case in API
5449 1.03 - 2014-08-07 - Warning fixes
5450 1.02 - 2014-07-09 - Declare qsort compare function _cdecl on windows
5451 1.01 - 2014-06-18 - fix stb_vorbis_get_samples_float
5452 1.0 - 2014-05-26 - fix memory leaks; fix warnings; fix bugs in multichannel
5453 (API change) report sample rate for decode-full-file funcs
5454 0.99996 - bracket #include <malloc.h> for macintosh compilation by Laurent Gomila
5455 0.99995 - use union instead of pointer-cast for fast-float-to-int to avoid alias-optimization problem
5456 0.99994 - change fast-float-to-int to work in single-precision FPU mode, remove endian-dependence
5457 0.99993 - remove assert that fired on legal files with empty tables
5458 0.99992 - rewind-to-start
5459 0.99991 - bugfix to stb_vorbis_get_samples_short by Bernhard Wodo
5460 0.9999 - (should have been 0.99990) fix no-CRT support, compiling as C++
5461 0.9998 - add a full-decode function with a memory source
5462 0.9997 - fix a bug in the read-from-FILE case in 0.9996 addition
5463 0.9996 - query length of vorbis stream in samples/seconds
5464 0.9995 - bugfix to another optimization that only happened in certain files
5465 0.9994 - bugfix to one of the optimizations that caused significant (but inaudible?) errors
5466 0.9993 - performance improvements; runs in 99% to 104% of time of reference implementation
5467 0.9992 - performance improvement of IMDCT; now performs close to reference implementation
5468 0.9991 - performance improvement of IMDCT
5469 0.999 - (should have been 0.9990) performance improvement of IMDCT
5470 0.998 - no-CRT support from Casey Muratori
5471 0.997 - bugfixes for bugs found by Terje Mathisen
5472 0.996 - bugfix: fast-huffman decode initialized incorrectly for sparse codebooks; fixing gives 10% speedup - found by Terje Mathisen
5473 0.995 - bugfix: fix to 'effective' overrun detection - found by Terje Mathisen
5474 0.994 - bugfix: garbage decode on final VQ symbol of a non-multiple - found by Terje Mathisen
5475 0.993 - bugfix: pushdata API required 1 extra byte for empty page (failed to consume final page if empty) - found by Terje Mathisen
5476 0.992 - fixes for MinGW warning
5477 0.991 - turn fast-float-conversion on by default
5478 0.990 - fix push-mode seek recovery if you seek into the headers
5479 0.98b - fix to bad release of 0.98
5480 0.98 - fix push-mode seek recovery; robustify float-to-int and support non-fast mode
5481 0.97 - builds under c++ (typecasting, don't use 'class' keyword)
5482 0.96 - somehow MY 0.95 was right, but the web one was wrong, so here's my 0.95 rereleased as 0.96, fixes a typo in the clamping code
5483 0.95 - clamping code for 16-bit functions
5484 0.94 - not publically released
5485 0.93 - fixed all-zero-floor case (was decoding garbage)
5486 0.92 - fixed a memory leak
5487 0.91 - conditional compiles to omit parts of the API and the infrastructure to support them: STB_VORBIS_NO_PULLDATA_API, STB_VORBIS_NO_PUSHDATA_API, STB_VORBIS_NO_STDIO, STB_VORBIS_NO_INTEGER_CONVERSION
5488 0.90 - first public release
5489 */
5490
5491 #endif // STB_VORBIS_HEADER_ONLY
5492
5493
5494 /*
5495 ------------------------------------------------------------------------------
5496 This software is available under 2 licenses -- choose whichever you prefer.
5497 ------------------------------------------------------------------------------
5498 ALTERNATIVE A - MIT License
5499 Copyright (c) 2017 Sean Barrett
5500 Permission is hereby granted, free of charge, to any person obtaining a copy of
5501 this software and associated documentation files (the "Software"), to deal in
5502 the Software without restriction, including without limitation the rights to
5503 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
5504 of the Software, and to permit persons to whom the Software is furnished to do
5505 so, subject to the following conditions:
5506 The above copyright notice and this permission notice shall be included in all
5507 copies or substantial portions of the Software.
5508 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
5509 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
5510 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
5511 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
5512 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
5513 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
5514 SOFTWARE.
5515 ------------------------------------------------------------------------------
5516 ALTERNATIVE B - Public Domain (www.unlicense.org)
5517 This is free and unencumbered software released into the public domain.
5518 Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
5519 software, either in source code form or as a compiled binary, for any purpose,
5520 commercial or non-commercial, and by any means.
5521 In jurisdictions that recognize copyright laws, the author or authors of this
5522 software dedicate any and all copyright interest in the software to the public
5523 domain. We make this dedication for the benefit of the public at large and to
5524 the detriment of our heirs and successors. We intend this dedication to be an
5525 overt act of relinquishment in perpetuity of all present and future rights to
5526 this software under copyright law.
5527 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
5528 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
5529 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
5530 AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
5531 ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
5532 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
5533 ------------------------------------------------------------------------------
5534 */
5535