1 // Ogg Vorbis audio decoder - v1.16 - public domain
2 // http://nothings.org/stb_vorbis/
3 //
4 // Original version written by Sean Barrett in 2007.
5 //
6 // Originally sponsored by RAD Game Tools. Seeking implementation
7 // sponsored by Phillip Bennefall, Marc Andersen, Aaron Baker,
8 // Elias Software, Aras Pranckevicius, and Sean Barrett.
9 //
10 // LICENSE
11 //
12 // See end of file for license information.
13 //
14 // Limitations:
15 //
16 // - floor 0 not supported (used in old ogg vorbis files pre-2004)
17 // - lossless sample-truncation at beginning ignored
18 // - cannot concatenate multiple vorbis streams
19 // - sample positions are 32-bit, limiting seekable 192Khz
20 // files to around 6 hours (Ogg supports 64-bit)
21 //
22 // Feature contributors:
23 // Dougall Johnson (sample-exact seeking)
24 //
25 // Bugfix/warning contributors:
26 // Terje Mathisen Niklas Frykholm Andy Hill
27 // Casey Muratori John Bolton Gargaj
28 // Laurent Gomila Marc LeBlanc Ronny Chevalier
29 // Bernhard Wodo Evan Balster alxprd@github
30 // Tom Beaumont Ingo Leitgeb Nicolas Guillemot
31 // Phillip Bennefall Rohit Thiago Goulart
32 // manxorist@github saga musix github:infatum
33 // Timur Gagiev
34 //
35 // Partial history:
36 // 1.16 - 2019-03-04 - fix warnings
37 // 1.15 - 2019-02-07 - explicit failure if Ogg Skeleton data is found
38 // 1.14 - 2018-02-11 - delete bogus dealloca usage
39 // 1.13 - 2018-01-29 - fix truncation of last frame (hopefully)
40 // 1.12 - 2017-11-21 - limit residue begin/end to blocksize/2 to avoid large temp allocs in bad/corrupt files
41 // 1.11 - 2017-07-23 - fix MinGW compilation
42 // 1.10 - 2017-03-03 - more robust seeking; fix negative ilog(); clear error in open_memory
43 // 1.09 - 2016-04-04 - back out 'truncation of last frame' fix from previous version
44 // 1.08 - 2016-04-02 - warnings; setup memory leaks; truncation of last frame
45 // 1.07 - 2015-01-16 - fixes for crashes on invalid files; warning fixes; const
46 // 1.06 - 2015-08-31 - full, correct support for seeking API (Dougall Johnson)
47 // some crash fixes when out of memory or with corrupt files
48 // fix some inappropriately signed shifts
49 // 1.05 - 2015-04-19 - don't define __forceinline if it's redundant
50 // 1.04 - 2014-08-27 - fix missing const-correct case in API
51 // 1.03 - 2014-08-07 - warning fixes
52 // 1.02 - 2014-07-09 - declare qsort comparison as explicitly _cdecl in Windows
53 // 1.01 - 2014-06-18 - fix stb_vorbis_get_samples_float (interleaved was correct)
54 // 1.0 - 2014-05-26 - fix memory leaks; fix warnings; fix bugs in >2-channel;
55 // (API change) report sample rate for decode-full-file funcs
56 //
57 // See end of file for full version history.
58
59
60 //////////////////////////////////////////////////////////////////////////////
61 //
62 // HEADER BEGINS HERE
63 //
64
65 #ifndef STB_VORBIS_INCLUDE_STB_VORBIS_H
66 #define STB_VORBIS_INCLUDE_STB_VORBIS_H
67
68 #if defined(STB_VORBIS_NO_CRT) && !defined(STB_VORBIS_NO_STDIO)
69 #define STB_VORBIS_NO_STDIO 1
70 #endif
71
72 #ifndef STB_VORBIS_NO_STDIO
73 #include <stdio.h>
74 #endif
75
76 #ifdef __cplusplus
77 extern "C" {
78 #endif
79
80 /////////// THREAD SAFETY
81
82 // Individual stb_vorbis* handles are not thread-safe; you cannot decode from
83 // them from multiple threads at the same time. However, you can have multiple
84 // stb_vorbis* handles and decode from them independently in multiple thrads.
85
86
87 /////////// MEMORY ALLOCATION
88
89 // normally stb_vorbis uses malloc() to allocate memory at startup,
90 // and alloca() to allocate temporary memory during a frame on the
91 // stack. (Memory consumption will depend on the amount of setup
92 // data in the file and how you set the compile flags for speed
93 // vs. size. In my test files the maximal-size usage is ~150KB.)
94 //
95 // You can modify the wrapper functions in the source (setup_malloc,
96 // setup_temp_malloc, temp_malloc) to change this behavior, or you
97 // can use a simpler allocation model: you pass in a buffer from
98 // which stb_vorbis will allocate _all_ its memory (including the
99 // temp memory). "open" may fail with a VORBIS_outofmem if you
100 // do not pass in enough data; there is no way to determine how
101 // much you do need except to succeed (at which point you can
102 // query get_info to find the exact amount required. yes I know
103 // this is lame).
104 //
105 // If you pass in a non-NULL buffer of the type below, allocation
106 // will occur from it as described above. Otherwise just pass NULL
107 // to use malloc()/alloca()
108
109 typedef struct
110 {
111 char *alloc_buffer;
112 int alloc_buffer_length_in_bytes;
113 } stb_vorbis_alloc;
114
115
116 /////////// FUNCTIONS USEABLE WITH ALL INPUT MODES
117
118 typedef struct stb_vorbis stb_vorbis;
119
120 typedef struct
121 {
122 unsigned int sample_rate;
123 int channels;
124
125 unsigned int setup_memory_required;
126 unsigned int setup_temp_memory_required;
127 unsigned int temp_memory_required;
128
129 int max_frame_size;
130 } stb_vorbis_info;
131
132 // get general information about the file
133 extern stb_vorbis_info stb_vorbis_get_info(stb_vorbis *f);
134
135 // get the last error detected (clears it, too)
136 extern int stb_vorbis_get_error(stb_vorbis *f);
137
138 // close an ogg vorbis file and free all memory in use
139 extern void stb_vorbis_close(stb_vorbis *f);
140
141 // this function returns the offset (in samples) from the beginning of the
142 // file that will be returned by the next decode, if it is known, or -1
143 // otherwise. after a flush_pushdata() call, this may take a while before
144 // it becomes valid again.
145 // NOT WORKING YET after a seek with PULLDATA API
146 extern int stb_vorbis_get_sample_offset(stb_vorbis *f);
147
148 // returns the current seek point within the file, or offset from the beginning
149 // of the memory buffer. In pushdata mode it returns 0.
150 extern unsigned int stb_vorbis_get_file_offset(stb_vorbis *f);
151
152 /////////// PUSHDATA API
153
154 #ifndef STB_VORBIS_NO_PUSHDATA_API
155
156 // this API allows you to get blocks of data from any source and hand
157 // them to stb_vorbis. you have to buffer them; stb_vorbis will tell
158 // you how much it used, and you have to give it the rest next time;
159 // and stb_vorbis may not have enough data to work with and you will
160 // need to give it the same data again PLUS more. Note that the Vorbis
161 // specification does not bound the size of an individual frame.
162
163 extern stb_vorbis *stb_vorbis_open_pushdata(
164 const unsigned char * datablock, int datablock_length_in_bytes,
165 int *datablock_memory_consumed_in_bytes,
166 int *error,
167 const stb_vorbis_alloc *alloc_buffer);
168 // create a vorbis decoder by passing in the initial data block containing
169 // the ogg&vorbis headers (you don't need to do parse them, just provide
170 // the first N bytes of the file--you're told if it's not enough, see below)
171 // on success, returns an stb_vorbis *, does not set error, returns the amount of
172 // data parsed/consumed on this call in *datablock_memory_consumed_in_bytes;
173 // on failure, returns NULL on error and sets *error, does not change *datablock_memory_consumed
174 // if returns NULL and *error is VORBIS_need_more_data, then the input block was
175 // incomplete and you need to pass in a larger block from the start of the file
176
177 extern int stb_vorbis_decode_frame_pushdata(
178 stb_vorbis *f,
179 const unsigned char *datablock, int datablock_length_in_bytes,
180 int *channels, // place to write number of float * buffers
181 float ***output, // place to write float ** array of float * buffers
182 int *samples // place to write number of output samples
183 );
184 // decode a frame of audio sample data if possible from the passed-in data block
185 //
186 // return value: number of bytes we used from datablock
187 //
188 // possible cases:
189 // 0 bytes used, 0 samples output (need more data)
190 // N bytes used, 0 samples output (resynching the stream, keep going)
191 // N bytes used, M samples output (one frame of data)
192 // note that after opening a file, you will ALWAYS get one N-bytes,0-sample
193 // frame, because Vorbis always "discards" the first frame.
194 //
195 // Note that on resynch, stb_vorbis will rarely consume all of the buffer,
196 // instead only datablock_length_in_bytes-3 or less. This is because it wants
197 // to avoid missing parts of a page header if they cross a datablock boundary,
198 // without writing state-machiney code to record a partial detection.
199 //
200 // The number of channels returned are stored in *channels (which can be
201 // NULL--it is always the same as the number of channels reported by
202 // get_info). *output will contain an array of float* buffers, one per
203 // channel. In other words, (*output)[0][0] contains the first sample from
204 // the first channel, and (*output)[1][0] contains the first sample from
205 // the second channel.
206
207 extern void stb_vorbis_flush_pushdata(stb_vorbis *f);
208 // inform stb_vorbis that your next datablock will not be contiguous with
209 // previous ones (e.g. you've seeked in the data); future attempts to decode
210 // frames will cause stb_vorbis to resynchronize (as noted above), and
211 // once it sees a valid Ogg page (typically 4-8KB, as large as 64KB), it
212 // will begin decoding the _next_ frame.
213 //
214 // if you want to seek using pushdata, you need to seek in your file, then
215 // call stb_vorbis_flush_pushdata(), then start calling decoding, then once
216 // decoding is returning you data, call stb_vorbis_get_sample_offset, and
217 // if you don't like the result, seek your file again and repeat.
218 #endif
219
220
221 ////////// PULLING INPUT API
222
223 #ifndef STB_VORBIS_NO_PULLDATA_API
224 // This API assumes stb_vorbis is allowed to pull data from a source--
225 // either a block of memory containing the _entire_ vorbis stream, or a
226 // FILE * that you or it create, or possibly some other reading mechanism
227 // if you go modify the source to replace the FILE * case with some kind
228 // of callback to your code. (But if you don't support seeking, you may
229 // just want to go ahead and use pushdata.)
230
231 #if !defined(STB_VORBIS_NO_STDIO) && !defined(STB_VORBIS_NO_INTEGER_CONVERSION)
232 extern int stb_vorbis_decode_filename(const char *filename, int *channels, int *sample_rate, short **output);
233 #endif
234 #if !defined(STB_VORBIS_NO_INTEGER_CONVERSION)
235 extern int stb_vorbis_decode_memory(const unsigned char *mem, int len, int *channels, int *sample_rate, short **output);
236 #endif
237 // decode an entire file and output the data interleaved into a malloc()ed
238 // buffer stored in *output. The return value is the number of samples
239 // decoded, or -1 if the file could not be opened or was not an ogg vorbis file.
240 // When you're done with it, just free() the pointer returned in *output.
241
242 extern stb_vorbis * stb_vorbis_open_memory(const unsigned char *data, int len,
243 int *error, const stb_vorbis_alloc *alloc_buffer);
244 // create an ogg vorbis decoder from an ogg vorbis stream in memory (note
245 // this must be the entire stream!). on failure, returns NULL and sets *error
246
247 #ifndef STB_VORBIS_NO_STDIO
248 extern stb_vorbis * stb_vorbis_open_filename(const char *filename,
249 int *error, const stb_vorbis_alloc *alloc_buffer);
250 // create an ogg vorbis decoder from a filename via fopen(). on failure,
251 // returns NULL and sets *error (possibly to VORBIS_file_open_failure).
252
253 extern stb_vorbis * stb_vorbis_open_file(FILE *f, int close_handle_on_close,
254 int *error, const stb_vorbis_alloc *alloc_buffer);
255 // create an ogg vorbis decoder from an open FILE *, looking for a stream at
256 // the _current_ seek point (ftell). on failure, returns NULL and sets *error.
257 // note that stb_vorbis must "own" this stream; if you seek it in between
258 // calls to stb_vorbis, it will become confused. Moreover, if you attempt to
259 // perform stb_vorbis_seek_*() operations on this file, it will assume it
260 // owns the _entire_ rest of the file after the start point. Use the next
261 // function, stb_vorbis_open_file_section(), to limit it.
262
263 extern stb_vorbis * stb_vorbis_open_file_section(FILE *f, int close_handle_on_close,
264 int *error, const stb_vorbis_alloc *alloc_buffer, unsigned int len);
265 // create an ogg vorbis decoder from an open FILE *, looking for a stream at
266 // the _current_ seek point (ftell); the stream will be of length 'len' bytes.
267 // on failure, returns NULL and sets *error. note that stb_vorbis must "own"
268 // this stream; if you seek it in between calls to stb_vorbis, it will become
269 // confused.
270 #endif
271
272 extern int stb_vorbis_seek_frame(stb_vorbis *f, unsigned int sample_number);
273 extern int stb_vorbis_seek(stb_vorbis *f, unsigned int sample_number);
274 // these functions seek in the Vorbis file to (approximately) 'sample_number'.
275 // after calling seek_frame(), the next call to get_frame_*() will include
276 // the specified sample. after calling stb_vorbis_seek(), the next call to
277 // stb_vorbis_get_samples_* will start with the specified sample. If you
278 // do not need to seek to EXACTLY the target sample when using get_samples_*,
279 // you can also use seek_frame().
280
281 extern int stb_vorbis_seek_start(stb_vorbis *f);
282 // this function is equivalent to stb_vorbis_seek(f,0)
283
284 extern unsigned int stb_vorbis_stream_length_in_samples(stb_vorbis *f);
285 extern float stb_vorbis_stream_length_in_seconds(stb_vorbis *f);
286 // these functions return the total length of the vorbis stream
287
288 extern int stb_vorbis_get_frame_float(stb_vorbis *f, int *channels, float ***output);
289 // decode the next frame and return the number of samples. the number of
290 // channels returned are stored in *channels (which can be NULL--it is always
291 // the same as the number of channels reported by get_info). *output will
292 // contain an array of float* buffers, one per channel. These outputs will
293 // be overwritten on the next call to stb_vorbis_get_frame_*.
294 //
295 // You generally should not intermix calls to stb_vorbis_get_frame_*()
296 // and stb_vorbis_get_samples_*(), since the latter calls the former.
297
298 #ifndef STB_VORBIS_NO_INTEGER_CONVERSION
299 extern int stb_vorbis_get_frame_short_interleaved(stb_vorbis *f, int num_c, short *buffer, int num_shorts);
300 extern int stb_vorbis_get_frame_short (stb_vorbis *f, int num_c, short **buffer, int num_samples);
301 #endif
302 // decode the next frame and return the number of *samples* per channel.
303 // Note that for interleaved data, you pass in the number of shorts (the
304 // size of your array), but the return value is the number of samples per
305 // channel, not the total number of samples.
306 //
307 // The data is coerced to the number of channels you request according to the
308 // channel coercion rules (see below). You must pass in the size of your
309 // buffer(s) so that stb_vorbis will not overwrite the end of the buffer.
310 // The maximum buffer size needed can be gotten from get_info(); however,
311 // the Vorbis I specification implies an absolute maximum of 4096 samples
312 // per channel.
313
314 // Channel coercion rules:
315 // Let M be the number of channels requested, and N the number of channels present,
316 // and Cn be the nth channel; let stereo L be the sum of all L and center channels,
317 // and stereo R be the sum of all R and center channels (channel assignment from the
318 // vorbis spec).
319 // M N output
320 // 1 k sum(Ck) for all k
321 // 2 * stereo L, stereo R
322 // k l k > l, the first l channels, then 0s
323 // k l k <= l, the first k channels
324 // Note that this is not _good_ surround etc. mixing at all! It's just so
325 // you get something useful.
326
327 extern int stb_vorbis_get_samples_float_interleaved(stb_vorbis *f, int channels, float *buffer, int num_floats);
328 extern int stb_vorbis_get_samples_float(stb_vorbis *f, int channels, float **buffer, int num_samples);
329 // gets num_samples samples, not necessarily on a frame boundary--this requires
330 // buffering so you have to supply the buffers. DOES NOT APPLY THE COERCION RULES.
331 // Returns the number of samples stored per channel; it may be less than requested
332 // at the end of the file. If there are no more samples in the file, returns 0.
333
334 #ifndef STB_VORBIS_NO_INTEGER_CONVERSION
335 extern int stb_vorbis_get_samples_short_interleaved(stb_vorbis *f, int channels, short *buffer, int num_shorts);
336 extern int stb_vorbis_get_samples_short(stb_vorbis *f, int channels, short **buffer, int num_samples);
337 #endif
338 // gets num_samples samples, not necessarily on a frame boundary--this requires
339 // buffering so you have to supply the buffers. Applies the coercion rules above
340 // to produce 'channels' channels. Returns the number of samples stored per channel;
341 // it may be less than requested at the end of the file. If there are no more
342 // samples in the file, returns 0.
343
344 #endif
345
346 //////// ERROR CODES
347
348 enum STBVorbisError
349 {
350 VORBIS__no_error,
351
352 VORBIS_need_more_data=1, // not a real error
353
354 VORBIS_invalid_api_mixing, // can't mix API modes
355 VORBIS_outofmem, // not enough memory
356 VORBIS_feature_not_supported, // uses floor 0
357 VORBIS_too_many_channels, // STB_VORBIS_MAX_CHANNELS is too small
358 VORBIS_file_open_failure, // fopen() failed
359 VORBIS_seek_without_length, // can't seek in unknown-length file
360
361 VORBIS_unexpected_eof=10, // file is truncated?
362 VORBIS_seek_invalid, // seek past EOF
363
364 // decoding errors (corrupt/invalid stream) -- you probably
365 // don't care about the exact details of these
366
367 // vorbis errors:
368 VORBIS_invalid_setup=20,
369 VORBIS_invalid_stream,
370
371 // ogg errors:
372 VORBIS_missing_capture_pattern=30,
373 VORBIS_invalid_stream_structure_version,
374 VORBIS_continued_packet_flag_invalid,
375 VORBIS_incorrect_stream_serial_number,
376 VORBIS_invalid_first_page,
377 VORBIS_bad_packet_type,
378 VORBIS_cant_find_last_page,
379 VORBIS_seek_failed,
380 VORBIS_ogg_skeleton_not_supported
381 };
382
383
384 #ifdef __cplusplus
385 }
386 #endif
387
388 #endif // STB_VORBIS_INCLUDE_STB_VORBIS_H
389 //
390 // HEADER ENDS HERE
391 //
392 //////////////////////////////////////////////////////////////////////////////
393
394 #ifndef STB_VORBIS_HEADER_ONLY
395
396 // global configuration settings (e.g. set these in the project/makefile),
397 // or just set them in this file at the top (although ideally the first few
398 // should be visible when the header file is compiled too, although it's not
399 // crucial)
400
401 // STB_VORBIS_NO_PUSHDATA_API
402 // does not compile the code for the various stb_vorbis_*_pushdata()
403 // functions
404 // #define STB_VORBIS_NO_PUSHDATA_API
405
406 // STB_VORBIS_NO_PULLDATA_API
407 // does not compile the code for the non-pushdata APIs
408 // #define STB_VORBIS_NO_PULLDATA_API
409
410 // STB_VORBIS_NO_STDIO
411 // does not compile the code for the APIs that use FILE *s internally
412 // or externally (implied by STB_VORBIS_NO_PULLDATA_API)
413 // #define STB_VORBIS_NO_STDIO
414
415 // STB_VORBIS_NO_INTEGER_CONVERSION
416 // does not compile the code for converting audio sample data from
417 // float to integer (implied by STB_VORBIS_NO_PULLDATA_API)
418 // #define STB_VORBIS_NO_INTEGER_CONVERSION
419
420 // STB_VORBIS_NO_FAST_SCALED_FLOAT
421 // does not use a fast float-to-int trick to accelerate float-to-int on
422 // most platforms which requires endianness be defined correctly.
423 //#define STB_VORBIS_NO_FAST_SCALED_FLOAT
424
425
426 // STB_VORBIS_MAX_CHANNELS [number]
427 // globally define this to the maximum number of channels you need.
428 // The spec does not put a restriction on channels except that
429 // the count is stored in a byte, so 255 is the hard limit.
430 // Reducing this saves about 16 bytes per value, so using 16 saves
431 // (255-16)*16 or around 4KB. Plus anything other memory usage
432 // I forgot to account for. Can probably go as low as 8 (7.1 audio),
433 // 6 (5.1 audio), or 2 (stereo only).
434 #ifndef STB_VORBIS_MAX_CHANNELS
435 #define STB_VORBIS_MAX_CHANNELS 16 // enough for anyone?
436 #endif
437
438 // STB_VORBIS_PUSHDATA_CRC_COUNT [number]
439 // after a flush_pushdata(), stb_vorbis begins scanning for the
440 // next valid page, without backtracking. when it finds something
441 // that looks like a page, it streams through it and verifies its
442 // CRC32. Should that validation fail, it keeps scanning. But it's
443 // possible that _while_ streaming through to check the CRC32 of
444 // one candidate page, it sees another candidate page. This #define
445 // determines how many "overlapping" candidate pages it can search
446 // at once. Note that "real" pages are typically ~4KB to ~8KB, whereas
447 // garbage pages could be as big as 64KB, but probably average ~16KB.
448 // So don't hose ourselves by scanning an apparent 64KB page and
449 // missing a ton of real ones in the interim; so minimum of 2
450 #ifndef STB_VORBIS_PUSHDATA_CRC_COUNT
451 #define STB_VORBIS_PUSHDATA_CRC_COUNT 4
452 #endif
453
454 // STB_VORBIS_FAST_HUFFMAN_LENGTH [number]
455 // sets the log size of the huffman-acceleration table. Maximum
456 // supported value is 24. with larger numbers, more decodings are O(1),
457 // but the table size is larger so worse cache missing, so you'll have
458 // to probe (and try multiple ogg vorbis files) to find the sweet spot.
459 #ifndef STB_VORBIS_FAST_HUFFMAN_LENGTH
460 #define STB_VORBIS_FAST_HUFFMAN_LENGTH 10
461 #endif
462
463 // STB_VORBIS_FAST_BINARY_LENGTH [number]
464 // sets the log size of the binary-search acceleration table. this
465 // is used in similar fashion to the fast-huffman size to set initial
466 // parameters for the binary search
467
468 // STB_VORBIS_FAST_HUFFMAN_INT
469 // The fast huffman tables are much more efficient if they can be
470 // stored as 16-bit results instead of 32-bit results. This restricts
471 // the codebooks to having only 65535 possible outcomes, though.
472 // (At least, accelerated by the huffman table.)
473 #ifndef STB_VORBIS_FAST_HUFFMAN_INT
474 #define STB_VORBIS_FAST_HUFFMAN_SHORT
475 #endif
476
477 // STB_VORBIS_NO_HUFFMAN_BINARY_SEARCH
478 // If the 'fast huffman' search doesn't succeed, then stb_vorbis falls
479 // back on binary searching for the correct one. This requires storing
480 // extra tables with the huffman codes in sorted order. Defining this
481 // symbol trades off space for speed by forcing a linear search in the
482 // non-fast case, except for "sparse" codebooks.
483 // #define STB_VORBIS_NO_HUFFMAN_BINARY_SEARCH
484
485 // STB_VORBIS_DIVIDES_IN_RESIDUE
486 // stb_vorbis precomputes the result of the scalar residue decoding
487 // that would otherwise require a divide per chunk. you can trade off
488 // space for time by defining this symbol.
489 // #define STB_VORBIS_DIVIDES_IN_RESIDUE
490
491 // STB_VORBIS_DIVIDES_IN_CODEBOOK
492 // vorbis VQ codebooks can be encoded two ways: with every case explicitly
493 // stored, or with all elements being chosen from a small range of values,
494 // and all values possible in all elements. By default, stb_vorbis expands
495 // this latter kind out to look like the former kind for ease of decoding,
496 // because otherwise an integer divide-per-vector-element is required to
497 // unpack the index. If you define STB_VORBIS_DIVIDES_IN_CODEBOOK, you can
498 // trade off storage for speed.
499 //#define STB_VORBIS_DIVIDES_IN_CODEBOOK
500
501 #ifdef STB_VORBIS_CODEBOOK_SHORTS
502 #error "STB_VORBIS_CODEBOOK_SHORTS is no longer supported as it produced incorrect results for some input formats"
503 #endif
504
505 // STB_VORBIS_DIVIDE_TABLE
506 // this replaces small integer divides in the floor decode loop with
507 // table lookups. made less than 1% difference, so disabled by default.
508
509 // STB_VORBIS_NO_INLINE_DECODE
510 // disables the inlining of the scalar codebook fast-huffman decode.
511 // might save a little codespace; useful for debugging
512 // #define STB_VORBIS_NO_INLINE_DECODE
513
514 // STB_VORBIS_NO_DEFER_FLOOR
515 // Normally we only decode the floor without synthesizing the actual
516 // full curve. We can instead synthesize the curve immediately. This
517 // requires more memory and is very likely slower, so I don't think
518 // you'd ever want to do it except for debugging.
519 // #define STB_VORBIS_NO_DEFER_FLOOR
520
521
522
523
524 //////////////////////////////////////////////////////////////////////////////
525
526 #ifdef STB_VORBIS_NO_PULLDATA_API
527 #define STB_VORBIS_NO_INTEGER_CONVERSION
528 #define STB_VORBIS_NO_STDIO
529 #endif
530
531 #if defined(STB_VORBIS_NO_CRT) && !defined(STB_VORBIS_NO_STDIO)
532 #define STB_VORBIS_NO_STDIO 1
533 #endif
534
535 #ifndef STB_VORBIS_NO_INTEGER_CONVERSION
536 #ifndef STB_VORBIS_NO_FAST_SCALED_FLOAT
537
538 // only need endianness for fast-float-to-int, which we don't
539 // use for pushdata
540
541 #ifndef STB_VORBIS_BIG_ENDIAN
542 #define STB_VORBIS_ENDIAN 0
543 #else
544 #define STB_VORBIS_ENDIAN 1
545 #endif
546
547 #endif
548 #endif
549
550
551 #ifndef STB_VORBIS_NO_STDIO
552 #include <stdio.h>
553 #endif
554
555 #ifndef STB_VORBIS_NO_CRT
556 #include <stdlib.h>
557 #include <string.h>
558 #include <assert.h>
559 #include <math.h>
560
561 // find definition of alloca if it's not in stdlib.h:
562 #if defined(_MSC_VER) || defined(__MINGW32__)
563 #include <malloc.h>
564 #endif
565 #if defined(__linux__) || defined(__linux) || defined(__EMSCRIPTEN__)
566 #include <alloca.h>
567 #endif
568 #else // STB_VORBIS_NO_CRT
569 #define NULL 0
570 #define malloc(s) 0
571 #define free(s) ((void) 0)
572 #define realloc(s) 0
573 #endif // STB_VORBIS_NO_CRT
574
575 #include <limits.h>
576
577 #ifdef __MINGW32__
578 // eff you mingw:
579 // "fixed":
580 // http://sourceforge.net/p/mingw-w64/mailman/message/32882927/
581 // "no that broke the build, reverted, who cares about C":
582 // http://sourceforge.net/p/mingw-w64/mailman/message/32890381/
583 #ifdef __forceinline
584 #undef __forceinline
585 #endif
586 #define __forceinline
587 #define alloca __builtin_alloca
588 #elif !defined(_MSC_VER)
589 #if __GNUC__
590 #define __forceinline inline
591 #else
592 #define __forceinline
593 #endif
594 #endif
595
596 #if STB_VORBIS_MAX_CHANNELS > 256
597 #error "Value of STB_VORBIS_MAX_CHANNELS outside of allowed range"
598 #endif
599
600 #if STB_VORBIS_FAST_HUFFMAN_LENGTH > 24
601 #error "Value of STB_VORBIS_FAST_HUFFMAN_LENGTH outside of allowed range"
602 #endif
603
604
605 #if 0
606 #include <crtdbg.h>
607 #define CHECK(f) _CrtIsValidHeapPointer(f->channel_buffers[1])
608 #else
609 #define CHECK(f) ((void) 0)
610 #endif
611
612 #define MAX_BLOCKSIZE_LOG 13 // from specification
613 #define MAX_BLOCKSIZE (1 << MAX_BLOCKSIZE_LOG)
614
615
616 typedef unsigned char uint8;
617 typedef signed char int8;
618 typedef unsigned short uint16;
619 typedef signed short int16;
620 typedef unsigned int uint32;
621 typedef signed int int32;
622
623 #ifndef TRUE
624 #define TRUE 1
625 #define FALSE 0
626 #endif
627
628 typedef float codetype;
629
630 // @NOTE
631 //
632 // Some arrays below are tagged "//varies", which means it's actually
633 // a variable-sized piece of data, but rather than malloc I assume it's
634 // small enough it's better to just allocate it all together with the
635 // main thing
636 //
637 // Most of the variables are specified with the smallest size I could pack
638 // them into. It might give better performance to make them all full-sized
639 // integers. It should be safe to freely rearrange the structures or change
640 // the sizes larger--nothing relies on silently truncating etc., nor the
641 // order of variables.
642
643 #define FAST_HUFFMAN_TABLE_SIZE (1 << STB_VORBIS_FAST_HUFFMAN_LENGTH)
644 #define FAST_HUFFMAN_TABLE_MASK (FAST_HUFFMAN_TABLE_SIZE - 1)
645
646 typedef struct
647 {
648 int dimensions, entries;
649 uint8 *codeword_lengths;
650 float minimum_value;
651 float delta_value;
652 uint8 value_bits;
653 uint8 lookup_type;
654 uint8 sequence_p;
655 uint8 sparse;
656 uint32 lookup_values;
657 codetype *multiplicands;
658 uint32 *codewords;
659 #ifdef STB_VORBIS_FAST_HUFFMAN_SHORT
660 int16 fast_huffman[FAST_HUFFMAN_TABLE_SIZE];
661 #else
662 int32 fast_huffman[FAST_HUFFMAN_TABLE_SIZE];
663 #endif
664 uint32 *sorted_codewords;
665 int *sorted_values;
666 int sorted_entries;
667 } Codebook;
668
669 typedef struct
670 {
671 uint8 order;
672 uint16 rate;
673 uint16 bark_map_size;
674 uint8 amplitude_bits;
675 uint8 amplitude_offset;
676 uint8 number_of_books;
677 uint8 book_list[16]; // varies
678 } Floor0;
679
680 typedef struct
681 {
682 uint8 partitions;
683 uint8 partition_class_list[32]; // varies
684 uint8 class_dimensions[16]; // varies
685 uint8 class_subclasses[16]; // varies
686 uint8 class_masterbooks[16]; // varies
687 int16 subclass_books[16][8]; // varies
688 uint16 Xlist[31*8+2]; // varies
689 uint8 sorted_order[31*8+2];
690 uint8 neighbors[31*8+2][2];
691 uint8 floor1_multiplier;
692 uint8 rangebits;
693 int values;
694 } Floor1;
695
696 typedef union
697 {
698 Floor0 floor0;
699 Floor1 floor1;
700 } Floor;
701
702 typedef struct
703 {
704 uint32 begin, end;
705 uint32 part_size;
706 uint8 classifications;
707 uint8 classbook;
708 uint8 **classdata;
709 int16 (*residue_books)[8];
710 } Residue;
711
712 typedef struct
713 {
714 uint8 magnitude;
715 uint8 angle;
716 uint8 mux;
717 } MappingChannel;
718
719 typedef struct
720 {
721 uint16 coupling_steps;
722 MappingChannel *chan;
723 uint8 submaps;
724 uint8 submap_floor[15]; // varies
725 uint8 submap_residue[15]; // varies
726 } Mapping;
727
728 typedef struct
729 {
730 uint8 blockflag;
731 uint8 mapping;
732 uint16 windowtype;
733 uint16 transformtype;
734 } Mode;
735
736 typedef struct
737 {
738 uint32 goal_crc; // expected crc if match
739 int bytes_left; // bytes left in packet
740 uint32 crc_so_far; // running crc
741 int bytes_done; // bytes processed in _current_ chunk
742 uint32 sample_loc; // granule pos encoded in page
743 } CRCscan;
744
745 typedef struct
746 {
747 uint32 page_start, page_end;
748 uint32 last_decoded_sample;
749 } ProbedPage;
750
751 struct stb_vorbis
752 {
753 // user-accessible info
754 unsigned int sample_rate;
755 int channels;
756
757 unsigned int setup_memory_required;
758 unsigned int temp_memory_required;
759 unsigned int setup_temp_memory_required;
760
761 // input config
762 #ifndef STB_VORBIS_NO_STDIO
763 FILE *f;
764 uint32 f_start;
765 int close_on_free;
766 #endif
767
768 uint8 *stream;
769 uint8 *stream_start;
770 uint8 *stream_end;
771
772 uint32 stream_len;
773
774 uint8 push_mode;
775
776 uint32 first_audio_page_offset;
777
778 ProbedPage p_first, p_last;
779
780 // memory management
781 stb_vorbis_alloc alloc;
782 int setup_offset;
783 int temp_offset;
784
785 // run-time results
786 int eof;
787 enum STBVorbisError error;
788
789 // user-useful data
790
791 // header info
792 int blocksize[2];
793 int blocksize_0, blocksize_1;
794 int codebook_count;
795 Codebook *codebooks;
796 int floor_count;
797 uint16 floor_types[64]; // varies
798 Floor *floor_config;
799 int residue_count;
800 uint16 residue_types[64]; // varies
801 Residue *residue_config;
802 int mapping_count;
803 Mapping *mapping;
804 int mode_count;
805 Mode mode_config[64]; // varies
806
807 uint32 total_samples;
808
809 // decode buffer
810 float *channel_buffers[STB_VORBIS_MAX_CHANNELS];
811 float *outputs [STB_VORBIS_MAX_CHANNELS];
812
813 float *previous_window[STB_VORBIS_MAX_CHANNELS];
814 int previous_length;
815
816 #ifndef STB_VORBIS_NO_DEFER_FLOOR
817 int16 *finalY[STB_VORBIS_MAX_CHANNELS];
818 #else
819 float *floor_buffers[STB_VORBIS_MAX_CHANNELS];
820 #endif
821
822 uint32 current_loc; // sample location of next frame to decode
823 int current_loc_valid;
824
825 // per-blocksize precomputed data
826
827 // twiddle factors
828 float *A[2],*B[2],*C[2];
829 float *window[2];
830 uint16 *bit_reverse[2];
831
832 // current page/packet/segment streaming info
833 uint32 serial; // stream serial number for verification
834 int last_page;
835 int segment_count;
836 uint8 segments[255];
837 uint8 page_flag;
838 uint8 bytes_in_seg;
839 uint8 first_decode;
840 int next_seg;
841 int last_seg; // flag that we're on the last segment
842 int last_seg_which; // what was the segment number of the last seg?
843 uint32 acc;
844 int valid_bits;
845 int packet_bytes;
846 int end_seg_with_known_loc;
847 uint32 known_loc_for_packet;
848 int discard_samples_deferred;
849 uint32 samples_output;
850
851 // push mode scanning
852 int page_crc_tests; // only in push_mode: number of tests active; -1 if not searching
853 #ifndef STB_VORBIS_NO_PUSHDATA_API
854 CRCscan scan[STB_VORBIS_PUSHDATA_CRC_COUNT];
855 #endif
856
857 // sample-access
858 int channel_buffer_start;
859 int channel_buffer_end;
860 };
861
862 #if defined(STB_VORBIS_NO_PUSHDATA_API)
863 #define IS_PUSH_MODE(f) FALSE
864 #elif defined(STB_VORBIS_NO_PULLDATA_API)
865 #define IS_PUSH_MODE(f) TRUE
866 #else
867 #define IS_PUSH_MODE(f) ((f)->push_mode)
868 #endif
869
870 typedef struct stb_vorbis vorb;
871
error(vorb * f,enum STBVorbisError e)872 static int error(vorb *f, enum STBVorbisError e)
873 {
874 f->error = e;
875 if (!f->eof && e != VORBIS_need_more_data) {
876 f->error=e; // breakpoint for debugging
877 }
878 return 0;
879 }
880
881
882 // these functions are used for allocating temporary memory
883 // while decoding. if you can afford the stack space, use
884 // alloca(); otherwise, provide a temp buffer and it will
885 // allocate out of those.
886
887 #define array_size_required(count,size) (count*(sizeof(void *)+(size)))
888
889 #define temp_alloc(f,size) (f->alloc.alloc_buffer ? setup_temp_malloc(f,size) : alloca(size))
890 #define temp_free(f,p) 0
891 #define temp_alloc_save(f) ((f)->temp_offset)
892 #define temp_alloc_restore(f,p) ((f)->temp_offset = (p))
893
894 #define temp_block_array(f,count,size) make_block_array(temp_alloc(f,array_size_required(count,size)), count, size)
895
896 // given a sufficiently large block of memory, make an array of pointers to subblocks of it
make_block_array(void * mem,int count,int size)897 static void *make_block_array(void *mem, int count, int size)
898 {
899 int i;
900 void ** p = (void **) mem;
901 char *q = (char *) (p + count);
902 for (i=0; i < count; ++i) {
903 p[i] = q;
904 q += size;
905 }
906 return p;
907 }
908
setup_malloc(vorb * f,int sz)909 static void *setup_malloc(vorb *f, int sz)
910 {
911 sz = (sz+3) & ~3;
912 f->setup_memory_required += sz;
913 if (f->alloc.alloc_buffer) {
914 void *p = (char *) f->alloc.alloc_buffer + f->setup_offset;
915 if (f->setup_offset + sz > f->temp_offset) return NULL;
916 f->setup_offset += sz;
917 return p;
918 }
919 return sz ? malloc(sz) : NULL;
920 }
921
setup_free(vorb * f,void * p)922 static void setup_free(vorb *f, void *p)
923 {
924 if (f->alloc.alloc_buffer) return; // do nothing; setup mem is a stack
925 free(p);
926 }
927
setup_temp_malloc(vorb * f,int sz)928 static void *setup_temp_malloc(vorb *f, int sz)
929 {
930 sz = (sz+3) & ~3;
931 if (f->alloc.alloc_buffer) {
932 if (f->temp_offset - sz < f->setup_offset) return NULL;
933 f->temp_offset -= sz;
934 return (char *) f->alloc.alloc_buffer + f->temp_offset;
935 }
936 return malloc(sz);
937 }
938
setup_temp_free(vorb * f,void * p,int sz)939 static void setup_temp_free(vorb *f, void *p, int sz)
940 {
941 if (f->alloc.alloc_buffer) {
942 f->temp_offset += (sz+3)&~3;
943 return;
944 }
945 free(p);
946 }
947
948 #define CRC32_POLY 0x04c11db7 // from spec
949
950 static uint32 crc_table[256];
crc32_init(void)951 static void crc32_init(void)
952 {
953 int i,j;
954 uint32 s;
955 for(i=0; i < 256; i++) {
956 for (s=(uint32) i << 24, j=0; j < 8; ++j)
957 s = (s << 1) ^ (s >= (1U<<31) ? CRC32_POLY : 0);
958 crc_table[i] = s;
959 }
960 }
961
crc32_update(uint32 crc,uint8 byte)962 static __forceinline uint32 crc32_update(uint32 crc, uint8 byte)
963 {
964 return (crc << 8) ^ crc_table[byte ^ (crc >> 24)];
965 }
966
967
968 // used in setup, and for huffman that doesn't go fast path
bit_reverse(unsigned int n)969 static unsigned int bit_reverse(unsigned int n)
970 {
971 n = ((n & 0xAAAAAAAA) >> 1) | ((n & 0x55555555) << 1);
972 n = ((n & 0xCCCCCCCC) >> 2) | ((n & 0x33333333) << 2);
973 n = ((n & 0xF0F0F0F0) >> 4) | ((n & 0x0F0F0F0F) << 4);
974 n = ((n & 0xFF00FF00) >> 8) | ((n & 0x00FF00FF) << 8);
975 return (n >> 16) | (n << 16);
976 }
977
square(float x)978 static float square(float x)
979 {
980 return x*x;
981 }
982
983 // this is a weird definition of log2() for which log2(1) = 1, log2(2) = 2, log2(4) = 3
984 // as required by the specification. fast(?) implementation from stb.h
985 // @OPTIMIZE: called multiple times per-packet with "constants"; move to setup
ilog(int32 n)986 static int ilog(int32 n)
987 {
988 static signed char log2_4[16] = { 0,1,2,2,3,3,3,3,4,4,4,4,4,4,4,4 };
989
990 if (n < 0) return 0; // signed n returns 0
991
992 // 2 compares if n < 16, 3 compares otherwise (4 if signed or n > 1<<29)
993 if (n < (1 << 14))
994 if (n < (1 << 4)) return 0 + log2_4[n ];
995 else if (n < (1 << 9)) return 5 + log2_4[n >> 5];
996 else return 10 + log2_4[n >> 10];
997 else if (n < (1 << 24))
998 if (n < (1 << 19)) return 15 + log2_4[n >> 15];
999 else return 20 + log2_4[n >> 20];
1000 else if (n < (1 << 29)) return 25 + log2_4[n >> 25];
1001 else return 30 + log2_4[n >> 30];
1002 }
1003
1004 #ifndef M_PI
1005 #define M_PI 3.14159265358979323846264f // from CRC
1006 #endif
1007
1008 // code length assigned to a value with no huffman encoding
1009 #define NO_CODE 255
1010
1011 /////////////////////// LEAF SETUP FUNCTIONS //////////////////////////
1012 //
1013 // these functions are only called at setup, and only a few times
1014 // per file
1015
float32_unpack(uint32 x)1016 static float float32_unpack(uint32 x)
1017 {
1018 // from the specification
1019 uint32 mantissa = x & 0x1fffff;
1020 uint32 sign = x & 0x80000000;
1021 uint32 exp = (x & 0x7fe00000) >> 21;
1022 double res = sign ? -(double)mantissa : (double)mantissa;
1023 return (float) ldexp((float)res, exp-788);
1024 }
1025
1026
1027 // zlib & jpeg huffman tables assume that the output symbols
1028 // can either be arbitrarily arranged, or have monotonically
1029 // increasing frequencies--they rely on the lengths being sorted;
1030 // this makes for a very simple generation algorithm.
1031 // vorbis allows a huffman table with non-sorted lengths. This
1032 // requires a more sophisticated construction, since symbols in
1033 // order do not map to huffman codes "in order".
add_entry(Codebook * c,uint32 huff_code,int symbol,int count,int len,uint32 * values)1034 static void add_entry(Codebook *c, uint32 huff_code, int symbol, int count, int len, uint32 *values)
1035 {
1036 if (!c->sparse) {
1037 c->codewords [symbol] = huff_code;
1038 } else {
1039 c->codewords [count] = huff_code;
1040 c->codeword_lengths[count] = len;
1041 values [count] = symbol;
1042 }
1043 }
1044
compute_codewords(Codebook * c,uint8 * len,int n,uint32 * values)1045 static int compute_codewords(Codebook *c, uint8 *len, int n, uint32 *values)
1046 {
1047 int i,k,m=0;
1048 uint32 available[32];
1049
1050 memset(available, 0, sizeof(available));
1051 // find the first entry
1052 for (k=0; k < n; ++k) if (len[k] < NO_CODE) break;
1053 if (k == n) { assert(c->sorted_entries == 0); return TRUE; }
1054 // add to the list
1055 add_entry(c, 0, k, m++, len[k], values);
1056 // add all available leaves
1057 for (i=1; i <= len[k]; ++i)
1058 available[i] = 1U << (32-i);
1059 // note that the above code treats the first case specially,
1060 // but it's really the same as the following code, so they
1061 // could probably be combined (except the initial code is 0,
1062 // and I use 0 in available[] to mean 'empty')
1063 for (i=k+1; i < n; ++i) {
1064 uint32 res;
1065 int z = len[i], y;
1066 if (z == NO_CODE) continue;
1067 // find lowest available leaf (should always be earliest,
1068 // which is what the specification calls for)
1069 // note that this property, and the fact we can never have
1070 // more than one free leaf at a given level, isn't totally
1071 // trivial to prove, but it seems true and the assert never
1072 // fires, so!
1073 while (z > 0 && !available[z]) --z;
1074 if (z == 0) { return FALSE; }
1075 res = available[z];
1076 assert(z >= 0 && z < 32);
1077 available[z] = 0;
1078 add_entry(c, bit_reverse(res), i, m++, len[i], values);
1079 // propagate availability up the tree
1080 if (z != len[i]) {
1081 assert(len[i] >= 0 && len[i] < 32);
1082 for (y=len[i]; y > z; --y) {
1083 assert(available[y] == 0);
1084 available[y] = res + (1 << (32-y));
1085 }
1086 }
1087 }
1088 return TRUE;
1089 }
1090
1091 // accelerated huffman table allows fast O(1) match of all symbols
1092 // of length <= STB_VORBIS_FAST_HUFFMAN_LENGTH
compute_accelerated_huffman(Codebook * c)1093 static void compute_accelerated_huffman(Codebook *c)
1094 {
1095 int i, len;
1096 for (i=0; i < FAST_HUFFMAN_TABLE_SIZE; ++i)
1097 c->fast_huffman[i] = -1;
1098
1099 len = c->sparse ? c->sorted_entries : c->entries;
1100 #ifdef STB_VORBIS_FAST_HUFFMAN_SHORT
1101 if (len > 32767) len = 32767; // largest possible value we can encode!
1102 #endif
1103 for (i=0; i < len; ++i) {
1104 if (c->codeword_lengths[i] <= STB_VORBIS_FAST_HUFFMAN_LENGTH) {
1105 uint32 z = c->sparse ? bit_reverse(c->sorted_codewords[i]) : c->codewords[i];
1106 // set table entries for all bit combinations in the higher bits
1107 while (z < FAST_HUFFMAN_TABLE_SIZE) {
1108 c->fast_huffman[z] = i;
1109 z += 1 << c->codeword_lengths[i];
1110 }
1111 }
1112 }
1113 }
1114
1115 #ifdef _MSC_VER
1116 #define STBV_CDECL __cdecl
1117 #else
1118 #define STBV_CDECL
1119 #endif
1120
uint32_compare(const void * p,const void * q)1121 static int STBV_CDECL uint32_compare(const void *p, const void *q)
1122 {
1123 uint32 x = * (uint32 *) p;
1124 uint32 y = * (uint32 *) q;
1125 return x < y ? -1 : x > y;
1126 }
1127
include_in_sort(Codebook * c,uint8 len)1128 static int include_in_sort(Codebook *c, uint8 len)
1129 {
1130 if (c->sparse) { assert(len != NO_CODE); return TRUE; }
1131 if (len == NO_CODE) return FALSE;
1132 if (len > STB_VORBIS_FAST_HUFFMAN_LENGTH) return TRUE;
1133 return FALSE;
1134 }
1135
1136 // if the fast table above doesn't work, we want to binary
1137 // search them... need to reverse the bits
compute_sorted_huffman(Codebook * c,uint8 * lengths,uint32 * values)1138 static void compute_sorted_huffman(Codebook *c, uint8 *lengths, uint32 *values)
1139 {
1140 int i, len;
1141 // build a list of all the entries
1142 // OPTIMIZATION: don't include the short ones, since they'll be caught by FAST_HUFFMAN.
1143 // this is kind of a frivolous optimization--I don't see any performance improvement,
1144 // but it's like 4 extra lines of code, so.
1145 if (!c->sparse) {
1146 int k = 0;
1147 for (i=0; i < c->entries; ++i)
1148 if (include_in_sort(c, lengths[i]))
1149 c->sorted_codewords[k++] = bit_reverse(c->codewords[i]);
1150 assert(k == c->sorted_entries);
1151 } else {
1152 for (i=0; i < c->sorted_entries; ++i)
1153 c->sorted_codewords[i] = bit_reverse(c->codewords[i]);
1154 }
1155
1156 qsort(c->sorted_codewords, c->sorted_entries, sizeof(c->sorted_codewords[0]), uint32_compare);
1157 c->sorted_codewords[c->sorted_entries] = 0xffffffff;
1158
1159 len = c->sparse ? c->sorted_entries : c->entries;
1160 // now we need to indicate how they correspond; we could either
1161 // #1: sort a different data structure that says who they correspond to
1162 // #2: for each sorted entry, search the original list to find who corresponds
1163 // #3: for each original entry, find the sorted entry
1164 // #1 requires extra storage, #2 is slow, #3 can use binary search!
1165 for (i=0; i < len; ++i) {
1166 int huff_len = c->sparse ? lengths[values[i]] : lengths[i];
1167 if (include_in_sort(c,huff_len)) {
1168 uint32 code = bit_reverse(c->codewords[i]);
1169 int x=0, n=c->sorted_entries;
1170 while (n > 1) {
1171 // invariant: sc[x] <= code < sc[x+n]
1172 int m = x + (n >> 1);
1173 if (c->sorted_codewords[m] <= code) {
1174 x = m;
1175 n -= (n>>1);
1176 } else {
1177 n >>= 1;
1178 }
1179 }
1180 assert(c->sorted_codewords[x] == code);
1181 if (c->sparse) {
1182 c->sorted_values[x] = values[i];
1183 c->codeword_lengths[x] = huff_len;
1184 } else {
1185 c->sorted_values[x] = i;
1186 }
1187 }
1188 }
1189 }
1190
1191 // only run while parsing the header (3 times)
vorbis_validate(uint8 * data)1192 static int vorbis_validate(uint8 *data)
1193 {
1194 static uint8 vorbis[6] = { 'v', 'o', 'r', 'b', 'i', 's' };
1195 return memcmp(data, vorbis, 6) == 0;
1196 }
1197
1198 // called from setup only, once per code book
1199 // (formula implied by specification)
lookup1_values(int entries,int dim)1200 static int lookup1_values(int entries, int dim)
1201 {
1202 int r = (int) floor(exp((float) log((float) entries) / dim));
1203 if ((int) floor(pow((float) r+1, dim)) <= entries) // (int) cast for MinGW warning;
1204 ++r; // floor() to avoid _ftol() when non-CRT
1205 assert(pow((float) r+1, dim) > entries);
1206 assert((int) floor(pow((float) r, dim)) <= entries); // (int),floor() as above
1207 return r;
1208 }
1209
1210 // called twice per file
compute_twiddle_factors(int n,float * A,float * B,float * C)1211 static void compute_twiddle_factors(int n, float *A, float *B, float *C)
1212 {
1213 int n4 = n >> 2, n8 = n >> 3;
1214 int k,k2;
1215
1216 for (k=k2=0; k < n4; ++k,k2+=2) {
1217 A[k2 ] = (float) cos(4*k*M_PI/n);
1218 A[k2+1] = (float) -sin(4*k*M_PI/n);
1219 B[k2 ] = (float) cos((k2+1)*M_PI/n/2) * 0.5f;
1220 B[k2+1] = (float) sin((k2+1)*M_PI/n/2) * 0.5f;
1221 }
1222 for (k=k2=0; k < n8; ++k,k2+=2) {
1223 C[k2 ] = (float) cos(2*(k2+1)*M_PI/n);
1224 C[k2+1] = (float) -sin(2*(k2+1)*M_PI/n);
1225 }
1226 }
1227
compute_window(int n,float * window)1228 static void compute_window(int n, float *window)
1229 {
1230 int n2 = n >> 1, i;
1231 for (i=0; i < n2; ++i)
1232 window[i] = (float) sin(0.5 * M_PI * square((float) sin((i - 0 + 0.5) / n2 * 0.5 * M_PI)));
1233 }
1234
compute_bitreverse(int n,uint16 * rev)1235 static void compute_bitreverse(int n, uint16 *rev)
1236 {
1237 int ld = ilog(n) - 1; // ilog is off-by-one from normal definitions
1238 int i, n8 = n >> 3;
1239 for (i=0; i < n8; ++i)
1240 rev[i] = (bit_reverse(i) >> (32-ld+3)) << 2;
1241 }
1242
init_blocksize(vorb * f,int b,int n)1243 static int init_blocksize(vorb *f, int b, int n)
1244 {
1245 int n2 = n >> 1, n4 = n >> 2, n8 = n >> 3;
1246 f->A[b] = (float *) setup_malloc(f, sizeof(float) * n2);
1247 f->B[b] = (float *) setup_malloc(f, sizeof(float) * n2);
1248 f->C[b] = (float *) setup_malloc(f, sizeof(float) * n4);
1249 if (!f->A[b] || !f->B[b] || !f->C[b]) return error(f, VORBIS_outofmem);
1250 compute_twiddle_factors(n, f->A[b], f->B[b], f->C[b]);
1251 f->window[b] = (float *) setup_malloc(f, sizeof(float) * n2);
1252 if (!f->window[b]) return error(f, VORBIS_outofmem);
1253 compute_window(n, f->window[b]);
1254 f->bit_reverse[b] = (uint16 *) setup_malloc(f, sizeof(uint16) * n8);
1255 if (!f->bit_reverse[b]) return error(f, VORBIS_outofmem);
1256 compute_bitreverse(n, f->bit_reverse[b]);
1257 return TRUE;
1258 }
1259
neighbors(uint16 * x,int n,int * plow,int * phigh)1260 static void neighbors(uint16 *x, int n, int *plow, int *phigh)
1261 {
1262 int low = -1;
1263 int high = 65536;
1264 int i;
1265 for (i=0; i < n; ++i) {
1266 if (x[i] > low && x[i] < x[n]) { *plow = i; low = x[i]; }
1267 if (x[i] < high && x[i] > x[n]) { *phigh = i; high = x[i]; }
1268 }
1269 }
1270
1271 // this has been repurposed so y is now the original index instead of y
1272 typedef struct
1273 {
1274 uint16 x,id;
1275 } stbv__floor_ordering;
1276
point_compare(const void * p,const void * q)1277 static int STBV_CDECL point_compare(const void *p, const void *q)
1278 {
1279 stbv__floor_ordering *a = (stbv__floor_ordering *) p;
1280 stbv__floor_ordering *b = (stbv__floor_ordering *) q;
1281 return a->x < b->x ? -1 : a->x > b->x;
1282 }
1283
1284 //
1285 /////////////////////// END LEAF SETUP FUNCTIONS //////////////////////////
1286
1287
1288 #if defined(STB_VORBIS_NO_STDIO)
1289 #define USE_MEMORY(z) TRUE
1290 #else
1291 #define USE_MEMORY(z) ((z)->stream)
1292 #endif
1293
get8(vorb * z)1294 static uint8 get8(vorb *z)
1295 {
1296 if (USE_MEMORY(z)) {
1297 if (z->stream >= z->stream_end) { z->eof = TRUE; return 0; }
1298 return *z->stream++;
1299 }
1300
1301 #ifndef STB_VORBIS_NO_STDIO
1302 {
1303 int c = fgetc(z->f);
1304 if (c == EOF) { z->eof = TRUE; return 0; }
1305 return c;
1306 }
1307 #endif
1308 }
1309
get32(vorb * f)1310 static uint32 get32(vorb *f)
1311 {
1312 uint32 x;
1313 x = get8(f);
1314 x += get8(f) << 8;
1315 x += get8(f) << 16;
1316 x += (uint32) get8(f) << 24;
1317 return x;
1318 }
1319
getn(vorb * z,uint8 * data,int n)1320 static int getn(vorb *z, uint8 *data, int n)
1321 {
1322 if (USE_MEMORY(z)) {
1323 if (z->stream+n > z->stream_end) { z->eof = 1; return 0; }
1324 memcpy(data, z->stream, n);
1325 z->stream += n;
1326 return 1;
1327 }
1328
1329 #ifndef STB_VORBIS_NO_STDIO
1330 if (fread(data, n, 1, z->f) == 1)
1331 return 1;
1332 else {
1333 z->eof = 1;
1334 return 0;
1335 }
1336 #endif
1337 }
1338
skip(vorb * z,int n)1339 static void skip(vorb *z, int n)
1340 {
1341 if (USE_MEMORY(z)) {
1342 z->stream += n;
1343 if (z->stream >= z->stream_end) z->eof = 1;
1344 return;
1345 }
1346 #ifndef STB_VORBIS_NO_STDIO
1347 {
1348 long x = ftell(z->f);
1349 fseek(z->f, x+n, SEEK_SET);
1350 }
1351 #endif
1352 }
1353
set_file_offset(stb_vorbis * f,unsigned int loc)1354 static int set_file_offset(stb_vorbis *f, unsigned int loc)
1355 {
1356 #ifndef STB_VORBIS_NO_PUSHDATA_API
1357 if (f->push_mode) return 0;
1358 #endif
1359 f->eof = 0;
1360 if (USE_MEMORY(f)) {
1361 if (f->stream_start + loc >= f->stream_end || f->stream_start + loc < f->stream_start) {
1362 f->stream = f->stream_end;
1363 f->eof = 1;
1364 return 0;
1365 } else {
1366 f->stream = f->stream_start + loc;
1367 return 1;
1368 }
1369 }
1370 #ifndef STB_VORBIS_NO_STDIO
1371 if (loc + f->f_start < loc || loc >= 0x80000000) {
1372 loc = 0x7fffffff;
1373 f->eof = 1;
1374 } else {
1375 loc += f->f_start;
1376 }
1377 if (!fseek(f->f, loc, SEEK_SET))
1378 return 1;
1379 f->eof = 1;
1380 fseek(f->f, f->f_start, SEEK_END);
1381 return 0;
1382 #endif
1383 }
1384
1385
1386 static uint8 ogg_page_header[4] = { 0x4f, 0x67, 0x67, 0x53 };
1387
capture_pattern(vorb * f)1388 static int capture_pattern(vorb *f)
1389 {
1390 if (0x4f != get8(f)) return FALSE;
1391 if (0x67 != get8(f)) return FALSE;
1392 if (0x67 != get8(f)) return FALSE;
1393 if (0x53 != get8(f)) return FALSE;
1394 return TRUE;
1395 }
1396
1397 #define PAGEFLAG_continued_packet 1
1398 #define PAGEFLAG_first_page 2
1399 #define PAGEFLAG_last_page 4
1400
start_page_no_capturepattern(vorb * f)1401 static int start_page_no_capturepattern(vorb *f)
1402 {
1403 uint32 loc0,loc1,n;
1404 // stream structure version
1405 if (0 != get8(f)) return error(f, VORBIS_invalid_stream_structure_version);
1406 // header flag
1407 f->page_flag = get8(f);
1408 // absolute granule position
1409 loc0 = get32(f);
1410 loc1 = get32(f);
1411 // @TODO: validate loc0,loc1 as valid positions?
1412 // stream serial number -- vorbis doesn't interleave, so discard
1413 get32(f);
1414 //if (f->serial != get32(f)) return error(f, VORBIS_incorrect_stream_serial_number);
1415 // page sequence number
1416 n = get32(f);
1417 f->last_page = n;
1418 // CRC32
1419 get32(f);
1420 // page_segments
1421 f->segment_count = get8(f);
1422 if (!getn(f, f->segments, f->segment_count))
1423 return error(f, VORBIS_unexpected_eof);
1424 // assume we _don't_ know any the sample position of any segments
1425 f->end_seg_with_known_loc = -2;
1426 if (loc0 != ~0U || loc1 != ~0U) {
1427 int i;
1428 // determine which packet is the last one that will complete
1429 for (i=f->segment_count-1; i >= 0; --i)
1430 if (f->segments[i] < 255)
1431 break;
1432 // 'i' is now the index of the _last_ segment of a packet that ends
1433 if (i >= 0) {
1434 f->end_seg_with_known_loc = i;
1435 f->known_loc_for_packet = loc0;
1436 }
1437 }
1438 if (f->first_decode) {
1439 int i,len;
1440 ProbedPage p;
1441 len = 0;
1442 for (i=0; i < f->segment_count; ++i)
1443 len += f->segments[i];
1444 len += 27 + f->segment_count;
1445 p.page_start = f->first_audio_page_offset;
1446 p.page_end = p.page_start + len;
1447 p.last_decoded_sample = loc0;
1448 f->p_first = p;
1449 }
1450 f->next_seg = 0;
1451 return TRUE;
1452 }
1453
start_page(vorb * f)1454 static int start_page(vorb *f)
1455 {
1456 if (!capture_pattern(f)) return error(f, VORBIS_missing_capture_pattern);
1457 return start_page_no_capturepattern(f);
1458 }
1459
start_packet(vorb * f)1460 static int start_packet(vorb *f)
1461 {
1462 while (f->next_seg == -1) {
1463 if (!start_page(f)) return FALSE;
1464 if (f->page_flag & PAGEFLAG_continued_packet)
1465 return error(f, VORBIS_continued_packet_flag_invalid);
1466 }
1467 f->last_seg = FALSE;
1468 f->valid_bits = 0;
1469 f->packet_bytes = 0;
1470 f->bytes_in_seg = 0;
1471 // f->next_seg is now valid
1472 return TRUE;
1473 }
1474
maybe_start_packet(vorb * f)1475 static int maybe_start_packet(vorb *f)
1476 {
1477 if (f->next_seg == -1) {
1478 int x = get8(f);
1479 if (f->eof) return FALSE; // EOF at page boundary is not an error!
1480 if (0x4f != x ) return error(f, VORBIS_missing_capture_pattern);
1481 if (0x67 != get8(f)) return error(f, VORBIS_missing_capture_pattern);
1482 if (0x67 != get8(f)) return error(f, VORBIS_missing_capture_pattern);
1483 if (0x53 != get8(f)) return error(f, VORBIS_missing_capture_pattern);
1484 if (!start_page_no_capturepattern(f)) return FALSE;
1485 if (f->page_flag & PAGEFLAG_continued_packet) {
1486 // set up enough state that we can read this packet if we want,
1487 // e.g. during recovery
1488 f->last_seg = FALSE;
1489 f->bytes_in_seg = 0;
1490 return error(f, VORBIS_continued_packet_flag_invalid);
1491 }
1492 }
1493 return start_packet(f);
1494 }
1495
next_segment(vorb * f)1496 static int next_segment(vorb *f)
1497 {
1498 int len;
1499 if (f->last_seg) return 0;
1500 if (f->next_seg == -1) {
1501 f->last_seg_which = f->segment_count-1; // in case start_page fails
1502 if (!start_page(f)) { f->last_seg = 1; return 0; }
1503 if (!(f->page_flag & PAGEFLAG_continued_packet)) return error(f, VORBIS_continued_packet_flag_invalid);
1504 }
1505 len = f->segments[f->next_seg++];
1506 if (len < 255) {
1507 f->last_seg = TRUE;
1508 f->last_seg_which = f->next_seg-1;
1509 }
1510 if (f->next_seg >= f->segment_count)
1511 f->next_seg = -1;
1512 assert(f->bytes_in_seg == 0);
1513 f->bytes_in_seg = len;
1514 return len;
1515 }
1516
1517 #define EOP (-1)
1518 #define INVALID_BITS (-1)
1519
get8_packet_raw(vorb * f)1520 static int get8_packet_raw(vorb *f)
1521 {
1522 if (!f->bytes_in_seg) { // CLANG!
1523 if (f->last_seg) return EOP;
1524 else if (!next_segment(f)) return EOP;
1525 }
1526 assert(f->bytes_in_seg > 0);
1527 --f->bytes_in_seg;
1528 ++f->packet_bytes;
1529 return get8(f);
1530 }
1531
get8_packet(vorb * f)1532 static int get8_packet(vorb *f)
1533 {
1534 int x = get8_packet_raw(f);
1535 f->valid_bits = 0;
1536 return x;
1537 }
1538
flush_packet(vorb * f)1539 static void flush_packet(vorb *f)
1540 {
1541 while (get8_packet_raw(f) != EOP);
1542 }
1543
1544 // @OPTIMIZE: this is the secondary bit decoder, so it's probably not as important
1545 // as the huffman decoder?
get_bits(vorb * f,int n)1546 static uint32 get_bits(vorb *f, int n)
1547 {
1548 uint32 z;
1549
1550 if (f->valid_bits < 0) return 0;
1551 if (f->valid_bits < n) {
1552 if (n > 24) {
1553 // the accumulator technique below would not work correctly in this case
1554 z = get_bits(f, 24);
1555 z += get_bits(f, n-24) << 24;
1556 return z;
1557 }
1558 if (f->valid_bits == 0) f->acc = 0;
1559 while (f->valid_bits < n) {
1560 int z = get8_packet_raw(f);
1561 if (z == EOP) {
1562 f->valid_bits = INVALID_BITS;
1563 return 0;
1564 }
1565 f->acc += z << f->valid_bits;
1566 f->valid_bits += 8;
1567 }
1568 }
1569 if (f->valid_bits < 0) return 0;
1570 z = f->acc & ((1 << n)-1);
1571 f->acc >>= n;
1572 f->valid_bits -= n;
1573 return z;
1574 }
1575
1576 // @OPTIMIZE: primary accumulator for huffman
1577 // expand the buffer to as many bits as possible without reading off end of packet
1578 // it might be nice to allow f->valid_bits and f->acc to be stored in registers,
1579 // e.g. cache them locally and decode locally
prep_huffman(vorb * f)1580 static __forceinline void prep_huffman(vorb *f)
1581 {
1582 if (f->valid_bits <= 24) {
1583 if (f->valid_bits == 0) f->acc = 0;
1584 do {
1585 int z;
1586 if (f->last_seg && !f->bytes_in_seg) return;
1587 z = get8_packet_raw(f);
1588 if (z == EOP) return;
1589 f->acc += (unsigned) z << f->valid_bits;
1590 f->valid_bits += 8;
1591 } while (f->valid_bits <= 24);
1592 }
1593 }
1594
1595 enum
1596 {
1597 VORBIS_packet_id = 1,
1598 VORBIS_packet_comment = 3,
1599 VORBIS_packet_setup = 5
1600 };
1601
codebook_decode_scalar_raw(vorb * f,Codebook * c)1602 static int codebook_decode_scalar_raw(vorb *f, Codebook *c)
1603 {
1604 int i;
1605 prep_huffman(f);
1606
1607 if (c->codewords == NULL && c->sorted_codewords == NULL)
1608 return -1;
1609
1610 // cases to use binary search: sorted_codewords && !c->codewords
1611 // sorted_codewords && c->entries > 8
1612 if (c->entries > 8 ? c->sorted_codewords!=NULL : !c->codewords) {
1613 // binary search
1614 uint32 code = bit_reverse(f->acc);
1615 int x=0, n=c->sorted_entries, len;
1616
1617 while (n > 1) {
1618 // invariant: sc[x] <= code < sc[x+n]
1619 int m = x + (n >> 1);
1620 if (c->sorted_codewords[m] <= code) {
1621 x = m;
1622 n -= (n>>1);
1623 } else {
1624 n >>= 1;
1625 }
1626 }
1627 // x is now the sorted index
1628 if (!c->sparse) x = c->sorted_values[x];
1629 // x is now sorted index if sparse, or symbol otherwise
1630 len = c->codeword_lengths[x];
1631 if (f->valid_bits >= len) {
1632 f->acc >>= len;
1633 f->valid_bits -= len;
1634 return x;
1635 }
1636
1637 f->valid_bits = 0;
1638 return -1;
1639 }
1640
1641 // if small, linear search
1642 assert(!c->sparse);
1643 for (i=0; i < c->entries; ++i) {
1644 if (c->codeword_lengths[i] == NO_CODE) continue;
1645 if (c->codewords[i] == (f->acc & ((1 << c->codeword_lengths[i])-1))) {
1646 if (f->valid_bits >= c->codeword_lengths[i]) {
1647 f->acc >>= c->codeword_lengths[i];
1648 f->valid_bits -= c->codeword_lengths[i];
1649 return i;
1650 }
1651 f->valid_bits = 0;
1652 return -1;
1653 }
1654 }
1655
1656 error(f, VORBIS_invalid_stream);
1657 f->valid_bits = 0;
1658 return -1;
1659 }
1660
1661 #ifndef STB_VORBIS_NO_INLINE_DECODE
1662
1663 #define DECODE_RAW(var, f,c) \
1664 if (f->valid_bits < STB_VORBIS_FAST_HUFFMAN_LENGTH) \
1665 prep_huffman(f); \
1666 var = f->acc & FAST_HUFFMAN_TABLE_MASK; \
1667 var = c->fast_huffman[var]; \
1668 if (var >= 0) { \
1669 int n = c->codeword_lengths[var]; \
1670 f->acc >>= n; \
1671 f->valid_bits -= n; \
1672 if (f->valid_bits < 0) { f->valid_bits = 0; var = -1; } \
1673 } else { \
1674 var = codebook_decode_scalar_raw(f,c); \
1675 }
1676
1677 #else
1678
codebook_decode_scalar(vorb * f,Codebook * c)1679 static int codebook_decode_scalar(vorb *f, Codebook *c)
1680 {
1681 int i;
1682 if (f->valid_bits < STB_VORBIS_FAST_HUFFMAN_LENGTH)
1683 prep_huffman(f);
1684 // fast huffman table lookup
1685 i = f->acc & FAST_HUFFMAN_TABLE_MASK;
1686 i = c->fast_huffman[i];
1687 if (i >= 0) {
1688 f->acc >>= c->codeword_lengths[i];
1689 f->valid_bits -= c->codeword_lengths[i];
1690 if (f->valid_bits < 0) { f->valid_bits = 0; return -1; }
1691 return i;
1692 }
1693 return codebook_decode_scalar_raw(f,c);
1694 }
1695
1696 #define DECODE_RAW(var,f,c) var = codebook_decode_scalar(f,c);
1697
1698 #endif
1699
1700 #define DECODE(var,f,c) \
1701 DECODE_RAW(var,f,c) \
1702 if (c->sparse) var = c->sorted_values[var];
1703
1704 #ifndef STB_VORBIS_DIVIDES_IN_CODEBOOK
1705 #define DECODE_VQ(var,f,c) DECODE_RAW(var,f,c)
1706 #else
1707 #define DECODE_VQ(var,f,c) DECODE(var,f,c)
1708 #endif
1709
1710
1711
1712
1713
1714
1715 // CODEBOOK_ELEMENT_FAST is an optimization for the CODEBOOK_FLOATS case
1716 // where we avoid one addition
1717 #define CODEBOOK_ELEMENT(c,off) (c->multiplicands[off])
1718 #define CODEBOOK_ELEMENT_FAST(c,off) (c->multiplicands[off])
1719 #define CODEBOOK_ELEMENT_BASE(c) (0)
1720
codebook_decode_start(vorb * f,Codebook * c)1721 static int codebook_decode_start(vorb *f, Codebook *c)
1722 {
1723 int z = -1;
1724
1725 // type 0 is only legal in a scalar context
1726 if (c->lookup_type == 0)
1727 error(f, VORBIS_invalid_stream);
1728 else {
1729 DECODE_VQ(z,f,c);
1730 if (c->sparse) assert(z < c->sorted_entries);
1731 if (z < 0) { // check for EOP
1732 if (!f->bytes_in_seg)
1733 if (f->last_seg)
1734 return z;
1735 error(f, VORBIS_invalid_stream);
1736 }
1737 }
1738 return z;
1739 }
1740
codebook_decode(vorb * f,Codebook * c,float * output,int len)1741 static int codebook_decode(vorb *f, Codebook *c, float *output, int len)
1742 {
1743 int i,z = codebook_decode_start(f,c);
1744 if (z < 0) return FALSE;
1745 if (len > c->dimensions) len = c->dimensions;
1746
1747 #ifdef STB_VORBIS_DIVIDES_IN_CODEBOOK
1748 if (c->lookup_type == 1) {
1749 float last = CODEBOOK_ELEMENT_BASE(c);
1750 int div = 1;
1751 for (i=0; i < len; ++i) {
1752 int off = (z / div) % c->lookup_values;
1753 float val = CODEBOOK_ELEMENT_FAST(c,off) + last;
1754 output[i] += val;
1755 if (c->sequence_p) last = val + c->minimum_value;
1756 div *= c->lookup_values;
1757 }
1758 return TRUE;
1759 }
1760 #endif
1761
1762 z *= c->dimensions;
1763 if (c->sequence_p) {
1764 float last = CODEBOOK_ELEMENT_BASE(c);
1765 for (i=0; i < len; ++i) {
1766 float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1767 output[i] += val;
1768 last = val + c->minimum_value;
1769 }
1770 } else {
1771 float last = CODEBOOK_ELEMENT_BASE(c);
1772 for (i=0; i < len; ++i) {
1773 output[i] += CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1774 }
1775 }
1776
1777 return TRUE;
1778 }
1779
codebook_decode_step(vorb * f,Codebook * c,float * output,int len,int step)1780 static int codebook_decode_step(vorb *f, Codebook *c, float *output, int len, int step)
1781 {
1782 int i,z = codebook_decode_start(f,c);
1783 float last = CODEBOOK_ELEMENT_BASE(c);
1784 if (z < 0) return FALSE;
1785 if (len > c->dimensions) len = c->dimensions;
1786
1787 #ifdef STB_VORBIS_DIVIDES_IN_CODEBOOK
1788 if (c->lookup_type == 1) {
1789 int div = 1;
1790 for (i=0; i < len; ++i) {
1791 int off = (z / div) % c->lookup_values;
1792 float val = CODEBOOK_ELEMENT_FAST(c,off) + last;
1793 output[i*step] += val;
1794 if (c->sequence_p) last = val;
1795 div *= c->lookup_values;
1796 }
1797 return TRUE;
1798 }
1799 #endif
1800
1801 z *= c->dimensions;
1802 for (i=0; i < len; ++i) {
1803 float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1804 output[i*step] += val;
1805 if (c->sequence_p) last = val;
1806 }
1807
1808 return TRUE;
1809 }
1810
codebook_decode_deinterleave_repeat(vorb * f,Codebook * c,float ** outputs,int ch,int * c_inter_p,int * p_inter_p,int len,int total_decode)1811 static int codebook_decode_deinterleave_repeat(vorb *f, Codebook *c, float **outputs, int ch, int *c_inter_p, int *p_inter_p, int len, int total_decode)
1812 {
1813 int c_inter = *c_inter_p;
1814 int p_inter = *p_inter_p;
1815 int i,z, effective = c->dimensions;
1816
1817 // type 0 is only legal in a scalar context
1818 if (c->lookup_type == 0) return error(f, VORBIS_invalid_stream);
1819
1820 while (total_decode > 0) {
1821 float last = CODEBOOK_ELEMENT_BASE(c);
1822 DECODE_VQ(z,f,c);
1823 #ifndef STB_VORBIS_DIVIDES_IN_CODEBOOK
1824 assert(!c->sparse || z < c->sorted_entries);
1825 #endif
1826 if (z < 0) {
1827 if (!f->bytes_in_seg)
1828 if (f->last_seg) return FALSE;
1829 return error(f, VORBIS_invalid_stream);
1830 }
1831
1832 // if this will take us off the end of the buffers, stop short!
1833 // we check by computing the length of the virtual interleaved
1834 // buffer (len*ch), our current offset within it (p_inter*ch)+(c_inter),
1835 // and the length we'll be using (effective)
1836 if (c_inter + p_inter*ch + effective > len * ch) {
1837 effective = len*ch - (p_inter*ch - c_inter);
1838 }
1839
1840 #ifdef STB_VORBIS_DIVIDES_IN_CODEBOOK
1841 if (c->lookup_type == 1) {
1842 int div = 1;
1843 for (i=0; i < effective; ++i) {
1844 int off = (z / div) % c->lookup_values;
1845 float val = CODEBOOK_ELEMENT_FAST(c,off) + last;
1846 if (outputs[c_inter])
1847 outputs[c_inter][p_inter] += val;
1848 if (++c_inter == ch) { c_inter = 0; ++p_inter; }
1849 if (c->sequence_p) last = val;
1850 div *= c->lookup_values;
1851 }
1852 } else
1853 #endif
1854 {
1855 z *= c->dimensions;
1856 if (c->sequence_p) {
1857 for (i=0; i < effective; ++i) {
1858 float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1859 if (outputs[c_inter])
1860 outputs[c_inter][p_inter] += val;
1861 if (++c_inter == ch) { c_inter = 0; ++p_inter; }
1862 last = val;
1863 }
1864 } else {
1865 for (i=0; i < effective; ++i) {
1866 float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1867 if (outputs[c_inter])
1868 outputs[c_inter][p_inter] += val;
1869 if (++c_inter == ch) { c_inter = 0; ++p_inter; }
1870 }
1871 }
1872 }
1873
1874 total_decode -= effective;
1875 }
1876 *c_inter_p = c_inter;
1877 *p_inter_p = p_inter;
1878 return TRUE;
1879 }
1880
predict_point(int x,int x0,int x1,int y0,int y1)1881 static int predict_point(int x, int x0, int x1, int y0, int y1)
1882 {
1883 int dy = y1 - y0;
1884 int adx = x1 - x0;
1885 // @OPTIMIZE: force int division to round in the right direction... is this necessary on x86?
1886 int err = abs(dy) * (x - x0);
1887 int off = err / adx;
1888 return dy < 0 ? y0 - off : y0 + off;
1889 }
1890
1891 // the following table is block-copied from the specification
1892 static float inverse_db_table[256] =
1893 {
1894 1.0649863e-07f, 1.1341951e-07f, 1.2079015e-07f, 1.2863978e-07f,
1895 1.3699951e-07f, 1.4590251e-07f, 1.5538408e-07f, 1.6548181e-07f,
1896 1.7623575e-07f, 1.8768855e-07f, 1.9988561e-07f, 2.1287530e-07f,
1897 2.2670913e-07f, 2.4144197e-07f, 2.5713223e-07f, 2.7384213e-07f,
1898 2.9163793e-07f, 3.1059021e-07f, 3.3077411e-07f, 3.5226968e-07f,
1899 3.7516214e-07f, 3.9954229e-07f, 4.2550680e-07f, 4.5315863e-07f,
1900 4.8260743e-07f, 5.1396998e-07f, 5.4737065e-07f, 5.8294187e-07f,
1901 6.2082472e-07f, 6.6116941e-07f, 7.0413592e-07f, 7.4989464e-07f,
1902 7.9862701e-07f, 8.5052630e-07f, 9.0579828e-07f, 9.6466216e-07f,
1903 1.0273513e-06f, 1.0941144e-06f, 1.1652161e-06f, 1.2409384e-06f,
1904 1.3215816e-06f, 1.4074654e-06f, 1.4989305e-06f, 1.5963394e-06f,
1905 1.7000785e-06f, 1.8105592e-06f, 1.9282195e-06f, 2.0535261e-06f,
1906 2.1869758e-06f, 2.3290978e-06f, 2.4804557e-06f, 2.6416497e-06f,
1907 2.8133190e-06f, 2.9961443e-06f, 3.1908506e-06f, 3.3982101e-06f,
1908 3.6190449e-06f, 3.8542308e-06f, 4.1047004e-06f, 4.3714470e-06f,
1909 4.6555282e-06f, 4.9580707e-06f, 5.2802740e-06f, 5.6234160e-06f,
1910 5.9888572e-06f, 6.3780469e-06f, 6.7925283e-06f, 7.2339451e-06f,
1911 7.7040476e-06f, 8.2047000e-06f, 8.7378876e-06f, 9.3057248e-06f,
1912 9.9104632e-06f, 1.0554501e-05f, 1.1240392e-05f, 1.1970856e-05f,
1913 1.2748789e-05f, 1.3577278e-05f, 1.4459606e-05f, 1.5399272e-05f,
1914 1.6400004e-05f, 1.7465768e-05f, 1.8600792e-05f, 1.9809576e-05f,
1915 2.1096914e-05f, 2.2467911e-05f, 2.3928002e-05f, 2.5482978e-05f,
1916 2.7139006e-05f, 2.8902651e-05f, 3.0780908e-05f, 3.2781225e-05f,
1917 3.4911534e-05f, 3.7180282e-05f, 3.9596466e-05f, 4.2169667e-05f,
1918 4.4910090e-05f, 4.7828601e-05f, 5.0936773e-05f, 5.4246931e-05f,
1919 5.7772202e-05f, 6.1526565e-05f, 6.5524908e-05f, 6.9783085e-05f,
1920 7.4317983e-05f, 7.9147585e-05f, 8.4291040e-05f, 8.9768747e-05f,
1921 9.5602426e-05f, 0.00010181521f, 0.00010843174f, 0.00011547824f,
1922 0.00012298267f, 0.00013097477f, 0.00013948625f, 0.00014855085f,
1923 0.00015820453f, 0.00016848555f, 0.00017943469f, 0.00019109536f,
1924 0.00020351382f, 0.00021673929f, 0.00023082423f, 0.00024582449f,
1925 0.00026179955f, 0.00027881276f, 0.00029693158f, 0.00031622787f,
1926 0.00033677814f, 0.00035866388f, 0.00038197188f, 0.00040679456f,
1927 0.00043323036f, 0.00046138411f, 0.00049136745f, 0.00052329927f,
1928 0.00055730621f, 0.00059352311f, 0.00063209358f, 0.00067317058f,
1929 0.00071691700f, 0.00076350630f, 0.00081312324f, 0.00086596457f,
1930 0.00092223983f, 0.00098217216f, 0.0010459992f, 0.0011139742f,
1931 0.0011863665f, 0.0012634633f, 0.0013455702f, 0.0014330129f,
1932 0.0015261382f, 0.0016253153f, 0.0017309374f, 0.0018434235f,
1933 0.0019632195f, 0.0020908006f, 0.0022266726f, 0.0023713743f,
1934 0.0025254795f, 0.0026895994f, 0.0028643847f, 0.0030505286f,
1935 0.0032487691f, 0.0034598925f, 0.0036847358f, 0.0039241906f,
1936 0.0041792066f, 0.0044507950f, 0.0047400328f, 0.0050480668f,
1937 0.0053761186f, 0.0057254891f, 0.0060975636f, 0.0064938176f,
1938 0.0069158225f, 0.0073652516f, 0.0078438871f, 0.0083536271f,
1939 0.0088964928f, 0.009474637f, 0.010090352f, 0.010746080f,
1940 0.011444421f, 0.012188144f, 0.012980198f, 0.013823725f,
1941 0.014722068f, 0.015678791f, 0.016697687f, 0.017782797f,
1942 0.018938423f, 0.020169149f, 0.021479854f, 0.022875735f,
1943 0.024362330f, 0.025945531f, 0.027631618f, 0.029427276f,
1944 0.031339626f, 0.033376252f, 0.035545228f, 0.037855157f,
1945 0.040315199f, 0.042935108f, 0.045725273f, 0.048696758f,
1946 0.051861348f, 0.055231591f, 0.058820850f, 0.062643361f,
1947 0.066714279f, 0.071049749f, 0.075666962f, 0.080584227f,
1948 0.085821044f, 0.091398179f, 0.097337747f, 0.10366330f,
1949 0.11039993f, 0.11757434f, 0.12521498f, 0.13335215f,
1950 0.14201813f, 0.15124727f, 0.16107617f, 0.17154380f,
1951 0.18269168f, 0.19456402f, 0.20720788f, 0.22067342f,
1952 0.23501402f, 0.25028656f, 0.26655159f, 0.28387361f,
1953 0.30232132f, 0.32196786f, 0.34289114f, 0.36517414f,
1954 0.38890521f, 0.41417847f, 0.44109412f, 0.46975890f,
1955 0.50028648f, 0.53279791f, 0.56742212f, 0.60429640f,
1956 0.64356699f, 0.68538959f, 0.72993007f, 0.77736504f,
1957 0.82788260f, 0.88168307f, 0.9389798f, 1.0f
1958 };
1959
1960
1961 // @OPTIMIZE: if you want to replace this bresenham line-drawing routine,
1962 // note that you must produce bit-identical output to decode correctly;
1963 // this specific sequence of operations is specified in the spec (it's
1964 // drawing integer-quantized frequency-space lines that the encoder
1965 // expects to be exactly the same)
1966 // ... also, isn't the whole point of Bresenham's algorithm to NOT
1967 // have to divide in the setup? sigh.
1968 #ifndef STB_VORBIS_NO_DEFER_FLOOR
1969 #define LINE_OP(a,b) a *= b
1970 #else
1971 #define LINE_OP(a,b) a = b
1972 #endif
1973
1974 #ifdef STB_VORBIS_DIVIDE_TABLE
1975 #define DIVTAB_NUMER 32
1976 #define DIVTAB_DENOM 64
1977 int8 integer_divide_table[DIVTAB_NUMER][DIVTAB_DENOM]; // 2KB
1978 #endif
1979
draw_line(float * output,int x0,int y0,int x1,int y1,int n)1980 static __forceinline void draw_line(float *output, int x0, int y0, int x1, int y1, int n)
1981 {
1982 int dy = y1 - y0;
1983 int adx = x1 - x0;
1984 int ady = abs(dy);
1985 int base;
1986 int x=x0,y=y0;
1987 int err = 0;
1988 int sy;
1989
1990 #ifdef STB_VORBIS_DIVIDE_TABLE
1991 if (adx < DIVTAB_DENOM && ady < DIVTAB_NUMER) {
1992 if (dy < 0) {
1993 base = -integer_divide_table[ady][adx];
1994 sy = base-1;
1995 } else {
1996 base = integer_divide_table[ady][adx];
1997 sy = base+1;
1998 }
1999 } else {
2000 base = dy / adx;
2001 if (dy < 0)
2002 sy = base - 1;
2003 else
2004 sy = base+1;
2005 }
2006 #else
2007 base = dy / adx;
2008 if (dy < 0)
2009 sy = base - 1;
2010 else
2011 sy = base+1;
2012 #endif
2013 ady -= abs(base) * adx;
2014 if (x1 > n) x1 = n;
2015 if (x < x1) {
2016 LINE_OP(output[x], inverse_db_table[y]);
2017 for (++x; x < x1; ++x) {
2018 err += ady;
2019 if (err >= adx) {
2020 err -= adx;
2021 y += sy;
2022 } else
2023 y += base;
2024 LINE_OP(output[x], inverse_db_table[y]);
2025 }
2026 }
2027 }
2028
residue_decode(vorb * f,Codebook * book,float * target,int offset,int n,int rtype)2029 static int residue_decode(vorb *f, Codebook *book, float *target, int offset, int n, int rtype)
2030 {
2031 int k;
2032 if (rtype == 0) {
2033 int step = n / book->dimensions;
2034 for (k=0; k < step; ++k)
2035 if (!codebook_decode_step(f, book, target+offset+k, n-offset-k, step))
2036 return FALSE;
2037 } else {
2038 for (k=0; k < n; ) {
2039 if (!codebook_decode(f, book, target+offset, n-k))
2040 return FALSE;
2041 k += book->dimensions;
2042 offset += book->dimensions;
2043 }
2044 }
2045 return TRUE;
2046 }
2047
2048 // n is 1/2 of the blocksize --
2049 // specification: "Correct per-vector decode length is [n]/2"
decode_residue(vorb * f,float * residue_buffers[],int ch,int n,int rn,uint8 * do_not_decode)2050 static void decode_residue(vorb *f, float *residue_buffers[], int ch, int n, int rn, uint8 *do_not_decode)
2051 {
2052 int i,j,pass;
2053 Residue *r = f->residue_config + rn;
2054 int rtype = f->residue_types[rn];
2055 int c = r->classbook;
2056 int classwords = f->codebooks[c].dimensions;
2057 unsigned int actual_size = rtype == 2 ? n*2 : n;
2058 unsigned int limit_r_begin = (r->begin < actual_size ? r->begin : actual_size);
2059 unsigned int limit_r_end = (r->end < actual_size ? r->end : actual_size);
2060 int n_read = limit_r_end - limit_r_begin;
2061 int part_read = n_read / r->part_size;
2062 int temp_alloc_point = temp_alloc_save(f);
2063 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2064 uint8 ***part_classdata = (uint8 ***) temp_block_array(f,f->channels, part_read * sizeof(**part_classdata));
2065 #else
2066 int **classifications = (int **) temp_block_array(f,f->channels, part_read * sizeof(**classifications));
2067 #endif
2068
2069 CHECK(f);
2070
2071 for (i=0; i < ch; ++i)
2072 if (!do_not_decode[i])
2073 memset(residue_buffers[i], 0, sizeof(float) * n);
2074
2075 if (rtype == 2 && ch != 1) {
2076 for (j=0; j < ch; ++j)
2077 if (!do_not_decode[j])
2078 break;
2079 if (j == ch)
2080 goto done;
2081
2082 for (pass=0; pass < 8; ++pass) {
2083 int pcount = 0, class_set = 0;
2084 if (ch == 2) {
2085 while (pcount < part_read) {
2086 int z = r->begin + pcount*r->part_size;
2087 int c_inter = (z & 1), p_inter = z>>1;
2088 if (pass == 0) {
2089 Codebook *c = f->codebooks+r->classbook;
2090 int q;
2091 DECODE(q,f,c);
2092 if (q == EOP) goto done;
2093 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2094 part_classdata[0][class_set] = r->classdata[q];
2095 #else
2096 for (i=classwords-1; i >= 0; --i) {
2097 classifications[0][i+pcount] = q % r->classifications;
2098 q /= r->classifications;
2099 }
2100 #endif
2101 }
2102 for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) {
2103 int z = r->begin + pcount*r->part_size;
2104 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2105 int c = part_classdata[0][class_set][i];
2106 #else
2107 int c = classifications[0][pcount];
2108 #endif
2109 int b = r->residue_books[c][pass];
2110 if (b >= 0) {
2111 Codebook *book = f->codebooks + b;
2112 #ifdef STB_VORBIS_DIVIDES_IN_CODEBOOK
2113 if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r->part_size))
2114 goto done;
2115 #else
2116 // saves 1%
2117 if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r->part_size))
2118 goto done;
2119 #endif
2120 } else {
2121 z += r->part_size;
2122 c_inter = z & 1;
2123 p_inter = z >> 1;
2124 }
2125 }
2126 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2127 ++class_set;
2128 #endif
2129 }
2130 } else if (ch == 1) {
2131 while (pcount < part_read) {
2132 int z = r->begin + pcount*r->part_size;
2133 int c_inter = 0, p_inter = z;
2134 if (pass == 0) {
2135 Codebook *c = f->codebooks+r->classbook;
2136 int q;
2137 DECODE(q,f,c);
2138 if (q == EOP) goto done;
2139 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2140 part_classdata[0][class_set] = r->classdata[q];
2141 #else
2142 for (i=classwords-1; i >= 0; --i) {
2143 classifications[0][i+pcount] = q % r->classifications;
2144 q /= r->classifications;
2145 }
2146 #endif
2147 }
2148 for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) {
2149 int z = r->begin + pcount*r->part_size;
2150 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2151 int c = part_classdata[0][class_set][i];
2152 #else
2153 int c = classifications[0][pcount];
2154 #endif
2155 int b = r->residue_books[c][pass];
2156 if (b >= 0) {
2157 Codebook *book = f->codebooks + b;
2158 if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r->part_size))
2159 goto done;
2160 } else {
2161 z += r->part_size;
2162 c_inter = 0;
2163 p_inter = z;
2164 }
2165 }
2166 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2167 ++class_set;
2168 #endif
2169 }
2170 } else {
2171 while (pcount < part_read) {
2172 int z = r->begin + pcount*r->part_size;
2173 int c_inter = z % ch, p_inter = z/ch;
2174 if (pass == 0) {
2175 Codebook *c = f->codebooks+r->classbook;
2176 int q;
2177 DECODE(q,f,c);
2178 if (q == EOP) goto done;
2179 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2180 part_classdata[0][class_set] = r->classdata[q];
2181 #else
2182 for (i=classwords-1; i >= 0; --i) {
2183 classifications[0][i+pcount] = q % r->classifications;
2184 q /= r->classifications;
2185 }
2186 #endif
2187 }
2188 for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) {
2189 int z = r->begin + pcount*r->part_size;
2190 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2191 int c = part_classdata[0][class_set][i];
2192 #else
2193 int c = classifications[0][pcount];
2194 #endif
2195 int b = r->residue_books[c][pass];
2196 if (b >= 0) {
2197 Codebook *book = f->codebooks + b;
2198 if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r->part_size))
2199 goto done;
2200 } else {
2201 z += r->part_size;
2202 c_inter = z % ch;
2203 p_inter = z / ch;
2204 }
2205 }
2206 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2207 ++class_set;
2208 #endif
2209 }
2210 }
2211 }
2212 goto done;
2213 }
2214 CHECK(f);
2215
2216 for (pass=0; pass < 8; ++pass) {
2217 int pcount = 0, class_set=0;
2218 while (pcount < part_read) {
2219 if (pass == 0) {
2220 for (j=0; j < ch; ++j) {
2221 if (!do_not_decode[j]) {
2222 Codebook *c = f->codebooks+r->classbook;
2223 int temp;
2224 DECODE(temp,f,c);
2225 if (temp == EOP) goto done;
2226 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2227 part_classdata[j][class_set] = r->classdata[temp];
2228 #else
2229 for (i=classwords-1; i >= 0; --i) {
2230 classifications[j][i+pcount] = temp % r->classifications;
2231 temp /= r->classifications;
2232 }
2233 #endif
2234 }
2235 }
2236 }
2237 for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) {
2238 for (j=0; j < ch; ++j) {
2239 if (!do_not_decode[j]) {
2240 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2241 int c = part_classdata[j][class_set][i];
2242 #else
2243 int c = classifications[j][pcount];
2244 #endif
2245 int b = r->residue_books[c][pass];
2246 if (b >= 0) {
2247 float *target = residue_buffers[j];
2248 int offset = r->begin + pcount * r->part_size;
2249 int n = r->part_size;
2250 Codebook *book = f->codebooks + b;
2251 if (!residue_decode(f, book, target, offset, n, rtype))
2252 goto done;
2253 }
2254 }
2255 }
2256 }
2257 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2258 ++class_set;
2259 #endif
2260 }
2261 }
2262 done:
2263 CHECK(f);
2264 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2265 temp_free(f,part_classdata);
2266 #else
2267 temp_free(f,classifications);
2268 #endif
2269 temp_alloc_restore(f,temp_alloc_point);
2270 }
2271
2272
2273 #if 0
2274 // slow way for debugging
2275 void inverse_mdct_slow(float *buffer, int n)
2276 {
2277 int i,j;
2278 int n2 = n >> 1;
2279 float *x = (float *) malloc(sizeof(*x) * n2);
2280 memcpy(x, buffer, sizeof(*x) * n2);
2281 for (i=0; i < n; ++i) {
2282 float acc = 0;
2283 for (j=0; j < n2; ++j)
2284 // formula from paper:
2285 //acc += n/4.0f * x[j] * (float) cos(M_PI / 2 / n * (2 * i + 1 + n/2.0)*(2*j+1));
2286 // formula from wikipedia
2287 //acc += 2.0f / n2 * x[j] * (float) cos(M_PI/n2 * (i + 0.5 + n2/2)*(j + 0.5));
2288 // these are equivalent, except the formula from the paper inverts the multiplier!
2289 // however, what actually works is NO MULTIPLIER!?!
2290 //acc += 64 * 2.0f / n2 * x[j] * (float) cos(M_PI/n2 * (i + 0.5 + n2/2)*(j + 0.5));
2291 acc += x[j] * (float) cos(M_PI / 2 / n * (2 * i + 1 + n/2.0)*(2*j+1));
2292 buffer[i] = acc;
2293 }
2294 free(x);
2295 }
2296 #elif 0
2297 // same as above, but just barely able to run in real time on modern machines
inverse_mdct_slow(float * buffer,int n,vorb * f,int blocktype)2298 void inverse_mdct_slow(float *buffer, int n, vorb *f, int blocktype)
2299 {
2300 float mcos[16384];
2301 int i,j;
2302 int n2 = n >> 1, nmask = (n << 2) -1;
2303 float *x = (float *) malloc(sizeof(*x) * n2);
2304 memcpy(x, buffer, sizeof(*x) * n2);
2305 for (i=0; i < 4*n; ++i)
2306 mcos[i] = (float) cos(M_PI / 2 * i / n);
2307
2308 for (i=0; i < n; ++i) {
2309 float acc = 0;
2310 for (j=0; j < n2; ++j)
2311 acc += x[j] * mcos[(2 * i + 1 + n2)*(2*j+1) & nmask];
2312 buffer[i] = acc;
2313 }
2314 free(x);
2315 }
2316 #elif 0
2317 // transform to use a slow dct-iv; this is STILL basically trivial,
2318 // but only requires half as many ops
dct_iv_slow(float * buffer,int n)2319 void dct_iv_slow(float *buffer, int n)
2320 {
2321 float mcos[16384];
2322 float x[2048];
2323 int i,j;
2324 int n2 = n >> 1, nmask = (n << 3) - 1;
2325 memcpy(x, buffer, sizeof(*x) * n);
2326 for (i=0; i < 8*n; ++i)
2327 mcos[i] = (float) cos(M_PI / 4 * i / n);
2328 for (i=0; i < n; ++i) {
2329 float acc = 0;
2330 for (j=0; j < n; ++j)
2331 acc += x[j] * mcos[((2 * i + 1)*(2*j+1)) & nmask];
2332 buffer[i] = acc;
2333 }
2334 }
2335
inverse_mdct_slow(float * buffer,int n,vorb * f,int blocktype)2336 void inverse_mdct_slow(float *buffer, int n, vorb *f, int blocktype)
2337 {
2338 int i, n4 = n >> 2, n2 = n >> 1, n3_4 = n - n4;
2339 float temp[4096];
2340
2341 memcpy(temp, buffer, n2 * sizeof(float));
2342 dct_iv_slow(temp, n2); // returns -c'-d, a-b'
2343
2344 for (i=0; i < n4 ; ++i) buffer[i] = temp[i+n4]; // a-b'
2345 for ( ; i < n3_4; ++i) buffer[i] = -temp[n3_4 - i - 1]; // b-a', c+d'
2346 for ( ; i < n ; ++i) buffer[i] = -temp[i - n3_4]; // c'+d
2347 }
2348 #endif
2349
2350 #ifndef LIBVORBIS_MDCT
2351 #define LIBVORBIS_MDCT 0
2352 #endif
2353
2354 #if LIBVORBIS_MDCT
2355 // directly call the vorbis MDCT using an interface documented
2356 // by Jeff Roberts... useful for performance comparison
2357 typedef struct
2358 {
2359 int n;
2360 int log2n;
2361
2362 float *trig;
2363 int *bitrev;
2364
2365 float scale;
2366 } mdct_lookup;
2367
2368 extern void mdct_init(mdct_lookup *lookup, int n);
2369 extern void mdct_clear(mdct_lookup *l);
2370 extern void mdct_backward(mdct_lookup *init, float *in, float *out);
2371
2372 mdct_lookup M1,M2;
2373
inverse_mdct(float * buffer,int n,vorb * f,int blocktype)2374 void inverse_mdct(float *buffer, int n, vorb *f, int blocktype)
2375 {
2376 mdct_lookup *M;
2377 if (M1.n == n) M = &M1;
2378 else if (M2.n == n) M = &M2;
2379 else if (M1.n == 0) { mdct_init(&M1, n); M = &M1; }
2380 else {
2381 if (M2.n) __asm int 3;
2382 mdct_init(&M2, n);
2383 M = &M2;
2384 }
2385
2386 mdct_backward(M, buffer, buffer);
2387 }
2388 #endif
2389
2390
2391 // the following were split out into separate functions while optimizing;
2392 // they could be pushed back up but eh. __forceinline showed no change;
2393 // they're probably already being inlined.
imdct_step3_iter0_loop(int n,float * e,int i_off,int k_off,float * A)2394 static void imdct_step3_iter0_loop(int n, float *e, int i_off, int k_off, float *A)
2395 {
2396 float *ee0 = e + i_off;
2397 float *ee2 = ee0 + k_off;
2398 int i;
2399
2400 assert((n & 3) == 0);
2401 for (i=(n>>2); i > 0; --i) {
2402 float k00_20, k01_21;
2403 k00_20 = ee0[ 0] - ee2[ 0];
2404 k01_21 = ee0[-1] - ee2[-1];
2405 ee0[ 0] += ee2[ 0];//ee0[ 0] = ee0[ 0] + ee2[ 0];
2406 ee0[-1] += ee2[-1];//ee0[-1] = ee0[-1] + ee2[-1];
2407 ee2[ 0] = k00_20 * A[0] - k01_21 * A[1];
2408 ee2[-1] = k01_21 * A[0] + k00_20 * A[1];
2409 A += 8;
2410
2411 k00_20 = ee0[-2] - ee2[-2];
2412 k01_21 = ee0[-3] - ee2[-3];
2413 ee0[-2] += ee2[-2];//ee0[-2] = ee0[-2] + ee2[-2];
2414 ee0[-3] += ee2[-3];//ee0[-3] = ee0[-3] + ee2[-3];
2415 ee2[-2] = k00_20 * A[0] - k01_21 * A[1];
2416 ee2[-3] = k01_21 * A[0] + k00_20 * A[1];
2417 A += 8;
2418
2419 k00_20 = ee0[-4] - ee2[-4];
2420 k01_21 = ee0[-5] - ee2[-5];
2421 ee0[-4] += ee2[-4];//ee0[-4] = ee0[-4] + ee2[-4];
2422 ee0[-5] += ee2[-5];//ee0[-5] = ee0[-5] + ee2[-5];
2423 ee2[-4] = k00_20 * A[0] - k01_21 * A[1];
2424 ee2[-5] = k01_21 * A[0] + k00_20 * A[1];
2425 A += 8;
2426
2427 k00_20 = ee0[-6] - ee2[-6];
2428 k01_21 = ee0[-7] - ee2[-7];
2429 ee0[-6] += ee2[-6];//ee0[-6] = ee0[-6] + ee2[-6];
2430 ee0[-7] += ee2[-7];//ee0[-7] = ee0[-7] + ee2[-7];
2431 ee2[-6] = k00_20 * A[0] - k01_21 * A[1];
2432 ee2[-7] = k01_21 * A[0] + k00_20 * A[1];
2433 A += 8;
2434 ee0 -= 8;
2435 ee2 -= 8;
2436 }
2437 }
2438
imdct_step3_inner_r_loop(int lim,float * e,int d0,int k_off,float * A,int k1)2439 static void imdct_step3_inner_r_loop(int lim, float *e, int d0, int k_off, float *A, int k1)
2440 {
2441 int i;
2442 float k00_20, k01_21;
2443
2444 float *e0 = e + d0;
2445 float *e2 = e0 + k_off;
2446
2447 for (i=lim >> 2; i > 0; --i) {
2448 k00_20 = e0[-0] - e2[-0];
2449 k01_21 = e0[-1] - e2[-1];
2450 e0[-0] += e2[-0];//e0[-0] = e0[-0] + e2[-0];
2451 e0[-1] += e2[-1];//e0[-1] = e0[-1] + e2[-1];
2452 e2[-0] = (k00_20)*A[0] - (k01_21) * A[1];
2453 e2[-1] = (k01_21)*A[0] + (k00_20) * A[1];
2454
2455 A += k1;
2456
2457 k00_20 = e0[-2] - e2[-2];
2458 k01_21 = e0[-3] - e2[-3];
2459 e0[-2] += e2[-2];//e0[-2] = e0[-2] + e2[-2];
2460 e0[-3] += e2[-3];//e0[-3] = e0[-3] + e2[-3];
2461 e2[-2] = (k00_20)*A[0] - (k01_21) * A[1];
2462 e2[-3] = (k01_21)*A[0] + (k00_20) * A[1];
2463
2464 A += k1;
2465
2466 k00_20 = e0[-4] - e2[-4];
2467 k01_21 = e0[-5] - e2[-5];
2468 e0[-4] += e2[-4];//e0[-4] = e0[-4] + e2[-4];
2469 e0[-5] += e2[-5];//e0[-5] = e0[-5] + e2[-5];
2470 e2[-4] = (k00_20)*A[0] - (k01_21) * A[1];
2471 e2[-5] = (k01_21)*A[0] + (k00_20) * A[1];
2472
2473 A += k1;
2474
2475 k00_20 = e0[-6] - e2[-6];
2476 k01_21 = e0[-7] - e2[-7];
2477 e0[-6] += e2[-6];//e0[-6] = e0[-6] + e2[-6];
2478 e0[-7] += e2[-7];//e0[-7] = e0[-7] + e2[-7];
2479 e2[-6] = (k00_20)*A[0] - (k01_21) * A[1];
2480 e2[-7] = (k01_21)*A[0] + (k00_20) * A[1];
2481
2482 e0 -= 8;
2483 e2 -= 8;
2484
2485 A += k1;
2486 }
2487 }
2488
imdct_step3_inner_s_loop(int n,float * e,int i_off,int k_off,float * A,int a_off,int k0)2489 static void imdct_step3_inner_s_loop(int n, float *e, int i_off, int k_off, float *A, int a_off, int k0)
2490 {
2491 int i;
2492 float A0 = A[0];
2493 float A1 = A[0+1];
2494 float A2 = A[0+a_off];
2495 float A3 = A[0+a_off+1];
2496 float A4 = A[0+a_off*2+0];
2497 float A5 = A[0+a_off*2+1];
2498 float A6 = A[0+a_off*3+0];
2499 float A7 = A[0+a_off*3+1];
2500
2501 float k00,k11;
2502
2503 float *ee0 = e +i_off;
2504 float *ee2 = ee0+k_off;
2505
2506 for (i=n; i > 0; --i) {
2507 k00 = ee0[ 0] - ee2[ 0];
2508 k11 = ee0[-1] - ee2[-1];
2509 ee0[ 0] = ee0[ 0] + ee2[ 0];
2510 ee0[-1] = ee0[-1] + ee2[-1];
2511 ee2[ 0] = (k00) * A0 - (k11) * A1;
2512 ee2[-1] = (k11) * A0 + (k00) * A1;
2513
2514 k00 = ee0[-2] - ee2[-2];
2515 k11 = ee0[-3] - ee2[-3];
2516 ee0[-2] = ee0[-2] + ee2[-2];
2517 ee0[-3] = ee0[-3] + ee2[-3];
2518 ee2[-2] = (k00) * A2 - (k11) * A3;
2519 ee2[-3] = (k11) * A2 + (k00) * A3;
2520
2521 k00 = ee0[-4] - ee2[-4];
2522 k11 = ee0[-5] - ee2[-5];
2523 ee0[-4] = ee0[-4] + ee2[-4];
2524 ee0[-5] = ee0[-5] + ee2[-5];
2525 ee2[-4] = (k00) * A4 - (k11) * A5;
2526 ee2[-5] = (k11) * A4 + (k00) * A5;
2527
2528 k00 = ee0[-6] - ee2[-6];
2529 k11 = ee0[-7] - ee2[-7];
2530 ee0[-6] = ee0[-6] + ee2[-6];
2531 ee0[-7] = ee0[-7] + ee2[-7];
2532 ee2[-6] = (k00) * A6 - (k11) * A7;
2533 ee2[-7] = (k11) * A6 + (k00) * A7;
2534
2535 ee0 -= k0;
2536 ee2 -= k0;
2537 }
2538 }
2539
iter_54(float * z)2540 static __forceinline void iter_54(float *z)
2541 {
2542 float k00,k11,k22,k33;
2543 float y0,y1,y2,y3;
2544
2545 k00 = z[ 0] - z[-4];
2546 y0 = z[ 0] + z[-4];
2547 y2 = z[-2] + z[-6];
2548 k22 = z[-2] - z[-6];
2549
2550 z[-0] = y0 + y2; // z0 + z4 + z2 + z6
2551 z[-2] = y0 - y2; // z0 + z4 - z2 - z6
2552
2553 // done with y0,y2
2554
2555 k33 = z[-3] - z[-7];
2556
2557 z[-4] = k00 + k33; // z0 - z4 + z3 - z7
2558 z[-6] = k00 - k33; // z0 - z4 - z3 + z7
2559
2560 // done with k33
2561
2562 k11 = z[-1] - z[-5];
2563 y1 = z[-1] + z[-5];
2564 y3 = z[-3] + z[-7];
2565
2566 z[-1] = y1 + y3; // z1 + z5 + z3 + z7
2567 z[-3] = y1 - y3; // z1 + z5 - z3 - z7
2568 z[-5] = k11 - k22; // z1 - z5 + z2 - z6
2569 z[-7] = k11 + k22; // z1 - z5 - z2 + z6
2570 }
2571
imdct_step3_inner_s_loop_ld654(int n,float * e,int i_off,float * A,int base_n)2572 static void imdct_step3_inner_s_loop_ld654(int n, float *e, int i_off, float *A, int base_n)
2573 {
2574 int a_off = base_n >> 3;
2575 float A2 = A[0+a_off];
2576 float *z = e + i_off;
2577 float *base = z - 16 * n;
2578
2579 while (z > base) {
2580 float k00,k11;
2581
2582 k00 = z[-0] - z[-8];
2583 k11 = z[-1] - z[-9];
2584 z[-0] = z[-0] + z[-8];
2585 z[-1] = z[-1] + z[-9];
2586 z[-8] = k00;
2587 z[-9] = k11 ;
2588
2589 k00 = z[ -2] - z[-10];
2590 k11 = z[ -3] - z[-11];
2591 z[ -2] = z[ -2] + z[-10];
2592 z[ -3] = z[ -3] + z[-11];
2593 z[-10] = (k00+k11) * A2;
2594 z[-11] = (k11-k00) * A2;
2595
2596 k00 = z[-12] - z[ -4]; // reverse to avoid a unary negation
2597 k11 = z[ -5] - z[-13];
2598 z[ -4] = z[ -4] + z[-12];
2599 z[ -5] = z[ -5] + z[-13];
2600 z[-12] = k11;
2601 z[-13] = k00;
2602
2603 k00 = z[-14] - z[ -6]; // reverse to avoid a unary negation
2604 k11 = z[ -7] - z[-15];
2605 z[ -6] = z[ -6] + z[-14];
2606 z[ -7] = z[ -7] + z[-15];
2607 z[-14] = (k00+k11) * A2;
2608 z[-15] = (k00-k11) * A2;
2609
2610 iter_54(z);
2611 iter_54(z-8);
2612 z -= 16;
2613 }
2614 }
2615
inverse_mdct(float * buffer,int n,vorb * f,int blocktype)2616 static void inverse_mdct(float *buffer, int n, vorb *f, int blocktype)
2617 {
2618 int n2 = n >> 1, n4 = n >> 2, n8 = n >> 3, l;
2619 int ld;
2620 // @OPTIMIZE: reduce register pressure by using fewer variables?
2621 int save_point = temp_alloc_save(f);
2622 float *buf2 = (float *) temp_alloc(f, n2 * sizeof(*buf2));
2623 float *u=NULL,*v=NULL;
2624 // twiddle factors
2625 float *A = f->A[blocktype];
2626
2627 // IMDCT algorithm from "The use of multirate filter banks for coding of high quality digital audio"
2628 // See notes about bugs in that paper in less-optimal implementation 'inverse_mdct_old' after this function.
2629
2630 // kernel from paper
2631
2632
2633 // merged:
2634 // copy and reflect spectral data
2635 // step 0
2636
2637 // note that it turns out that the items added together during
2638 // this step are, in fact, being added to themselves (as reflected
2639 // by step 0). inexplicable inefficiency! this became obvious
2640 // once I combined the passes.
2641
2642 // so there's a missing 'times 2' here (for adding X to itself).
2643 // this propagates through linearly to the end, where the numbers
2644 // are 1/2 too small, and need to be compensated for.
2645
2646 {
2647 float *d,*e, *AA, *e_stop;
2648 d = &buf2[n2-2];
2649 AA = A;
2650 e = &buffer[0];
2651 e_stop = &buffer[n2];
2652 while (e != e_stop) {
2653 d[1] = (e[0] * AA[0] - e[2]*AA[1]);
2654 d[0] = (e[0] * AA[1] + e[2]*AA[0]);
2655 d -= 2;
2656 AA += 2;
2657 e += 4;
2658 }
2659
2660 e = &buffer[n2-3];
2661 while (d >= buf2) {
2662 d[1] = (-e[2] * AA[0] - -e[0]*AA[1]);
2663 d[0] = (-e[2] * AA[1] + -e[0]*AA[0]);
2664 d -= 2;
2665 AA += 2;
2666 e -= 4;
2667 }
2668 }
2669
2670 // now we use symbolic names for these, so that we can
2671 // possibly swap their meaning as we change which operations
2672 // are in place
2673
2674 u = buffer;
2675 v = buf2;
2676
2677 // step 2 (paper output is w, now u)
2678 // this could be in place, but the data ends up in the wrong
2679 // place... _somebody_'s got to swap it, so this is nominated
2680 {
2681 float *AA = &A[n2-8];
2682 float *d0,*d1, *e0, *e1;
2683
2684 e0 = &v[n4];
2685 e1 = &v[0];
2686
2687 d0 = &u[n4];
2688 d1 = &u[0];
2689
2690 while (AA >= A) {
2691 float v40_20, v41_21;
2692
2693 v41_21 = e0[1] - e1[1];
2694 v40_20 = e0[0] - e1[0];
2695 d0[1] = e0[1] + e1[1];
2696 d0[0] = e0[0] + e1[0];
2697 d1[1] = v41_21*AA[4] - v40_20*AA[5];
2698 d1[0] = v40_20*AA[4] + v41_21*AA[5];
2699
2700 v41_21 = e0[3] - e1[3];
2701 v40_20 = e0[2] - e1[2];
2702 d0[3] = e0[3] + e1[3];
2703 d0[2] = e0[2] + e1[2];
2704 d1[3] = v41_21*AA[0] - v40_20*AA[1];
2705 d1[2] = v40_20*AA[0] + v41_21*AA[1];
2706
2707 AA -= 8;
2708
2709 d0 += 4;
2710 d1 += 4;
2711 e0 += 4;
2712 e1 += 4;
2713 }
2714 }
2715
2716 // step 3
2717 ld = ilog(n) - 1; // ilog is off-by-one from normal definitions
2718
2719 // optimized step 3:
2720
2721 // the original step3 loop can be nested r inside s or s inside r;
2722 // it's written originally as s inside r, but this is dumb when r
2723 // iterates many times, and s few. So I have two copies of it and
2724 // switch between them halfway.
2725
2726 // this is iteration 0 of step 3
2727 imdct_step3_iter0_loop(n >> 4, u, n2-1-n4*0, -(n >> 3), A);
2728 imdct_step3_iter0_loop(n >> 4, u, n2-1-n4*1, -(n >> 3), A);
2729
2730 // this is iteration 1 of step 3
2731 imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*0, -(n >> 4), A, 16);
2732 imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*1, -(n >> 4), A, 16);
2733 imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*2, -(n >> 4), A, 16);
2734 imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*3, -(n >> 4), A, 16);
2735
2736 l=2;
2737 for (; l < (ld-3)>>1; ++l) {
2738 int k0 = n >> (l+2), k0_2 = k0>>1;
2739 int lim = 1 << (l+1);
2740 int i;
2741 for (i=0; i < lim; ++i)
2742 imdct_step3_inner_r_loop(n >> (l+4), u, n2-1 - k0*i, -k0_2, A, 1 << (l+3));
2743 }
2744
2745 for (; l < ld-6; ++l) {
2746 int k0 = n >> (l+2), k1 = 1 << (l+3), k0_2 = k0>>1;
2747 int rlim = n >> (l+6), r;
2748 int lim = 1 << (l+1);
2749 int i_off;
2750 float *A0 = A;
2751 i_off = n2-1;
2752 for (r=rlim; r > 0; --r) {
2753 imdct_step3_inner_s_loop(lim, u, i_off, -k0_2, A0, k1, k0);
2754 A0 += k1*4;
2755 i_off -= 8;
2756 }
2757 }
2758
2759 // iterations with count:
2760 // ld-6,-5,-4 all interleaved together
2761 // the big win comes from getting rid of needless flops
2762 // due to the constants on pass 5 & 4 being all 1 and 0;
2763 // combining them to be simultaneous to improve cache made little difference
2764 imdct_step3_inner_s_loop_ld654(n >> 5, u, n2-1, A, n);
2765
2766 // output is u
2767
2768 // step 4, 5, and 6
2769 // cannot be in-place because of step 5
2770 {
2771 uint16 *bitrev = f->bit_reverse[blocktype];
2772 // weirdly, I'd have thought reading sequentially and writing
2773 // erratically would have been better than vice-versa, but in
2774 // fact that's not what my testing showed. (That is, with
2775 // j = bitreverse(i), do you read i and write j, or read j and write i.)
2776
2777 float *d0 = &v[n4-4];
2778 float *d1 = &v[n2-4];
2779 while (d0 >= v) {
2780 int k4;
2781
2782 k4 = bitrev[0];
2783 d1[3] = u[k4+0];
2784 d1[2] = u[k4+1];
2785 d0[3] = u[k4+2];
2786 d0[2] = u[k4+3];
2787
2788 k4 = bitrev[1];
2789 d1[1] = u[k4+0];
2790 d1[0] = u[k4+1];
2791 d0[1] = u[k4+2];
2792 d0[0] = u[k4+3];
2793
2794 d0 -= 4;
2795 d1 -= 4;
2796 bitrev += 2;
2797 }
2798 }
2799 // (paper output is u, now v)
2800
2801
2802 // data must be in buf2
2803 assert(v == buf2);
2804
2805 // step 7 (paper output is v, now v)
2806 // this is now in place
2807 {
2808 float *C = f->C[blocktype];
2809 float *d, *e;
2810
2811 d = v;
2812 e = v + n2 - 4;
2813
2814 while (d < e) {
2815 float a02,a11,b0,b1,b2,b3;
2816
2817 a02 = d[0] - e[2];
2818 a11 = d[1] + e[3];
2819
2820 b0 = C[1]*a02 + C[0]*a11;
2821 b1 = C[1]*a11 - C[0]*a02;
2822
2823 b2 = d[0] + e[ 2];
2824 b3 = d[1] - e[ 3];
2825
2826 d[0] = b2 + b0;
2827 d[1] = b3 + b1;
2828 e[2] = b2 - b0;
2829 e[3] = b1 - b3;
2830
2831 a02 = d[2] - e[0];
2832 a11 = d[3] + e[1];
2833
2834 b0 = C[3]*a02 + C[2]*a11;
2835 b1 = C[3]*a11 - C[2]*a02;
2836
2837 b2 = d[2] + e[ 0];
2838 b3 = d[3] - e[ 1];
2839
2840 d[2] = b2 + b0;
2841 d[3] = b3 + b1;
2842 e[0] = b2 - b0;
2843 e[1] = b1 - b3;
2844
2845 C += 4;
2846 d += 4;
2847 e -= 4;
2848 }
2849 }
2850
2851 // data must be in buf2
2852
2853
2854 // step 8+decode (paper output is X, now buffer)
2855 // this generates pairs of data a la 8 and pushes them directly through
2856 // the decode kernel (pushing rather than pulling) to avoid having
2857 // to make another pass later
2858
2859 // this cannot POSSIBLY be in place, so we refer to the buffers directly
2860
2861 {
2862 float *d0,*d1,*d2,*d3;
2863
2864 float *B = f->B[blocktype] + n2 - 8;
2865 float *e = buf2 + n2 - 8;
2866 d0 = &buffer[0];
2867 d1 = &buffer[n2-4];
2868 d2 = &buffer[n2];
2869 d3 = &buffer[n-4];
2870 while (e >= v) {
2871 float p0,p1,p2,p3;
2872
2873 p3 = e[6]*B[7] - e[7]*B[6];
2874 p2 = -e[6]*B[6] - e[7]*B[7];
2875
2876 d0[0] = p3;
2877 d1[3] = - p3;
2878 d2[0] = p2;
2879 d3[3] = p2;
2880
2881 p1 = e[4]*B[5] - e[5]*B[4];
2882 p0 = -e[4]*B[4] - e[5]*B[5];
2883
2884 d0[1] = p1;
2885 d1[2] = - p1;
2886 d2[1] = p0;
2887 d3[2] = p0;
2888
2889 p3 = e[2]*B[3] - e[3]*B[2];
2890 p2 = -e[2]*B[2] - e[3]*B[3];
2891
2892 d0[2] = p3;
2893 d1[1] = - p3;
2894 d2[2] = p2;
2895 d3[1] = p2;
2896
2897 p1 = e[0]*B[1] - e[1]*B[0];
2898 p0 = -e[0]*B[0] - e[1]*B[1];
2899
2900 d0[3] = p1;
2901 d1[0] = - p1;
2902 d2[3] = p0;
2903 d3[0] = p0;
2904
2905 B -= 8;
2906 e -= 8;
2907 d0 += 4;
2908 d2 += 4;
2909 d1 -= 4;
2910 d3 -= 4;
2911 }
2912 }
2913
2914 temp_free(f,buf2);
2915 temp_alloc_restore(f,save_point);
2916 }
2917
2918 #if 0
2919 // this is the original version of the above code, if you want to optimize it from scratch
2920 void inverse_mdct_naive(float *buffer, int n)
2921 {
2922 float s;
2923 float A[1 << 12], B[1 << 12], C[1 << 11];
2924 int i,k,k2,k4, n2 = n >> 1, n4 = n >> 2, n8 = n >> 3, l;
2925 int n3_4 = n - n4, ld;
2926 // how can they claim this only uses N words?!
2927 // oh, because they're only used sparsely, whoops
2928 float u[1 << 13], X[1 << 13], v[1 << 13], w[1 << 13];
2929 // set up twiddle factors
2930
2931 for (k=k2=0; k < n4; ++k,k2+=2) {
2932 A[k2 ] = (float) cos(4*k*M_PI/n);
2933 A[k2+1] = (float) -sin(4*k*M_PI/n);
2934 B[k2 ] = (float) cos((k2+1)*M_PI/n/2);
2935 B[k2+1] = (float) sin((k2+1)*M_PI/n/2);
2936 }
2937 for (k=k2=0; k < n8; ++k,k2+=2) {
2938 C[k2 ] = (float) cos(2*(k2+1)*M_PI/n);
2939 C[k2+1] = (float) -sin(2*(k2+1)*M_PI/n);
2940 }
2941
2942 // IMDCT algorithm from "The use of multirate filter banks for coding of high quality digital audio"
2943 // Note there are bugs in that pseudocode, presumably due to them attempting
2944 // to rename the arrays nicely rather than representing the way their actual
2945 // implementation bounces buffers back and forth. As a result, even in the
2946 // "some formulars corrected" version, a direct implementation fails. These
2947 // are noted below as "paper bug".
2948
2949 // copy and reflect spectral data
2950 for (k=0; k < n2; ++k) u[k] = buffer[k];
2951 for ( ; k < n ; ++k) u[k] = -buffer[n - k - 1];
2952 // kernel from paper
2953 // step 1
2954 for (k=k2=k4=0; k < n4; k+=1, k2+=2, k4+=4) {
2955 v[n-k4-1] = (u[k4] - u[n-k4-1]) * A[k2] - (u[k4+2] - u[n-k4-3])*A[k2+1];
2956 v[n-k4-3] = (u[k4] - u[n-k4-1]) * A[k2+1] + (u[k4+2] - u[n-k4-3])*A[k2];
2957 }
2958 // step 2
2959 for (k=k4=0; k < n8; k+=1, k4+=4) {
2960 w[n2+3+k4] = v[n2+3+k4] + v[k4+3];
2961 w[n2+1+k4] = v[n2+1+k4] + v[k4+1];
2962 w[k4+3] = (v[n2+3+k4] - v[k4+3])*A[n2-4-k4] - (v[n2+1+k4]-v[k4+1])*A[n2-3-k4];
2963 w[k4+1] = (v[n2+1+k4] - v[k4+1])*A[n2-4-k4] + (v[n2+3+k4]-v[k4+3])*A[n2-3-k4];
2964 }
2965 // step 3
2966 ld = ilog(n) - 1; // ilog is off-by-one from normal definitions
2967 for (l=0; l < ld-3; ++l) {
2968 int k0 = n >> (l+2), k1 = 1 << (l+3);
2969 int rlim = n >> (l+4), r4, r;
2970 int s2lim = 1 << (l+2), s2;
2971 for (r=r4=0; r < rlim; r4+=4,++r) {
2972 for (s2=0; s2 < s2lim; s2+=2) {
2973 u[n-1-k0*s2-r4] = w[n-1-k0*s2-r4] + w[n-1-k0*(s2+1)-r4];
2974 u[n-3-k0*s2-r4] = w[n-3-k0*s2-r4] + w[n-3-k0*(s2+1)-r4];
2975 u[n-1-k0*(s2+1)-r4] = (w[n-1-k0*s2-r4] - w[n-1-k0*(s2+1)-r4]) * A[r*k1]
2976 - (w[n-3-k0*s2-r4] - w[n-3-k0*(s2+1)-r4]) * A[r*k1+1];
2977 u[n-3-k0*(s2+1)-r4] = (w[n-3-k0*s2-r4] - w[n-3-k0*(s2+1)-r4]) * A[r*k1]
2978 + (w[n-1-k0*s2-r4] - w[n-1-k0*(s2+1)-r4]) * A[r*k1+1];
2979 }
2980 }
2981 if (l+1 < ld-3) {
2982 // paper bug: ping-ponging of u&w here is omitted
2983 memcpy(w, u, sizeof(u));
2984 }
2985 }
2986
2987 // step 4
2988 for (i=0; i < n8; ++i) {
2989 int j = bit_reverse(i) >> (32-ld+3);
2990 assert(j < n8);
2991 if (i == j) {
2992 // paper bug: original code probably swapped in place; if copying,
2993 // need to directly copy in this case
2994 int i8 = i << 3;
2995 v[i8+1] = u[i8+1];
2996 v[i8+3] = u[i8+3];
2997 v[i8+5] = u[i8+5];
2998 v[i8+7] = u[i8+7];
2999 } else if (i < j) {
3000 int i8 = i << 3, j8 = j << 3;
3001 v[j8+1] = u[i8+1], v[i8+1] = u[j8 + 1];
3002 v[j8+3] = u[i8+3], v[i8+3] = u[j8 + 3];
3003 v[j8+5] = u[i8+5], v[i8+5] = u[j8 + 5];
3004 v[j8+7] = u[i8+7], v[i8+7] = u[j8 + 7];
3005 }
3006 }
3007 // step 5
3008 for (k=0; k < n2; ++k) {
3009 w[k] = v[k*2+1];
3010 }
3011 // step 6
3012 for (k=k2=k4=0; k < n8; ++k, k2 += 2, k4 += 4) {
3013 u[n-1-k2] = w[k4];
3014 u[n-2-k2] = w[k4+1];
3015 u[n3_4 - 1 - k2] = w[k4+2];
3016 u[n3_4 - 2 - k2] = w[k4+3];
3017 }
3018 // step 7
3019 for (k=k2=0; k < n8; ++k, k2 += 2) {
3020 v[n2 + k2 ] = ( u[n2 + k2] + u[n-2-k2] + C[k2+1]*(u[n2+k2]-u[n-2-k2]) + C[k2]*(u[n2+k2+1]+u[n-2-k2+1]))/2;
3021 v[n-2 - k2] = ( u[n2 + k2] + u[n-2-k2] - C[k2+1]*(u[n2+k2]-u[n-2-k2]) - C[k2]*(u[n2+k2+1]+u[n-2-k2+1]))/2;
3022 v[n2+1+ k2] = ( u[n2+1+k2] - u[n-1-k2] + C[k2+1]*(u[n2+1+k2]+u[n-1-k2]) - C[k2]*(u[n2+k2]-u[n-2-k2]))/2;
3023 v[n-1 - k2] = (-u[n2+1+k2] + u[n-1-k2] + C[k2+1]*(u[n2+1+k2]+u[n-1-k2]) - C[k2]*(u[n2+k2]-u[n-2-k2]))/2;
3024 }
3025 // step 8
3026 for (k=k2=0; k < n4; ++k,k2 += 2) {
3027 X[k] = v[k2+n2]*B[k2 ] + v[k2+1+n2]*B[k2+1];
3028 X[n2-1-k] = v[k2+n2]*B[k2+1] - v[k2+1+n2]*B[k2 ];
3029 }
3030
3031 // decode kernel to output
3032 // determined the following value experimentally
3033 // (by first figuring out what made inverse_mdct_slow work); then matching that here
3034 // (probably vorbis encoder premultiplies by n or n/2, to save it on the decoder?)
3035 s = 0.5; // theoretically would be n4
3036
3037 // [[[ note! the s value of 0.5 is compensated for by the B[] in the current code,
3038 // so it needs to use the "old" B values to behave correctly, or else
3039 // set s to 1.0 ]]]
3040 for (i=0; i < n4 ; ++i) buffer[i] = s * X[i+n4];
3041 for ( ; i < n3_4; ++i) buffer[i] = -s * X[n3_4 - i - 1];
3042 for ( ; i < n ; ++i) buffer[i] = -s * X[i - n3_4];
3043 }
3044 #endif
3045
get_window(vorb * f,int len)3046 static float *get_window(vorb *f, int len)
3047 {
3048 len <<= 1;
3049 if (len == f->blocksize_0) return f->window[0];
3050 if (len == f->blocksize_1) return f->window[1];
3051 assert(0);
3052 return NULL;
3053 }
3054
3055 #ifndef STB_VORBIS_NO_DEFER_FLOOR
3056 typedef int16 YTYPE;
3057 #else
3058 typedef int YTYPE;
3059 #endif
do_floor(vorb * f,Mapping * map,int i,int n,float * target,YTYPE * finalY,uint8 * step2_flag)3060 static int do_floor(vorb *f, Mapping *map, int i, int n, float *target, YTYPE *finalY, uint8 *step2_flag)
3061 {
3062 int n2 = n >> 1;
3063 int s = map->chan[i].mux, floor;
3064 floor = map->submap_floor[s];
3065 if (f->floor_types[floor] == 0) {
3066 return error(f, VORBIS_invalid_stream);
3067 } else {
3068 Floor1 *g = &f->floor_config[floor].floor1;
3069 int j,q;
3070 int lx = 0, ly = finalY[0] * g->floor1_multiplier;
3071 for (q=1; q < g->values; ++q) {
3072 j = g->sorted_order[q];
3073 #ifndef STB_VORBIS_NO_DEFER_FLOOR
3074 if (finalY[j] >= 0)
3075 #else
3076 if (step2_flag[j])
3077 #endif
3078 {
3079 int hy = finalY[j] * g->floor1_multiplier;
3080 int hx = g->Xlist[j];
3081 if (lx != hx)
3082 draw_line(target, lx,ly, hx,hy, n2);
3083 CHECK(f);
3084 lx = hx, ly = hy;
3085 }
3086 }
3087 if (lx < n2) {
3088 // optimization of: draw_line(target, lx,ly, n,ly, n2);
3089 for (j=lx; j < n2; ++j)
3090 LINE_OP(target[j], inverse_db_table[ly]);
3091 CHECK(f);
3092 }
3093 }
3094 return TRUE;
3095 }
3096
3097 // The meaning of "left" and "right"
3098 //
3099 // For a given frame:
3100 // we compute samples from 0..n
3101 // window_center is n/2
3102 // we'll window and mix the samples from left_start to left_end with data from the previous frame
3103 // all of the samples from left_end to right_start can be output without mixing; however,
3104 // this interval is 0-length except when transitioning between short and long frames
3105 // all of the samples from right_start to right_end need to be mixed with the next frame,
3106 // which we don't have, so those get saved in a buffer
3107 // frame N's right_end-right_start, the number of samples to mix with the next frame,
3108 // has to be the same as frame N+1's left_end-left_start (which they are by
3109 // construction)
3110
vorbis_decode_initial(vorb * f,int * p_left_start,int * p_left_end,int * p_right_start,int * p_right_end,int * mode)3111 static int vorbis_decode_initial(vorb *f, int *p_left_start, int *p_left_end, int *p_right_start, int *p_right_end, int *mode)
3112 {
3113 Mode *m;
3114 int i, n, prev, next, window_center;
3115 f->channel_buffer_start = f->channel_buffer_end = 0;
3116
3117 retry:
3118 if (f->eof) return FALSE;
3119 if (!maybe_start_packet(f))
3120 return FALSE;
3121 // check packet type
3122 if (get_bits(f,1) != 0) {
3123 if (IS_PUSH_MODE(f))
3124 return error(f,VORBIS_bad_packet_type);
3125 while (EOP != get8_packet(f));
3126 goto retry;
3127 }
3128
3129 if (f->alloc.alloc_buffer)
3130 assert(f->alloc.alloc_buffer_length_in_bytes == f->temp_offset);
3131
3132 i = get_bits(f, ilog(f->mode_count-1));
3133 if (i == EOP) return FALSE;
3134 if (i >= f->mode_count) return FALSE;
3135 *mode = i;
3136 m = f->mode_config + i;
3137 if (m->blockflag) {
3138 n = f->blocksize_1;
3139 prev = get_bits(f,1);
3140 next = get_bits(f,1);
3141 } else {
3142 prev = next = 0;
3143 n = f->blocksize_0;
3144 }
3145
3146 // WINDOWING
3147
3148 window_center = n >> 1;
3149 if (m->blockflag && !prev) {
3150 *p_left_start = (n - f->blocksize_0) >> 2;
3151 *p_left_end = (n + f->blocksize_0) >> 2;
3152 } else {
3153 *p_left_start = 0;
3154 *p_left_end = window_center;
3155 }
3156 if (m->blockflag && !next) {
3157 *p_right_start = (n*3 - f->blocksize_0) >> 2;
3158 *p_right_end = (n*3 + f->blocksize_0) >> 2;
3159 } else {
3160 *p_right_start = window_center;
3161 *p_right_end = n;
3162 }
3163
3164 return TRUE;
3165 }
3166
vorbis_decode_packet_rest(vorb * f,int * len,Mode * m,int left_start,int left_end,int right_start,int right_end,int * p_left)3167 static int vorbis_decode_packet_rest(vorb *f, int *len, Mode *m, int left_start, int left_end, int right_start, int right_end, int *p_left)
3168 {
3169 Mapping *map;
3170 int i,j,k,n,n2;
3171 int zero_channel[256];
3172 int really_zero_channel[256];
3173
3174 // WINDOWING
3175
3176 n = f->blocksize[m->blockflag];
3177 map = &f->mapping[m->mapping];
3178
3179 // FLOORS
3180 n2 = n >> 1;
3181
3182 CHECK(f);
3183
3184 for (i=0; i < f->channels; ++i) {
3185 int s = map->chan[i].mux, floor;
3186 zero_channel[i] = FALSE;
3187 floor = map->submap_floor[s];
3188 if (f->floor_types[floor] == 0) {
3189 return error(f, VORBIS_invalid_stream);
3190 } else {
3191 Floor1 *g = &f->floor_config[floor].floor1;
3192 if (get_bits(f, 1)) {
3193 short *finalY;
3194 uint8 step2_flag[256];
3195 static int range_list[4] = { 256, 128, 86, 64 };
3196 int range = range_list[g->floor1_multiplier-1];
3197 int offset = 2;
3198 finalY = f->finalY[i];
3199 finalY[0] = get_bits(f, ilog(range)-1);
3200 finalY[1] = get_bits(f, ilog(range)-1);
3201 for (j=0; j < g->partitions; ++j) {
3202 int pclass = g->partition_class_list[j];
3203 int cdim = g->class_dimensions[pclass];
3204 int cbits = g->class_subclasses[pclass];
3205 int csub = (1 << cbits)-1;
3206 int cval = 0;
3207 if (cbits) {
3208 Codebook *c = f->codebooks + g->class_masterbooks[pclass];
3209 DECODE(cval,f,c);
3210 }
3211 for (k=0; k < cdim; ++k) {
3212 int book = g->subclass_books[pclass][cval & csub];
3213 cval = cval >> cbits;
3214 if (book >= 0) {
3215 int temp;
3216 Codebook *c = f->codebooks + book;
3217 DECODE(temp,f,c);
3218 finalY[offset++] = temp;
3219 } else
3220 finalY[offset++] = 0;
3221 }
3222 }
3223 if (f->valid_bits == INVALID_BITS) goto error; // behavior according to spec
3224 step2_flag[0] = step2_flag[1] = 1;
3225 for (j=2; j < g->values; ++j) {
3226 int low, high, pred, highroom, lowroom, room, val;
3227 low = g->neighbors[j][0];
3228 high = g->neighbors[j][1];
3229 //neighbors(g->Xlist, j, &low, &high);
3230 pred = predict_point(g->Xlist[j], g->Xlist[low], g->Xlist[high], finalY[low], finalY[high]);
3231 val = finalY[j];
3232 highroom = range - pred;
3233 lowroom = pred;
3234 if (highroom < lowroom)
3235 room = highroom * 2;
3236 else
3237 room = lowroom * 2;
3238 if (val) {
3239 step2_flag[low] = step2_flag[high] = 1;
3240 step2_flag[j] = 1;
3241 if (val >= room)
3242 if (highroom > lowroom)
3243 finalY[j] = val - lowroom + pred;
3244 else
3245 finalY[j] = pred - val + highroom - 1;
3246 else
3247 if (val & 1)
3248 finalY[j] = pred - ((val+1)>>1);
3249 else
3250 finalY[j] = pred + (val>>1);
3251 } else {
3252 step2_flag[j] = 0;
3253 finalY[j] = pred;
3254 }
3255 }
3256
3257 #ifdef STB_VORBIS_NO_DEFER_FLOOR
3258 do_floor(f, map, i, n, f->floor_buffers[i], finalY, step2_flag);
3259 #else
3260 // defer final floor computation until _after_ residue
3261 for (j=0; j < g->values; ++j) {
3262 if (!step2_flag[j])
3263 finalY[j] = -1;
3264 }
3265 #endif
3266 } else {
3267 error:
3268 zero_channel[i] = TRUE;
3269 }
3270 // So we just defer everything else to later
3271
3272 // at this point we've decoded the floor into buffer
3273 }
3274 }
3275 CHECK(f);
3276 // at this point we've decoded all floors
3277
3278 if (f->alloc.alloc_buffer)
3279 assert(f->alloc.alloc_buffer_length_in_bytes == f->temp_offset);
3280
3281 // re-enable coupled channels if necessary
3282 memcpy(really_zero_channel, zero_channel, sizeof(really_zero_channel[0]) * f->channels);
3283 for (i=0; i < map->coupling_steps; ++i)
3284 if (!zero_channel[map->chan[i].magnitude] || !zero_channel[map->chan[i].angle]) {
3285 zero_channel[map->chan[i].magnitude] = zero_channel[map->chan[i].angle] = FALSE;
3286 }
3287
3288 CHECK(f);
3289 // RESIDUE DECODE
3290 for (i=0; i < map->submaps; ++i) {
3291 float *residue_buffers[STB_VORBIS_MAX_CHANNELS];
3292 int r;
3293 uint8 do_not_decode[256];
3294 int ch = 0;
3295 for (j=0; j < f->channels; ++j) {
3296 if (map->chan[j].mux == i) {
3297 if (zero_channel[j]) {
3298 do_not_decode[ch] = TRUE;
3299 residue_buffers[ch] = NULL;
3300 } else {
3301 do_not_decode[ch] = FALSE;
3302 residue_buffers[ch] = f->channel_buffers[j];
3303 }
3304 ++ch;
3305 }
3306 }
3307 r = map->submap_residue[i];
3308 decode_residue(f, residue_buffers, ch, n2, r, do_not_decode);
3309 }
3310
3311 if (f->alloc.alloc_buffer)
3312 assert(f->alloc.alloc_buffer_length_in_bytes == f->temp_offset);
3313 CHECK(f);
3314
3315 // INVERSE COUPLING
3316 for (i = map->coupling_steps-1; i >= 0; --i) {
3317 int n2 = n >> 1;
3318 float *m = f->channel_buffers[map->chan[i].magnitude];
3319 float *a = f->channel_buffers[map->chan[i].angle ];
3320 for (j=0; j < n2; ++j) {
3321 float a2,m2;
3322 if (m[j] > 0)
3323 if (a[j] > 0)
3324 m2 = m[j], a2 = m[j] - a[j];
3325 else
3326 a2 = m[j], m2 = m[j] + a[j];
3327 else
3328 if (a[j] > 0)
3329 m2 = m[j], a2 = m[j] + a[j];
3330 else
3331 a2 = m[j], m2 = m[j] - a[j];
3332 m[j] = m2;
3333 a[j] = a2;
3334 }
3335 }
3336 CHECK(f);
3337
3338 // finish decoding the floors
3339 #ifndef STB_VORBIS_NO_DEFER_FLOOR
3340 for (i=0; i < f->channels; ++i) {
3341 if (really_zero_channel[i]) {
3342 memset(f->channel_buffers[i], 0, sizeof(*f->channel_buffers[i]) * n2);
3343 } else {
3344 do_floor(f, map, i, n, f->channel_buffers[i], f->finalY[i], NULL);
3345 }
3346 }
3347 #else
3348 for (i=0; i < f->channels; ++i) {
3349 if (really_zero_channel[i]) {
3350 memset(f->channel_buffers[i], 0, sizeof(*f->channel_buffers[i]) * n2);
3351 } else {
3352 for (j=0; j < n2; ++j)
3353 f->channel_buffers[i][j] *= f->floor_buffers[i][j];
3354 }
3355 }
3356 #endif
3357
3358 // INVERSE MDCT
3359 CHECK(f);
3360 for (i=0; i < f->channels; ++i)
3361 inverse_mdct(f->channel_buffers[i], n, f, m->blockflag);
3362 CHECK(f);
3363
3364 // this shouldn't be necessary, unless we exited on an error
3365 // and want to flush to get to the next packet
3366 flush_packet(f);
3367
3368 if (f->first_decode) {
3369 // assume we start so first non-discarded sample is sample 0
3370 // this isn't to spec, but spec would require us to read ahead
3371 // and decode the size of all current frames--could be done,
3372 // but presumably it's not a commonly used feature
3373 f->current_loc = -n2; // start of first frame is positioned for discard
3374 // we might have to discard samples "from" the next frame too,
3375 // if we're lapping a large block then a small at the start?
3376 f->discard_samples_deferred = n - right_end;
3377 f->current_loc_valid = TRUE;
3378 f->first_decode = FALSE;
3379 } else if (f->discard_samples_deferred) {
3380 if (f->discard_samples_deferred >= right_start - left_start) {
3381 f->discard_samples_deferred -= (right_start - left_start);
3382 left_start = right_start;
3383 *p_left = left_start;
3384 } else {
3385 left_start += f->discard_samples_deferred;
3386 *p_left = left_start;
3387 f->discard_samples_deferred = 0;
3388 }
3389 } else if (f->previous_length == 0 && f->current_loc_valid) {
3390 // we're recovering from a seek... that means we're going to discard
3391 // the samples from this packet even though we know our position from
3392 // the last page header, so we need to update the position based on
3393 // the discarded samples here
3394 // but wait, the code below is going to add this in itself even
3395 // on a discard, so we don't need to do it here...
3396 }
3397
3398 // check if we have ogg information about the sample # for this packet
3399 if (f->last_seg_which == f->end_seg_with_known_loc) {
3400 // if we have a valid current loc, and this is final:
3401 if (f->current_loc_valid && (f->page_flag & PAGEFLAG_last_page)) {
3402 uint32 current_end = f->known_loc_for_packet;
3403 // then let's infer the size of the (probably) short final frame
3404 if (current_end < f->current_loc + (right_end-left_start)) {
3405 if (current_end < f->current_loc) {
3406 // negative truncation, that's impossible!
3407 *len = 0;
3408 } else {
3409 *len = current_end - f->current_loc;
3410 }
3411 *len += left_start; // this doesn't seem right, but has no ill effect on my test files
3412 if (*len > right_end) *len = right_end; // this should never happen
3413 f->current_loc += *len;
3414 return TRUE;
3415 }
3416 }
3417 // otherwise, just set our sample loc
3418 // guess that the ogg granule pos refers to the _middle_ of the
3419 // last frame?
3420 // set f->current_loc to the position of left_start
3421 f->current_loc = f->known_loc_for_packet - (n2-left_start);
3422 f->current_loc_valid = TRUE;
3423 }
3424 if (f->current_loc_valid)
3425 f->current_loc += (right_start - left_start);
3426
3427 if (f->alloc.alloc_buffer)
3428 assert(f->alloc.alloc_buffer_length_in_bytes == f->temp_offset);
3429 *len = right_end; // ignore samples after the window goes to 0
3430 CHECK(f);
3431
3432 return TRUE;
3433 }
3434
vorbis_decode_packet(vorb * f,int * len,int * p_left,int * p_right)3435 static int vorbis_decode_packet(vorb *f, int *len, int *p_left, int *p_right)
3436 {
3437 int mode, left_end, right_end;
3438 if (!vorbis_decode_initial(f, p_left, &left_end, p_right, &right_end, &mode)) return 0;
3439 return vorbis_decode_packet_rest(f, len, f->mode_config + mode, *p_left, left_end, *p_right, right_end, p_left);
3440 }
3441
vorbis_finish_frame(stb_vorbis * f,int len,int left,int right)3442 static int vorbis_finish_frame(stb_vorbis *f, int len, int left, int right)
3443 {
3444 int prev,i,j;
3445 // we use right&left (the start of the right- and left-window sin()-regions)
3446 // to determine how much to return, rather than inferring from the rules
3447 // (same result, clearer code); 'left' indicates where our sin() window
3448 // starts, therefore where the previous window's right edge starts, and
3449 // therefore where to start mixing from the previous buffer. 'right'
3450 // indicates where our sin() ending-window starts, therefore that's where
3451 // we start saving, and where our returned-data ends.
3452
3453 // mixin from previous window
3454 if (f->previous_length) {
3455 int i,j, n = f->previous_length;
3456 float *w = get_window(f, n);
3457 for (i=0; i < f->channels; ++i) {
3458 for (j=0; j < n; ++j)
3459 f->channel_buffers[i][left+j] =
3460 f->channel_buffers[i][left+j]*w[ j] +
3461 f->previous_window[i][ j]*w[n-1-j];
3462 }
3463 }
3464
3465 prev = f->previous_length;
3466
3467 // last half of this data becomes previous window
3468 f->previous_length = len - right;
3469
3470 // @OPTIMIZE: could avoid this copy by double-buffering the
3471 // output (flipping previous_window with channel_buffers), but
3472 // then previous_window would have to be 2x as large, and
3473 // channel_buffers couldn't be temp mem (although they're NOT
3474 // currently temp mem, they could be (unless we want to level
3475 // performance by spreading out the computation))
3476 for (i=0; i < f->channels; ++i)
3477 for (j=0; right+j < len; ++j)
3478 f->previous_window[i][j] = f->channel_buffers[i][right+j];
3479
3480 if (!prev)
3481 // there was no previous packet, so this data isn't valid...
3482 // this isn't entirely true, only the would-have-overlapped data
3483 // isn't valid, but this seems to be what the spec requires
3484 return 0;
3485
3486 // truncate a short frame
3487 if (len < right) right = len;
3488
3489 f->samples_output += right-left;
3490
3491 return right - left;
3492 }
3493
vorbis_pump_first_frame(stb_vorbis * f)3494 static int vorbis_pump_first_frame(stb_vorbis *f)
3495 {
3496 int len, right, left, res;
3497 res = vorbis_decode_packet(f, &len, &left, &right);
3498 if (res)
3499 vorbis_finish_frame(f, len, left, right);
3500 return res;
3501 }
3502
3503 #ifndef STB_VORBIS_NO_PUSHDATA_API
is_whole_packet_present(stb_vorbis * f,int end_page)3504 static int is_whole_packet_present(stb_vorbis *f, int end_page)
3505 {
3506 // make sure that we have the packet available before continuing...
3507 // this requires a full ogg parse, but we know we can fetch from f->stream
3508
3509 // instead of coding this out explicitly, we could save the current read state,
3510 // read the next packet with get8() until end-of-packet, check f->eof, then
3511 // reset the state? but that would be slower, esp. since we'd have over 256 bytes
3512 // of state to restore (primarily the page segment table)
3513
3514 int s = f->next_seg, first = TRUE;
3515 uint8 *p = f->stream;
3516
3517 if (s != -1) { // if we're not starting the packet with a 'continue on next page' flag
3518 for (; s < f->segment_count; ++s) {
3519 p += f->segments[s];
3520 if (f->segments[s] < 255) // stop at first short segment
3521 break;
3522 }
3523 // either this continues, or it ends it...
3524 if (end_page)
3525 if (s < f->segment_count-1) return error(f, VORBIS_invalid_stream);
3526 if (s == f->segment_count)
3527 s = -1; // set 'crosses page' flag
3528 if (p > f->stream_end) return error(f, VORBIS_need_more_data);
3529 first = FALSE;
3530 }
3531 for (; s == -1;) {
3532 uint8 *q;
3533 int n;
3534
3535 // check that we have the page header ready
3536 if (p + 26 >= f->stream_end) return error(f, VORBIS_need_more_data);
3537 // validate the page
3538 if (memcmp(p, ogg_page_header, 4)) return error(f, VORBIS_invalid_stream);
3539 if (p[4] != 0) return error(f, VORBIS_invalid_stream);
3540 if (first) { // the first segment must NOT have 'continued_packet', later ones MUST
3541 if (f->previous_length)
3542 if ((p[5] & PAGEFLAG_continued_packet)) return error(f, VORBIS_invalid_stream);
3543 // if no previous length, we're resynching, so we can come in on a continued-packet,
3544 // which we'll just drop
3545 } else {
3546 if (!(p[5] & PAGEFLAG_continued_packet)) return error(f, VORBIS_invalid_stream);
3547 }
3548 n = p[26]; // segment counts
3549 q = p+27; // q points to segment table
3550 p = q + n; // advance past header
3551 // make sure we've read the segment table
3552 if (p > f->stream_end) return error(f, VORBIS_need_more_data);
3553 for (s=0; s < n; ++s) {
3554 p += q[s];
3555 if (q[s] < 255)
3556 break;
3557 }
3558 if (end_page)
3559 if (s < n-1) return error(f, VORBIS_invalid_stream);
3560 if (s == n)
3561 s = -1; // set 'crosses page' flag
3562 if (p > f->stream_end) return error(f, VORBIS_need_more_data);
3563 first = FALSE;
3564 }
3565 return TRUE;
3566 }
3567 #endif // !STB_VORBIS_NO_PUSHDATA_API
3568
start_decoder(vorb * f)3569 static int start_decoder(vorb *f)
3570 {
3571 uint8 header[6], x,y;
3572 int len,i,j,k, max_submaps = 0;
3573 int longest_floorlist=0;
3574
3575 // first page, first packet
3576
3577 if (!start_page(f)) return FALSE;
3578 // validate page flag
3579 if (!(f->page_flag & PAGEFLAG_first_page)) return error(f, VORBIS_invalid_first_page);
3580 if (f->page_flag & PAGEFLAG_last_page) return error(f, VORBIS_invalid_first_page);
3581 if (f->page_flag & PAGEFLAG_continued_packet) return error(f, VORBIS_invalid_first_page);
3582 // check for expected packet length
3583 if (f->segment_count != 1) return error(f, VORBIS_invalid_first_page);
3584 if (f->segments[0] != 30) {
3585 // check for the Ogg skeleton fishead identifying header to refine our error
3586 if (f->segments[0] == 64 &&
3587 getn(f, header, 6) &&
3588 header[0] == 'f' &&
3589 header[1] == 'i' &&
3590 header[2] == 's' &&
3591 header[3] == 'h' &&
3592 header[4] == 'e' &&
3593 header[5] == 'a' &&
3594 get8(f) == 'd' &&
3595 get8(f) == '\0') return error(f, VORBIS_ogg_skeleton_not_supported);
3596 else
3597 return error(f, VORBIS_invalid_first_page);
3598 }
3599
3600 // read packet
3601 // check packet header
3602 if (get8(f) != VORBIS_packet_id) return error(f, VORBIS_invalid_first_page);
3603 if (!getn(f, header, 6)) return error(f, VORBIS_unexpected_eof);
3604 if (!vorbis_validate(header)) return error(f, VORBIS_invalid_first_page);
3605 // vorbis_version
3606 if (get32(f) != 0) return error(f, VORBIS_invalid_first_page);
3607 f->channels = get8(f); if (!f->channels) return error(f, VORBIS_invalid_first_page);
3608 if (f->channels > STB_VORBIS_MAX_CHANNELS) return error(f, VORBIS_too_many_channels);
3609 f->sample_rate = get32(f); if (!f->sample_rate) return error(f, VORBIS_invalid_first_page);
3610 get32(f); // bitrate_maximum
3611 get32(f); // bitrate_nominal
3612 get32(f); // bitrate_minimum
3613 x = get8(f);
3614 {
3615 int log0,log1;
3616 log0 = x & 15;
3617 log1 = x >> 4;
3618 f->blocksize_0 = 1 << log0;
3619 f->blocksize_1 = 1 << log1;
3620 if (log0 < 6 || log0 > 13) return error(f, VORBIS_invalid_setup);
3621 if (log1 < 6 || log1 > 13) return error(f, VORBIS_invalid_setup);
3622 if (log0 > log1) return error(f, VORBIS_invalid_setup);
3623 }
3624
3625 // framing_flag
3626 x = get8(f);
3627 if (!(x & 1)) return error(f, VORBIS_invalid_first_page);
3628
3629 // second packet!
3630 if (!start_page(f)) return FALSE;
3631
3632 if (!start_packet(f)) return FALSE;
3633 do {
3634 len = next_segment(f);
3635 skip(f, len);
3636 f->bytes_in_seg = 0;
3637 } while (len);
3638
3639 // third packet!
3640 if (!start_packet(f)) return FALSE;
3641
3642 #ifndef STB_VORBIS_NO_PUSHDATA_API
3643 if (IS_PUSH_MODE(f)) {
3644 if (!is_whole_packet_present(f, TRUE)) {
3645 // convert error in ogg header to write type
3646 if (f->error == VORBIS_invalid_stream)
3647 f->error = VORBIS_invalid_setup;
3648 return FALSE;
3649 }
3650 }
3651 #endif
3652
3653 crc32_init(); // always init it, to avoid multithread race conditions
3654
3655 if (get8_packet(f) != VORBIS_packet_setup) return error(f, VORBIS_invalid_setup);
3656 for (i=0; i < 6; ++i) header[i] = get8_packet(f);
3657 if (!vorbis_validate(header)) return error(f, VORBIS_invalid_setup);
3658
3659 // codebooks
3660
3661 f->codebook_count = get_bits(f,8) + 1;
3662 f->codebooks = (Codebook *) setup_malloc(f, sizeof(*f->codebooks) * f->codebook_count);
3663 if (f->codebooks == NULL) return error(f, VORBIS_outofmem);
3664 memset(f->codebooks, 0, sizeof(*f->codebooks) * f->codebook_count);
3665 for (i=0; i < f->codebook_count; ++i) {
3666 uint32 *values;
3667 int ordered, sorted_count;
3668 int total=0;
3669 uint8 *lengths;
3670 Codebook *c = f->codebooks+i;
3671 CHECK(f);
3672 x = get_bits(f, 8); if (x != 0x42) return error(f, VORBIS_invalid_setup);
3673 x = get_bits(f, 8); if (x != 0x43) return error(f, VORBIS_invalid_setup);
3674 x = get_bits(f, 8); if (x != 0x56) return error(f, VORBIS_invalid_setup);
3675 x = get_bits(f, 8);
3676 c->dimensions = (get_bits(f, 8)<<8) + x;
3677 x = get_bits(f, 8);
3678 y = get_bits(f, 8);
3679 c->entries = (get_bits(f, 8)<<16) + (y<<8) + x;
3680 ordered = get_bits(f,1);
3681 c->sparse = ordered ? 0 : get_bits(f,1);
3682
3683 if (c->dimensions == 0 && c->entries != 0) return error(f, VORBIS_invalid_setup);
3684
3685 if (c->sparse)
3686 lengths = (uint8 *) setup_temp_malloc(f, c->entries);
3687 else
3688 lengths = c->codeword_lengths = (uint8 *) setup_malloc(f, c->entries);
3689
3690 if (!lengths) return error(f, VORBIS_outofmem);
3691
3692 if (ordered) {
3693 int current_entry = 0;
3694 int current_length = get_bits(f,5) + 1;
3695 while (current_entry < c->entries) {
3696 int limit = c->entries - current_entry;
3697 int n = get_bits(f, ilog(limit));
3698 if (current_entry + n > (int) c->entries) { return error(f, VORBIS_invalid_setup); }
3699 memset(lengths + current_entry, current_length, n);
3700 current_entry += n;
3701 ++current_length;
3702 }
3703 } else {
3704 for (j=0; j < c->entries; ++j) {
3705 int present = c->sparse ? get_bits(f,1) : 1;
3706 if (present) {
3707 lengths[j] = get_bits(f, 5) + 1;
3708 ++total;
3709 if (lengths[j] == 32)
3710 return error(f, VORBIS_invalid_setup);
3711 } else {
3712 lengths[j] = NO_CODE;
3713 }
3714 }
3715 }
3716
3717 if (c->sparse && total >= c->entries >> 2) {
3718 // convert sparse items to non-sparse!
3719 if (c->entries > (int) f->setup_temp_memory_required)
3720 f->setup_temp_memory_required = c->entries;
3721
3722 c->codeword_lengths = (uint8 *) setup_malloc(f, c->entries);
3723 if (c->codeword_lengths == NULL) return error(f, VORBIS_outofmem);
3724 memcpy(c->codeword_lengths, lengths, c->entries);
3725 setup_temp_free(f, lengths, c->entries); // note this is only safe if there have been no intervening temp mallocs!
3726 lengths = c->codeword_lengths;
3727 c->sparse = 0;
3728 }
3729
3730 // compute the size of the sorted tables
3731 if (c->sparse) {
3732 sorted_count = total;
3733 } else {
3734 sorted_count = 0;
3735 #ifndef STB_VORBIS_NO_HUFFMAN_BINARY_SEARCH
3736 for (j=0; j < c->entries; ++j)
3737 if (lengths[j] > STB_VORBIS_FAST_HUFFMAN_LENGTH && lengths[j] != NO_CODE)
3738 ++sorted_count;
3739 #endif
3740 }
3741
3742 c->sorted_entries = sorted_count;
3743 values = NULL;
3744
3745 CHECK(f);
3746 if (!c->sparse) {
3747 c->codewords = (uint32 *) setup_malloc(f, sizeof(c->codewords[0]) * c->entries);
3748 if (!c->codewords) return error(f, VORBIS_outofmem);
3749 } else {
3750 unsigned int size;
3751 if (c->sorted_entries) {
3752 c->codeword_lengths = (uint8 *) setup_malloc(f, c->sorted_entries);
3753 if (!c->codeword_lengths) return error(f, VORBIS_outofmem);
3754 c->codewords = (uint32 *) setup_temp_malloc(f, sizeof(*c->codewords) * c->sorted_entries);
3755 if (!c->codewords) return error(f, VORBIS_outofmem);
3756 values = (uint32 *) setup_temp_malloc(f, sizeof(*values) * c->sorted_entries);
3757 if (!values) return error(f, VORBIS_outofmem);
3758 }
3759 size = c->entries + (sizeof(*c->codewords) + sizeof(*values)) * c->sorted_entries;
3760 if (size > f->setup_temp_memory_required)
3761 f->setup_temp_memory_required = size;
3762 }
3763
3764 if (!compute_codewords(c, lengths, c->entries, values)) {
3765 if (c->sparse) setup_temp_free(f, values, 0);
3766 return error(f, VORBIS_invalid_setup);
3767 }
3768
3769 if (c->sorted_entries) {
3770 // allocate an extra slot for sentinels
3771 c->sorted_codewords = (uint32 *) setup_malloc(f, sizeof(*c->sorted_codewords) * (c->sorted_entries+1));
3772 if (c->sorted_codewords == NULL) return error(f, VORBIS_outofmem);
3773 // allocate an extra slot at the front so that c->sorted_values[-1] is defined
3774 // so that we can catch that case without an extra if
3775 c->sorted_values = ( int *) setup_malloc(f, sizeof(*c->sorted_values ) * (c->sorted_entries+1));
3776 if (c->sorted_values == NULL) return error(f, VORBIS_outofmem);
3777 ++c->sorted_values;
3778 c->sorted_values[-1] = -1;
3779 compute_sorted_huffman(c, lengths, values);
3780 }
3781
3782 if (c->sparse) {
3783 setup_temp_free(f, values, sizeof(*values)*c->sorted_entries);
3784 setup_temp_free(f, c->codewords, sizeof(*c->codewords)*c->sorted_entries);
3785 setup_temp_free(f, lengths, c->entries);
3786 c->codewords = NULL;
3787 }
3788
3789 compute_accelerated_huffman(c);
3790
3791 CHECK(f);
3792 c->lookup_type = get_bits(f, 4);
3793 if (c->lookup_type > 2) return error(f, VORBIS_invalid_setup);
3794 if (c->lookup_type > 0) {
3795 uint16 *mults;
3796 c->minimum_value = float32_unpack(get_bits(f, 32));
3797 c->delta_value = float32_unpack(get_bits(f, 32));
3798 c->value_bits = get_bits(f, 4)+1;
3799 c->sequence_p = get_bits(f,1);
3800 if (c->lookup_type == 1) {
3801 c->lookup_values = lookup1_values(c->entries, c->dimensions);
3802 } else {
3803 c->lookup_values = c->entries * c->dimensions;
3804 }
3805 if (c->lookup_values == 0) return error(f, VORBIS_invalid_setup);
3806 mults = (uint16 *) setup_temp_malloc(f, sizeof(mults[0]) * c->lookup_values);
3807 if (mults == NULL) return error(f, VORBIS_outofmem);
3808 for (j=0; j < (int) c->lookup_values; ++j) {
3809 int q = get_bits(f, c->value_bits);
3810 if (q == EOP) { setup_temp_free(f,mults,sizeof(mults[0])*c->lookup_values); return error(f, VORBIS_invalid_setup); }
3811 mults[j] = q;
3812 }
3813
3814 #ifndef STB_VORBIS_DIVIDES_IN_CODEBOOK
3815 if (c->lookup_type == 1) {
3816 int len, sparse = c->sparse;
3817 float last=0;
3818 // pre-expand the lookup1-style multiplicands, to avoid a divide in the inner loop
3819 if (sparse) {
3820 if (c->sorted_entries == 0) goto skip;
3821 c->multiplicands = (codetype *) setup_malloc(f, sizeof(c->multiplicands[0]) * c->sorted_entries * c->dimensions);
3822 } else
3823 c->multiplicands = (codetype *) setup_malloc(f, sizeof(c->multiplicands[0]) * c->entries * c->dimensions);
3824 if (c->multiplicands == NULL) { setup_temp_free(f,mults,sizeof(mults[0])*c->lookup_values); return error(f, VORBIS_outofmem); }
3825 len = sparse ? c->sorted_entries : c->entries;
3826 for (j=0; j < len; ++j) {
3827 unsigned int z = sparse ? c->sorted_values[j] : j;
3828 unsigned int div=1;
3829 for (k=0; k < c->dimensions; ++k) {
3830 int off = (z / div) % c->lookup_values;
3831 float val = mults[off];
3832 val = mults[off]*c->delta_value + c->minimum_value + last;
3833 c->multiplicands[j*c->dimensions + k] = val;
3834 if (c->sequence_p)
3835 last = val;
3836 if (k+1 < c->dimensions) {
3837 if (div > UINT_MAX / (unsigned int) c->lookup_values) {
3838 setup_temp_free(f, mults,sizeof(mults[0])*c->lookup_values);
3839 return error(f, VORBIS_invalid_setup);
3840 }
3841 div *= c->lookup_values;
3842 }
3843 }
3844 }
3845 c->lookup_type = 2;
3846 }
3847 else
3848 #endif
3849 {
3850 float last=0;
3851 CHECK(f);
3852 c->multiplicands = (codetype *) setup_malloc(f, sizeof(c->multiplicands[0]) * c->lookup_values);
3853 if (c->multiplicands == NULL) { setup_temp_free(f, mults,sizeof(mults[0])*c->lookup_values); return error(f, VORBIS_outofmem); }
3854 for (j=0; j < (int) c->lookup_values; ++j) {
3855 float val = mults[j] * c->delta_value + c->minimum_value + last;
3856 c->multiplicands[j] = val;
3857 if (c->sequence_p)
3858 last = val;
3859 }
3860 }
3861 #ifndef STB_VORBIS_DIVIDES_IN_CODEBOOK
3862 skip:;
3863 #endif
3864 setup_temp_free(f, mults, sizeof(mults[0])*c->lookup_values);
3865
3866 CHECK(f);
3867 }
3868 CHECK(f);
3869 }
3870
3871 // time domain transfers (notused)
3872
3873 x = get_bits(f, 6) + 1;
3874 for (i=0; i < x; ++i) {
3875 uint32 z = get_bits(f, 16);
3876 if (z != 0) return error(f, VORBIS_invalid_setup);
3877 }
3878
3879 // Floors
3880 f->floor_count = get_bits(f, 6)+1;
3881 f->floor_config = (Floor *) setup_malloc(f, f->floor_count * sizeof(*f->floor_config));
3882 if (f->floor_config == NULL) return error(f, VORBIS_outofmem);
3883 for (i=0; i < f->floor_count; ++i) {
3884 f->floor_types[i] = get_bits(f, 16);
3885 if (f->floor_types[i] > 1) return error(f, VORBIS_invalid_setup);
3886 if (f->floor_types[i] == 0) {
3887 Floor0 *g = &f->floor_config[i].floor0;
3888 g->order = get_bits(f,8);
3889 g->rate = get_bits(f,16);
3890 g->bark_map_size = get_bits(f,16);
3891 g->amplitude_bits = get_bits(f,6);
3892 g->amplitude_offset = get_bits(f,8);
3893 g->number_of_books = get_bits(f,4) + 1;
3894 for (j=0; j < g->number_of_books; ++j)
3895 g->book_list[j] = get_bits(f,8);
3896 return error(f, VORBIS_feature_not_supported);
3897 } else {
3898 stbv__floor_ordering p[31*8+2];
3899 Floor1 *g = &f->floor_config[i].floor1;
3900 int max_class = -1;
3901 g->partitions = get_bits(f, 5);
3902 for (j=0; j < g->partitions; ++j) {
3903 g->partition_class_list[j] = get_bits(f, 4);
3904 if (g->partition_class_list[j] > max_class)
3905 max_class = g->partition_class_list[j];
3906 }
3907 for (j=0; j <= max_class; ++j) {
3908 g->class_dimensions[j] = get_bits(f, 3)+1;
3909 g->class_subclasses[j] = get_bits(f, 2);
3910 if (g->class_subclasses[j]) {
3911 g->class_masterbooks[j] = get_bits(f, 8);
3912 if (g->class_masterbooks[j] >= f->codebook_count) return error(f, VORBIS_invalid_setup);
3913 }
3914 for (k=0; k < 1 << g->class_subclasses[j]; ++k) {
3915 g->subclass_books[j][k] = get_bits(f,8)-1;
3916 if (g->subclass_books[j][k] >= f->codebook_count) return error(f, VORBIS_invalid_setup);
3917 }
3918 }
3919 g->floor1_multiplier = get_bits(f,2)+1;
3920 g->rangebits = get_bits(f,4);
3921 g->Xlist[0] = 0;
3922 g->Xlist[1] = 1 << g->rangebits;
3923 g->values = 2;
3924 for (j=0; j < g->partitions; ++j) {
3925 int c = g->partition_class_list[j];
3926 for (k=0; k < g->class_dimensions[c]; ++k) {
3927 g->Xlist[g->values] = get_bits(f, g->rangebits);
3928 ++g->values;
3929 }
3930 }
3931 // precompute the sorting
3932 for (j=0; j < g->values; ++j) {
3933 p[j].x = g->Xlist[j];
3934 p[j].id = j;
3935 }
3936 qsort(p, g->values, sizeof(p[0]), point_compare);
3937 for (j=0; j < g->values; ++j)
3938 g->sorted_order[j] = (uint8) p[j].id;
3939 // precompute the neighbors
3940 for (j=2; j < g->values; ++j) {
3941 int low,hi;
3942 neighbors(g->Xlist, j, &low,&hi);
3943 g->neighbors[j][0] = low;
3944 g->neighbors[j][1] = hi;
3945 }
3946
3947 if (g->values > longest_floorlist)
3948 longest_floorlist = g->values;
3949 }
3950 }
3951
3952 // Residue
3953 f->residue_count = get_bits(f, 6)+1;
3954 f->residue_config = (Residue *) setup_malloc(f, f->residue_count * sizeof(f->residue_config[0]));
3955 if (f->residue_config == NULL) return error(f, VORBIS_outofmem);
3956 memset(f->residue_config, 0, f->residue_count * sizeof(f->residue_config[0]));
3957 for (i=0; i < f->residue_count; ++i) {
3958 uint8 residue_cascade[64];
3959 Residue *r = f->residue_config+i;
3960 f->residue_types[i] = get_bits(f, 16);
3961 if (f->residue_types[i] > 2) return error(f, VORBIS_invalid_setup);
3962 r->begin = get_bits(f, 24);
3963 r->end = get_bits(f, 24);
3964 if (r->end < r->begin) return error(f, VORBIS_invalid_setup);
3965 r->part_size = get_bits(f,24)+1;
3966 r->classifications = get_bits(f,6)+1;
3967 r->classbook = get_bits(f,8);
3968 if (r->classbook >= f->codebook_count) return error(f, VORBIS_invalid_setup);
3969 for (j=0; j < r->classifications; ++j) {
3970 uint8 high_bits=0;
3971 uint8 low_bits=get_bits(f,3);
3972 if (get_bits(f,1))
3973 high_bits = get_bits(f,5);
3974 residue_cascade[j] = high_bits*8 + low_bits;
3975 }
3976 r->residue_books = (short (*)[8]) setup_malloc(f, sizeof(r->residue_books[0]) * r->classifications);
3977 if (r->residue_books == NULL) return error(f, VORBIS_outofmem);
3978 for (j=0; j < r->classifications; ++j) {
3979 for (k=0; k < 8; ++k) {
3980 if (residue_cascade[j] & (1 << k)) {
3981 r->residue_books[j][k] = get_bits(f, 8);
3982 if (r->residue_books[j][k] >= f->codebook_count) return error(f, VORBIS_invalid_setup);
3983 } else {
3984 r->residue_books[j][k] = -1;
3985 }
3986 }
3987 }
3988 // precompute the classifications[] array to avoid inner-loop mod/divide
3989 // call it 'classdata' since we already have r->classifications
3990 r->classdata = (uint8 **) setup_malloc(f, sizeof(*r->classdata) * f->codebooks[r->classbook].entries);
3991 if (!r->classdata) return error(f, VORBIS_outofmem);
3992 memset(r->classdata, 0, sizeof(*r->classdata) * f->codebooks[r->classbook].entries);
3993 for (j=0; j < f->codebooks[r->classbook].entries; ++j) {
3994 int classwords = f->codebooks[r->classbook].dimensions;
3995 int temp = j;
3996 r->classdata[j] = (uint8 *) setup_malloc(f, sizeof(r->classdata[j][0]) * classwords);
3997 if (r->classdata[j] == NULL) return error(f, VORBIS_outofmem);
3998 for (k=classwords-1; k >= 0; --k) {
3999 r->classdata[j][k] = temp % r->classifications;
4000 temp /= r->classifications;
4001 }
4002 }
4003 }
4004
4005 f->mapping_count = get_bits(f,6)+1;
4006 f->mapping = (Mapping *) setup_malloc(f, f->mapping_count * sizeof(*f->mapping));
4007 if (f->mapping == NULL) return error(f, VORBIS_outofmem);
4008 memset(f->mapping, 0, f->mapping_count * sizeof(*f->mapping));
4009 for (i=0; i < f->mapping_count; ++i) {
4010 Mapping *m = f->mapping + i;
4011 int mapping_type = get_bits(f,16);
4012 if (mapping_type != 0) return error(f, VORBIS_invalid_setup);
4013 m->chan = (MappingChannel *) setup_malloc(f, f->channels * sizeof(*m->chan));
4014 if (m->chan == NULL) return error(f, VORBIS_outofmem);
4015 if (get_bits(f,1))
4016 m->submaps = get_bits(f,4)+1;
4017 else
4018 m->submaps = 1;
4019 if (m->submaps > max_submaps)
4020 max_submaps = m->submaps;
4021 if (get_bits(f,1)) {
4022 m->coupling_steps = get_bits(f,8)+1;
4023 for (k=0; k < m->coupling_steps; ++k) {
4024 m->chan[k].magnitude = get_bits(f, ilog(f->channels-1));
4025 m->chan[k].angle = get_bits(f, ilog(f->channels-1));
4026 if (m->chan[k].magnitude >= f->channels) return error(f, VORBIS_invalid_setup);
4027 if (m->chan[k].angle >= f->channels) return error(f, VORBIS_invalid_setup);
4028 if (m->chan[k].magnitude == m->chan[k].angle) return error(f, VORBIS_invalid_setup);
4029 }
4030 } else
4031 m->coupling_steps = 0;
4032
4033 // reserved field
4034 if (get_bits(f,2)) return error(f, VORBIS_invalid_setup);
4035 if (m->submaps > 1) {
4036 for (j=0; j < f->channels; ++j) {
4037 m->chan[j].mux = get_bits(f, 4);
4038 if (m->chan[j].mux >= m->submaps) return error(f, VORBIS_invalid_setup);
4039 }
4040 } else
4041 // @SPECIFICATION: this case is missing from the spec
4042 for (j=0; j < f->channels; ++j)
4043 m->chan[j].mux = 0;
4044
4045 for (j=0; j < m->submaps; ++j) {
4046 get_bits(f,8); // discard
4047 m->submap_floor[j] = get_bits(f,8);
4048 m->submap_residue[j] = get_bits(f,8);
4049 if (m->submap_floor[j] >= f->floor_count) return error(f, VORBIS_invalid_setup);
4050 if (m->submap_residue[j] >= f->residue_count) return error(f, VORBIS_invalid_setup);
4051 }
4052 }
4053
4054 // Modes
4055 f->mode_count = get_bits(f, 6)+1;
4056 for (i=0; i < f->mode_count; ++i) {
4057 Mode *m = f->mode_config+i;
4058 m->blockflag = get_bits(f,1);
4059 m->windowtype = get_bits(f,16);
4060 m->transformtype = get_bits(f,16);
4061 m->mapping = get_bits(f,8);
4062 if (m->windowtype != 0) return error(f, VORBIS_invalid_setup);
4063 if (m->transformtype != 0) return error(f, VORBIS_invalid_setup);
4064 if (m->mapping >= f->mapping_count) return error(f, VORBIS_invalid_setup);
4065 }
4066
4067 flush_packet(f);
4068
4069 f->previous_length = 0;
4070
4071 for (i=0; i < f->channels; ++i) {
4072 f->channel_buffers[i] = (float *) setup_malloc(f, sizeof(float) * f->blocksize_1);
4073 f->previous_window[i] = (float *) setup_malloc(f, sizeof(float) * f->blocksize_1/2);
4074 f->finalY[i] = (int16 *) setup_malloc(f, sizeof(int16) * longest_floorlist);
4075 if (f->channel_buffers[i] == NULL || f->previous_window[i] == NULL || f->finalY[i] == NULL) return error(f, VORBIS_outofmem);
4076 memset(f->channel_buffers[i], 0, sizeof(float) * f->blocksize_1);
4077 #ifdef STB_VORBIS_NO_DEFER_FLOOR
4078 f->floor_buffers[i] = (float *) setup_malloc(f, sizeof(float) * f->blocksize_1/2);
4079 if (f->floor_buffers[i] == NULL) return error(f, VORBIS_outofmem);
4080 #endif
4081 }
4082
4083 if (!init_blocksize(f, 0, f->blocksize_0)) return FALSE;
4084 if (!init_blocksize(f, 1, f->blocksize_1)) return FALSE;
4085 f->blocksize[0] = f->blocksize_0;
4086 f->blocksize[1] = f->blocksize_1;
4087
4088 #ifdef STB_VORBIS_DIVIDE_TABLE
4089 if (integer_divide_table[1][1]==0)
4090 for (i=0; i < DIVTAB_NUMER; ++i)
4091 for (j=1; j < DIVTAB_DENOM; ++j)
4092 integer_divide_table[i][j] = i / j;
4093 #endif
4094
4095 // compute how much temporary memory is needed
4096
4097 // 1.
4098 {
4099 uint32 imdct_mem = (f->blocksize_1 * sizeof(float) >> 1);
4100 uint32 classify_mem;
4101 int i,max_part_read=0;
4102 for (i=0; i < f->residue_count; ++i) {
4103 Residue *r = f->residue_config + i;
4104 unsigned int actual_size = f->blocksize_1 / 2;
4105 unsigned int limit_r_begin = r->begin < actual_size ? r->begin : actual_size;
4106 unsigned int limit_r_end = r->end < actual_size ? r->end : actual_size;
4107 int n_read = limit_r_end - limit_r_begin;
4108 int part_read = n_read / r->part_size;
4109 if (part_read > max_part_read)
4110 max_part_read = part_read;
4111 }
4112 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
4113 classify_mem = f->channels * (sizeof(void*) + max_part_read * sizeof(uint8 *));
4114 #else
4115 classify_mem = f->channels * (sizeof(void*) + max_part_read * sizeof(int *));
4116 #endif
4117
4118 // maximum reasonable partition size is f->blocksize_1
4119
4120 f->temp_memory_required = classify_mem;
4121 if (imdct_mem > f->temp_memory_required)
4122 f->temp_memory_required = imdct_mem;
4123 }
4124
4125 f->first_decode = TRUE;
4126
4127 if (f->alloc.alloc_buffer) {
4128 assert(f->temp_offset == f->alloc.alloc_buffer_length_in_bytes);
4129 // check if there's enough temp memory so we don't error later
4130 if (f->setup_offset + sizeof(*f) + f->temp_memory_required > (unsigned) f->temp_offset)
4131 return error(f, VORBIS_outofmem);
4132 }
4133
4134 f->first_audio_page_offset = stb_vorbis_get_file_offset(f);
4135
4136 return TRUE;
4137 }
4138
vorbis_deinit(stb_vorbis * p)4139 static void vorbis_deinit(stb_vorbis *p)
4140 {
4141 int i,j;
4142 if (p->residue_config) {
4143 for (i=0; i < p->residue_count; ++i) {
4144 Residue *r = p->residue_config+i;
4145 if (r->classdata) {
4146 for (j=0; j < p->codebooks[r->classbook].entries; ++j)
4147 setup_free(p, r->classdata[j]);
4148 setup_free(p, r->classdata);
4149 }
4150 setup_free(p, r->residue_books);
4151 }
4152 }
4153
4154 if (p->codebooks) {
4155 CHECK(p);
4156 for (i=0; i < p->codebook_count; ++i) {
4157 Codebook *c = p->codebooks + i;
4158 setup_free(p, c->codeword_lengths);
4159 setup_free(p, c->multiplicands);
4160 setup_free(p, c->codewords);
4161 setup_free(p, c->sorted_codewords);
4162 // c->sorted_values[-1] is the first entry in the array
4163 setup_free(p, c->sorted_values ? c->sorted_values-1 : NULL);
4164 }
4165 setup_free(p, p->codebooks);
4166 }
4167 setup_free(p, p->floor_config);
4168 setup_free(p, p->residue_config);
4169 if (p->mapping) {
4170 for (i=0; i < p->mapping_count; ++i)
4171 setup_free(p, p->mapping[i].chan);
4172 setup_free(p, p->mapping);
4173 }
4174 CHECK(p);
4175 for (i=0; i < p->channels && i < STB_VORBIS_MAX_CHANNELS; ++i) {
4176 setup_free(p, p->channel_buffers[i]);
4177 setup_free(p, p->previous_window[i]);
4178 #ifdef STB_VORBIS_NO_DEFER_FLOOR
4179 setup_free(p, p->floor_buffers[i]);
4180 #endif
4181 setup_free(p, p->finalY[i]);
4182 }
4183 for (i=0; i < 2; ++i) {
4184 setup_free(p, p->A[i]);
4185 setup_free(p, p->B[i]);
4186 setup_free(p, p->C[i]);
4187 setup_free(p, p->window[i]);
4188 setup_free(p, p->bit_reverse[i]);
4189 }
4190 #ifndef STB_VORBIS_NO_STDIO
4191 if (p->close_on_free) fclose(p->f);
4192 #endif
4193 }
4194
stb_vorbis_close(stb_vorbis * p)4195 void stb_vorbis_close(stb_vorbis *p)
4196 {
4197 if (p == NULL) return;
4198 vorbis_deinit(p);
4199 setup_free(p,p);
4200 }
4201
vorbis_init(stb_vorbis * p,const stb_vorbis_alloc * z)4202 static void vorbis_init(stb_vorbis *p, const stb_vorbis_alloc *z)
4203 {
4204 memset(p, 0, sizeof(*p)); // NULL out all malloc'd pointers to start
4205 if (z) {
4206 p->alloc = *z;
4207 p->alloc.alloc_buffer_length_in_bytes = (p->alloc.alloc_buffer_length_in_bytes+3) & ~3;
4208 p->temp_offset = p->alloc.alloc_buffer_length_in_bytes;
4209 }
4210 p->eof = 0;
4211 p->error = VORBIS__no_error;
4212 p->stream = NULL;
4213 p->codebooks = NULL;
4214 p->page_crc_tests = -1;
4215 #ifndef STB_VORBIS_NO_STDIO
4216 p->close_on_free = FALSE;
4217 p->f = NULL;
4218 #endif
4219 }
4220
stb_vorbis_get_sample_offset(stb_vorbis * f)4221 int stb_vorbis_get_sample_offset(stb_vorbis *f)
4222 {
4223 if (f->current_loc_valid)
4224 return f->current_loc;
4225 else
4226 return -1;
4227 }
4228
stb_vorbis_get_info(stb_vorbis * f)4229 stb_vorbis_info stb_vorbis_get_info(stb_vorbis *f)
4230 {
4231 stb_vorbis_info d;
4232 d.channels = f->channels;
4233 d.sample_rate = f->sample_rate;
4234 d.setup_memory_required = f->setup_memory_required;
4235 d.setup_temp_memory_required = f->setup_temp_memory_required;
4236 d.temp_memory_required = f->temp_memory_required;
4237 d.max_frame_size = f->blocksize_1 >> 1;
4238 return d;
4239 }
4240
stb_vorbis_get_error(stb_vorbis * f)4241 int stb_vorbis_get_error(stb_vorbis *f)
4242 {
4243 int e = f->error;
4244 f->error = VORBIS__no_error;
4245 return e;
4246 }
4247
vorbis_alloc(stb_vorbis * f)4248 static stb_vorbis * vorbis_alloc(stb_vorbis *f)
4249 {
4250 stb_vorbis *p = (stb_vorbis *) setup_malloc(f, sizeof(*p));
4251 return p;
4252 }
4253
4254 #ifndef STB_VORBIS_NO_PUSHDATA_API
4255
stb_vorbis_flush_pushdata(stb_vorbis * f)4256 void stb_vorbis_flush_pushdata(stb_vorbis *f)
4257 {
4258 f->previous_length = 0;
4259 f->page_crc_tests = 0;
4260 f->discard_samples_deferred = 0;
4261 f->current_loc_valid = FALSE;
4262 f->first_decode = FALSE;
4263 f->samples_output = 0;
4264 f->channel_buffer_start = 0;
4265 f->channel_buffer_end = 0;
4266 }
4267
vorbis_search_for_page_pushdata(vorb * f,uint8 * data,int data_len)4268 static int vorbis_search_for_page_pushdata(vorb *f, uint8 *data, int data_len)
4269 {
4270 int i,n;
4271 for (i=0; i < f->page_crc_tests; ++i)
4272 f->scan[i].bytes_done = 0;
4273
4274 // if we have room for more scans, search for them first, because
4275 // they may cause us to stop early if their header is incomplete
4276 if (f->page_crc_tests < STB_VORBIS_PUSHDATA_CRC_COUNT) {
4277 if (data_len < 4) return 0;
4278 data_len -= 3; // need to look for 4-byte sequence, so don't miss
4279 // one that straddles a boundary
4280 for (i=0; i < data_len; ++i) {
4281 if (data[i] == 0x4f) {
4282 if (0==memcmp(data+i, ogg_page_header, 4)) {
4283 int j,len;
4284 uint32 crc;
4285 // make sure we have the whole page header
4286 if (i+26 >= data_len || i+27+data[i+26] >= data_len) {
4287 // only read up to this page start, so hopefully we'll
4288 // have the whole page header start next time
4289 data_len = i;
4290 break;
4291 }
4292 // ok, we have it all; compute the length of the page
4293 len = 27 + data[i+26];
4294 for (j=0; j < data[i+26]; ++j)
4295 len += data[i+27+j];
4296 // scan everything up to the embedded crc (which we must 0)
4297 crc = 0;
4298 for (j=0; j < 22; ++j)
4299 crc = crc32_update(crc, data[i+j]);
4300 // now process 4 0-bytes
4301 for ( ; j < 26; ++j)
4302 crc = crc32_update(crc, 0);
4303 // len is the total number of bytes we need to scan
4304 n = f->page_crc_tests++;
4305 f->scan[n].bytes_left = len-j;
4306 f->scan[n].crc_so_far = crc;
4307 f->scan[n].goal_crc = data[i+22] + (data[i+23] << 8) + (data[i+24]<<16) + (data[i+25]<<24);
4308 // if the last frame on a page is continued to the next, then
4309 // we can't recover the sample_loc immediately
4310 if (data[i+27+data[i+26]-1] == 255)
4311 f->scan[n].sample_loc = ~0;
4312 else
4313 f->scan[n].sample_loc = data[i+6] + (data[i+7] << 8) + (data[i+ 8]<<16) + (data[i+ 9]<<24);
4314 f->scan[n].bytes_done = i+j;
4315 if (f->page_crc_tests == STB_VORBIS_PUSHDATA_CRC_COUNT)
4316 break;
4317 // keep going if we still have room for more
4318 }
4319 }
4320 }
4321 }
4322
4323 for (i=0; i < f->page_crc_tests;) {
4324 uint32 crc;
4325 int j;
4326 int n = f->scan[i].bytes_done;
4327 int m = f->scan[i].bytes_left;
4328 if (m > data_len - n) m = data_len - n;
4329 // m is the bytes to scan in the current chunk
4330 crc = f->scan[i].crc_so_far;
4331 for (j=0; j < m; ++j)
4332 crc = crc32_update(crc, data[n+j]);
4333 f->scan[i].bytes_left -= m;
4334 f->scan[i].crc_so_far = crc;
4335 if (f->scan[i].bytes_left == 0) {
4336 // does it match?
4337 if (f->scan[i].crc_so_far == f->scan[i].goal_crc) {
4338 // Houston, we have page
4339 data_len = n+m; // consumption amount is wherever that scan ended
4340 f->page_crc_tests = -1; // drop out of page scan mode
4341 f->previous_length = 0; // decode-but-don't-output one frame
4342 f->next_seg = -1; // start a new page
4343 f->current_loc = f->scan[i].sample_loc; // set the current sample location
4344 // to the amount we'd have decoded had we decoded this page
4345 f->current_loc_valid = f->current_loc != ~0U;
4346 return data_len;
4347 }
4348 // delete entry
4349 f->scan[i] = f->scan[--f->page_crc_tests];
4350 } else {
4351 ++i;
4352 }
4353 }
4354
4355 return data_len;
4356 }
4357
4358 // return value: number of bytes we used
stb_vorbis_decode_frame_pushdata(stb_vorbis * f,const uint8 * data,int data_len,int * channels,float *** output,int * samples)4359 int stb_vorbis_decode_frame_pushdata(
4360 stb_vorbis *f, // the file we're decoding
4361 const uint8 *data, int data_len, // the memory available for decoding
4362 int *channels, // place to write number of float * buffers
4363 float ***output, // place to write float ** array of float * buffers
4364 int *samples // place to write number of output samples
4365 )
4366 {
4367 int i;
4368 int len,right,left;
4369
4370 if (!IS_PUSH_MODE(f)) return error(f, VORBIS_invalid_api_mixing);
4371
4372 if (f->page_crc_tests >= 0) {
4373 *samples = 0;
4374 return vorbis_search_for_page_pushdata(f, (uint8 *) data, data_len);
4375 }
4376
4377 f->stream = (uint8 *) data;
4378 f->stream_end = (uint8 *) data + data_len;
4379 f->error = VORBIS__no_error;
4380
4381 // check that we have the entire packet in memory
4382 if (!is_whole_packet_present(f, FALSE)) {
4383 *samples = 0;
4384 return 0;
4385 }
4386
4387 if (!vorbis_decode_packet(f, &len, &left, &right)) {
4388 // save the actual error we encountered
4389 enum STBVorbisError error = f->error;
4390 if (error == VORBIS_bad_packet_type) {
4391 // flush and resynch
4392 f->error = VORBIS__no_error;
4393 while (get8_packet(f) != EOP)
4394 if (f->eof) break;
4395 *samples = 0;
4396 return (int) (f->stream - data);
4397 }
4398 if (error == VORBIS_continued_packet_flag_invalid) {
4399 if (f->previous_length == 0) {
4400 // we may be resynching, in which case it's ok to hit one
4401 // of these; just discard the packet
4402 f->error = VORBIS__no_error;
4403 while (get8_packet(f) != EOP)
4404 if (f->eof) break;
4405 *samples = 0;
4406 return (int) (f->stream - data);
4407 }
4408 }
4409 // if we get an error while parsing, what to do?
4410 // well, it DEFINITELY won't work to continue from where we are!
4411 stb_vorbis_flush_pushdata(f);
4412 // restore the error that actually made us bail
4413 f->error = error;
4414 *samples = 0;
4415 return 1;
4416 }
4417
4418 // success!
4419 len = vorbis_finish_frame(f, len, left, right);
4420 for (i=0; i < f->channels; ++i)
4421 f->outputs[i] = f->channel_buffers[i] + left;
4422
4423 if (channels) *channels = f->channels;
4424 *samples = len;
4425 *output = f->outputs;
4426 return (int) (f->stream - data);
4427 }
4428
stb_vorbis_open_pushdata(const unsigned char * data,int data_len,int * data_used,int * error,const stb_vorbis_alloc * alloc)4429 stb_vorbis *stb_vorbis_open_pushdata(
4430 const unsigned char *data, int data_len, // the memory available for decoding
4431 int *data_used, // only defined if result is not NULL
4432 int *error, const stb_vorbis_alloc *alloc)
4433 {
4434 stb_vorbis *f, p;
4435 vorbis_init(&p, alloc);
4436 p.stream = (uint8 *) data;
4437 p.stream_end = (uint8 *) data + data_len;
4438 p.push_mode = TRUE;
4439 if (!start_decoder(&p)) {
4440 if (p.eof)
4441 *error = VORBIS_need_more_data;
4442 else
4443 *error = p.error;
4444 return NULL;
4445 }
4446 f = vorbis_alloc(&p);
4447 if (f) {
4448 *f = p;
4449 *data_used = (int) (f->stream - data);
4450 *error = 0;
4451 return f;
4452 } else {
4453 vorbis_deinit(&p);
4454 return NULL;
4455 }
4456 }
4457 #endif // STB_VORBIS_NO_PUSHDATA_API
4458
stb_vorbis_get_file_offset(stb_vorbis * f)4459 unsigned int stb_vorbis_get_file_offset(stb_vorbis *f)
4460 {
4461 #ifndef STB_VORBIS_NO_PUSHDATA_API
4462 if (f->push_mode) return 0;
4463 #endif
4464 if (USE_MEMORY(f)) return (unsigned int) (f->stream - f->stream_start);
4465 #ifndef STB_VORBIS_NO_STDIO
4466 return (unsigned int) (ftell(f->f) - f->f_start);
4467 #endif
4468 }
4469
4470 #ifndef STB_VORBIS_NO_PULLDATA_API
4471 //
4472 // DATA-PULLING API
4473 //
4474
vorbis_find_page(stb_vorbis * f,uint32 * end,uint32 * last)4475 static uint32 vorbis_find_page(stb_vorbis *f, uint32 *end, uint32 *last)
4476 {
4477 for(;;) {
4478 int n;
4479 if (f->eof) return 0;
4480 n = get8(f);
4481 if (n == 0x4f) { // page header candidate
4482 unsigned int retry_loc = stb_vorbis_get_file_offset(f);
4483 int i;
4484 // check if we're off the end of a file_section stream
4485 if (retry_loc - 25 > f->stream_len)
4486 return 0;
4487 // check the rest of the header
4488 for (i=1; i < 4; ++i)
4489 if (get8(f) != ogg_page_header[i])
4490 break;
4491 if (f->eof) return 0;
4492 if (i == 4) {
4493 uint8 header[27];
4494 uint32 i, crc, goal, len;
4495 for (i=0; i < 4; ++i)
4496 header[i] = ogg_page_header[i];
4497 for (; i < 27; ++i)
4498 header[i] = get8(f);
4499 if (f->eof) return 0;
4500 if (header[4] != 0) goto invalid;
4501 goal = header[22] + (header[23] << 8) + (header[24]<<16) + (header[25]<<24);
4502 for (i=22; i < 26; ++i)
4503 header[i] = 0;
4504 crc = 0;
4505 for (i=0; i < 27; ++i)
4506 crc = crc32_update(crc, header[i]);
4507 len = 0;
4508 for (i=0; i < header[26]; ++i) {
4509 int s = get8(f);
4510 crc = crc32_update(crc, s);
4511 len += s;
4512 }
4513 if (len && f->eof) return 0;
4514 for (i=0; i < len; ++i)
4515 crc = crc32_update(crc, get8(f));
4516 // finished parsing probable page
4517 if (crc == goal) {
4518 // we could now check that it's either got the last
4519 // page flag set, OR it's followed by the capture
4520 // pattern, but I guess TECHNICALLY you could have
4521 // a file with garbage between each ogg page and recover
4522 // from it automatically? So even though that paranoia
4523 // might decrease the chance of an invalid decode by
4524 // another 2^32, not worth it since it would hose those
4525 // invalid-but-useful files?
4526 if (end)
4527 *end = stb_vorbis_get_file_offset(f);
4528 if (last) {
4529 if (header[5] & 0x04)
4530 *last = 1;
4531 else
4532 *last = 0;
4533 }
4534 set_file_offset(f, retry_loc-1);
4535 return 1;
4536 }
4537 }
4538 invalid:
4539 // not a valid page, so rewind and look for next one
4540 set_file_offset(f, retry_loc);
4541 }
4542 }
4543 }
4544
4545
4546 #define SAMPLE_unknown 0xffffffff
4547
4548 // seeking is implemented with a binary search, which narrows down the range to
4549 // 64K, before using a linear search (because finding the synchronization
4550 // pattern can be expensive, and the chance we'd find the end page again is
4551 // relatively high for small ranges)
4552 //
4553 // two initial interpolation-style probes are used at the start of the search
4554 // to try to bound either side of the binary search sensibly, while still
4555 // working in O(log n) time if they fail.
4556
get_seek_page_info(stb_vorbis * f,ProbedPage * z)4557 static int get_seek_page_info(stb_vorbis *f, ProbedPage *z)
4558 {
4559 uint8 header[27], lacing[255];
4560 int i,len;
4561
4562 // record where the page starts
4563 z->page_start = stb_vorbis_get_file_offset(f);
4564
4565 // parse the header
4566 getn(f, header, 27);
4567 if (header[0] != 'O' || header[1] != 'g' || header[2] != 'g' || header[3] != 'S')
4568 return 0;
4569 getn(f, lacing, header[26]);
4570
4571 // determine the length of the payload
4572 len = 0;
4573 for (i=0; i < header[26]; ++i)
4574 len += lacing[i];
4575
4576 // this implies where the page ends
4577 z->page_end = z->page_start + 27 + header[26] + len;
4578
4579 // read the last-decoded sample out of the data
4580 z->last_decoded_sample = header[6] + (header[7] << 8) + (header[8] << 16) + (header[9] << 24);
4581
4582 // restore file state to where we were
4583 set_file_offset(f, z->page_start);
4584 return 1;
4585 }
4586
4587 // rarely used function to seek back to the preceding page while finding the
4588 // start of a packet
go_to_page_before(stb_vorbis * f,unsigned int limit_offset)4589 static int go_to_page_before(stb_vorbis *f, unsigned int limit_offset)
4590 {
4591 unsigned int previous_safe, end;
4592
4593 // now we want to seek back 64K from the limit
4594 if (limit_offset >= 65536 && limit_offset-65536 >= f->first_audio_page_offset)
4595 previous_safe = limit_offset - 65536;
4596 else
4597 previous_safe = f->first_audio_page_offset;
4598
4599 set_file_offset(f, previous_safe);
4600
4601 while (vorbis_find_page(f, &end, NULL)) {
4602 if (end >= limit_offset && stb_vorbis_get_file_offset(f) < limit_offset)
4603 return 1;
4604 set_file_offset(f, end);
4605 }
4606
4607 return 0;
4608 }
4609
4610 // implements the search logic for finding a page and starting decoding. if
4611 // the function succeeds, current_loc_valid will be true and current_loc will
4612 // be less than or equal to the provided sample number (the closer the
4613 // better).
seek_to_sample_coarse(stb_vorbis * f,uint32 sample_number)4614 static int seek_to_sample_coarse(stb_vorbis *f, uint32 sample_number)
4615 {
4616 ProbedPage left, right, mid;
4617 int i, start_seg_with_known_loc, end_pos, page_start;
4618 uint32 delta, stream_length, padding;
4619 double offset, bytes_per_sample;
4620 int probe = 0;
4621
4622 // find the last page and validate the target sample
4623 stream_length = stb_vorbis_stream_length_in_samples(f);
4624 if (stream_length == 0) return error(f, VORBIS_seek_without_length);
4625 if (sample_number > stream_length) return error(f, VORBIS_seek_invalid);
4626
4627 // this is the maximum difference between the window-center (which is the
4628 // actual granule position value), and the right-start (which the spec
4629 // indicates should be the granule position (give or take one)).
4630 padding = ((f->blocksize_1 - f->blocksize_0) >> 2);
4631 if (sample_number < padding)
4632 sample_number = 0;
4633 else
4634 sample_number -= padding;
4635
4636 left = f->p_first;
4637 while (left.last_decoded_sample == ~0U) {
4638 // (untested) the first page does not have a 'last_decoded_sample'
4639 set_file_offset(f, left.page_end);
4640 if (!get_seek_page_info(f, &left)) goto error;
4641 }
4642
4643 right = f->p_last;
4644 assert(right.last_decoded_sample != ~0U);
4645
4646 // starting from the start is handled differently
4647 if (sample_number <= left.last_decoded_sample) {
4648 if (stb_vorbis_seek_start(f))
4649 return 1;
4650 return 0;
4651 }
4652
4653 while (left.page_end != right.page_start) {
4654 assert(left.page_end < right.page_start);
4655 // search range in bytes
4656 delta = right.page_start - left.page_end;
4657 if (delta <= 65536) {
4658 // there's only 64K left to search - handle it linearly
4659 set_file_offset(f, left.page_end);
4660 } else {
4661 if (probe < 2) {
4662 if (probe == 0) {
4663 // first probe (interpolate)
4664 double data_bytes = right.page_end - left.page_start;
4665 bytes_per_sample = data_bytes / right.last_decoded_sample;
4666 offset = left.page_start + bytes_per_sample * (sample_number - left.last_decoded_sample);
4667 } else {
4668 // second probe (try to bound the other side)
4669 double error = ((double) sample_number - mid.last_decoded_sample) * bytes_per_sample;
4670 if (error >= 0 && error < 8000) error = 8000;
4671 if (error < 0 && error > -8000) error = -8000;
4672 offset += error * 2;
4673 }
4674
4675 // ensure the offset is valid
4676 if (offset < left.page_end)
4677 offset = left.page_end;
4678 if (offset > right.page_start - 65536)
4679 offset = right.page_start - 65536;
4680
4681 set_file_offset(f, (unsigned int) offset);
4682 } else {
4683 // binary search for large ranges (offset by 32K to ensure
4684 // we don't hit the right page)
4685 set_file_offset(f, left.page_end + (delta / 2) - 32768);
4686 }
4687
4688 if (!vorbis_find_page(f, NULL, NULL)) goto error;
4689 }
4690
4691 for (;;) {
4692 if (!get_seek_page_info(f, &mid)) goto error;
4693 if (mid.last_decoded_sample != ~0U) break;
4694 // (untested) no frames end on this page
4695 set_file_offset(f, mid.page_end);
4696 assert(mid.page_start < right.page_start);
4697 }
4698
4699 // if we've just found the last page again then we're in a tricky file,
4700 // and we're close enough.
4701 if (mid.page_start == right.page_start)
4702 break;
4703
4704 if (sample_number < mid.last_decoded_sample)
4705 right = mid;
4706 else
4707 left = mid;
4708
4709 ++probe;
4710 }
4711
4712 // seek back to start of the last packet
4713 page_start = left.page_start;
4714 set_file_offset(f, page_start);
4715 if (!start_page(f)) return error(f, VORBIS_seek_failed);
4716 end_pos = f->end_seg_with_known_loc;
4717 assert(end_pos >= 0);
4718
4719 for (;;) {
4720 for (i = end_pos; i > 0; --i)
4721 if (f->segments[i-1] != 255)
4722 break;
4723
4724 start_seg_with_known_loc = i;
4725
4726 if (start_seg_with_known_loc > 0 || !(f->page_flag & PAGEFLAG_continued_packet))
4727 break;
4728
4729 // (untested) the final packet begins on an earlier page
4730 if (!go_to_page_before(f, page_start))
4731 goto error;
4732
4733 page_start = stb_vorbis_get_file_offset(f);
4734 if (!start_page(f)) goto error;
4735 end_pos = f->segment_count - 1;
4736 }
4737
4738 // prepare to start decoding
4739 f->current_loc_valid = FALSE;
4740 f->last_seg = FALSE;
4741 f->valid_bits = 0;
4742 f->packet_bytes = 0;
4743 f->bytes_in_seg = 0;
4744 f->previous_length = 0;
4745 f->next_seg = start_seg_with_known_loc;
4746
4747 for (i = 0; i < start_seg_with_known_loc; i++)
4748 skip(f, f->segments[i]);
4749
4750 // start decoding (optimizable - this frame is generally discarded)
4751 if (!vorbis_pump_first_frame(f))
4752 return 0;
4753 if (f->current_loc > sample_number)
4754 return error(f, VORBIS_seek_failed);
4755 return 1;
4756
4757 error:
4758 // try to restore the file to a valid state
4759 stb_vorbis_seek_start(f);
4760 return error(f, VORBIS_seek_failed);
4761 }
4762
4763 // the same as vorbis_decode_initial, but without advancing
peek_decode_initial(vorb * f,int * p_left_start,int * p_left_end,int * p_right_start,int * p_right_end,int * mode)4764 static int peek_decode_initial(vorb *f, int *p_left_start, int *p_left_end, int *p_right_start, int *p_right_end, int *mode)
4765 {
4766 int bits_read, bytes_read;
4767
4768 if (!vorbis_decode_initial(f, p_left_start, p_left_end, p_right_start, p_right_end, mode))
4769 return 0;
4770
4771 // either 1 or 2 bytes were read, figure out which so we can rewind
4772 bits_read = 1 + ilog(f->mode_count-1);
4773 if (f->mode_config[*mode].blockflag)
4774 bits_read += 2;
4775 bytes_read = (bits_read + 7) / 8;
4776
4777 f->bytes_in_seg += bytes_read;
4778 f->packet_bytes -= bytes_read;
4779 skip(f, -bytes_read);
4780 if (f->next_seg == -1)
4781 f->next_seg = f->segment_count - 1;
4782 else
4783 f->next_seg--;
4784 f->valid_bits = 0;
4785
4786 return 1;
4787 }
4788
stb_vorbis_seek_frame(stb_vorbis * f,unsigned int sample_number)4789 int stb_vorbis_seek_frame(stb_vorbis *f, unsigned int sample_number)
4790 {
4791 uint32 max_frame_samples;
4792
4793 if (IS_PUSH_MODE(f)) return error(f, VORBIS_invalid_api_mixing);
4794
4795 // fast page-level search
4796 if (!seek_to_sample_coarse(f, sample_number))
4797 return 0;
4798
4799 assert(f->current_loc_valid);
4800 assert(f->current_loc <= sample_number);
4801
4802 // linear search for the relevant packet
4803 max_frame_samples = (f->blocksize_1*3 - f->blocksize_0) >> 2;
4804 while (f->current_loc < sample_number) {
4805 int left_start, left_end, right_start, right_end, mode, frame_samples;
4806 if (!peek_decode_initial(f, &left_start, &left_end, &right_start, &right_end, &mode))
4807 return error(f, VORBIS_seek_failed);
4808 // calculate the number of samples returned by the next frame
4809 frame_samples = right_start - left_start;
4810 if (f->current_loc + frame_samples > sample_number) {
4811 return 1; // the next frame will contain the sample
4812 } else if (f->current_loc + frame_samples + max_frame_samples > sample_number) {
4813 // there's a chance the frame after this could contain the sample
4814 vorbis_pump_first_frame(f);
4815 } else {
4816 // this frame is too early to be relevant
4817 f->current_loc += frame_samples;
4818 f->previous_length = 0;
4819 maybe_start_packet(f);
4820 flush_packet(f);
4821 }
4822 }
4823 // the next frame will start with the sample
4824 assert(f->current_loc == sample_number);
4825 return 1;
4826 }
4827
stb_vorbis_seek(stb_vorbis * f,unsigned int sample_number)4828 int stb_vorbis_seek(stb_vorbis *f, unsigned int sample_number)
4829 {
4830 if (!stb_vorbis_seek_frame(f, sample_number))
4831 return 0;
4832
4833 if (sample_number != f->current_loc) {
4834 int n;
4835 uint32 frame_start = f->current_loc;
4836 stb_vorbis_get_frame_float(f, &n, NULL);
4837 assert(sample_number > frame_start);
4838 assert(f->channel_buffer_start + (int) (sample_number-frame_start) <= f->channel_buffer_end);
4839 f->channel_buffer_start += (sample_number - frame_start);
4840 }
4841
4842 return 1;
4843 }
4844
stb_vorbis_seek_start(stb_vorbis * f)4845 int stb_vorbis_seek_start(stb_vorbis *f)
4846 {
4847 if (IS_PUSH_MODE(f)) { return error(f, VORBIS_invalid_api_mixing); }
4848 set_file_offset(f, f->first_audio_page_offset);
4849 f->previous_length = 0;
4850 f->first_decode = TRUE;
4851 f->next_seg = -1;
4852 return vorbis_pump_first_frame(f);
4853 }
4854
stb_vorbis_stream_length_in_samples(stb_vorbis * f)4855 unsigned int stb_vorbis_stream_length_in_samples(stb_vorbis *f)
4856 {
4857 unsigned int restore_offset, previous_safe;
4858 unsigned int end, last_page_loc;
4859
4860 if (IS_PUSH_MODE(f)) return error(f, VORBIS_invalid_api_mixing);
4861 if (!f->total_samples) {
4862 unsigned int last;
4863 uint32 lo,hi;
4864 char header[6];
4865
4866 // first, store the current decode position so we can restore it
4867 restore_offset = stb_vorbis_get_file_offset(f);
4868
4869 // now we want to seek back 64K from the end (the last page must
4870 // be at most a little less than 64K, but let's allow a little slop)
4871 if (f->stream_len >= 65536 && f->stream_len-65536 >= f->first_audio_page_offset)
4872 previous_safe = f->stream_len - 65536;
4873 else
4874 previous_safe = f->first_audio_page_offset;
4875
4876 set_file_offset(f, previous_safe);
4877 // previous_safe is now our candidate 'earliest known place that seeking
4878 // to will lead to the final page'
4879
4880 if (!vorbis_find_page(f, &end, &last)) {
4881 // if we can't find a page, we're hosed!
4882 f->error = VORBIS_cant_find_last_page;
4883 f->total_samples = 0xffffffff;
4884 goto done;
4885 }
4886
4887 // check if there are more pages
4888 last_page_loc = stb_vorbis_get_file_offset(f);
4889
4890 // stop when the last_page flag is set, not when we reach eof;
4891 // this allows us to stop short of a 'file_section' end without
4892 // explicitly checking the length of the section
4893 while (!last) {
4894 set_file_offset(f, end);
4895 if (!vorbis_find_page(f, &end, &last)) {
4896 // the last page we found didn't have the 'last page' flag
4897 // set. whoops!
4898 break;
4899 }
4900 previous_safe = last_page_loc+1;
4901 last_page_loc = stb_vorbis_get_file_offset(f);
4902 }
4903
4904 set_file_offset(f, last_page_loc);
4905
4906 // parse the header
4907 getn(f, (unsigned char *)header, 6);
4908 // extract the absolute granule position
4909 lo = get32(f);
4910 hi = get32(f);
4911 if (lo == 0xffffffff && hi == 0xffffffff) {
4912 f->error = VORBIS_cant_find_last_page;
4913 f->total_samples = SAMPLE_unknown;
4914 goto done;
4915 }
4916 if (hi)
4917 lo = 0xfffffffe; // saturate
4918 f->total_samples = lo;
4919
4920 f->p_last.page_start = last_page_loc;
4921 f->p_last.page_end = end;
4922 f->p_last.last_decoded_sample = lo;
4923
4924 done:
4925 set_file_offset(f, restore_offset);
4926 }
4927 return f->total_samples == SAMPLE_unknown ? 0 : f->total_samples;
4928 }
4929
stb_vorbis_stream_length_in_seconds(stb_vorbis * f)4930 float stb_vorbis_stream_length_in_seconds(stb_vorbis *f)
4931 {
4932 return stb_vorbis_stream_length_in_samples(f) / (float) f->sample_rate;
4933 }
4934
4935
4936
stb_vorbis_get_frame_float(stb_vorbis * f,int * channels,float *** output)4937 int stb_vorbis_get_frame_float(stb_vorbis *f, int *channels, float ***output)
4938 {
4939 int len, right,left,i;
4940 if (IS_PUSH_MODE(f)) return error(f, VORBIS_invalid_api_mixing);
4941
4942 if (!vorbis_decode_packet(f, &len, &left, &right)) {
4943 f->channel_buffer_start = f->channel_buffer_end = 0;
4944 return 0;
4945 }
4946
4947 len = vorbis_finish_frame(f, len, left, right);
4948 for (i=0; i < f->channels; ++i)
4949 f->outputs[i] = f->channel_buffers[i] + left;
4950
4951 f->channel_buffer_start = left;
4952 f->channel_buffer_end = left+len;
4953
4954 if (channels) *channels = f->channels;
4955 if (output) *output = f->outputs;
4956 return len;
4957 }
4958
4959 #ifndef STB_VORBIS_NO_STDIO
4960
stb_vorbis_open_file_section(FILE * file,int close_on_free,int * error,const stb_vorbis_alloc * alloc,unsigned int length)4961 stb_vorbis * stb_vorbis_open_file_section(FILE *file, int close_on_free, int *error, const stb_vorbis_alloc *alloc, unsigned int length)
4962 {
4963 stb_vorbis *f, p;
4964 vorbis_init(&p, alloc);
4965 p.f = file;
4966 p.f_start = (uint32) ftell(file);
4967 p.stream_len = length;
4968 p.close_on_free = close_on_free;
4969 if (start_decoder(&p)) {
4970 f = vorbis_alloc(&p);
4971 if (f) {
4972 *f = p;
4973 vorbis_pump_first_frame(f);
4974 return f;
4975 }
4976 }
4977 if (error) *error = p.error;
4978 vorbis_deinit(&p);
4979 return NULL;
4980 }
4981
stb_vorbis_open_file(FILE * file,int close_on_free,int * error,const stb_vorbis_alloc * alloc)4982 stb_vorbis * stb_vorbis_open_file(FILE *file, int close_on_free, int *error, const stb_vorbis_alloc *alloc)
4983 {
4984 unsigned int len, start;
4985 start = (unsigned int) ftell(file);
4986 fseek(file, 0, SEEK_END);
4987 len = (unsigned int) (ftell(file) - start);
4988 fseek(file, start, SEEK_SET);
4989 return stb_vorbis_open_file_section(file, close_on_free, error, alloc, len);
4990 }
4991
stb_vorbis_open_filename(const char * filename,int * error,const stb_vorbis_alloc * alloc)4992 stb_vorbis * stb_vorbis_open_filename(const char *filename, int *error, const stb_vorbis_alloc *alloc)
4993 {
4994 FILE *f;
4995 #if defined(_WIN32) && defined(__STDC_WANT_SECURE_LIB__)
4996 if (0 != fopen_s(&f, filename, "rb"))
4997 f = NULL;
4998 #else
4999 f = fopen(filename, "rb");
5000 #endif
5001 if (f)
5002 return stb_vorbis_open_file(f, TRUE, error, alloc);
5003 if (error) *error = VORBIS_file_open_failure;
5004 return NULL;
5005 }
5006 #endif // STB_VORBIS_NO_STDIO
5007
stb_vorbis_open_memory(const unsigned char * data,int len,int * error,const stb_vorbis_alloc * alloc)5008 stb_vorbis * stb_vorbis_open_memory(const unsigned char *data, int len, int *error, const stb_vorbis_alloc *alloc)
5009 {
5010 stb_vorbis *f, p;
5011 if (data == NULL) return NULL;
5012 vorbis_init(&p, alloc);
5013 p.stream = (uint8 *) data;
5014 p.stream_end = (uint8 *) data + len;
5015 p.stream_start = (uint8 *) p.stream;
5016 p.stream_len = len;
5017 p.push_mode = FALSE;
5018 if (start_decoder(&p)) {
5019 f = vorbis_alloc(&p);
5020 if (f) {
5021 *f = p;
5022 vorbis_pump_first_frame(f);
5023 if (error) *error = VORBIS__no_error;
5024 return f;
5025 }
5026 }
5027 if (error) *error = p.error;
5028 vorbis_deinit(&p);
5029 return NULL;
5030 }
5031
5032 #ifndef STB_VORBIS_NO_INTEGER_CONVERSION
5033 #define PLAYBACK_MONO 1
5034 #define PLAYBACK_LEFT 2
5035 #define PLAYBACK_RIGHT 4
5036
5037 #define L (PLAYBACK_LEFT | PLAYBACK_MONO)
5038 #define C (PLAYBACK_LEFT | PLAYBACK_RIGHT | PLAYBACK_MONO)
5039 #define R (PLAYBACK_RIGHT | PLAYBACK_MONO)
5040
5041 static int8 channel_position[7][6] =
5042 {
5043 { 0 },
5044 { C },
5045 { L, R },
5046 { L, C, R },
5047 { L, R, L, R },
5048 { L, C, R, L, R },
5049 { L, C, R, L, R, C },
5050 };
5051
5052
5053 #ifndef STB_VORBIS_NO_FAST_SCALED_FLOAT
5054 typedef union {
5055 float f;
5056 int i;
5057 } float_conv;
5058 typedef char stb_vorbis_float_size_test[sizeof(float)==4 && sizeof(int) == 4];
5059 #define FASTDEF(x) float_conv x
5060 // add (1<<23) to convert to int, then divide by 2^SHIFT, then add 0.5/2^SHIFT to round
5061 #define MAGIC(SHIFT) (1.5f * (1 << (23-SHIFT)) + 0.5f/(1 << SHIFT))
5062 #define ADDEND(SHIFT) (((150-SHIFT) << 23) + (1 << 22))
5063 #define FAST_SCALED_FLOAT_TO_INT(temp,x,s) (temp.f = (x) + MAGIC(s), temp.i - ADDEND(s))
5064 #define check_endianness()
5065 #else
5066 #define FAST_SCALED_FLOAT_TO_INT(temp,x,s) ((int) ((x) * (1 << (s))))
5067 #define check_endianness()
5068 #define FASTDEF(x)
5069 #endif
5070
copy_samples(short * dest,float * src,int len)5071 static void copy_samples(short *dest, float *src, int len)
5072 {
5073 int i;
5074 check_endianness();
5075 for (i=0; i < len; ++i) {
5076 FASTDEF(temp);
5077 int v = FAST_SCALED_FLOAT_TO_INT(temp, src[i],15);
5078 if ((unsigned int) (v + 32768) > 65535)
5079 v = v < 0 ? -32768 : 32767;
5080 dest[i] = v;
5081 }
5082 }
5083
compute_samples(int mask,short * output,int num_c,float ** data,int d_offset,int len)5084 static void compute_samples(int mask, short *output, int num_c, float **data, int d_offset, int len)
5085 {
5086 #define BUFFER_SIZE 32
5087 float buffer[BUFFER_SIZE];
5088 int i,j,o,n = BUFFER_SIZE;
5089 check_endianness();
5090 for (o = 0; o < len; o += BUFFER_SIZE) {
5091 memset(buffer, 0, sizeof(buffer));
5092 if (o + n > len) n = len - o;
5093 for (j=0; j < num_c; ++j) {
5094 if (channel_position[num_c][j] & mask) {
5095 for (i=0; i < n; ++i)
5096 buffer[i] += data[j][d_offset+o+i];
5097 }
5098 }
5099 for (i=0; i < n; ++i) {
5100 FASTDEF(temp);
5101 int v = FAST_SCALED_FLOAT_TO_INT(temp,buffer[i],15);
5102 if ((unsigned int) (v + 32768) > 65535)
5103 v = v < 0 ? -32768 : 32767;
5104 output[o+i] = v;
5105 }
5106 }
5107 }
5108
compute_stereo_samples(short * output,int num_c,float ** data,int d_offset,int len)5109 static void compute_stereo_samples(short *output, int num_c, float **data, int d_offset, int len)
5110 {
5111 #define BUFFER_SIZE 32
5112 float buffer[BUFFER_SIZE];
5113 int i,j,o,n = BUFFER_SIZE >> 1;
5114 // o is the offset in the source data
5115 check_endianness();
5116 for (o = 0; o < len; o += BUFFER_SIZE >> 1) {
5117 // o2 is the offset in the output data
5118 int o2 = o << 1;
5119 memset(buffer, 0, sizeof(buffer));
5120 if (o + n > len) n = len - o;
5121 for (j=0; j < num_c; ++j) {
5122 int m = channel_position[num_c][j] & (PLAYBACK_LEFT | PLAYBACK_RIGHT);
5123 if (m == (PLAYBACK_LEFT | PLAYBACK_RIGHT)) {
5124 for (i=0; i < n; ++i) {
5125 buffer[i*2+0] += data[j][d_offset+o+i];
5126 buffer[i*2+1] += data[j][d_offset+o+i];
5127 }
5128 } else if (m == PLAYBACK_LEFT) {
5129 for (i=0; i < n; ++i) {
5130 buffer[i*2+0] += data[j][d_offset+o+i];
5131 }
5132 } else if (m == PLAYBACK_RIGHT) {
5133 for (i=0; i < n; ++i) {
5134 buffer[i*2+1] += data[j][d_offset+o+i];
5135 }
5136 }
5137 }
5138 for (i=0; i < (n<<1); ++i) {
5139 FASTDEF(temp);
5140 int v = FAST_SCALED_FLOAT_TO_INT(temp,buffer[i],15);
5141 if ((unsigned int) (v + 32768) > 65535)
5142 v = v < 0 ? -32768 : 32767;
5143 output[o2+i] = v;
5144 }
5145 }
5146 }
5147
convert_samples_short(int buf_c,short ** buffer,int b_offset,int data_c,float ** data,int d_offset,int samples)5148 static void convert_samples_short(int buf_c, short **buffer, int b_offset, int data_c, float **data, int d_offset, int samples)
5149 {
5150 int i;
5151 if (buf_c != data_c && buf_c <= 2 && data_c <= 6) {
5152 static int channel_selector[3][2] = { {0}, {PLAYBACK_MONO}, {PLAYBACK_LEFT, PLAYBACK_RIGHT} };
5153 for (i=0; i < buf_c; ++i)
5154 compute_samples(channel_selector[buf_c][i], buffer[i]+b_offset, data_c, data, d_offset, samples);
5155 } else {
5156 int limit = buf_c < data_c ? buf_c : data_c;
5157 for (i=0; i < limit; ++i)
5158 copy_samples(buffer[i]+b_offset, data[i]+d_offset, samples);
5159 for ( ; i < buf_c; ++i)
5160 memset(buffer[i]+b_offset, 0, sizeof(short) * samples);
5161 }
5162 }
5163
stb_vorbis_get_frame_short(stb_vorbis * f,int num_c,short ** buffer,int num_samples)5164 int stb_vorbis_get_frame_short(stb_vorbis *f, int num_c, short **buffer, int num_samples)
5165 {
5166 float **output;
5167 int len = stb_vorbis_get_frame_float(f, NULL, &output);
5168 if (len > num_samples) len = num_samples;
5169 if (len)
5170 convert_samples_short(num_c, buffer, 0, f->channels, output, 0, len);
5171 return len;
5172 }
5173
convert_channels_short_interleaved(int buf_c,short * buffer,int data_c,float ** data,int d_offset,int len)5174 static void convert_channels_short_interleaved(int buf_c, short *buffer, int data_c, float **data, int d_offset, int len)
5175 {
5176 int i;
5177 check_endianness();
5178 if (buf_c != data_c && buf_c <= 2 && data_c <= 6) {
5179 assert(buf_c == 2);
5180 for (i=0; i < buf_c; ++i)
5181 compute_stereo_samples(buffer, data_c, data, d_offset, len);
5182 } else {
5183 int limit = buf_c < data_c ? buf_c : data_c;
5184 int j;
5185 for (j=0; j < len; ++j) {
5186 for (i=0; i < limit; ++i) {
5187 FASTDEF(temp);
5188 float f = data[i][d_offset+j];
5189 int v = FAST_SCALED_FLOAT_TO_INT(temp, f,15);//data[i][d_offset+j],15);
5190 if ((unsigned int) (v + 32768) > 65535)
5191 v = v < 0 ? -32768 : 32767;
5192 *buffer++ = v;
5193 }
5194 for ( ; i < buf_c; ++i)
5195 *buffer++ = 0;
5196 }
5197 }
5198 }
5199
stb_vorbis_get_frame_short_interleaved(stb_vorbis * f,int num_c,short * buffer,int num_shorts)5200 int stb_vorbis_get_frame_short_interleaved(stb_vorbis *f, int num_c, short *buffer, int num_shorts)
5201 {
5202 float **output;
5203 int len;
5204 if (num_c == 1) return stb_vorbis_get_frame_short(f,num_c,&buffer, num_shorts);
5205 len = stb_vorbis_get_frame_float(f, NULL, &output);
5206 if (len) {
5207 if (len*num_c > num_shorts) len = num_shorts / num_c;
5208 convert_channels_short_interleaved(num_c, buffer, f->channels, output, 0, len);
5209 }
5210 return len;
5211 }
5212
stb_vorbis_get_samples_short_interleaved(stb_vorbis * f,int channels,short * buffer,int num_shorts)5213 int stb_vorbis_get_samples_short_interleaved(stb_vorbis *f, int channels, short *buffer, int num_shorts)
5214 {
5215 float **outputs;
5216 int len = num_shorts / channels;
5217 int n=0;
5218 int z = f->channels;
5219 if (z > channels) z = channels;
5220 while (n < len) {
5221 int k = f->channel_buffer_end - f->channel_buffer_start;
5222 if (n+k >= len) k = len - n;
5223 if (k)
5224 convert_channels_short_interleaved(channels, buffer, f->channels, f->channel_buffers, f->channel_buffer_start, k);
5225 buffer += k*channels;
5226 n += k;
5227 f->channel_buffer_start += k;
5228 if (n == len) break;
5229 if (!stb_vorbis_get_frame_float(f, NULL, &outputs)) break;
5230 }
5231 return n;
5232 }
5233
stb_vorbis_get_samples_short(stb_vorbis * f,int channels,short ** buffer,int len)5234 int stb_vorbis_get_samples_short(stb_vorbis *f, int channels, short **buffer, int len)
5235 {
5236 float **outputs;
5237 int n=0;
5238 int z = f->channels;
5239 if (z > channels) z = channels;
5240 while (n < len) {
5241 int k = f->channel_buffer_end - f->channel_buffer_start;
5242 if (n+k >= len) k = len - n;
5243 if (k)
5244 convert_samples_short(channels, buffer, n, f->channels, f->channel_buffers, f->channel_buffer_start, k);
5245 n += k;
5246 f->channel_buffer_start += k;
5247 if (n == len) break;
5248 if (!stb_vorbis_get_frame_float(f, NULL, &outputs)) break;
5249 }
5250 return n;
5251 }
5252
5253 #ifndef STB_VORBIS_NO_STDIO
stb_vorbis_decode_filename(const char * filename,int * channels,int * sample_rate,short ** output)5254 int stb_vorbis_decode_filename(const char *filename, int *channels, int *sample_rate, short **output)
5255 {
5256 int data_len, offset, total, limit, error;
5257 short *data;
5258 stb_vorbis *v = stb_vorbis_open_filename(filename, &error, NULL);
5259 if (v == NULL) return -1;
5260 limit = v->channels * 4096;
5261 *channels = v->channels;
5262 if (sample_rate)
5263 *sample_rate = v->sample_rate;
5264 offset = data_len = 0;
5265 total = limit;
5266 data = (short *) malloc(total * sizeof(*data));
5267 if (data == NULL) {
5268 stb_vorbis_close(v);
5269 return -2;
5270 }
5271 for (;;) {
5272 int n = stb_vorbis_get_frame_short_interleaved(v, v->channels, data+offset, total-offset);
5273 if (n == 0) break;
5274 data_len += n;
5275 offset += n * v->channels;
5276 if (offset + limit > total) {
5277 short *data2;
5278 total *= 2;
5279 data2 = (short *) realloc(data, total * sizeof(*data));
5280 if (data2 == NULL) {
5281 free(data);
5282 stb_vorbis_close(v);
5283 return -2;
5284 }
5285 data = data2;
5286 }
5287 }
5288 *output = data;
5289 stb_vorbis_close(v);
5290 return data_len;
5291 }
5292 #endif // NO_STDIO
5293
stb_vorbis_decode_memory(const uint8 * mem,int len,int * channels,int * sample_rate,short ** output)5294 int stb_vorbis_decode_memory(const uint8 *mem, int len, int *channels, int *sample_rate, short **output)
5295 {
5296 int data_len, offset, total, limit, error;
5297 short *data;
5298 stb_vorbis *v = stb_vorbis_open_memory(mem, len, &error, NULL);
5299 if (v == NULL) return -1;
5300 limit = v->channels * 4096;
5301 *channels = v->channels;
5302 if (sample_rate)
5303 *sample_rate = v->sample_rate;
5304 offset = data_len = 0;
5305 total = limit;
5306 data = (short *) malloc(total * sizeof(*data));
5307 if (data == NULL) {
5308 stb_vorbis_close(v);
5309 return -2;
5310 }
5311 for (;;) {
5312 int n = stb_vorbis_get_frame_short_interleaved(v, v->channels, data+offset, total-offset);
5313 if (n == 0) break;
5314 data_len += n;
5315 offset += n * v->channels;
5316 if (offset + limit > total) {
5317 short *data2;
5318 total *= 2;
5319 data2 = (short *) realloc(data, total * sizeof(*data));
5320 if (data2 == NULL) {
5321 free(data);
5322 stb_vorbis_close(v);
5323 return -2;
5324 }
5325 data = data2;
5326 }
5327 }
5328 *output = data;
5329 stb_vorbis_close(v);
5330 return data_len;
5331 }
5332 #endif // STB_VORBIS_NO_INTEGER_CONVERSION
5333
stb_vorbis_get_samples_float_interleaved(stb_vorbis * f,int channels,float * buffer,int num_floats)5334 int stb_vorbis_get_samples_float_interleaved(stb_vorbis *f, int channels, float *buffer, int num_floats)
5335 {
5336 float **outputs;
5337 int len = num_floats / channels;
5338 int n=0;
5339 int z = f->channels;
5340 if (z > channels) z = channels;
5341 while (n < len) {
5342 int i,j;
5343 int k = f->channel_buffer_end - f->channel_buffer_start;
5344 if (n+k >= len) k = len - n;
5345 for (j=0; j < k; ++j) {
5346 for (i=0; i < z; ++i)
5347 *buffer++ = f->channel_buffers[i][f->channel_buffer_start+j];
5348 for ( ; i < channels; ++i)
5349 *buffer++ = 0;
5350 }
5351 n += k;
5352 f->channel_buffer_start += k;
5353 if (n == len)
5354 break;
5355 if (!stb_vorbis_get_frame_float(f, NULL, &outputs))
5356 break;
5357 }
5358 return n;
5359 }
5360
stb_vorbis_get_samples_float(stb_vorbis * f,int channels,float ** buffer,int num_samples)5361 int stb_vorbis_get_samples_float(stb_vorbis *f, int channels, float **buffer, int num_samples)
5362 {
5363 float **outputs;
5364 int n=0;
5365 int z = f->channels;
5366 if (z > channels) z = channels;
5367 while (n < num_samples) {
5368 int i;
5369 int k = f->channel_buffer_end - f->channel_buffer_start;
5370 if (n+k >= num_samples) k = num_samples - n;
5371 if (k) {
5372 for (i=0; i < z; ++i)
5373 memcpy(buffer[i]+n, f->channel_buffers[i]+f->channel_buffer_start, sizeof(float)*k);
5374 for ( ; i < channels; ++i)
5375 memset(buffer[i]+n, 0, sizeof(float) * k);
5376 }
5377 n += k;
5378 f->channel_buffer_start += k;
5379 if (n == num_samples)
5380 break;
5381 if (!stb_vorbis_get_frame_float(f, NULL, &outputs))
5382 break;
5383 }
5384 return n;
5385 }
5386 #endif // STB_VORBIS_NO_PULLDATA_API
5387
5388 /* Version history
5389 1.12 - 2017-11-21 - limit residue begin/end to blocksize/2 to avoid large temp allocs in bad/corrupt files
5390 1.11 - 2017-07-23 - fix MinGW compilation
5391 1.10 - 2017-03-03 - more robust seeking; fix negative ilog(); clear error in open_memory
5392 1.09 - 2016-04-04 - back out 'avoid discarding last frame' fix from previous version
5393 1.08 - 2016-04-02 - fixed multiple warnings; fix setup memory leaks;
5394 avoid discarding last frame of audio data
5395 1.07 - 2015-01-16 - fixed some warnings, fix mingw, const-correct API
5396 some more crash fixes when out of memory or with corrupt files
5397 1.06 - 2015-08-31 - full, correct support for seeking API (Dougall Johnson)
5398 some crash fixes when out of memory or with corrupt files
5399 1.05 - 2015-04-19 - don't define __forceinline if it's redundant
5400 1.04 - 2014-08-27 - fix missing const-correct case in API
5401 1.03 - 2014-08-07 - Warning fixes
5402 1.02 - 2014-07-09 - Declare qsort compare function _cdecl on windows
5403 1.01 - 2014-06-18 - fix stb_vorbis_get_samples_float
5404 1.0 - 2014-05-26 - fix memory leaks; fix warnings; fix bugs in multichannel
5405 (API change) report sample rate for decode-full-file funcs
5406 0.99996 - bracket #include <malloc.h> for macintosh compilation by Laurent Gomila
5407 0.99995 - use union instead of pointer-cast for fast-float-to-int to avoid alias-optimization problem
5408 0.99994 - change fast-float-to-int to work in single-precision FPU mode, remove endian-dependence
5409 0.99993 - remove assert that fired on legal files with empty tables
5410 0.99992 - rewind-to-start
5411 0.99991 - bugfix to stb_vorbis_get_samples_short by Bernhard Wodo
5412 0.9999 - (should have been 0.99990) fix no-CRT support, compiling as C++
5413 0.9998 - add a full-decode function with a memory source
5414 0.9997 - fix a bug in the read-from-FILE case in 0.9996 addition
5415 0.9996 - query length of vorbis stream in samples/seconds
5416 0.9995 - bugfix to another optimization that only happened in certain files
5417 0.9994 - bugfix to one of the optimizations that caused significant (but inaudible?) errors
5418 0.9993 - performance improvements; runs in 99% to 104% of time of reference implementation
5419 0.9992 - performance improvement of IMDCT; now performs close to reference implementation
5420 0.9991 - performance improvement of IMDCT
5421 0.999 - (should have been 0.9990) performance improvement of IMDCT
5422 0.998 - no-CRT support from Casey Muratori
5423 0.997 - bugfixes for bugs found by Terje Mathisen
5424 0.996 - bugfix: fast-huffman decode initialized incorrectly for sparse codebooks; fixing gives 10% speedup - found by Terje Mathisen
5425 0.995 - bugfix: fix to 'effective' overrun detection - found by Terje Mathisen
5426 0.994 - bugfix: garbage decode on final VQ symbol of a non-multiple - found by Terje Mathisen
5427 0.993 - bugfix: pushdata API required 1 extra byte for empty page (failed to consume final page if empty) - found by Terje Mathisen
5428 0.992 - fixes for MinGW warning
5429 0.991 - turn fast-float-conversion on by default
5430 0.990 - fix push-mode seek recovery if you seek into the headers
5431 0.98b - fix to bad release of 0.98
5432 0.98 - fix push-mode seek recovery; robustify float-to-int and support non-fast mode
5433 0.97 - builds under c++ (typecasting, don't use 'class' keyword)
5434 0.96 - somehow MY 0.95 was right, but the web one was wrong, so here's my 0.95 rereleased as 0.96, fixes a typo in the clamping code
5435 0.95 - clamping code for 16-bit functions
5436 0.94 - not publically released
5437 0.93 - fixed all-zero-floor case (was decoding garbage)
5438 0.92 - fixed a memory leak
5439 0.91 - conditional compiles to omit parts of the API and the infrastructure to support them: STB_VORBIS_NO_PULLDATA_API, STB_VORBIS_NO_PUSHDATA_API, STB_VORBIS_NO_STDIO, STB_VORBIS_NO_INTEGER_CONVERSION
5440 0.90 - first public release
5441 */
5442
5443 #endif // STB_VORBIS_HEADER_ONLY
5444
5445
5446 /*
5447 ------------------------------------------------------------------------------
5448 This software is available under 2 licenses -- choose whichever you prefer.
5449 ------------------------------------------------------------------------------
5450 ALTERNATIVE A - MIT License
5451 Copyright (c) 2017 Sean Barrett
5452 Permission is hereby granted, free of charge, to any person obtaining a copy of
5453 this software and associated documentation files (the "Software"), to deal in
5454 the Software without restriction, including without limitation the rights to
5455 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
5456 of the Software, and to permit persons to whom the Software is furnished to do
5457 so, subject to the following conditions:
5458 The above copyright notice and this permission notice shall be included in all
5459 copies or substantial portions of the Software.
5460 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
5461 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
5462 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
5463 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
5464 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
5465 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
5466 SOFTWARE.
5467 ------------------------------------------------------------------------------
5468 ALTERNATIVE B - Public Domain (www.unlicense.org)
5469 This is free and unencumbered software released into the public domain.
5470 Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
5471 software, either in source code form or as a compiled binary, for any purpose,
5472 commercial or non-commercial, and by any means.
5473 In jurisdictions that recognize copyright laws, the author or authors of this
5474 software dedicate any and all copyright interest in the software to the public
5475 domain. We make this dedication for the benefit of the public at large and to
5476 the detriment of our heirs and successors. We intend this dedication to be an
5477 overt act of relinquishment in perpetuity of all present and future rights to
5478 this software under copyright law.
5479 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
5480 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
5481 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
5482 AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
5483 ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
5484 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
5485 ------------------------------------------------------------------------------
5486 */
5487