1 // Ogg Vorbis audio decoder - v1.20 - public domain
2 // http://nothings.org/stb_vorbis/
3 //
4 // Original version written by Sean Barrett in 2007.
5 //
6 // Originally sponsored by RAD Game Tools. Seeking implementation
7 // sponsored by Phillip Bennefall, Marc Andersen, Aaron Baker,
8 // Elias Software, Aras Pranckevicius, and Sean Barrett.
9 //
10 // LICENSE
11 //
12 // See end of file for license information.
13 //
14 // Limitations:
15 //
16 // - floor 0 not supported (used in old ogg vorbis files pre-2004)
17 // - lossless sample-truncation at beginning ignored
18 // - cannot concatenate multiple vorbis streams
19 // - sample positions are 32-bit, limiting seekable 192Khz
20 // files to around 6 hours (Ogg supports 64-bit)
21 //
22 // Feature contributors:
23 // Dougall Johnson (sample-exact seeking)
24 //
25 // Bugfix/warning contributors:
26 // Terje Mathisen Niklas Frykholm Andy Hill
27 // Casey Muratori John Bolton Gargaj
28 // Laurent Gomila Marc LeBlanc Ronny Chevalier
29 // Bernhard Wodo Evan Balster github:alxprd
30 // Tom Beaumont Ingo Leitgeb Nicolas Guillemot
31 // Phillip Bennefall Rohit Thiago Goulart
32 // github:manxorist saga musix github:infatum
33 // Timur Gagiev Maxwell Koo Peter Waller
34 // github:audinowho Dougall Johnson David Reid
35 // github:Clownacy Pedro J. Estebanez Remi Verschelde
36 //
37 // Partial history:
38 // 1.20 - 2020-07-11 - several small fixes
39 // 1.19 - 2020-02-05 - warnings
40 // 1.18 - 2020-02-02 - fix seek bugs; parse header comments; misc warnings etc.
41 // 1.17 - 2019-07-08 - fix CVE-2019-13217..CVE-2019-13223 (by ForAllSecure)
42 // 1.16 - 2019-03-04 - fix warnings
43 // 1.15 - 2019-02-07 - explicit failure if Ogg Skeleton data is found
44 // 1.14 - 2018-02-11 - delete bogus dealloca usage
45 // 1.13 - 2018-01-29 - fix truncation of last frame (hopefully)
46 // 1.12 - 2017-11-21 - limit residue begin/end to blocksize/2 to avoid large temp allocs in bad/corrupt files
47 // 1.11 - 2017-07-23 - fix MinGW compilation
48 // 1.10 - 2017-03-03 - more robust seeking; fix negative ilog(); clear error in open_memory
49 // 1.09 - 2016-04-04 - back out 'truncation of last frame' fix from previous version
50 // 1.08 - 2016-04-02 - warnings; setup memory leaks; truncation of last frame
51 // 1.07 - 2015-01-16 - fixes for crashes on invalid files; warning fixes; const
52 // 1.06 - 2015-08-31 - full, correct support for seeking API (Dougall Johnson)
53 // some crash fixes when out of memory or with corrupt files
54 // fix some inappropriately signed shifts
55 // 1.05 - 2015-04-19 - don't define __forceinline if it's redundant
56 // 1.04 - 2014-08-27 - fix missing const-correct case in API
57 // 1.03 - 2014-08-07 - warning fixes
58 // 1.02 - 2014-07-09 - declare qsort comparison as explicitly _cdecl in Windows
59 // 1.01 - 2014-06-18 - fix stb_vorbis_get_samples_float (interleaved was correct)
60 // 1.0 - 2014-05-26 - fix memory leaks; fix warnings; fix bugs in >2-channel;
61 // (API change) report sample rate for decode-full-file funcs
62 //
63 // See end of file for full version history.
64
65
66 //////////////////////////////////////////////////////////////////////////////
67 //
68 // HEADER BEGINS HERE
69 //
70
71 #ifndef STB_VORBIS_INCLUDE_STB_VORBIS_H
72 #define STB_VORBIS_INCLUDE_STB_VORBIS_H
73
74 #if defined(STB_VORBIS_NO_CRT) && !defined(STB_VORBIS_NO_STDIO)
75 #define STB_VORBIS_NO_STDIO 1
76 #endif
77
78 #if 0 /* FAudio change! */
79 #ifndef STB_VORBIS_NO_STDIO
80 #include <stdio.h>
81 #endif
82 #endif
83
84 #ifdef __cplusplus
85 extern "C" {
86 #endif
87
88 /////////// THREAD SAFETY
89
90 // Individual stb_vorbis* handles are not thread-safe; you cannot decode from
91 // them from multiple threads at the same time. However, you can have multiple
92 // stb_vorbis* handles and decode from them independently in multiple thrads.
93
94
95 /////////// MEMORY ALLOCATION
96
97 // normally stb_vorbis uses malloc() to allocate memory at startup,
98 // and alloca() to allocate temporary memory during a frame on the
99 // stack. (Memory consumption will depend on the amount of setup
100 // data in the file and how you set the compile flags for speed
101 // vs. size. In my test files the maximal-size usage is ~150KB.)
102 //
103 // You can modify the wrapper functions in the source (setup_malloc,
104 // setup_temp_malloc, temp_malloc) to change this behavior, or you
105 // can use a simpler allocation model: you pass in a buffer from
106 // which stb_vorbis will allocate _all_ its memory (including the
107 // temp memory). "open" may fail with a VORBIS_outofmem if you
108 // do not pass in enough data; there is no way to determine how
109 // much you do need except to succeed (at which point you can
110 // query get_info to find the exact amount required. yes I know
111 // this is lame).
112 //
113 // If you pass in a non-NULL buffer of the type below, allocation
114 // will occur from it as described above. Otherwise just pass NULL
115 // to use malloc()/alloca()
116
117 typedef struct
118 {
119 char *alloc_buffer;
120 int alloc_buffer_length_in_bytes;
121 } stb_vorbis_alloc;
122
123
124 /////////// FUNCTIONS USEABLE WITH ALL INPUT MODES
125
126 typedef struct stb_vorbis stb_vorbis;
127
128 typedef struct
129 {
130 unsigned int sample_rate;
131 int channels;
132
133 unsigned int setup_memory_required;
134 unsigned int setup_temp_memory_required;
135 unsigned int temp_memory_required;
136
137 int max_frame_size;
138 } stb_vorbis_info;
139
140 typedef struct
141 {
142 char *vendor;
143
144 int comment_list_length;
145 char **comment_list;
146 } stb_vorbis_comment;
147
148 // get general information about the file
149 FAUDIOAPI stb_vorbis_info stb_vorbis_get_info(stb_vorbis *f);
150
151 // get ogg comments
152 FAUDIOAPI stb_vorbis_comment stb_vorbis_get_comment(stb_vorbis *f);
153
154 // get the last error detected (clears it, too)
155 FAUDIOAPI int stb_vorbis_get_error(stb_vorbis *f);
156
157 // close an ogg vorbis file and free all memory in use
158 FAUDIOAPI void stb_vorbis_close(stb_vorbis *f);
159
160 // this function returns the offset (in samples) from the beginning of the
161 // file that will be returned by the next decode, if it is known, or -1
162 // otherwise. after a flush_pushdata() call, this may take a while before
163 // it becomes valid again.
164 // NOT WORKING YET after a seek with PULLDATA API
165 FAUDIOAPI int stb_vorbis_get_sample_offset(stb_vorbis *f);
166
167 // returns the current seek point within the file, or offset from the beginning
168 // of the memory buffer. In pushdata mode it returns 0.
169 FAUDIOAPI unsigned int stb_vorbis_get_file_offset(stb_vorbis *f);
170
171 /////////// PUSHDATA API
172
173 #ifndef STB_VORBIS_NO_PUSHDATA_API
174
175 // this API allows you to get blocks of data from any source and hand
176 // them to stb_vorbis. you have to buffer them; stb_vorbis will tell
177 // you how much it used, and you have to give it the rest next time;
178 // and stb_vorbis may not have enough data to work with and you will
179 // need to give it the same data again PLUS more. Note that the Vorbis
180 // specification does not bound the size of an individual frame.
181
182 extern stb_vorbis *stb_vorbis_open_pushdata(
183 const unsigned char * datablock, int datablock_length_in_bytes,
184 int *datablock_memory_consumed_in_bytes,
185 int *error,
186 const stb_vorbis_alloc *alloc_buffer);
187 // create a vorbis decoder by passing in the initial data block containing
188 // the ogg&vorbis headers (you don't need to do parse them, just provide
189 // the first N bytes of the file--you're told if it's not enough, see below)
190 // on success, returns an stb_vorbis *, does not set error, returns the amount of
191 // data parsed/consumed on this call in *datablock_memory_consumed_in_bytes;
192 // on failure, returns NULL on error and sets *error, does not change *datablock_memory_consumed
193 // if returns NULL and *error is VORBIS_need_more_data, then the input block was
194 // incomplete and you need to pass in a larger block from the start of the file
195
196 extern int stb_vorbis_decode_frame_pushdata(
197 stb_vorbis *f,
198 const unsigned char *datablock, int datablock_length_in_bytes,
199 int *channels, // place to write number of float * buffers
200 float ***output, // place to write float ** array of float * buffers
201 int *samples // place to write number of output samples
202 );
203 // decode a frame of audio sample data if possible from the passed-in data block
204 //
205 // return value: number of bytes we used from datablock
206 //
207 // possible cases:
208 // 0 bytes used, 0 samples output (need more data)
209 // N bytes used, 0 samples output (resynching the stream, keep going)
210 // N bytes used, M samples output (one frame of data)
211 // note that after opening a file, you will ALWAYS get one N-bytes,0-sample
212 // frame, because Vorbis always "discards" the first frame.
213 //
214 // Note that on resynch, stb_vorbis will rarely consume all of the buffer,
215 // instead only datablock_length_in_bytes-3 or less. This is because it wants
216 // to avoid missing parts of a page header if they cross a datablock boundary,
217 // without writing state-machiney code to record a partial detection.
218 //
219 // The number of channels returned are stored in *channels (which can be
220 // NULL--it is always the same as the number of channels reported by
221 // get_info). *output will contain an array of float* buffers, one per
222 // channel. In other words, (*output)[0][0] contains the first sample from
223 // the first channel, and (*output)[1][0] contains the first sample from
224 // the second channel.
225
226 extern void stb_vorbis_flush_pushdata(stb_vorbis *f);
227 // inform stb_vorbis that your next datablock will not be contiguous with
228 // previous ones (e.g. you've seeked in the data); future attempts to decode
229 // frames will cause stb_vorbis to resynchronize (as noted above), and
230 // once it sees a valid Ogg page (typically 4-8KB, as large as 64KB), it
231 // will begin decoding the _next_ frame.
232 //
233 // if you want to seek using pushdata, you need to seek in your file, then
234 // call stb_vorbis_flush_pushdata(), then start calling decoding, then once
235 // decoding is returning you data, call stb_vorbis_get_sample_offset, and
236 // if you don't like the result, seek your file again and repeat.
237 #endif
238
239
240 ////////// PULLING INPUT API
241
242 #ifndef STB_VORBIS_NO_PULLDATA_API
243 // This API assumes stb_vorbis is allowed to pull data from a source--
244 // either a block of memory containing the _entire_ vorbis stream, or a
245 // FILE * that you or it create, or possibly some other reading mechanism
246 // if you go modify the source to replace the FILE * case with some kind
247 // of callback to your code. (But if you don't support seeking, you may
248 // just want to go ahead and use pushdata.)
249
250 #if !defined(STB_VORBIS_NO_STDIO) && !defined(STB_VORBIS_NO_INTEGER_CONVERSION)
251 extern int stb_vorbis_decode_filename(const char *filename, int *channels, int *sample_rate, short **output);
252 #endif
253 #if !defined(STB_VORBIS_NO_INTEGER_CONVERSION)
254 extern int stb_vorbis_decode_memory(const unsigned char *mem, int len, int *channels, int *sample_rate, short **output);
255 #endif
256 // decode an entire file and output the data interleaved into a malloc()ed
257 // buffer stored in *output. The return value is the number of samples
258 // decoded, or -1 if the file could not be opened or was not an ogg vorbis file.
259 // When you're done with it, just free() the pointer returned in *output.
260
261 FAUDIOAPI stb_vorbis * stb_vorbis_open_memory(const unsigned char *data, int len,
262 int *error, const stb_vorbis_alloc *alloc_buffer);
263 // create an ogg vorbis decoder from an ogg vorbis stream in memory (note
264 // this must be the entire stream!). on failure, returns NULL and sets *error
265
266 #ifndef STB_VORBIS_NO_STDIO
267 FAUDIOAPI stb_vorbis * stb_vorbis_open_filename(const char *filename,
268 int *error, const stb_vorbis_alloc *alloc_buffer);
269 // create an ogg vorbis decoder from a filename via fopen(). on failure,
270 // returns NULL and sets *error (possibly to VORBIS_file_open_failure).
271
272 FAUDIOAPI stb_vorbis * stb_vorbis_open_file(FILE *f, int close_handle_on_close,
273 int *error, const stb_vorbis_alloc *alloc_buffer);
274 // create an ogg vorbis decoder from an open FILE *, looking for a stream at
275 // the _current_ seek point (ftell). on failure, returns NULL and sets *error.
276 // note that stb_vorbis must "own" this stream; if you seek it in between
277 // calls to stb_vorbis, it will become confused. Moreover, if you attempt to
278 // perform stb_vorbis_seek_*() operations on this file, it will assume it
279 // owns the _entire_ rest of the file after the start point. Use the next
280 // function, stb_vorbis_open_file_section(), to limit it.
281
282 FAUDIOAPI stb_vorbis * stb_vorbis_open_file_section(FILE *f, int close_handle_on_close,
283 int *error, const stb_vorbis_alloc *alloc_buffer, unsigned int len);
284 // create an ogg vorbis decoder from an open FILE *, looking for a stream at
285 // the _current_ seek point (ftell); the stream will be of length 'len' bytes.
286 // on failure, returns NULL and sets *error. note that stb_vorbis must "own"
287 // this stream; if you seek it in between calls to stb_vorbis, it will become
288 // confused.
289 #endif
290
291 FAUDIOAPI int stb_vorbis_seek_frame(stb_vorbis *f, unsigned int sample_number);
292 FAUDIOAPI int stb_vorbis_seek(stb_vorbis *f, unsigned int sample_number);
293 // these functions seek in the Vorbis file to (approximately) 'sample_number'.
294 // after calling seek_frame(), the next call to get_frame_*() will include
295 // the specified sample. after calling stb_vorbis_seek(), the next call to
296 // stb_vorbis_get_samples_* will start with the specified sample. If you
297 // do not need to seek to EXACTLY the target sample when using get_samples_*,
298 // you can also use seek_frame().
299
300 FAUDIOAPI int stb_vorbis_seek_start(stb_vorbis *f);
301 // this function is equivalent to stb_vorbis_seek(f,0)
302
303 FAUDIOAPI unsigned int stb_vorbis_stream_length_in_samples(stb_vorbis *f);
304 FAUDIOAPI float stb_vorbis_stream_length_in_seconds(stb_vorbis *f);
305 // these functions return the total length of the vorbis stream
306
307 FAUDIOAPI int stb_vorbis_get_frame_float(stb_vorbis *f, int *channels, float ***output);
308 // decode the next frame and return the number of samples. the number of
309 // channels returned are stored in *channels (which can be NULL--it is always
310 // the same as the number of channels reported by get_info). *output will
311 // contain an array of float* buffers, one per channel. These outputs will
312 // be overwritten on the next call to stb_vorbis_get_frame_*.
313 //
314 // You generally should not intermix calls to stb_vorbis_get_frame_*()
315 // and stb_vorbis_get_samples_*(), since the latter calls the former.
316
317 #ifndef STB_VORBIS_NO_INTEGER_CONVERSION
318 extern int stb_vorbis_get_frame_short_interleaved(stb_vorbis *f, int num_c, short *buffer, int num_shorts);
319 extern int stb_vorbis_get_frame_short (stb_vorbis *f, int num_c, short **buffer, int num_samples);
320 #endif
321 // decode the next frame and return the number of *samples* per channel.
322 // Note that for interleaved data, you pass in the number of shorts (the
323 // size of your array), but the return value is the number of samples per
324 // channel, not the total number of samples.
325 //
326 // The data is coerced to the number of channels you request according to the
327 // channel coercion rules (see below). You must pass in the size of your
328 // buffer(s) so that stb_vorbis will not overwrite the end of the buffer.
329 // The maximum buffer size needed can be gotten from get_info(); however,
330 // the Vorbis I specification implies an absolute maximum of 4096 samples
331 // per channel.
332
333 // Channel coercion rules:
334 // Let M be the number of channels requested, and N the number of channels present,
335 // and Cn be the nth channel; let stereo L be the sum of all L and center channels,
336 // and stereo R be the sum of all R and center channels (channel assignment from the
337 // vorbis spec).
338 // M N output
339 // 1 k sum(Ck) for all k
340 // 2 * stereo L, stereo R
341 // k l k > l, the first l channels, then 0s
342 // k l k <= l, the first k channels
343 // Note that this is not _good_ surround etc. mixing at all! It's just so
344 // you get something useful.
345
346 FAUDIOAPI int stb_vorbis_get_samples_float_interleaved(stb_vorbis *f, int channels, float *buffer, int num_floats);
347 FAUDIOAPI int stb_vorbis_get_samples_float(stb_vorbis *f, int channels, float **buffer, int num_samples);
348 // gets num_samples samples, not necessarily on a frame boundary--this requires
349 // buffering so you have to supply the buffers. DOES NOT APPLY THE COERCION RULES.
350 // Returns the number of samples stored per channel; it may be less than requested
351 // at the end of the file. If there are no more samples in the file, returns 0.
352
353 #ifndef STB_VORBIS_NO_INTEGER_CONVERSION
354 extern int stb_vorbis_get_samples_short_interleaved(stb_vorbis *f, int channels, short *buffer, int num_shorts);
355 extern int stb_vorbis_get_samples_short(stb_vorbis *f, int channels, short **buffer, int num_samples);
356 #endif
357 // gets num_samples samples, not necessarily on a frame boundary--this requires
358 // buffering so you have to supply the buffers. Applies the coercion rules above
359 // to produce 'channels' channels. Returns the number of samples stored per channel;
360 // it may be less than requested at the end of the file. If there are no more
361 // samples in the file, returns 0.
362
363 #endif
364
365 //////// ERROR CODES
366
367 enum STBVorbisError
368 {
369 VORBIS__no_error,
370
371 VORBIS_need_more_data=1, // not a real error
372
373 VORBIS_invalid_api_mixing, // can't mix API modes
374 VORBIS_outofmem, // not enough memory
375 VORBIS_feature_not_supported, // uses floor 0
376 VORBIS_too_many_channels, // STB_VORBIS_MAX_CHANNELS is too small
377 VORBIS_file_open_failure, // fopen() failed
378 VORBIS_seek_without_length, // can't seek in unknown-length file
379
380 VORBIS_unexpected_eof=10, // file is truncated?
381 VORBIS_seek_invalid, // seek past EOF
382
383 // decoding errors (corrupt/invalid stream) -- you probably
384 // don't care about the exact details of these
385
386 // vorbis errors:
387 VORBIS_invalid_setup=20,
388 VORBIS_invalid_stream,
389
390 // ogg errors:
391 VORBIS_missing_capture_pattern=30,
392 VORBIS_invalid_stream_structure_version,
393 VORBIS_continued_packet_flag_invalid,
394 VORBIS_incorrect_stream_serial_number,
395 VORBIS_invalid_first_page,
396 VORBIS_bad_packet_type,
397 VORBIS_cant_find_last_page,
398 VORBIS_seek_failed,
399 VORBIS_ogg_skeleton_not_supported
400 };
401
402
403 #ifdef __cplusplus
404 }
405 #endif
406
407 #endif // STB_VORBIS_INCLUDE_STB_VORBIS_H
408 //
409 // HEADER ENDS HERE
410 //
411 //////////////////////////////////////////////////////////////////////////////
412
413 #ifndef STB_VORBIS_HEADER_ONLY
414
415 // global configuration settings (e.g. set these in the project/makefile),
416 // or just set them in this file at the top (although ideally the first few
417 // should be visible when the header file is compiled too, although it's not
418 // crucial)
419
420 // STB_VORBIS_NO_PUSHDATA_API
421 // does not compile the code for the various stb_vorbis_*_pushdata()
422 // functions
423 // #define STB_VORBIS_NO_PUSHDATA_API
424
425 // STB_VORBIS_NO_PULLDATA_API
426 // does not compile the code for the non-pushdata APIs
427 // #define STB_VORBIS_NO_PULLDATA_API
428
429 // STB_VORBIS_NO_STDIO
430 // does not compile the code for the APIs that use FILE *s internally
431 // or externally (implied by STB_VORBIS_NO_PULLDATA_API)
432 // #define STB_VORBIS_NO_STDIO
433
434 // STB_VORBIS_NO_INTEGER_CONVERSION
435 // does not compile the code for converting audio sample data from
436 // float to integer (implied by STB_VORBIS_NO_PULLDATA_API)
437 // #define STB_VORBIS_NO_INTEGER_CONVERSION
438
439 // STB_VORBIS_NO_FAST_SCALED_FLOAT
440 // does not use a fast float-to-int trick to accelerate float-to-int on
441 // most platforms which requires endianness be defined correctly.
442 //#define STB_VORBIS_NO_FAST_SCALED_FLOAT
443
444
445 // STB_VORBIS_MAX_CHANNELS [number]
446 // globally define this to the maximum number of channels you need.
447 // The spec does not put a restriction on channels except that
448 // the count is stored in a byte, so 255 is the hard limit.
449 // Reducing this saves about 16 bytes per value, so using 16 saves
450 // (255-16)*16 or around 4KB. Plus anything other memory usage
451 // I forgot to account for. Can probably go as low as 8 (7.1 audio),
452 // 6 (5.1 audio), or 2 (stereo only).
453 #ifndef STB_VORBIS_MAX_CHANNELS
454 #define STB_VORBIS_MAX_CHANNELS 16 // enough for anyone?
455 #endif
456
457 // STB_VORBIS_PUSHDATA_CRC_COUNT [number]
458 // after a flush_pushdata(), stb_vorbis begins scanning for the
459 // next valid page, without backtracking. when it finds something
460 // that looks like a page, it streams through it and verifies its
461 // CRC32. Should that validation fail, it keeps scanning. But it's
462 // possible that _while_ streaming through to check the CRC32 of
463 // one candidate page, it sees another candidate page. This #define
464 // determines how many "overlapping" candidate pages it can search
465 // at once. Note that "real" pages are typically ~4KB to ~8KB, whereas
466 // garbage pages could be as big as 64KB, but probably average ~16KB.
467 // So don't hose ourselves by scanning an apparent 64KB page and
468 // missing a ton of real ones in the interim; so minimum of 2
469 #ifndef STB_VORBIS_PUSHDATA_CRC_COUNT
470 #define STB_VORBIS_PUSHDATA_CRC_COUNT 4
471 #endif
472
473 // STB_VORBIS_FAST_HUFFMAN_LENGTH [number]
474 // sets the log size of the huffman-acceleration table. Maximum
475 // supported value is 24. with larger numbers, more decodings are O(1),
476 // but the table size is larger so worse cache missing, so you'll have
477 // to probe (and try multiple ogg vorbis files) to find the sweet spot.
478 #ifndef STB_VORBIS_FAST_HUFFMAN_LENGTH
479 #define STB_VORBIS_FAST_HUFFMAN_LENGTH 10
480 #endif
481
482 // STB_VORBIS_FAST_BINARY_LENGTH [number]
483 // sets the log size of the binary-search acceleration table. this
484 // is used in similar fashion to the fast-huffman size to set initial
485 // parameters for the binary search
486
487 // STB_VORBIS_FAST_HUFFMAN_INT
488 // The fast huffman tables are much more efficient if they can be
489 // stored as 16-bit results instead of 32-bit results. This restricts
490 // the codebooks to having only 65535 possible outcomes, though.
491 // (At least, accelerated by the huffman table.)
492 #ifndef STB_VORBIS_FAST_HUFFMAN_INT
493 #define STB_VORBIS_FAST_HUFFMAN_SHORT
494 #endif
495
496 // STB_VORBIS_NO_HUFFMAN_BINARY_SEARCH
497 // If the 'fast huffman' search doesn't succeed, then stb_vorbis falls
498 // back on binary searching for the correct one. This requires storing
499 // extra tables with the huffman codes in sorted order. Defining this
500 // symbol trades off space for speed by forcing a linear search in the
501 // non-fast case, except for "sparse" codebooks.
502 // #define STB_VORBIS_NO_HUFFMAN_BINARY_SEARCH
503
504 // STB_VORBIS_DIVIDES_IN_RESIDUE
505 // stb_vorbis precomputes the result of the scalar residue decoding
506 // that would otherwise require a divide per chunk. you can trade off
507 // space for time by defining this symbol.
508 // #define STB_VORBIS_DIVIDES_IN_RESIDUE
509
510 // STB_VORBIS_DIVIDES_IN_CODEBOOK
511 // vorbis VQ codebooks can be encoded two ways: with every case explicitly
512 // stored, or with all elements being chosen from a small range of values,
513 // and all values possible in all elements. By default, stb_vorbis expands
514 // this latter kind out to look like the former kind for ease of decoding,
515 // because otherwise an integer divide-per-vector-element is required to
516 // unpack the index. If you define STB_VORBIS_DIVIDES_IN_CODEBOOK, you can
517 // trade off storage for speed.
518 //#define STB_VORBIS_DIVIDES_IN_CODEBOOK
519
520 #ifdef STB_VORBIS_CODEBOOK_SHORTS
521 #error "STB_VORBIS_CODEBOOK_SHORTS is no longer supported as it produced incorrect results for some input formats"
522 #endif
523
524 // STB_VORBIS_DIVIDE_TABLE
525 // this replaces small integer divides in the floor decode loop with
526 // table lookups. made less than 1% difference, so disabled by default.
527
528 // STB_VORBIS_NO_INLINE_DECODE
529 // disables the inlining of the scalar codebook fast-huffman decode.
530 // might save a little codespace; useful for debugging
531 // #define STB_VORBIS_NO_INLINE_DECODE
532
533 // STB_VORBIS_NO_DEFER_FLOOR
534 // Normally we only decode the floor without synthesizing the actual
535 // full curve. We can instead synthesize the curve immediately. This
536 // requires more memory and is very likely slower, so I don't think
537 // you'd ever want to do it except for debugging.
538 // #define STB_VORBIS_NO_DEFER_FLOOR
539
540
541
542
543 //////////////////////////////////////////////////////////////////////////////
544
545 #ifdef STB_VORBIS_NO_PULLDATA_API
546 #define STB_VORBIS_NO_INTEGER_CONVERSION
547 #define STB_VORBIS_NO_STDIO
548 #endif
549
550 #if defined(STB_VORBIS_NO_CRT) && !defined(STB_VORBIS_NO_STDIO)
551 #define STB_VORBIS_NO_STDIO 1
552 #endif
553
554 #ifndef STB_VORBIS_NO_INTEGER_CONVERSION
555 #ifndef STB_VORBIS_NO_FAST_SCALED_FLOAT
556
557 // only need endianness for fast-float-to-int, which we don't
558 // use for pushdata
559
560 #ifndef STB_VORBIS_BIG_ENDIAN
561 #define STB_VORBIS_ENDIAN 0
562 #else
563 #define STB_VORBIS_ENDIAN 1
564 #endif
565
566 #endif
567 #endif
568
569
570 #if 0 /* FAudio change! */
571 #ifndef STB_VORBIS_NO_STDIO
572 #include <stdio.h>
573 #endif
574
575 #ifndef STB_VORBIS_NO_CRT
576 #include <stdlib.h>
577 #include <string.h>
578 #include <assert.h>
579 #include <math.h>
580
581 // find definition of alloca if it's not in stdlib.h:
582 #if defined(_MSC_VER) || defined(__MINGW32__)
583 #include <malloc.h>
584 #endif
585 #if defined(__linux__) || defined(__linux) || defined(__EMSCRIPTEN__) || defined(__NEWLIB__)
586 #include <alloca.h>
587 #endif
588 #else // STB_VORBIS_NO_CRT
589 #define NULL 0
590 #define malloc(s) 0
591 #define free(s) ((void) 0)
592 #define realloc(s) 0
593 #endif // STB_VORBIS_NO_CRT
594 #endif
595
596 #include <limits.h>
597
598 #ifdef __MINGW32__
599 // eff you mingw:
600 // "fixed":
601 // http://sourceforge.net/p/mingw-w64/mailman/message/32882927/
602 // "no that broke the build, reverted, who cares about C":
603 // http://sourceforge.net/p/mingw-w64/mailman/message/32890381/
604 #ifdef __forceinline
605 #undef __forceinline
606 #endif
607 #define __forceinline
608 #ifndef alloca
609 #define alloca __builtin_alloca
610 #endif
611 #elif !defined(_MSC_VER)
612 #if __GNUC__
613 #define __forceinline inline
614 #else
615 #define __forceinline
616 #endif
617 #endif
618
619 #if STB_VORBIS_MAX_CHANNELS > 256
620 #error "Value of STB_VORBIS_MAX_CHANNELS outside of allowed range"
621 #endif
622
623 #if STB_VORBIS_FAST_HUFFMAN_LENGTH > 24
624 #error "Value of STB_VORBIS_FAST_HUFFMAN_LENGTH outside of allowed range"
625 #endif
626
627
628 #if 0
629 #include <crtdbg.h>
630 #define CHECK(f) _CrtIsValidHeapPointer(f->channel_buffers[1])
631 #else
632 #define CHECK(f) ((void) 0)
633 #endif
634
635 #define MAX_BLOCKSIZE_LOG 13 // from specification
636 #define MAX_BLOCKSIZE (1 << MAX_BLOCKSIZE_LOG)
637
638
639 typedef unsigned char uint8;
640 typedef signed char int8;
641 typedef unsigned short uint16;
642 typedef signed short int16;
643 typedef unsigned int uint32;
644 typedef signed int int32;
645
646 #ifndef TRUE
647 #define TRUE 1
648 #define FALSE 0
649 #endif
650
651 typedef float codetype;
652
653 // @NOTE
654 //
655 // Some arrays below are tagged "//varies", which means it's actually
656 // a variable-sized piece of data, but rather than malloc I assume it's
657 // small enough it's better to just allocate it all together with the
658 // main thing
659 //
660 // Most of the variables are specified with the smallest size I could pack
661 // them into. It might give better performance to make them all full-sized
662 // integers. It should be safe to freely rearrange the structures or change
663 // the sizes larger--nothing relies on silently truncating etc., nor the
664 // order of variables.
665
666 #define FAST_HUFFMAN_TABLE_SIZE (1 << STB_VORBIS_FAST_HUFFMAN_LENGTH)
667 #define FAST_HUFFMAN_TABLE_MASK (FAST_HUFFMAN_TABLE_SIZE - 1)
668
669 typedef struct
670 {
671 int dimensions, entries;
672 uint8 *codeword_lengths;
673 float minimum_value;
674 float delta_value;
675 uint8 value_bits;
676 uint8 lookup_type;
677 uint8 sequence_p;
678 uint8 sparse;
679 uint32 lookup_values;
680 codetype *multiplicands;
681 uint32 *codewords;
682 #ifdef STB_VORBIS_FAST_HUFFMAN_SHORT
683 int16 fast_huffman[FAST_HUFFMAN_TABLE_SIZE];
684 #else
685 int32 fast_huffman[FAST_HUFFMAN_TABLE_SIZE];
686 #endif
687 uint32 *sorted_codewords;
688 int *sorted_values;
689 int sorted_entries;
690 } Codebook;
691
692 typedef struct
693 {
694 uint8 order;
695 uint16 rate;
696 uint16 bark_map_size;
697 uint8 amplitude_bits;
698 uint8 amplitude_offset;
699 uint8 number_of_books;
700 uint8 book_list[16]; // varies
701 } Floor0;
702
703 typedef struct
704 {
705 uint8 partitions;
706 uint8 partition_class_list[32]; // varies
707 uint8 class_dimensions[16]; // varies
708 uint8 class_subclasses[16]; // varies
709 uint8 class_masterbooks[16]; // varies
710 int16 subclass_books[16][8]; // varies
711 uint16 Xlist[31*8+2]; // varies
712 uint8 sorted_order[31*8+2];
713 uint8 neighbors[31*8+2][2];
714 uint8 floor1_multiplier;
715 uint8 rangebits;
716 int values;
717 } Floor1;
718
719 typedef union
720 {
721 Floor0 floor0;
722 Floor1 floor1;
723 } Floor;
724
725 typedef struct
726 {
727 uint32 begin, end;
728 uint32 part_size;
729 uint8 classifications;
730 uint8 classbook;
731 uint8 **classdata;
732 int16 (*residue_books)[8];
733 } Residue;
734
735 typedef struct
736 {
737 uint8 magnitude;
738 uint8 angle;
739 uint8 mux;
740 } MappingChannel;
741
742 typedef struct
743 {
744 uint16 coupling_steps;
745 MappingChannel *chan;
746 uint8 submaps;
747 uint8 submap_floor[15]; // varies
748 uint8 submap_residue[15]; // varies
749 } Mapping;
750
751 typedef struct
752 {
753 uint8 blockflag;
754 uint8 mapping;
755 uint16 windowtype;
756 uint16 transformtype;
757 } Mode;
758
759 typedef struct
760 {
761 uint32 goal_crc; // expected crc if match
762 int bytes_left; // bytes left in packet
763 uint32 crc_so_far; // running crc
764 int bytes_done; // bytes processed in _current_ chunk
765 uint32 sample_loc; // granule pos encoded in page
766 } CRCscan;
767
768 typedef struct
769 {
770 uint32 page_start, page_end;
771 uint32 last_decoded_sample;
772 } ProbedPage;
773
774 struct stb_vorbis
775 {
776 // user-accessible info
777 unsigned int sample_rate;
778 int channels;
779
780 unsigned int setup_memory_required;
781 unsigned int temp_memory_required;
782 unsigned int setup_temp_memory_required;
783
784 char *vendor;
785 int comment_list_length;
786 char **comment_list;
787
788 // input config
789 #ifndef STB_VORBIS_NO_STDIO
790 FILE *f;
791 uint32 f_start;
792 int close_on_free;
793 #endif
794
795 uint8 *stream;
796 uint8 *stream_start;
797 uint8 *stream_end;
798
799 uint32 stream_len;
800
801 uint8 push_mode;
802
803 // the page to seek to when seeking to start, may be zero
804 uint32 first_audio_page_offset;
805
806 // p_first is the page on which the first audio packet ends
807 // (but not necessarily the page on which it starts)
808 ProbedPage p_first, p_last;
809
810 // memory management
811 stb_vorbis_alloc alloc;
812 int setup_offset;
813 int temp_offset;
814
815 // run-time results
816 int eof;
817 enum STBVorbisError error;
818
819 // user-useful data
820
821 // header info
822 int blocksize[2];
823 int blocksize_0, blocksize_1;
824 int codebook_count;
825 Codebook *codebooks;
826 int floor_count;
827 uint16 floor_types[64]; // varies
828 Floor *floor_config;
829 int residue_count;
830 uint16 residue_types[64]; // varies
831 Residue *residue_config;
832 int mapping_count;
833 Mapping *mapping;
834 int mode_count;
835 Mode mode_config[64]; // varies
836
837 uint32 total_samples;
838
839 // decode buffer
840 float *channel_buffers[STB_VORBIS_MAX_CHANNELS];
841 float *outputs [STB_VORBIS_MAX_CHANNELS];
842
843 float *previous_window[STB_VORBIS_MAX_CHANNELS];
844 int previous_length;
845
846 #ifndef STB_VORBIS_NO_DEFER_FLOOR
847 int16 *finalY[STB_VORBIS_MAX_CHANNELS];
848 #else
849 float *floor_buffers[STB_VORBIS_MAX_CHANNELS];
850 #endif
851
852 uint32 current_loc; // sample location of next frame to decode
853 int current_loc_valid;
854
855 // per-blocksize precomputed data
856
857 // twiddle factors
858 float *A[2],*B[2],*C[2];
859 float *window[2];
860 uint16 *bit_reverse[2];
861
862 // current page/packet/segment streaming info
863 uint32 serial; // stream serial number for verification
864 int last_page;
865 int segment_count;
866 uint8 segments[255];
867 uint8 page_flag;
868 uint8 bytes_in_seg;
869 uint8 first_decode;
870 int next_seg;
871 int last_seg; // flag that we're on the last segment
872 int last_seg_which; // what was the segment number of the last seg?
873 uint32 acc;
874 int valid_bits;
875 int packet_bytes;
876 int end_seg_with_known_loc;
877 uint32 known_loc_for_packet;
878 int discard_samples_deferred;
879 uint32 samples_output;
880
881 // push mode scanning
882 int page_crc_tests; // only in push_mode: number of tests active; -1 if not searching
883 #ifndef STB_VORBIS_NO_PUSHDATA_API
884 CRCscan scan[STB_VORBIS_PUSHDATA_CRC_COUNT];
885 #endif
886
887 // sample-access
888 int channel_buffer_start;
889 int channel_buffer_end;
890 };
891
892 #if defined(STB_VORBIS_NO_PUSHDATA_API)
893 #define IS_PUSH_MODE(f) FALSE
894 #elif defined(STB_VORBIS_NO_PULLDATA_API)
895 #define IS_PUSH_MODE(f) TRUE
896 #else
897 #define IS_PUSH_MODE(f) ((f)->push_mode)
898 #endif
899
900 typedef struct stb_vorbis vorb;
901
error(vorb * f,enum STBVorbisError e)902 static int error(vorb *f, enum STBVorbisError e)
903 {
904 f->error = e;
905 if (!f->eof && e != VORBIS_need_more_data) {
906 f->error=e; // breakpoint for debugging
907 }
908 return 0;
909 }
910
911
912 // these functions are used for allocating temporary memory
913 // while decoding. if you can afford the stack space, use
914 // alloca(); otherwise, provide a temp buffer and it will
915 // allocate out of those.
916
917 #define array_size_required(count,size) (count*(sizeof(void *)+(size)))
918
919 #define temp_alloc(f,size) (f->alloc.alloc_buffer ? setup_temp_malloc(f,size) : FAudio_alloca(size))
920 #define temp_free(f,p) FAudio_dealloca(p)
921 #define temp_alloc_save(f) ((f)->temp_offset)
922 #define temp_alloc_restore(f,p) ((f)->temp_offset = (p))
923
924 #define temp_block_array(f,count,size) make_block_array(temp_alloc(f,array_size_required(count,size)), count, size)
925
926 // given a sufficiently large block of memory, make an array of pointers to subblocks of it
make_block_array(void * mem,int count,int size)927 static void *make_block_array(void *mem, int count, int size)
928 {
929 int i;
930 void ** p = (void **) mem;
931 char *q = (char *) (p + count);
932 for (i=0; i < count; ++i) {
933 p[i] = q;
934 q += size;
935 }
936 return p;
937 }
938
939 // FIXME: https://github.com/nothings/stb/pull/1005 -flibit
940 static char zeromalloc = 0;
941
setup_malloc(vorb * f,int sz)942 static void *setup_malloc(vorb *f, int sz)
943 {
944 sz = (sz+7) & ~7; // round up to nearest 8 for alignment of future allocs.
945 f->setup_memory_required += sz;
946 if (f->alloc.alloc_buffer) {
947 void *p = (char *) f->alloc.alloc_buffer + f->setup_offset;
948 if (f->setup_offset + sz > f->temp_offset) return NULL;
949 f->setup_offset += sz;
950 return p;
951 }
952 return sz ? malloc(sz) : &zeromalloc;
953 }
954
setup_free(vorb * f,void * p)955 static void setup_free(vorb *f, void *p)
956 {
957 if (f->alloc.alloc_buffer) return; // do nothing; setup mem is a stack
958 if (p != &zeromalloc)
959 free(p);
960 }
961
setup_temp_malloc(vorb * f,int sz)962 static void *setup_temp_malloc(vorb *f, int sz)
963 {
964 sz = (sz+7) & ~7; // round up to nearest 8 for alignment of future allocs.
965 if (f->alloc.alloc_buffer) {
966 if (f->temp_offset - sz < f->setup_offset) return NULL;
967 f->temp_offset -= sz;
968 return (char *) f->alloc.alloc_buffer + f->temp_offset;
969 }
970 return malloc(sz);
971 }
972
setup_temp_free(vorb * f,void * p,int sz)973 static void setup_temp_free(vorb *f, void *p, int sz)
974 {
975 if (f->alloc.alloc_buffer) {
976 f->temp_offset += (sz+7)&~7;
977 return;
978 }
979 free(p);
980 }
981
982 #define CRC32_POLY 0x04c11db7 // from spec
983
984 static uint32 crc_table[256];
crc32_init(void)985 static void crc32_init(void)
986 {
987 int i,j;
988 uint32 s;
989 for(i=0; i < 256; i++) {
990 for (s=(uint32) i << 24, j=0; j < 8; ++j)
991 s = (s << 1) ^ (s >= (1U<<31) ? CRC32_POLY : 0);
992 crc_table[i] = s;
993 }
994 }
995
crc32_update(uint32 crc,uint8 byte)996 static __forceinline uint32 crc32_update(uint32 crc, uint8 byte)
997 {
998 return (crc << 8) ^ crc_table[byte ^ (crc >> 24)];
999 }
1000
1001
1002 // used in setup, and for huffman that doesn't go fast path
bit_reverse(unsigned int n)1003 static unsigned int bit_reverse(unsigned int n)
1004 {
1005 n = ((n & 0xAAAAAAAA) >> 1) | ((n & 0x55555555) << 1);
1006 n = ((n & 0xCCCCCCCC) >> 2) | ((n & 0x33333333) << 2);
1007 n = ((n & 0xF0F0F0F0) >> 4) | ((n & 0x0F0F0F0F) << 4);
1008 n = ((n & 0xFF00FF00) >> 8) | ((n & 0x00FF00FF) << 8);
1009 return (n >> 16) | (n << 16);
1010 }
1011
square(float x)1012 static float square(float x)
1013 {
1014 return x*x;
1015 }
1016
1017 // this is a weird definition of log2() for which log2(1) = 1, log2(2) = 2, log2(4) = 3
1018 // as required by the specification. fast(?) implementation from stb.h
1019 // @OPTIMIZE: called multiple times per-packet with "constants"; move to setup
ilog(int32 n)1020 static int ilog(int32 n)
1021 {
1022 static signed char log2_4[16] = { 0,1,2,2,3,3,3,3,4,4,4,4,4,4,4,4 };
1023
1024 if (n < 0) return 0; // signed n returns 0
1025
1026 // 2 compares if n < 16, 3 compares otherwise (4 if signed or n > 1<<29)
1027 if (n < (1 << 14))
1028 if (n < (1 << 4)) return 0 + log2_4[n ];
1029 else if (n < (1 << 9)) return 5 + log2_4[n >> 5];
1030 else return 10 + log2_4[n >> 10];
1031 else if (n < (1 << 24))
1032 if (n < (1 << 19)) return 15 + log2_4[n >> 15];
1033 else return 20 + log2_4[n >> 20];
1034 else if (n < (1 << 29)) return 25 + log2_4[n >> 25];
1035 else return 30 + log2_4[n >> 30];
1036 }
1037
1038 #ifndef M_PI
1039 #define M_PI 3.14159265358979323846264f // from CRC
1040 #endif
1041
1042 // code length assigned to a value with no huffman encoding
1043 #define NO_CODE 255
1044
1045 /////////////////////// LEAF SETUP FUNCTIONS //////////////////////////
1046 //
1047 // these functions are only called at setup, and only a few times
1048 // per file
1049
float32_unpack(uint32 x)1050 static float float32_unpack(uint32 x)
1051 {
1052 // from the specification
1053 uint32 mantissa = x & 0x1fffff;
1054 uint32 sign = x & 0x80000000;
1055 uint32 exp = (x & 0x7fe00000) >> 21;
1056 double res = sign ? -(double)mantissa : (double)mantissa;
1057 return (float) ldexp((float)res, exp-788);
1058 }
1059
1060
1061 // zlib & jpeg huffman tables assume that the output symbols
1062 // can either be arbitrarily arranged, or have monotonically
1063 // increasing frequencies--they rely on the lengths being sorted;
1064 // this makes for a very simple generation algorithm.
1065 // vorbis allows a huffman table with non-sorted lengths. This
1066 // requires a more sophisticated construction, since symbols in
1067 // order do not map to huffman codes "in order".
add_entry(Codebook * c,uint32 huff_code,int symbol,int count,int len,uint32 * values)1068 static void add_entry(Codebook *c, uint32 huff_code, int symbol, int count, int len, uint32 *values)
1069 {
1070 if (!c->sparse) {
1071 c->codewords [symbol] = huff_code;
1072 } else {
1073 c->codewords [count] = huff_code;
1074 c->codeword_lengths[count] = len;
1075 values [count] = symbol;
1076 }
1077 }
1078
compute_codewords(Codebook * c,uint8 * len,int n,uint32 * values)1079 static int compute_codewords(Codebook *c, uint8 *len, int n, uint32 *values)
1080 {
1081 int i,k,m=0;
1082 uint32 available[32];
1083
1084 memset(available, 0, sizeof(available));
1085 // find the first entry
1086 for (k=0; k < n; ++k) if (len[k] < NO_CODE) break;
1087 if (k == n) { assert(c->sorted_entries == 0); return TRUE; }
1088 // add to the list
1089 add_entry(c, 0, k, m++, len[k], values);
1090 // add all available leaves
1091 for (i=1; i <= len[k]; ++i)
1092 available[i] = 1U << (32-i);
1093 // note that the above code treats the first case specially,
1094 // but it's really the same as the following code, so they
1095 // could probably be combined (except the initial code is 0,
1096 // and I use 0 in available[] to mean 'empty')
1097 for (i=k+1; i < n; ++i) {
1098 uint32 res;
1099 int z = len[i], y;
1100 if (z == NO_CODE) continue;
1101 // find lowest available leaf (should always be earliest,
1102 // which is what the specification calls for)
1103 // note that this property, and the fact we can never have
1104 // more than one free leaf at a given level, isn't totally
1105 // trivial to prove, but it seems true and the assert never
1106 // fires, so!
1107 while (z > 0 && !available[z]) --z;
1108 if (z == 0) { return FALSE; }
1109 res = available[z];
1110 assert(z >= 0 && z < 32);
1111 available[z] = 0;
1112 add_entry(c, bit_reverse(res), i, m++, len[i], values);
1113 // propagate availability up the tree
1114 if (z != len[i]) {
1115 assert(len[i] >= 0 && len[i] < 32);
1116 for (y=len[i]; y > z; --y) {
1117 assert(available[y] == 0);
1118 available[y] = res + (1 << (32-y));
1119 }
1120 }
1121 }
1122 return TRUE;
1123 }
1124
1125 // accelerated huffman table allows fast O(1) match of all symbols
1126 // of length <= STB_VORBIS_FAST_HUFFMAN_LENGTH
compute_accelerated_huffman(Codebook * c)1127 static void compute_accelerated_huffman(Codebook *c)
1128 {
1129 int i, len;
1130 for (i=0; i < FAST_HUFFMAN_TABLE_SIZE; ++i)
1131 c->fast_huffman[i] = -1;
1132
1133 len = c->sparse ? c->sorted_entries : c->entries;
1134 #ifdef STB_VORBIS_FAST_HUFFMAN_SHORT
1135 if (len > 32767) len = 32767; // largest possible value we can encode!
1136 #endif
1137 for (i=0; i < len; ++i) {
1138 if (c->codeword_lengths[i] <= STB_VORBIS_FAST_HUFFMAN_LENGTH) {
1139 uint32 z = c->sparse ? bit_reverse(c->sorted_codewords[i]) : c->codewords[i];
1140 // set table entries for all bit combinations in the higher bits
1141 while (z < FAST_HUFFMAN_TABLE_SIZE) {
1142 c->fast_huffman[z] = i;
1143 z += 1 << c->codeword_lengths[i];
1144 }
1145 }
1146 }
1147 }
1148
1149 #ifdef _MSC_VER
1150 #define STBV_CDECL __cdecl
1151 #else
1152 #define STBV_CDECL
1153 #endif
1154
uint32_compare(const void * p,const void * q)1155 static int STBV_CDECL uint32_compare(const void *p, const void *q)
1156 {
1157 uint32 x = * (uint32 *) p;
1158 uint32 y = * (uint32 *) q;
1159 return x < y ? -1 : x > y;
1160 }
1161
include_in_sort(Codebook * c,uint8 len)1162 static int include_in_sort(Codebook *c, uint8 len)
1163 {
1164 if (c->sparse) { assert(len != NO_CODE); return TRUE; }
1165 if (len == NO_CODE) return FALSE;
1166 if (len > STB_VORBIS_FAST_HUFFMAN_LENGTH) return TRUE;
1167 return FALSE;
1168 }
1169
1170 // if the fast table above doesn't work, we want to binary
1171 // search them... need to reverse the bits
compute_sorted_huffman(Codebook * c,uint8 * lengths,uint32 * values)1172 static void compute_sorted_huffman(Codebook *c, uint8 *lengths, uint32 *values)
1173 {
1174 int i, len;
1175 // build a list of all the entries
1176 // OPTIMIZATION: don't include the short ones, since they'll be caught by FAST_HUFFMAN.
1177 // this is kind of a frivolous optimization--I don't see any performance improvement,
1178 // but it's like 4 extra lines of code, so.
1179 if (!c->sparse) {
1180 int k = 0;
1181 for (i=0; i < c->entries; ++i)
1182 if (include_in_sort(c, lengths[i]))
1183 c->sorted_codewords[k++] = bit_reverse(c->codewords[i]);
1184 assert(k == c->sorted_entries);
1185 } else {
1186 for (i=0; i < c->sorted_entries; ++i)
1187 c->sorted_codewords[i] = bit_reverse(c->codewords[i]);
1188 }
1189
1190 qsort(c->sorted_codewords, c->sorted_entries, sizeof(c->sorted_codewords[0]), uint32_compare);
1191 c->sorted_codewords[c->sorted_entries] = 0xffffffff;
1192
1193 len = c->sparse ? c->sorted_entries : c->entries;
1194 // now we need to indicate how they correspond; we could either
1195 // #1: sort a different data structure that says who they correspond to
1196 // #2: for each sorted entry, search the original list to find who corresponds
1197 // #3: for each original entry, find the sorted entry
1198 // #1 requires extra storage, #2 is slow, #3 can use binary search!
1199 for (i=0; i < len; ++i) {
1200 int huff_len = c->sparse ? lengths[values[i]] : lengths[i];
1201 if (include_in_sort(c,huff_len)) {
1202 uint32 code = bit_reverse(c->codewords[i]);
1203 int x=0, n=c->sorted_entries;
1204 while (n > 1) {
1205 // invariant: sc[x] <= code < sc[x+n]
1206 int m = x + (n >> 1);
1207 if (c->sorted_codewords[m] <= code) {
1208 x = m;
1209 n -= (n>>1);
1210 } else {
1211 n >>= 1;
1212 }
1213 }
1214 assert(c->sorted_codewords[x] == code);
1215 if (c->sparse) {
1216 c->sorted_values[x] = values[i];
1217 c->codeword_lengths[x] = huff_len;
1218 } else {
1219 c->sorted_values[x] = i;
1220 }
1221 }
1222 }
1223 }
1224
1225 // only run while parsing the header (3 times)
vorbis_validate(uint8 * data)1226 static int vorbis_validate(uint8 *data)
1227 {
1228 static uint8 vorbis[6] = { 'v', 'o', 'r', 'b', 'i', 's' };
1229 return memcmp(data, vorbis, 6) == 0;
1230 }
1231
1232 // called from setup only, once per code book
1233 // (formula implied by specification)
lookup1_values(int entries,int dim)1234 static int lookup1_values(int entries, int dim)
1235 {
1236 int r = (int) floor(exp((float) log((float) entries) / dim));
1237 if ((int) floor(pow((float) r+1, dim)) <= entries) // (int) cast for MinGW warning;
1238 ++r; // floor() to avoid _ftol() when non-CRT
1239 if (pow((float) r+1, dim) <= entries)
1240 return -1;
1241 if ((int) floor(pow((float) r, dim)) > entries)
1242 return -1;
1243 return r;
1244 }
1245
1246 // called twice per file
compute_twiddle_factors(int n,float * A,float * B,float * C)1247 static void compute_twiddle_factors(int n, float *A, float *B, float *C)
1248 {
1249 int n4 = n >> 2, n8 = n >> 3;
1250 int k,k2;
1251
1252 for (k=k2=0; k < n4; ++k,k2+=2) {
1253 A[k2 ] = (float) cos(4*k*M_PI/n);
1254 A[k2+1] = (float) -sin(4*k*M_PI/n);
1255 B[k2 ] = (float) cos((k2+1)*M_PI/n/2) * 0.5f;
1256 B[k2+1] = (float) sin((k2+1)*M_PI/n/2) * 0.5f;
1257 }
1258 for (k=k2=0; k < n8; ++k,k2+=2) {
1259 C[k2 ] = (float) cos(2*(k2+1)*M_PI/n);
1260 C[k2+1] = (float) -sin(2*(k2+1)*M_PI/n);
1261 }
1262 }
1263
compute_window(int n,float * window)1264 static void compute_window(int n, float *window)
1265 {
1266 int n2 = n >> 1, i;
1267 for (i=0; i < n2; ++i)
1268 window[i] = (float) sin(0.5 * M_PI * square((float) sin((i - 0 + 0.5) / n2 * 0.5 * M_PI)));
1269 }
1270
compute_bitreverse(int n,uint16 * rev)1271 static void compute_bitreverse(int n, uint16 *rev)
1272 {
1273 int ld = ilog(n) - 1; // ilog is off-by-one from normal definitions
1274 int i, n8 = n >> 3;
1275 for (i=0; i < n8; ++i)
1276 rev[i] = (bit_reverse(i) >> (32-ld+3)) << 2;
1277 }
1278
init_blocksize(vorb * f,int b,int n)1279 static int init_blocksize(vorb *f, int b, int n)
1280 {
1281 int n2 = n >> 1, n4 = n >> 2, n8 = n >> 3;
1282 f->A[b] = (float *) setup_malloc(f, sizeof(float) * n2);
1283 f->B[b] = (float *) setup_malloc(f, sizeof(float) * n2);
1284 f->C[b] = (float *) setup_malloc(f, sizeof(float) * n4);
1285 if (!f->A[b] || !f->B[b] || !f->C[b]) return error(f, VORBIS_outofmem);
1286 compute_twiddle_factors(n, f->A[b], f->B[b], f->C[b]);
1287 f->window[b] = (float *) setup_malloc(f, sizeof(float) * n2);
1288 if (!f->window[b]) return error(f, VORBIS_outofmem);
1289 compute_window(n, f->window[b]);
1290 f->bit_reverse[b] = (uint16 *) setup_malloc(f, sizeof(uint16) * n8);
1291 if (!f->bit_reverse[b]) return error(f, VORBIS_outofmem);
1292 compute_bitreverse(n, f->bit_reverse[b]);
1293 return TRUE;
1294 }
1295
neighbors(uint16 * x,int n,int * plow,int * phigh)1296 static void neighbors(uint16 *x, int n, int *plow, int *phigh)
1297 {
1298 int low = -1;
1299 int high = 65536;
1300 int i;
1301 for (i=0; i < n; ++i) {
1302 if (x[i] > low && x[i] < x[n]) { *plow = i; low = x[i]; }
1303 if (x[i] < high && x[i] > x[n]) { *phigh = i; high = x[i]; }
1304 }
1305 }
1306
1307 // this has been repurposed so y is now the original index instead of y
1308 typedef struct
1309 {
1310 uint16 x,id;
1311 } stbv__floor_ordering;
1312
point_compare(const void * p,const void * q)1313 static int STBV_CDECL point_compare(const void *p, const void *q)
1314 {
1315 stbv__floor_ordering *a = (stbv__floor_ordering *) p;
1316 stbv__floor_ordering *b = (stbv__floor_ordering *) q;
1317 return a->x < b->x ? -1 : a->x > b->x;
1318 }
1319
1320 //
1321 /////////////////////// END LEAF SETUP FUNCTIONS //////////////////////////
1322
1323
1324 #if defined(STB_VORBIS_NO_STDIO)
1325 #define USE_MEMORY(z) TRUE
1326 #else
1327 #define USE_MEMORY(z) ((z)->stream)
1328 #endif
1329
get8(vorb * z)1330 static uint8 get8(vorb *z)
1331 {
1332 if (USE_MEMORY(z)) {
1333 if (z->stream >= z->stream_end) { z->eof = TRUE; return 0; }
1334 return *z->stream++;
1335 }
1336
1337 #ifndef STB_VORBIS_NO_STDIO
1338 {
1339 /* FAudio change! */
1340 int8 c;
1341 if (fread(&c, 1, 1, z->f) != 1) { z->eof = TRUE; return 0; }
1342 return c;
1343 }
1344 #endif
1345 }
1346
get32(vorb * f)1347 static uint32 get32(vorb *f)
1348 {
1349 uint32 x;
1350 x = get8(f);
1351 x += get8(f) << 8;
1352 x += get8(f) << 16;
1353 x += (uint32) get8(f) << 24;
1354 return x;
1355 }
1356
getn(vorb * z,uint8 * data,int n)1357 static int getn(vorb *z, uint8 *data, int n)
1358 {
1359 if (USE_MEMORY(z)) {
1360 if (z->stream+n > z->stream_end) { z->eof = 1; return 0; }
1361 memcpy(data, z->stream, n);
1362 z->stream += n;
1363 return 1;
1364 }
1365
1366 #ifndef STB_VORBIS_NO_STDIO
1367 if (fread(data, n, 1, z->f) == 1)
1368 return 1;
1369 else {
1370 z->eof = 1;
1371 return 0;
1372 }
1373 #endif
1374 }
1375
skip(vorb * z,int n)1376 static void skip(vorb *z, int n)
1377 {
1378 if (USE_MEMORY(z)) {
1379 z->stream += n;
1380 if (z->stream >= z->stream_end) z->eof = 1;
1381 return;
1382 }
1383 #ifndef STB_VORBIS_NO_STDIO
1384 {
1385 long x = ftell(z->f);
1386 fseek(z->f, x+n, SEEK_SET);
1387 }
1388 #endif
1389 }
1390
set_file_offset(stb_vorbis * f,unsigned int loc)1391 static int set_file_offset(stb_vorbis *f, unsigned int loc)
1392 {
1393 #ifndef STB_VORBIS_NO_PUSHDATA_API
1394 if (f->push_mode) return 0;
1395 #endif
1396 f->eof = 0;
1397 if (USE_MEMORY(f)) {
1398 if (f->stream_start + loc >= f->stream_end || f->stream_start + loc < f->stream_start) {
1399 f->stream = f->stream_end;
1400 f->eof = 1;
1401 return 0;
1402 } else {
1403 f->stream = f->stream_start + loc;
1404 return 1;
1405 }
1406 }
1407 #ifndef STB_VORBIS_NO_STDIO
1408 if (loc + f->f_start < loc || loc >= 0x80000000) {
1409 loc = 0x7fffffff;
1410 f->eof = 1;
1411 } else {
1412 loc += f->f_start;
1413 }
1414 if (fseek(f->f, loc, SEEK_SET) != -1) /* FAudio change! */
1415 return 1;
1416 f->eof = 1;
1417 fseek(f->f, f->f_start, SEEK_END);
1418 return 0;
1419 #endif
1420 }
1421
1422
1423 static uint8 ogg_page_header[4] = { 0x4f, 0x67, 0x67, 0x53 };
1424
capture_pattern(vorb * f)1425 static int capture_pattern(vorb *f)
1426 {
1427 if (0x4f != get8(f)) return FALSE;
1428 if (0x67 != get8(f)) return FALSE;
1429 if (0x67 != get8(f)) return FALSE;
1430 if (0x53 != get8(f)) return FALSE;
1431 return TRUE;
1432 }
1433
1434 #define PAGEFLAG_continued_packet 1
1435 #define PAGEFLAG_first_page 2
1436 #define PAGEFLAG_last_page 4
1437
start_page_no_capturepattern(vorb * f)1438 static int start_page_no_capturepattern(vorb *f)
1439 {
1440 uint32 loc0,loc1,n;
1441 if (f->first_decode && !IS_PUSH_MODE(f)) {
1442 f->p_first.page_start = stb_vorbis_get_file_offset(f) - 4;
1443 }
1444 // stream structure version
1445 if (0 != get8(f)) return error(f, VORBIS_invalid_stream_structure_version);
1446 // header flag
1447 f->page_flag = get8(f);
1448 // absolute granule position
1449 loc0 = get32(f);
1450 loc1 = get32(f);
1451 // @TODO: validate loc0,loc1 as valid positions?
1452 // stream serial number -- vorbis doesn't interleave, so discard
1453 get32(f);
1454 //if (f->serial != get32(f)) return error(f, VORBIS_incorrect_stream_serial_number);
1455 // page sequence number
1456 n = get32(f);
1457 f->last_page = n;
1458 // CRC32
1459 get32(f);
1460 // page_segments
1461 f->segment_count = get8(f);
1462 if (!getn(f, f->segments, f->segment_count))
1463 return error(f, VORBIS_unexpected_eof);
1464 // assume we _don't_ know any the sample position of any segments
1465 f->end_seg_with_known_loc = -2;
1466 if (loc0 != ~0U || loc1 != ~0U) {
1467 int i;
1468 // determine which packet is the last one that will complete
1469 for (i=f->segment_count-1; i >= 0; --i)
1470 if (f->segments[i] < 255)
1471 break;
1472 // 'i' is now the index of the _last_ segment of a packet that ends
1473 if (i >= 0) {
1474 f->end_seg_with_known_loc = i;
1475 f->known_loc_for_packet = loc0;
1476 }
1477 }
1478 if (f->first_decode) {
1479 int i,len;
1480 len = 0;
1481 for (i=0; i < f->segment_count; ++i)
1482 len += f->segments[i];
1483 len += 27 + f->segment_count;
1484 f->p_first.page_end = f->p_first.page_start + len;
1485 f->p_first.last_decoded_sample = loc0;
1486 }
1487 f->next_seg = 0;
1488 return TRUE;
1489 }
1490
start_page(vorb * f)1491 static int start_page(vorb *f)
1492 {
1493 if (!capture_pattern(f)) return error(f, VORBIS_missing_capture_pattern);
1494 return start_page_no_capturepattern(f);
1495 }
1496
start_packet(vorb * f)1497 static int start_packet(vorb *f)
1498 {
1499 while (f->next_seg == -1) {
1500 if (!start_page(f)) return FALSE;
1501 if (f->page_flag & PAGEFLAG_continued_packet)
1502 return error(f, VORBIS_continued_packet_flag_invalid);
1503 }
1504 f->last_seg = FALSE;
1505 f->valid_bits = 0;
1506 f->packet_bytes = 0;
1507 f->bytes_in_seg = 0;
1508 // f->next_seg is now valid
1509 return TRUE;
1510 }
1511
maybe_start_packet(vorb * f)1512 static int maybe_start_packet(vorb *f)
1513 {
1514 if (f->next_seg == -1) {
1515 int x = get8(f);
1516 if (f->eof) return FALSE; // EOF at page boundary is not an error!
1517 if (0x4f != x ) return error(f, VORBIS_missing_capture_pattern);
1518 if (0x67 != get8(f)) return error(f, VORBIS_missing_capture_pattern);
1519 if (0x67 != get8(f)) return error(f, VORBIS_missing_capture_pattern);
1520 if (0x53 != get8(f)) return error(f, VORBIS_missing_capture_pattern);
1521 if (!start_page_no_capturepattern(f)) return FALSE;
1522 if (f->page_flag & PAGEFLAG_continued_packet) {
1523 // set up enough state that we can read this packet if we want,
1524 // e.g. during recovery
1525 f->last_seg = FALSE;
1526 f->bytes_in_seg = 0;
1527 return error(f, VORBIS_continued_packet_flag_invalid);
1528 }
1529 }
1530 return start_packet(f);
1531 }
1532
next_segment(vorb * f)1533 static int next_segment(vorb *f)
1534 {
1535 int len;
1536 if (f->last_seg) return 0;
1537 if (f->next_seg == -1) {
1538 f->last_seg_which = f->segment_count-1; // in case start_page fails
1539 if (!start_page(f)) { f->last_seg = 1; return 0; }
1540 if (!(f->page_flag & PAGEFLAG_continued_packet)) return error(f, VORBIS_continued_packet_flag_invalid);
1541 }
1542 len = f->segments[f->next_seg++];
1543 if (len < 255) {
1544 f->last_seg = TRUE;
1545 f->last_seg_which = f->next_seg-1;
1546 }
1547 if (f->next_seg >= f->segment_count)
1548 f->next_seg = -1;
1549 assert(f->bytes_in_seg == 0);
1550 f->bytes_in_seg = len;
1551 return len;
1552 }
1553
1554 #define EOP (-1)
1555 #define INVALID_BITS (-1)
1556
get8_packet_raw(vorb * f)1557 static int get8_packet_raw(vorb *f)
1558 {
1559 if (!f->bytes_in_seg) { // CLANG!
1560 if (f->last_seg) return EOP;
1561 else if (!next_segment(f)) return EOP;
1562 }
1563 assert(f->bytes_in_seg > 0);
1564 --f->bytes_in_seg;
1565 ++f->packet_bytes;
1566 return get8(f);
1567 }
1568
get8_packet(vorb * f)1569 static int get8_packet(vorb *f)
1570 {
1571 int x = get8_packet_raw(f);
1572 f->valid_bits = 0;
1573 return x;
1574 }
1575
get32_packet(vorb * f)1576 static int get32_packet(vorb *f)
1577 {
1578 uint32 x;
1579 x = get8_packet(f);
1580 x += get8_packet(f) << 8;
1581 x += get8_packet(f) << 16;
1582 x += (uint32) get8_packet(f) << 24;
1583 return x;
1584 }
1585
flush_packet(vorb * f)1586 static void flush_packet(vorb *f)
1587 {
1588 while (get8_packet_raw(f) != EOP);
1589 }
1590
1591 // @OPTIMIZE: this is the secondary bit decoder, so it's probably not as important
1592 // as the huffman decoder?
get_bits(vorb * f,int n)1593 static uint32 get_bits(vorb *f, int n)
1594 {
1595 uint32 z;
1596
1597 if (f->valid_bits < 0) return 0;
1598 if (f->valid_bits < n) {
1599 if (n > 24) {
1600 // the accumulator technique below would not work correctly in this case
1601 z = get_bits(f, 24);
1602 z += get_bits(f, n-24) << 24;
1603 return z;
1604 }
1605 if (f->valid_bits == 0) f->acc = 0;
1606 while (f->valid_bits < n) {
1607 int z = get8_packet_raw(f);
1608 if (z == EOP) {
1609 f->valid_bits = INVALID_BITS;
1610 return 0;
1611 }
1612 f->acc += z << f->valid_bits;
1613 f->valid_bits += 8;
1614 }
1615 }
1616
1617 assert(f->valid_bits >= n);
1618 z = f->acc & ((1 << n)-1);
1619 f->acc >>= n;
1620 f->valid_bits -= n;
1621 return z;
1622 }
1623
1624 // @OPTIMIZE: primary accumulator for huffman
1625 // expand the buffer to as many bits as possible without reading off end of packet
1626 // it might be nice to allow f->valid_bits and f->acc to be stored in registers,
1627 // e.g. cache them locally and decode locally
prep_huffman(vorb * f)1628 static __forceinline void prep_huffman(vorb *f)
1629 {
1630 if (f->valid_bits <= 24) {
1631 if (f->valid_bits == 0) f->acc = 0;
1632 do {
1633 int z;
1634 if (f->last_seg && !f->bytes_in_seg) return;
1635 z = get8_packet_raw(f);
1636 if (z == EOP) return;
1637 f->acc += (unsigned) z << f->valid_bits;
1638 f->valid_bits += 8;
1639 } while (f->valid_bits <= 24);
1640 }
1641 }
1642
1643 enum
1644 {
1645 VORBIS_packet_id = 1,
1646 VORBIS_packet_comment = 3,
1647 VORBIS_packet_setup = 5
1648 };
1649
codebook_decode_scalar_raw(vorb * f,Codebook * c)1650 static int codebook_decode_scalar_raw(vorb *f, Codebook *c)
1651 {
1652 int i;
1653 prep_huffman(f);
1654
1655 if (c->codewords == NULL && c->sorted_codewords == NULL)
1656 return -1;
1657
1658 // cases to use binary search: sorted_codewords && !c->codewords
1659 // sorted_codewords && c->entries > 8
1660 if (c->entries > 8 ? c->sorted_codewords!=NULL : !c->codewords) {
1661 // binary search
1662 uint32 code = bit_reverse(f->acc);
1663 int x=0, n=c->sorted_entries, len;
1664
1665 while (n > 1) {
1666 // invariant: sc[x] <= code < sc[x+n]
1667 int m = x + (n >> 1);
1668 if (c->sorted_codewords[m] <= code) {
1669 x = m;
1670 n -= (n>>1);
1671 } else {
1672 n >>= 1;
1673 }
1674 }
1675 // x is now the sorted index
1676 if (!c->sparse) x = c->sorted_values[x];
1677 // x is now sorted index if sparse, or symbol otherwise
1678 len = c->codeword_lengths[x];
1679 if (f->valid_bits >= len) {
1680 f->acc >>= len;
1681 f->valid_bits -= len;
1682 return x;
1683 }
1684
1685 f->valid_bits = 0;
1686 return -1;
1687 }
1688
1689 // if small, linear search
1690 assert(!c->sparse);
1691 for (i=0; i < c->entries; ++i) {
1692 if (c->codeword_lengths[i] == NO_CODE) continue;
1693 if (c->codewords[i] == (f->acc & ((1 << c->codeword_lengths[i])-1))) {
1694 if (f->valid_bits >= c->codeword_lengths[i]) {
1695 f->acc >>= c->codeword_lengths[i];
1696 f->valid_bits -= c->codeword_lengths[i];
1697 return i;
1698 }
1699 f->valid_bits = 0;
1700 return -1;
1701 }
1702 }
1703
1704 error(f, VORBIS_invalid_stream);
1705 f->valid_bits = 0;
1706 return -1;
1707 }
1708
1709 #ifndef STB_VORBIS_NO_INLINE_DECODE
1710
1711 #define DECODE_RAW(var, f,c) \
1712 if (f->valid_bits < STB_VORBIS_FAST_HUFFMAN_LENGTH) \
1713 prep_huffman(f); \
1714 var = f->acc & FAST_HUFFMAN_TABLE_MASK; \
1715 var = c->fast_huffman[var]; \
1716 if (var >= 0) { \
1717 int n = c->codeword_lengths[var]; \
1718 f->acc >>= n; \
1719 f->valid_bits -= n; \
1720 if (f->valid_bits < 0) { f->valid_bits = 0; var = -1; } \
1721 } else { \
1722 var = codebook_decode_scalar_raw(f,c); \
1723 }
1724
1725 #else
1726
codebook_decode_scalar(vorb * f,Codebook * c)1727 static int codebook_decode_scalar(vorb *f, Codebook *c)
1728 {
1729 int i;
1730 if (f->valid_bits < STB_VORBIS_FAST_HUFFMAN_LENGTH)
1731 prep_huffman(f);
1732 // fast huffman table lookup
1733 i = f->acc & FAST_HUFFMAN_TABLE_MASK;
1734 i = c->fast_huffman[i];
1735 if (i >= 0) {
1736 f->acc >>= c->codeword_lengths[i];
1737 f->valid_bits -= c->codeword_lengths[i];
1738 if (f->valid_bits < 0) { f->valid_bits = 0; return -1; }
1739 return i;
1740 }
1741 return codebook_decode_scalar_raw(f,c);
1742 }
1743
1744 #define DECODE_RAW(var,f,c) var = codebook_decode_scalar(f,c);
1745
1746 #endif
1747
1748 #define DECODE(var,f,c) \
1749 DECODE_RAW(var,f,c) \
1750 if (c->sparse) var = c->sorted_values[var];
1751
1752 #ifndef STB_VORBIS_DIVIDES_IN_CODEBOOK
1753 #define DECODE_VQ(var,f,c) DECODE_RAW(var,f,c)
1754 #else
1755 #define DECODE_VQ(var,f,c) DECODE(var,f,c)
1756 #endif
1757
1758
1759
1760
1761
1762
1763 // CODEBOOK_ELEMENT_FAST is an optimization for the CODEBOOK_FLOATS case
1764 // where we avoid one addition
1765 #define CODEBOOK_ELEMENT(c,off) (c->multiplicands[off])
1766 #define CODEBOOK_ELEMENT_FAST(c,off) (c->multiplicands[off])
1767 #define CODEBOOK_ELEMENT_BASE(c) (0)
1768
codebook_decode_start(vorb * f,Codebook * c)1769 static int codebook_decode_start(vorb *f, Codebook *c)
1770 {
1771 int z = -1;
1772
1773 // type 0 is only legal in a scalar context
1774 if (c->lookup_type == 0)
1775 error(f, VORBIS_invalid_stream);
1776 else {
1777 DECODE_VQ(z,f,c);
1778 if (c->sparse) assert(z < c->sorted_entries);
1779 if (z < 0) { // check for EOP
1780 if (!f->bytes_in_seg)
1781 if (f->last_seg)
1782 return z;
1783 error(f, VORBIS_invalid_stream);
1784 }
1785 }
1786 return z;
1787 }
1788
codebook_decode(vorb * f,Codebook * c,float * output,int len)1789 static int codebook_decode(vorb *f, Codebook *c, float *output, int len)
1790 {
1791 int i,z = codebook_decode_start(f,c);
1792 if (z < 0) return FALSE;
1793 if (len > c->dimensions) len = c->dimensions;
1794
1795 #ifdef STB_VORBIS_DIVIDES_IN_CODEBOOK
1796 if (c->lookup_type == 1) {
1797 float last = CODEBOOK_ELEMENT_BASE(c);
1798 int div = 1;
1799 for (i=0; i < len; ++i) {
1800 int off = (z / div) % c->lookup_values;
1801 float val = CODEBOOK_ELEMENT_FAST(c,off) + last;
1802 output[i] += val;
1803 if (c->sequence_p) last = val + c->minimum_value;
1804 div *= c->lookup_values;
1805 }
1806 return TRUE;
1807 }
1808 #endif
1809
1810 z *= c->dimensions;
1811 if (c->sequence_p) {
1812 float last = CODEBOOK_ELEMENT_BASE(c);
1813 for (i=0; i < len; ++i) {
1814 float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1815 output[i] += val;
1816 last = val + c->minimum_value;
1817 }
1818 } else {
1819 float last = CODEBOOK_ELEMENT_BASE(c);
1820 for (i=0; i < len; ++i) {
1821 output[i] += CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1822 }
1823 }
1824
1825 return TRUE;
1826 }
1827
codebook_decode_step(vorb * f,Codebook * c,float * output,int len,int step)1828 static int codebook_decode_step(vorb *f, Codebook *c, float *output, int len, int step)
1829 {
1830 int i,z = codebook_decode_start(f,c);
1831 float last = CODEBOOK_ELEMENT_BASE(c);
1832 if (z < 0) return FALSE;
1833 if (len > c->dimensions) len = c->dimensions;
1834
1835 #ifdef STB_VORBIS_DIVIDES_IN_CODEBOOK
1836 if (c->lookup_type == 1) {
1837 int div = 1;
1838 for (i=0; i < len; ++i) {
1839 int off = (z / div) % c->lookup_values;
1840 float val = CODEBOOK_ELEMENT_FAST(c,off) + last;
1841 output[i*step] += val;
1842 if (c->sequence_p) last = val;
1843 div *= c->lookup_values;
1844 }
1845 return TRUE;
1846 }
1847 #endif
1848
1849 z *= c->dimensions;
1850 for (i=0; i < len; ++i) {
1851 float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1852 output[i*step] += val;
1853 if (c->sequence_p) last = val;
1854 }
1855
1856 return TRUE;
1857 }
1858
codebook_decode_deinterleave_repeat(vorb * f,Codebook * c,float ** outputs,int ch,int * c_inter_p,int * p_inter_p,int len,int total_decode)1859 static int codebook_decode_deinterleave_repeat(vorb *f, Codebook *c, float **outputs, int ch, int *c_inter_p, int *p_inter_p, int len, int total_decode)
1860 {
1861 int c_inter = *c_inter_p;
1862 int p_inter = *p_inter_p;
1863 int i,z, effective = c->dimensions;
1864
1865 // type 0 is only legal in a scalar context
1866 if (c->lookup_type == 0) return error(f, VORBIS_invalid_stream);
1867
1868 while (total_decode > 0) {
1869 float last = CODEBOOK_ELEMENT_BASE(c);
1870 DECODE_VQ(z,f,c);
1871 #ifndef STB_VORBIS_DIVIDES_IN_CODEBOOK
1872 assert(!c->sparse || z < c->sorted_entries);
1873 #endif
1874 if (z < 0) {
1875 if (!f->bytes_in_seg)
1876 if (f->last_seg) return FALSE;
1877 return error(f, VORBIS_invalid_stream);
1878 }
1879
1880 // if this will take us off the end of the buffers, stop short!
1881 // we check by computing the length of the virtual interleaved
1882 // buffer (len*ch), our current offset within it (p_inter*ch)+(c_inter),
1883 // and the length we'll be using (effective)
1884 if (c_inter + p_inter*ch + effective > len * ch) {
1885 effective = len*ch - (p_inter*ch - c_inter);
1886 }
1887
1888 #ifdef STB_VORBIS_DIVIDES_IN_CODEBOOK
1889 if (c->lookup_type == 1) {
1890 int div = 1;
1891 for (i=0; i < effective; ++i) {
1892 int off = (z / div) % c->lookup_values;
1893 float val = CODEBOOK_ELEMENT_FAST(c,off) + last;
1894 if (outputs[c_inter])
1895 outputs[c_inter][p_inter] += val;
1896 if (++c_inter == ch) { c_inter = 0; ++p_inter; }
1897 if (c->sequence_p) last = val;
1898 div *= c->lookup_values;
1899 }
1900 } else
1901 #endif
1902 {
1903 z *= c->dimensions;
1904 if (c->sequence_p) {
1905 for (i=0; i < effective; ++i) {
1906 float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1907 if (outputs[c_inter])
1908 outputs[c_inter][p_inter] += val;
1909 if (++c_inter == ch) { c_inter = 0; ++p_inter; }
1910 last = val;
1911 }
1912 } else {
1913 for (i=0; i < effective; ++i) {
1914 float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1915 if (outputs[c_inter])
1916 outputs[c_inter][p_inter] += val;
1917 if (++c_inter == ch) { c_inter = 0; ++p_inter; }
1918 }
1919 }
1920 }
1921
1922 total_decode -= effective;
1923 }
1924 *c_inter_p = c_inter;
1925 *p_inter_p = p_inter;
1926 return TRUE;
1927 }
1928
predict_point(int x,int x0,int x1,int y0,int y1)1929 static int predict_point(int x, int x0, int x1, int y0, int y1)
1930 {
1931 int dy = y1 - y0;
1932 int adx = x1 - x0;
1933 // @OPTIMIZE: force int division to round in the right direction... is this necessary on x86?
1934 int err = abs(dy) * (x - x0);
1935 int off = err / adx;
1936 return dy < 0 ? y0 - off : y0 + off;
1937 }
1938
1939 // the following table is block-copied from the specification
1940 static float inverse_db_table[256] =
1941 {
1942 1.0649863e-07f, 1.1341951e-07f, 1.2079015e-07f, 1.2863978e-07f,
1943 1.3699951e-07f, 1.4590251e-07f, 1.5538408e-07f, 1.6548181e-07f,
1944 1.7623575e-07f, 1.8768855e-07f, 1.9988561e-07f, 2.1287530e-07f,
1945 2.2670913e-07f, 2.4144197e-07f, 2.5713223e-07f, 2.7384213e-07f,
1946 2.9163793e-07f, 3.1059021e-07f, 3.3077411e-07f, 3.5226968e-07f,
1947 3.7516214e-07f, 3.9954229e-07f, 4.2550680e-07f, 4.5315863e-07f,
1948 4.8260743e-07f, 5.1396998e-07f, 5.4737065e-07f, 5.8294187e-07f,
1949 6.2082472e-07f, 6.6116941e-07f, 7.0413592e-07f, 7.4989464e-07f,
1950 7.9862701e-07f, 8.5052630e-07f, 9.0579828e-07f, 9.6466216e-07f,
1951 1.0273513e-06f, 1.0941144e-06f, 1.1652161e-06f, 1.2409384e-06f,
1952 1.3215816e-06f, 1.4074654e-06f, 1.4989305e-06f, 1.5963394e-06f,
1953 1.7000785e-06f, 1.8105592e-06f, 1.9282195e-06f, 2.0535261e-06f,
1954 2.1869758e-06f, 2.3290978e-06f, 2.4804557e-06f, 2.6416497e-06f,
1955 2.8133190e-06f, 2.9961443e-06f, 3.1908506e-06f, 3.3982101e-06f,
1956 3.6190449e-06f, 3.8542308e-06f, 4.1047004e-06f, 4.3714470e-06f,
1957 4.6555282e-06f, 4.9580707e-06f, 5.2802740e-06f, 5.6234160e-06f,
1958 5.9888572e-06f, 6.3780469e-06f, 6.7925283e-06f, 7.2339451e-06f,
1959 7.7040476e-06f, 8.2047000e-06f, 8.7378876e-06f, 9.3057248e-06f,
1960 9.9104632e-06f, 1.0554501e-05f, 1.1240392e-05f, 1.1970856e-05f,
1961 1.2748789e-05f, 1.3577278e-05f, 1.4459606e-05f, 1.5399272e-05f,
1962 1.6400004e-05f, 1.7465768e-05f, 1.8600792e-05f, 1.9809576e-05f,
1963 2.1096914e-05f, 2.2467911e-05f, 2.3928002e-05f, 2.5482978e-05f,
1964 2.7139006e-05f, 2.8902651e-05f, 3.0780908e-05f, 3.2781225e-05f,
1965 3.4911534e-05f, 3.7180282e-05f, 3.9596466e-05f, 4.2169667e-05f,
1966 4.4910090e-05f, 4.7828601e-05f, 5.0936773e-05f, 5.4246931e-05f,
1967 5.7772202e-05f, 6.1526565e-05f, 6.5524908e-05f, 6.9783085e-05f,
1968 7.4317983e-05f, 7.9147585e-05f, 8.4291040e-05f, 8.9768747e-05f,
1969 9.5602426e-05f, 0.00010181521f, 0.00010843174f, 0.00011547824f,
1970 0.00012298267f, 0.00013097477f, 0.00013948625f, 0.00014855085f,
1971 0.00015820453f, 0.00016848555f, 0.00017943469f, 0.00019109536f,
1972 0.00020351382f, 0.00021673929f, 0.00023082423f, 0.00024582449f,
1973 0.00026179955f, 0.00027881276f, 0.00029693158f, 0.00031622787f,
1974 0.00033677814f, 0.00035866388f, 0.00038197188f, 0.00040679456f,
1975 0.00043323036f, 0.00046138411f, 0.00049136745f, 0.00052329927f,
1976 0.00055730621f, 0.00059352311f, 0.00063209358f, 0.00067317058f,
1977 0.00071691700f, 0.00076350630f, 0.00081312324f, 0.00086596457f,
1978 0.00092223983f, 0.00098217216f, 0.0010459992f, 0.0011139742f,
1979 0.0011863665f, 0.0012634633f, 0.0013455702f, 0.0014330129f,
1980 0.0015261382f, 0.0016253153f, 0.0017309374f, 0.0018434235f,
1981 0.0019632195f, 0.0020908006f, 0.0022266726f, 0.0023713743f,
1982 0.0025254795f, 0.0026895994f, 0.0028643847f, 0.0030505286f,
1983 0.0032487691f, 0.0034598925f, 0.0036847358f, 0.0039241906f,
1984 0.0041792066f, 0.0044507950f, 0.0047400328f, 0.0050480668f,
1985 0.0053761186f, 0.0057254891f, 0.0060975636f, 0.0064938176f,
1986 0.0069158225f, 0.0073652516f, 0.0078438871f, 0.0083536271f,
1987 0.0088964928f, 0.009474637f, 0.010090352f, 0.010746080f,
1988 0.011444421f, 0.012188144f, 0.012980198f, 0.013823725f,
1989 0.014722068f, 0.015678791f, 0.016697687f, 0.017782797f,
1990 0.018938423f, 0.020169149f, 0.021479854f, 0.022875735f,
1991 0.024362330f, 0.025945531f, 0.027631618f, 0.029427276f,
1992 0.031339626f, 0.033376252f, 0.035545228f, 0.037855157f,
1993 0.040315199f, 0.042935108f, 0.045725273f, 0.048696758f,
1994 0.051861348f, 0.055231591f, 0.058820850f, 0.062643361f,
1995 0.066714279f, 0.071049749f, 0.075666962f, 0.080584227f,
1996 0.085821044f, 0.091398179f, 0.097337747f, 0.10366330f,
1997 0.11039993f, 0.11757434f, 0.12521498f, 0.13335215f,
1998 0.14201813f, 0.15124727f, 0.16107617f, 0.17154380f,
1999 0.18269168f, 0.19456402f, 0.20720788f, 0.22067342f,
2000 0.23501402f, 0.25028656f, 0.26655159f, 0.28387361f,
2001 0.30232132f, 0.32196786f, 0.34289114f, 0.36517414f,
2002 0.38890521f, 0.41417847f, 0.44109412f, 0.46975890f,
2003 0.50028648f, 0.53279791f, 0.56742212f, 0.60429640f,
2004 0.64356699f, 0.68538959f, 0.72993007f, 0.77736504f,
2005 0.82788260f, 0.88168307f, 0.9389798f, 1.0f
2006 };
2007
2008
2009 // @OPTIMIZE: if you want to replace this bresenham line-drawing routine,
2010 // note that you must produce bit-identical output to decode correctly;
2011 // this specific sequence of operations is specified in the spec (it's
2012 // drawing integer-quantized frequency-space lines that the encoder
2013 // expects to be exactly the same)
2014 // ... also, isn't the whole point of Bresenham's algorithm to NOT
2015 // have to divide in the setup? sigh.
2016 #ifndef STB_VORBIS_NO_DEFER_FLOOR
2017 #define LINE_OP(a,b) a *= b
2018 #else
2019 #define LINE_OP(a,b) a = b
2020 #endif
2021
2022 #ifdef STB_VORBIS_DIVIDE_TABLE
2023 #define DIVTAB_NUMER 32
2024 #define DIVTAB_DENOM 64
2025 int8 integer_divide_table[DIVTAB_NUMER][DIVTAB_DENOM]; // 2KB
2026 #endif
2027
draw_line(float * output,int x0,int y0,int x1,int y1,int n)2028 static __forceinline void draw_line(float *output, int x0, int y0, int x1, int y1, int n)
2029 {
2030 int dy = y1 - y0;
2031 int adx = x1 - x0;
2032 int ady = abs(dy);
2033 int base;
2034 int x=x0,y=y0;
2035 int err = 0;
2036 int sy;
2037
2038 #ifdef STB_VORBIS_DIVIDE_TABLE
2039 if (adx < DIVTAB_DENOM && ady < DIVTAB_NUMER) {
2040 if (dy < 0) {
2041 base = -integer_divide_table[ady][adx];
2042 sy = base-1;
2043 } else {
2044 base = integer_divide_table[ady][adx];
2045 sy = base+1;
2046 }
2047 } else {
2048 base = dy / adx;
2049 if (dy < 0)
2050 sy = base - 1;
2051 else
2052 sy = base+1;
2053 }
2054 #else
2055 base = dy / adx;
2056 if (dy < 0)
2057 sy = base - 1;
2058 else
2059 sy = base+1;
2060 #endif
2061 ady -= abs(base) * adx;
2062 if (x1 > n) x1 = n;
2063 if (x < x1) {
2064 LINE_OP(output[x], inverse_db_table[y&255]);
2065 for (++x; x < x1; ++x) {
2066 err += ady;
2067 if (err >= adx) {
2068 err -= adx;
2069 y += sy;
2070 } else
2071 y += base;
2072 LINE_OP(output[x], inverse_db_table[y&255]);
2073 }
2074 }
2075 }
2076
residue_decode(vorb * f,Codebook * book,float * target,int offset,int n,int rtype)2077 static int residue_decode(vorb *f, Codebook *book, float *target, int offset, int n, int rtype)
2078 {
2079 int k;
2080 if (rtype == 0) {
2081 int step = n / book->dimensions;
2082 for (k=0; k < step; ++k)
2083 if (!codebook_decode_step(f, book, target+offset+k, n-offset-k, step))
2084 return FALSE;
2085 } else {
2086 for (k=0; k < n; ) {
2087 if (!codebook_decode(f, book, target+offset, n-k))
2088 return FALSE;
2089 k += book->dimensions;
2090 offset += book->dimensions;
2091 }
2092 }
2093 return TRUE;
2094 }
2095
2096 // n is 1/2 of the blocksize --
2097 // specification: "Correct per-vector decode length is [n]/2"
decode_residue(vorb * f,float * residue_buffers[],int ch,int n,int rn,uint8 * do_not_decode)2098 static void decode_residue(vorb *f, float *residue_buffers[], int ch, int n, int rn, uint8 *do_not_decode)
2099 {
2100 int i,j,pass;
2101 Residue *r = f->residue_config + rn;
2102 int rtype = f->residue_types[rn];
2103 int c = r->classbook;
2104 int classwords = f->codebooks[c].dimensions;
2105 unsigned int actual_size = rtype == 2 ? n*2 : n;
2106 unsigned int limit_r_begin = (r->begin < actual_size ? r->begin : actual_size);
2107 unsigned int limit_r_end = (r->end < actual_size ? r->end : actual_size);
2108 int n_read = limit_r_end - limit_r_begin;
2109 int part_read = n_read / r->part_size;
2110 int temp_alloc_point = temp_alloc_save(f);
2111 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2112 uint8 ***part_classdata = (uint8 ***) temp_block_array(f,f->channels, part_read * sizeof(**part_classdata));
2113 #else
2114 int **classifications = (int **) temp_block_array(f,f->channels, part_read * sizeof(**classifications));
2115 #endif
2116
2117 CHECK(f);
2118
2119 for (i=0; i < ch; ++i)
2120 if (!do_not_decode[i])
2121 memset(residue_buffers[i], 0, sizeof(float) * n);
2122
2123 if (rtype == 2 && ch != 1) {
2124 for (j=0; j < ch; ++j)
2125 if (!do_not_decode[j])
2126 break;
2127 if (j == ch)
2128 goto done;
2129
2130 for (pass=0; pass < 8; ++pass) {
2131 int pcount = 0, class_set = 0;
2132 if (ch == 2) {
2133 while (pcount < part_read) {
2134 int z = r->begin + pcount*r->part_size;
2135 int c_inter = (z & 1), p_inter = z>>1;
2136 if (pass == 0) {
2137 Codebook *c = f->codebooks+r->classbook;
2138 int q;
2139 DECODE(q,f,c);
2140 if (q == EOP) goto done;
2141 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2142 part_classdata[0][class_set] = r->classdata[q];
2143 #else
2144 for (i=classwords-1; i >= 0; --i) {
2145 classifications[0][i+pcount] = q % r->classifications;
2146 q /= r->classifications;
2147 }
2148 #endif
2149 }
2150 for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) {
2151 int z = r->begin + pcount*r->part_size;
2152 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2153 int c = part_classdata[0][class_set][i];
2154 #else
2155 int c = classifications[0][pcount];
2156 #endif
2157 int b = r->residue_books[c][pass];
2158 if (b >= 0) {
2159 Codebook *book = f->codebooks + b;
2160 #ifdef STB_VORBIS_DIVIDES_IN_CODEBOOK
2161 if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r->part_size))
2162 goto done;
2163 #else
2164 // saves 1%
2165 if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r->part_size))
2166 goto done;
2167 #endif
2168 } else {
2169 z += r->part_size;
2170 c_inter = z & 1;
2171 p_inter = z >> 1;
2172 }
2173 }
2174 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2175 ++class_set;
2176 #endif
2177 }
2178 } else if (ch > 2) {
2179 while (pcount < part_read) {
2180 int z = r->begin + pcount*r->part_size;
2181 int c_inter = z % ch, p_inter = z/ch;
2182 if (pass == 0) {
2183 Codebook *c = f->codebooks+r->classbook;
2184 int q;
2185 DECODE(q,f,c);
2186 if (q == EOP) goto done;
2187 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2188 part_classdata[0][class_set] = r->classdata[q];
2189 #else
2190 for (i=classwords-1; i >= 0; --i) {
2191 classifications[0][i+pcount] = q % r->classifications;
2192 q /= r->classifications;
2193 }
2194 #endif
2195 }
2196 for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) {
2197 int z = r->begin + pcount*r->part_size;
2198 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2199 int c = part_classdata[0][class_set][i];
2200 #else
2201 int c = classifications[0][pcount];
2202 #endif
2203 int b = r->residue_books[c][pass];
2204 if (b >= 0) {
2205 Codebook *book = f->codebooks + b;
2206 if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r->part_size))
2207 goto done;
2208 } else {
2209 z += r->part_size;
2210 c_inter = z % ch;
2211 p_inter = z / ch;
2212 }
2213 }
2214 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2215 ++class_set;
2216 #endif
2217 }
2218 }
2219 }
2220 goto done;
2221 }
2222 CHECK(f);
2223
2224 for (pass=0; pass < 8; ++pass) {
2225 int pcount = 0, class_set=0;
2226 while (pcount < part_read) {
2227 if (pass == 0) {
2228 for (j=0; j < ch; ++j) {
2229 if (!do_not_decode[j]) {
2230 Codebook *c = f->codebooks+r->classbook;
2231 int temp;
2232 DECODE(temp,f,c);
2233 if (temp == EOP) goto done;
2234 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2235 part_classdata[j][class_set] = r->classdata[temp];
2236 #else
2237 for (i=classwords-1; i >= 0; --i) {
2238 classifications[j][i+pcount] = temp % r->classifications;
2239 temp /= r->classifications;
2240 }
2241 #endif
2242 }
2243 }
2244 }
2245 for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) {
2246 for (j=0; j < ch; ++j) {
2247 if (!do_not_decode[j]) {
2248 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2249 int c = part_classdata[j][class_set][i];
2250 #else
2251 int c = classifications[j][pcount];
2252 #endif
2253 int b = r->residue_books[c][pass];
2254 if (b >= 0) {
2255 float *target = residue_buffers[j];
2256 int offset = r->begin + pcount * r->part_size;
2257 int n = r->part_size;
2258 Codebook *book = f->codebooks + b;
2259 if (!residue_decode(f, book, target, offset, n, rtype))
2260 goto done;
2261 }
2262 }
2263 }
2264 }
2265 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2266 ++class_set;
2267 #endif
2268 }
2269 }
2270 done:
2271 CHECK(f);
2272 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2273 temp_free(f,part_classdata);
2274 #else
2275 temp_free(f,classifications);
2276 #endif
2277 temp_alloc_restore(f,temp_alloc_point);
2278 }
2279
2280
2281 #if 0
2282 // slow way for debugging
2283 void inverse_mdct_slow(float *buffer, int n)
2284 {
2285 int i,j;
2286 int n2 = n >> 1;
2287 float *x = (float *) malloc(sizeof(*x) * n2);
2288 memcpy(x, buffer, sizeof(*x) * n2);
2289 for (i=0; i < n; ++i) {
2290 float acc = 0;
2291 for (j=0; j < n2; ++j)
2292 // formula from paper:
2293 //acc += n/4.0f * x[j] * (float) cos(M_PI / 2 / n * (2 * i + 1 + n/2.0)*(2*j+1));
2294 // formula from wikipedia
2295 //acc += 2.0f / n2 * x[j] * (float) cos(M_PI/n2 * (i + 0.5 + n2/2)*(j + 0.5));
2296 // these are equivalent, except the formula from the paper inverts the multiplier!
2297 // however, what actually works is NO MULTIPLIER!?!
2298 //acc += 64 * 2.0f / n2 * x[j] * (float) cos(M_PI/n2 * (i + 0.5 + n2/2)*(j + 0.5));
2299 acc += x[j] * (float) cos(M_PI / 2 / n * (2 * i + 1 + n/2.0)*(2*j+1));
2300 buffer[i] = acc;
2301 }
2302 free(x);
2303 }
2304 #elif 0
2305 // same as above, but just barely able to run in real time on modern machines
inverse_mdct_slow(float * buffer,int n,vorb * f,int blocktype)2306 void inverse_mdct_slow(float *buffer, int n, vorb *f, int blocktype)
2307 {
2308 float mcos[16384];
2309 int i,j;
2310 int n2 = n >> 1, nmask = (n << 2) -1;
2311 float *x = (float *) malloc(sizeof(*x) * n2);
2312 memcpy(x, buffer, sizeof(*x) * n2);
2313 for (i=0; i < 4*n; ++i)
2314 mcos[i] = (float) cos(M_PI / 2 * i / n);
2315
2316 for (i=0; i < n; ++i) {
2317 float acc = 0;
2318 for (j=0; j < n2; ++j)
2319 acc += x[j] * mcos[(2 * i + 1 + n2)*(2*j+1) & nmask];
2320 buffer[i] = acc;
2321 }
2322 free(x);
2323 }
2324 #elif 0
2325 // transform to use a slow dct-iv; this is STILL basically trivial,
2326 // but only requires half as many ops
dct_iv_slow(float * buffer,int n)2327 void dct_iv_slow(float *buffer, int n)
2328 {
2329 float mcos[16384];
2330 float x[2048];
2331 int i,j;
2332 int n2 = n >> 1, nmask = (n << 3) - 1;
2333 memcpy(x, buffer, sizeof(*x) * n);
2334 for (i=0; i < 8*n; ++i)
2335 mcos[i] = (float) cos(M_PI / 4 * i / n);
2336 for (i=0; i < n; ++i) {
2337 float acc = 0;
2338 for (j=0; j < n; ++j)
2339 acc += x[j] * mcos[((2 * i + 1)*(2*j+1)) & nmask];
2340 buffer[i] = acc;
2341 }
2342 }
2343
inverse_mdct_slow(float * buffer,int n,vorb * f,int blocktype)2344 void inverse_mdct_slow(float *buffer, int n, vorb *f, int blocktype)
2345 {
2346 int i, n4 = n >> 2, n2 = n >> 1, n3_4 = n - n4;
2347 float temp[4096];
2348
2349 memcpy(temp, buffer, n2 * sizeof(float));
2350 dct_iv_slow(temp, n2); // returns -c'-d, a-b'
2351
2352 for (i=0; i < n4 ; ++i) buffer[i] = temp[i+n4]; // a-b'
2353 for ( ; i < n3_4; ++i) buffer[i] = -temp[n3_4 - i - 1]; // b-a', c+d'
2354 for ( ; i < n ; ++i) buffer[i] = -temp[i - n3_4]; // c'+d
2355 }
2356 #endif
2357
2358 #ifndef LIBVORBIS_MDCT
2359 #define LIBVORBIS_MDCT 0
2360 #endif
2361
2362 #if LIBVORBIS_MDCT
2363 // directly call the vorbis MDCT using an interface documented
2364 // by Jeff Roberts... useful for performance comparison
2365 typedef struct
2366 {
2367 int n;
2368 int log2n;
2369
2370 float *trig;
2371 int *bitrev;
2372
2373 float scale;
2374 } mdct_lookup;
2375
2376 extern void mdct_init(mdct_lookup *lookup, int n);
2377 extern void mdct_clear(mdct_lookup *l);
2378 extern void mdct_backward(mdct_lookup *init, float *in, float *out);
2379
2380 mdct_lookup M1,M2;
2381
inverse_mdct(float * buffer,int n,vorb * f,int blocktype)2382 void inverse_mdct(float *buffer, int n, vorb *f, int blocktype)
2383 {
2384 mdct_lookup *M;
2385 if (M1.n == n) M = &M1;
2386 else if (M2.n == n) M = &M2;
2387 else if (M1.n == 0) { mdct_init(&M1, n); M = &M1; }
2388 else {
2389 if (M2.n) __asm int 3;
2390 mdct_init(&M2, n);
2391 M = &M2;
2392 }
2393
2394 mdct_backward(M, buffer, buffer);
2395 }
2396 #endif
2397
2398
2399 // the following were split out into separate functions while optimizing;
2400 // they could be pushed back up but eh. __forceinline showed no change;
2401 // they're probably already being inlined.
imdct_step3_iter0_loop(int n,float * e,int i_off,int k_off,float * A)2402 static void imdct_step3_iter0_loop(int n, float *e, int i_off, int k_off, float *A)
2403 {
2404 float *ee0 = e + i_off;
2405 float *ee2 = ee0 + k_off;
2406 int i;
2407
2408 assert((n & 3) == 0);
2409 for (i=(n>>2); i > 0; --i) {
2410 float k00_20, k01_21;
2411 k00_20 = ee0[ 0] - ee2[ 0];
2412 k01_21 = ee0[-1] - ee2[-1];
2413 ee0[ 0] += ee2[ 0];//ee0[ 0] = ee0[ 0] + ee2[ 0];
2414 ee0[-1] += ee2[-1];//ee0[-1] = ee0[-1] + ee2[-1];
2415 ee2[ 0] = k00_20 * A[0] - k01_21 * A[1];
2416 ee2[-1] = k01_21 * A[0] + k00_20 * A[1];
2417 A += 8;
2418
2419 k00_20 = ee0[-2] - ee2[-2];
2420 k01_21 = ee0[-3] - ee2[-3];
2421 ee0[-2] += ee2[-2];//ee0[-2] = ee0[-2] + ee2[-2];
2422 ee0[-3] += ee2[-3];//ee0[-3] = ee0[-3] + ee2[-3];
2423 ee2[-2] = k00_20 * A[0] - k01_21 * A[1];
2424 ee2[-3] = k01_21 * A[0] + k00_20 * A[1];
2425 A += 8;
2426
2427 k00_20 = ee0[-4] - ee2[-4];
2428 k01_21 = ee0[-5] - ee2[-5];
2429 ee0[-4] += ee2[-4];//ee0[-4] = ee0[-4] + ee2[-4];
2430 ee0[-5] += ee2[-5];//ee0[-5] = ee0[-5] + ee2[-5];
2431 ee2[-4] = k00_20 * A[0] - k01_21 * A[1];
2432 ee2[-5] = k01_21 * A[0] + k00_20 * A[1];
2433 A += 8;
2434
2435 k00_20 = ee0[-6] - ee2[-6];
2436 k01_21 = ee0[-7] - ee2[-7];
2437 ee0[-6] += ee2[-6];//ee0[-6] = ee0[-6] + ee2[-6];
2438 ee0[-7] += ee2[-7];//ee0[-7] = ee0[-7] + ee2[-7];
2439 ee2[-6] = k00_20 * A[0] - k01_21 * A[1];
2440 ee2[-7] = k01_21 * A[0] + k00_20 * A[1];
2441 A += 8;
2442 ee0 -= 8;
2443 ee2 -= 8;
2444 }
2445 }
2446
imdct_step3_inner_r_loop(int lim,float * e,int d0,int k_off,float * A,int k1)2447 static void imdct_step3_inner_r_loop(int lim, float *e, int d0, int k_off, float *A, int k1)
2448 {
2449 int i;
2450 float k00_20, k01_21;
2451
2452 float *e0 = e + d0;
2453 float *e2 = e0 + k_off;
2454
2455 for (i=lim >> 2; i > 0; --i) {
2456 k00_20 = e0[-0] - e2[-0];
2457 k01_21 = e0[-1] - e2[-1];
2458 e0[-0] += e2[-0];//e0[-0] = e0[-0] + e2[-0];
2459 e0[-1] += e2[-1];//e0[-1] = e0[-1] + e2[-1];
2460 e2[-0] = (k00_20)*A[0] - (k01_21) * A[1];
2461 e2[-1] = (k01_21)*A[0] + (k00_20) * A[1];
2462
2463 A += k1;
2464
2465 k00_20 = e0[-2] - e2[-2];
2466 k01_21 = e0[-3] - e2[-3];
2467 e0[-2] += e2[-2];//e0[-2] = e0[-2] + e2[-2];
2468 e0[-3] += e2[-3];//e0[-3] = e0[-3] + e2[-3];
2469 e2[-2] = (k00_20)*A[0] - (k01_21) * A[1];
2470 e2[-3] = (k01_21)*A[0] + (k00_20) * A[1];
2471
2472 A += k1;
2473
2474 k00_20 = e0[-4] - e2[-4];
2475 k01_21 = e0[-5] - e2[-5];
2476 e0[-4] += e2[-4];//e0[-4] = e0[-4] + e2[-4];
2477 e0[-5] += e2[-5];//e0[-5] = e0[-5] + e2[-5];
2478 e2[-4] = (k00_20)*A[0] - (k01_21) * A[1];
2479 e2[-5] = (k01_21)*A[0] + (k00_20) * A[1];
2480
2481 A += k1;
2482
2483 k00_20 = e0[-6] - e2[-6];
2484 k01_21 = e0[-7] - e2[-7];
2485 e0[-6] += e2[-6];//e0[-6] = e0[-6] + e2[-6];
2486 e0[-7] += e2[-7];//e0[-7] = e0[-7] + e2[-7];
2487 e2[-6] = (k00_20)*A[0] - (k01_21) * A[1];
2488 e2[-7] = (k01_21)*A[0] + (k00_20) * A[1];
2489
2490 e0 -= 8;
2491 e2 -= 8;
2492
2493 A += k1;
2494 }
2495 }
2496
imdct_step3_inner_s_loop(int n,float * e,int i_off,int k_off,float * A,int a_off,int k0)2497 static void imdct_step3_inner_s_loop(int n, float *e, int i_off, int k_off, float *A, int a_off, int k0)
2498 {
2499 int i;
2500 float A0 = A[0];
2501 float A1 = A[0+1];
2502 float A2 = A[0+a_off];
2503 float A3 = A[0+a_off+1];
2504 float A4 = A[0+a_off*2+0];
2505 float A5 = A[0+a_off*2+1];
2506 float A6 = A[0+a_off*3+0];
2507 float A7 = A[0+a_off*3+1];
2508
2509 float k00,k11;
2510
2511 float *ee0 = e +i_off;
2512 float *ee2 = ee0+k_off;
2513
2514 for (i=n; i > 0; --i) {
2515 k00 = ee0[ 0] - ee2[ 0];
2516 k11 = ee0[-1] - ee2[-1];
2517 ee0[ 0] = ee0[ 0] + ee2[ 0];
2518 ee0[-1] = ee0[-1] + ee2[-1];
2519 ee2[ 0] = (k00) * A0 - (k11) * A1;
2520 ee2[-1] = (k11) * A0 + (k00) * A1;
2521
2522 k00 = ee0[-2] - ee2[-2];
2523 k11 = ee0[-3] - ee2[-3];
2524 ee0[-2] = ee0[-2] + ee2[-2];
2525 ee0[-3] = ee0[-3] + ee2[-3];
2526 ee2[-2] = (k00) * A2 - (k11) * A3;
2527 ee2[-3] = (k11) * A2 + (k00) * A3;
2528
2529 k00 = ee0[-4] - ee2[-4];
2530 k11 = ee0[-5] - ee2[-5];
2531 ee0[-4] = ee0[-4] + ee2[-4];
2532 ee0[-5] = ee0[-5] + ee2[-5];
2533 ee2[-4] = (k00) * A4 - (k11) * A5;
2534 ee2[-5] = (k11) * A4 + (k00) * A5;
2535
2536 k00 = ee0[-6] - ee2[-6];
2537 k11 = ee0[-7] - ee2[-7];
2538 ee0[-6] = ee0[-6] + ee2[-6];
2539 ee0[-7] = ee0[-7] + ee2[-7];
2540 ee2[-6] = (k00) * A6 - (k11) * A7;
2541 ee2[-7] = (k11) * A6 + (k00) * A7;
2542
2543 ee0 -= k0;
2544 ee2 -= k0;
2545 }
2546 }
2547
iter_54(float * z)2548 static __forceinline void iter_54(float *z)
2549 {
2550 float k00,k11,k22,k33;
2551 float y0,y1,y2,y3;
2552
2553 k00 = z[ 0] - z[-4];
2554 y0 = z[ 0] + z[-4];
2555 y2 = z[-2] + z[-6];
2556 k22 = z[-2] - z[-6];
2557
2558 z[-0] = y0 + y2; // z0 + z4 + z2 + z6
2559 z[-2] = y0 - y2; // z0 + z4 - z2 - z6
2560
2561 // done with y0,y2
2562
2563 k33 = z[-3] - z[-7];
2564
2565 z[-4] = k00 + k33; // z0 - z4 + z3 - z7
2566 z[-6] = k00 - k33; // z0 - z4 - z3 + z7
2567
2568 // done with k33
2569
2570 k11 = z[-1] - z[-5];
2571 y1 = z[-1] + z[-5];
2572 y3 = z[-3] + z[-7];
2573
2574 z[-1] = y1 + y3; // z1 + z5 + z3 + z7
2575 z[-3] = y1 - y3; // z1 + z5 - z3 - z7
2576 z[-5] = k11 - k22; // z1 - z5 + z2 - z6
2577 z[-7] = k11 + k22; // z1 - z5 - z2 + z6
2578 }
2579
imdct_step3_inner_s_loop_ld654(int n,float * e,int i_off,float * A,int base_n)2580 static void imdct_step3_inner_s_loop_ld654(int n, float *e, int i_off, float *A, int base_n)
2581 {
2582 int a_off = base_n >> 3;
2583 float A2 = A[0+a_off];
2584 float *z = e + i_off;
2585 float *base = z - 16 * n;
2586
2587 while (z > base) {
2588 float k00,k11;
2589
2590 k00 = z[-0] - z[-8];
2591 k11 = z[-1] - z[-9];
2592 z[-0] = z[-0] + z[-8];
2593 z[-1] = z[-1] + z[-9];
2594 z[-8] = k00;
2595 z[-9] = k11 ;
2596
2597 k00 = z[ -2] - z[-10];
2598 k11 = z[ -3] - z[-11];
2599 z[ -2] = z[ -2] + z[-10];
2600 z[ -3] = z[ -3] + z[-11];
2601 z[-10] = (k00+k11) * A2;
2602 z[-11] = (k11-k00) * A2;
2603
2604 k00 = z[-12] - z[ -4]; // reverse to avoid a unary negation
2605 k11 = z[ -5] - z[-13];
2606 z[ -4] = z[ -4] + z[-12];
2607 z[ -5] = z[ -5] + z[-13];
2608 z[-12] = k11;
2609 z[-13] = k00;
2610
2611 k00 = z[-14] - z[ -6]; // reverse to avoid a unary negation
2612 k11 = z[ -7] - z[-15];
2613 z[ -6] = z[ -6] + z[-14];
2614 z[ -7] = z[ -7] + z[-15];
2615 z[-14] = (k00+k11) * A2;
2616 z[-15] = (k00-k11) * A2;
2617
2618 iter_54(z);
2619 iter_54(z-8);
2620 z -= 16;
2621 }
2622 }
2623
inverse_mdct(float * buffer,int n,vorb * f,int blocktype)2624 static void inverse_mdct(float *buffer, int n, vorb *f, int blocktype)
2625 {
2626 int n2 = n >> 1, n4 = n >> 2, n8 = n >> 3, l;
2627 int ld;
2628 // @OPTIMIZE: reduce register pressure by using fewer variables?
2629 int save_point = temp_alloc_save(f);
2630 float *buf2 = (float *) temp_alloc(f, n2 * sizeof(*buf2));
2631 float *u=NULL,*v=NULL;
2632 // twiddle factors
2633 float *A = f->A[blocktype];
2634
2635 // IMDCT algorithm from "The use of multirate filter banks for coding of high quality digital audio"
2636 // See notes about bugs in that paper in less-optimal implementation 'inverse_mdct_old' after this function.
2637
2638 // kernel from paper
2639
2640
2641 // merged:
2642 // copy and reflect spectral data
2643 // step 0
2644
2645 // note that it turns out that the items added together during
2646 // this step are, in fact, being added to themselves (as reflected
2647 // by step 0). inexplicable inefficiency! this became obvious
2648 // once I combined the passes.
2649
2650 // so there's a missing 'times 2' here (for adding X to itself).
2651 // this propagates through linearly to the end, where the numbers
2652 // are 1/2 too small, and need to be compensated for.
2653
2654 {
2655 float *d,*e, *AA, *e_stop;
2656 d = &buf2[n2-2];
2657 AA = A;
2658 e = &buffer[0];
2659 e_stop = &buffer[n2];
2660 while (e != e_stop) {
2661 d[1] = (e[0] * AA[0] - e[2]*AA[1]);
2662 d[0] = (e[0] * AA[1] + e[2]*AA[0]);
2663 d -= 2;
2664 AA += 2;
2665 e += 4;
2666 }
2667
2668 e = &buffer[n2-3];
2669 while (d >= buf2) {
2670 d[1] = (-e[2] * AA[0] - -e[0]*AA[1]);
2671 d[0] = (-e[2] * AA[1] + -e[0]*AA[0]);
2672 d -= 2;
2673 AA += 2;
2674 e -= 4;
2675 }
2676 }
2677
2678 // now we use symbolic names for these, so that we can
2679 // possibly swap their meaning as we change which operations
2680 // are in place
2681
2682 u = buffer;
2683 v = buf2;
2684
2685 // step 2 (paper output is w, now u)
2686 // this could be in place, but the data ends up in the wrong
2687 // place... _somebody_'s got to swap it, so this is nominated
2688 {
2689 float *AA = &A[n2-8];
2690 float *d0,*d1, *e0, *e1;
2691
2692 e0 = &v[n4];
2693 e1 = &v[0];
2694
2695 d0 = &u[n4];
2696 d1 = &u[0];
2697
2698 while (AA >= A) {
2699 float v40_20, v41_21;
2700
2701 v41_21 = e0[1] - e1[1];
2702 v40_20 = e0[0] - e1[0];
2703 d0[1] = e0[1] + e1[1];
2704 d0[0] = e0[0] + e1[0];
2705 d1[1] = v41_21*AA[4] - v40_20*AA[5];
2706 d1[0] = v40_20*AA[4] + v41_21*AA[5];
2707
2708 v41_21 = e0[3] - e1[3];
2709 v40_20 = e0[2] - e1[2];
2710 d0[3] = e0[3] + e1[3];
2711 d0[2] = e0[2] + e1[2];
2712 d1[3] = v41_21*AA[0] - v40_20*AA[1];
2713 d1[2] = v40_20*AA[0] + v41_21*AA[1];
2714
2715 AA -= 8;
2716
2717 d0 += 4;
2718 d1 += 4;
2719 e0 += 4;
2720 e1 += 4;
2721 }
2722 }
2723
2724 // step 3
2725 ld = ilog(n) - 1; // ilog is off-by-one from normal definitions
2726
2727 // optimized step 3:
2728
2729 // the original step3 loop can be nested r inside s or s inside r;
2730 // it's written originally as s inside r, but this is dumb when r
2731 // iterates many times, and s few. So I have two copies of it and
2732 // switch between them halfway.
2733
2734 // this is iteration 0 of step 3
2735 imdct_step3_iter0_loop(n >> 4, u, n2-1-n4*0, -(n >> 3), A);
2736 imdct_step3_iter0_loop(n >> 4, u, n2-1-n4*1, -(n >> 3), A);
2737
2738 // this is iteration 1 of step 3
2739 imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*0, -(n >> 4), A, 16);
2740 imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*1, -(n >> 4), A, 16);
2741 imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*2, -(n >> 4), A, 16);
2742 imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*3, -(n >> 4), A, 16);
2743
2744 l=2;
2745 for (; l < (ld-3)>>1; ++l) {
2746 int k0 = n >> (l+2), k0_2 = k0>>1;
2747 int lim = 1 << (l+1);
2748 int i;
2749 for (i=0; i < lim; ++i)
2750 imdct_step3_inner_r_loop(n >> (l+4), u, n2-1 - k0*i, -k0_2, A, 1 << (l+3));
2751 }
2752
2753 for (; l < ld-6; ++l) {
2754 int k0 = n >> (l+2), k1 = 1 << (l+3), k0_2 = k0>>1;
2755 int rlim = n >> (l+6), r;
2756 int lim = 1 << (l+1);
2757 int i_off;
2758 float *A0 = A;
2759 i_off = n2-1;
2760 for (r=rlim; r > 0; --r) {
2761 imdct_step3_inner_s_loop(lim, u, i_off, -k0_2, A0, k1, k0);
2762 A0 += k1*4;
2763 i_off -= 8;
2764 }
2765 }
2766
2767 // iterations with count:
2768 // ld-6,-5,-4 all interleaved together
2769 // the big win comes from getting rid of needless flops
2770 // due to the constants on pass 5 & 4 being all 1 and 0;
2771 // combining them to be simultaneous to improve cache made little difference
2772 imdct_step3_inner_s_loop_ld654(n >> 5, u, n2-1, A, n);
2773
2774 // output is u
2775
2776 // step 4, 5, and 6
2777 // cannot be in-place because of step 5
2778 {
2779 uint16 *bitrev = f->bit_reverse[blocktype];
2780 // weirdly, I'd have thought reading sequentially and writing
2781 // erratically would have been better than vice-versa, but in
2782 // fact that's not what my testing showed. (That is, with
2783 // j = bitreverse(i), do you read i and write j, or read j and write i.)
2784
2785 float *d0 = &v[n4-4];
2786 float *d1 = &v[n2-4];
2787 while (d0 >= v) {
2788 int k4;
2789
2790 k4 = bitrev[0];
2791 d1[3] = u[k4+0];
2792 d1[2] = u[k4+1];
2793 d0[3] = u[k4+2];
2794 d0[2] = u[k4+3];
2795
2796 k4 = bitrev[1];
2797 d1[1] = u[k4+0];
2798 d1[0] = u[k4+1];
2799 d0[1] = u[k4+2];
2800 d0[0] = u[k4+3];
2801
2802 d0 -= 4;
2803 d1 -= 4;
2804 bitrev += 2;
2805 }
2806 }
2807 // (paper output is u, now v)
2808
2809
2810 // data must be in buf2
2811 assert(v == buf2);
2812
2813 // step 7 (paper output is v, now v)
2814 // this is now in place
2815 {
2816 float *C = f->C[blocktype];
2817 float *d, *e;
2818
2819 d = v;
2820 e = v + n2 - 4;
2821
2822 while (d < e) {
2823 float a02,a11,b0,b1,b2,b3;
2824
2825 a02 = d[0] - e[2];
2826 a11 = d[1] + e[3];
2827
2828 b0 = C[1]*a02 + C[0]*a11;
2829 b1 = C[1]*a11 - C[0]*a02;
2830
2831 b2 = d[0] + e[ 2];
2832 b3 = d[1] - e[ 3];
2833
2834 d[0] = b2 + b0;
2835 d[1] = b3 + b1;
2836 e[2] = b2 - b0;
2837 e[3] = b1 - b3;
2838
2839 a02 = d[2] - e[0];
2840 a11 = d[3] + e[1];
2841
2842 b0 = C[3]*a02 + C[2]*a11;
2843 b1 = C[3]*a11 - C[2]*a02;
2844
2845 b2 = d[2] + e[ 0];
2846 b3 = d[3] - e[ 1];
2847
2848 d[2] = b2 + b0;
2849 d[3] = b3 + b1;
2850 e[0] = b2 - b0;
2851 e[1] = b1 - b3;
2852
2853 C += 4;
2854 d += 4;
2855 e -= 4;
2856 }
2857 }
2858
2859 // data must be in buf2
2860
2861
2862 // step 8+decode (paper output is X, now buffer)
2863 // this generates pairs of data a la 8 and pushes them directly through
2864 // the decode kernel (pushing rather than pulling) to avoid having
2865 // to make another pass later
2866
2867 // this cannot POSSIBLY be in place, so we refer to the buffers directly
2868
2869 {
2870 float *d0,*d1,*d2,*d3;
2871
2872 float *B = f->B[blocktype] + n2 - 8;
2873 float *e = buf2 + n2 - 8;
2874 d0 = &buffer[0];
2875 d1 = &buffer[n2-4];
2876 d2 = &buffer[n2];
2877 d3 = &buffer[n-4];
2878 while (e >= v) {
2879 float p0,p1,p2,p3;
2880
2881 p3 = e[6]*B[7] - e[7]*B[6];
2882 p2 = -e[6]*B[6] - e[7]*B[7];
2883
2884 d0[0] = p3;
2885 d1[3] = - p3;
2886 d2[0] = p2;
2887 d3[3] = p2;
2888
2889 p1 = e[4]*B[5] - e[5]*B[4];
2890 p0 = -e[4]*B[4] - e[5]*B[5];
2891
2892 d0[1] = p1;
2893 d1[2] = - p1;
2894 d2[1] = p0;
2895 d3[2] = p0;
2896
2897 p3 = e[2]*B[3] - e[3]*B[2];
2898 p2 = -e[2]*B[2] - e[3]*B[3];
2899
2900 d0[2] = p3;
2901 d1[1] = - p3;
2902 d2[2] = p2;
2903 d3[1] = p2;
2904
2905 p1 = e[0]*B[1] - e[1]*B[0];
2906 p0 = -e[0]*B[0] - e[1]*B[1];
2907
2908 d0[3] = p1;
2909 d1[0] = - p1;
2910 d2[3] = p0;
2911 d3[0] = p0;
2912
2913 B -= 8;
2914 e -= 8;
2915 d0 += 4;
2916 d2 += 4;
2917 d1 -= 4;
2918 d3 -= 4;
2919 }
2920 }
2921
2922 temp_free(f,buf2);
2923 temp_alloc_restore(f,save_point);
2924 }
2925
2926 #if 0
2927 // this is the original version of the above code, if you want to optimize it from scratch
2928 void inverse_mdct_naive(float *buffer, int n)
2929 {
2930 float s;
2931 float A[1 << 12], B[1 << 12], C[1 << 11];
2932 int i,k,k2,k4, n2 = n >> 1, n4 = n >> 2, n8 = n >> 3, l;
2933 int n3_4 = n - n4, ld;
2934 // how can they claim this only uses N words?!
2935 // oh, because they're only used sparsely, whoops
2936 float u[1 << 13], X[1 << 13], v[1 << 13], w[1 << 13];
2937 // set up twiddle factors
2938
2939 for (k=k2=0; k < n4; ++k,k2+=2) {
2940 A[k2 ] = (float) cos(4*k*M_PI/n);
2941 A[k2+1] = (float) -sin(4*k*M_PI/n);
2942 B[k2 ] = (float) cos((k2+1)*M_PI/n/2);
2943 B[k2+1] = (float) sin((k2+1)*M_PI/n/2);
2944 }
2945 for (k=k2=0; k < n8; ++k,k2+=2) {
2946 C[k2 ] = (float) cos(2*(k2+1)*M_PI/n);
2947 C[k2+1] = (float) -sin(2*(k2+1)*M_PI/n);
2948 }
2949
2950 // IMDCT algorithm from "The use of multirate filter banks for coding of high quality digital audio"
2951 // Note there are bugs in that pseudocode, presumably due to them attempting
2952 // to rename the arrays nicely rather than representing the way their actual
2953 // implementation bounces buffers back and forth. As a result, even in the
2954 // "some formulars corrected" version, a direct implementation fails. These
2955 // are noted below as "paper bug".
2956
2957 // copy and reflect spectral data
2958 for (k=0; k < n2; ++k) u[k] = buffer[k];
2959 for ( ; k < n ; ++k) u[k] = -buffer[n - k - 1];
2960 // kernel from paper
2961 // step 1
2962 for (k=k2=k4=0; k < n4; k+=1, k2+=2, k4+=4) {
2963 v[n-k4-1] = (u[k4] - u[n-k4-1]) * A[k2] - (u[k4+2] - u[n-k4-3])*A[k2+1];
2964 v[n-k4-3] = (u[k4] - u[n-k4-1]) * A[k2+1] + (u[k4+2] - u[n-k4-3])*A[k2];
2965 }
2966 // step 2
2967 for (k=k4=0; k < n8; k+=1, k4+=4) {
2968 w[n2+3+k4] = v[n2+3+k4] + v[k4+3];
2969 w[n2+1+k4] = v[n2+1+k4] + v[k4+1];
2970 w[k4+3] = (v[n2+3+k4] - v[k4+3])*A[n2-4-k4] - (v[n2+1+k4]-v[k4+1])*A[n2-3-k4];
2971 w[k4+1] = (v[n2+1+k4] - v[k4+1])*A[n2-4-k4] + (v[n2+3+k4]-v[k4+3])*A[n2-3-k4];
2972 }
2973 // step 3
2974 ld = ilog(n) - 1; // ilog is off-by-one from normal definitions
2975 for (l=0; l < ld-3; ++l) {
2976 int k0 = n >> (l+2), k1 = 1 << (l+3);
2977 int rlim = n >> (l+4), r4, r;
2978 int s2lim = 1 << (l+2), s2;
2979 for (r=r4=0; r < rlim; r4+=4,++r) {
2980 for (s2=0; s2 < s2lim; s2+=2) {
2981 u[n-1-k0*s2-r4] = w[n-1-k0*s2-r4] + w[n-1-k0*(s2+1)-r4];
2982 u[n-3-k0*s2-r4] = w[n-3-k0*s2-r4] + w[n-3-k0*(s2+1)-r4];
2983 u[n-1-k0*(s2+1)-r4] = (w[n-1-k0*s2-r4] - w[n-1-k0*(s2+1)-r4]) * A[r*k1]
2984 - (w[n-3-k0*s2-r4] - w[n-3-k0*(s2+1)-r4]) * A[r*k1+1];
2985 u[n-3-k0*(s2+1)-r4] = (w[n-3-k0*s2-r4] - w[n-3-k0*(s2+1)-r4]) * A[r*k1]
2986 + (w[n-1-k0*s2-r4] - w[n-1-k0*(s2+1)-r4]) * A[r*k1+1];
2987 }
2988 }
2989 if (l+1 < ld-3) {
2990 // paper bug: ping-ponging of u&w here is omitted
2991 memcpy(w, u, sizeof(u));
2992 }
2993 }
2994
2995 // step 4
2996 for (i=0; i < n8; ++i) {
2997 int j = bit_reverse(i) >> (32-ld+3);
2998 assert(j < n8);
2999 if (i == j) {
3000 // paper bug: original code probably swapped in place; if copying,
3001 // need to directly copy in this case
3002 int i8 = i << 3;
3003 v[i8+1] = u[i8+1];
3004 v[i8+3] = u[i8+3];
3005 v[i8+5] = u[i8+5];
3006 v[i8+7] = u[i8+7];
3007 } else if (i < j) {
3008 int i8 = i << 3, j8 = j << 3;
3009 v[j8+1] = u[i8+1], v[i8+1] = u[j8 + 1];
3010 v[j8+3] = u[i8+3], v[i8+3] = u[j8 + 3];
3011 v[j8+5] = u[i8+5], v[i8+5] = u[j8 + 5];
3012 v[j8+7] = u[i8+7], v[i8+7] = u[j8 + 7];
3013 }
3014 }
3015 // step 5
3016 for (k=0; k < n2; ++k) {
3017 w[k] = v[k*2+1];
3018 }
3019 // step 6
3020 for (k=k2=k4=0; k < n8; ++k, k2 += 2, k4 += 4) {
3021 u[n-1-k2] = w[k4];
3022 u[n-2-k2] = w[k4+1];
3023 u[n3_4 - 1 - k2] = w[k4+2];
3024 u[n3_4 - 2 - k2] = w[k4+3];
3025 }
3026 // step 7
3027 for (k=k2=0; k < n8; ++k, k2 += 2) {
3028 v[n2 + k2 ] = ( u[n2 + k2] + u[n-2-k2] + C[k2+1]*(u[n2+k2]-u[n-2-k2]) + C[k2]*(u[n2+k2+1]+u[n-2-k2+1]))/2;
3029 v[n-2 - k2] = ( u[n2 + k2] + u[n-2-k2] - C[k2+1]*(u[n2+k2]-u[n-2-k2]) - C[k2]*(u[n2+k2+1]+u[n-2-k2+1]))/2;
3030 v[n2+1+ k2] = ( u[n2+1+k2] - u[n-1-k2] + C[k2+1]*(u[n2+1+k2]+u[n-1-k2]) - C[k2]*(u[n2+k2]-u[n-2-k2]))/2;
3031 v[n-1 - k2] = (-u[n2+1+k2] + u[n-1-k2] + C[k2+1]*(u[n2+1+k2]+u[n-1-k2]) - C[k2]*(u[n2+k2]-u[n-2-k2]))/2;
3032 }
3033 // step 8
3034 for (k=k2=0; k < n4; ++k,k2 += 2) {
3035 X[k] = v[k2+n2]*B[k2 ] + v[k2+1+n2]*B[k2+1];
3036 X[n2-1-k] = v[k2+n2]*B[k2+1] - v[k2+1+n2]*B[k2 ];
3037 }
3038
3039 // decode kernel to output
3040 // determined the following value experimentally
3041 // (by first figuring out what made inverse_mdct_slow work); then matching that here
3042 // (probably vorbis encoder premultiplies by n or n/2, to save it on the decoder?)
3043 s = 0.5; // theoretically would be n4
3044
3045 // [[[ note! the s value of 0.5 is compensated for by the B[] in the current code,
3046 // so it needs to use the "old" B values to behave correctly, or else
3047 // set s to 1.0 ]]]
3048 for (i=0; i < n4 ; ++i) buffer[i] = s * X[i+n4];
3049 for ( ; i < n3_4; ++i) buffer[i] = -s * X[n3_4 - i - 1];
3050 for ( ; i < n ; ++i) buffer[i] = -s * X[i - n3_4];
3051 }
3052 #endif
3053
get_window(vorb * f,int len)3054 static float *get_window(vorb *f, int len)
3055 {
3056 len <<= 1;
3057 if (len == f->blocksize_0) return f->window[0];
3058 if (len == f->blocksize_1) return f->window[1];
3059 return NULL;
3060 }
3061
3062 #ifndef STB_VORBIS_NO_DEFER_FLOOR
3063 typedef int16 YTYPE;
3064 #else
3065 typedef int YTYPE;
3066 #endif
do_floor(vorb * f,Mapping * map,int i,int n,float * target,YTYPE * finalY,uint8 * step2_flag)3067 static int do_floor(vorb *f, Mapping *map, int i, int n, float *target, YTYPE *finalY, uint8 *step2_flag)
3068 {
3069 int n2 = n >> 1;
3070 int s = map->chan[i].mux, floor;
3071 floor = map->submap_floor[s];
3072 if (f->floor_types[floor] == 0) {
3073 return error(f, VORBIS_invalid_stream);
3074 } else {
3075 Floor1 *g = &f->floor_config[floor].floor1;
3076 int j,q;
3077 int lx = 0, ly = finalY[0] * g->floor1_multiplier;
3078 for (q=1; q < g->values; ++q) {
3079 j = g->sorted_order[q];
3080 #ifndef STB_VORBIS_NO_DEFER_FLOOR
3081 if (finalY[j] >= 0)
3082 #else
3083 if (step2_flag[j])
3084 #endif
3085 {
3086 int hy = finalY[j] * g->floor1_multiplier;
3087 int hx = g->Xlist[j];
3088 if (lx != hx)
3089 draw_line(target, lx,ly, hx,hy, n2);
3090 CHECK(f);
3091 lx = hx, ly = hy;
3092 }
3093 }
3094 if (lx < n2) {
3095 // optimization of: draw_line(target, lx,ly, n,ly, n2);
3096 for (j=lx; j < n2; ++j)
3097 LINE_OP(target[j], inverse_db_table[ly]);
3098 CHECK(f);
3099 }
3100 }
3101 return TRUE;
3102 }
3103
3104 // The meaning of "left" and "right"
3105 //
3106 // For a given frame:
3107 // we compute samples from 0..n
3108 // window_center is n/2
3109 // we'll window and mix the samples from left_start to left_end with data from the previous frame
3110 // all of the samples from left_end to right_start can be output without mixing; however,
3111 // this interval is 0-length except when transitioning between short and long frames
3112 // all of the samples from right_start to right_end need to be mixed with the next frame,
3113 // which we don't have, so those get saved in a buffer
3114 // frame N's right_end-right_start, the number of samples to mix with the next frame,
3115 // has to be the same as frame N+1's left_end-left_start (which they are by
3116 // construction)
3117
vorbis_decode_initial(vorb * f,int * p_left_start,int * p_left_end,int * p_right_start,int * p_right_end,int * mode)3118 static int vorbis_decode_initial(vorb *f, int *p_left_start, int *p_left_end, int *p_right_start, int *p_right_end, int *mode)
3119 {
3120 Mode *m;
3121 int i, n, prev, next, window_center;
3122 f->channel_buffer_start = f->channel_buffer_end = 0;
3123
3124 retry:
3125 if (f->eof) return FALSE;
3126 if (!maybe_start_packet(f))
3127 return FALSE;
3128 // check packet type
3129 if (get_bits(f,1) != 0) {
3130 if (IS_PUSH_MODE(f))
3131 return error(f,VORBIS_bad_packet_type);
3132 while (EOP != get8_packet(f));
3133 goto retry;
3134 }
3135
3136 if (f->alloc.alloc_buffer)
3137 assert(f->alloc.alloc_buffer_length_in_bytes == f->temp_offset);
3138
3139 i = get_bits(f, ilog(f->mode_count-1));
3140 if (i == EOP) return FALSE;
3141 if (i >= f->mode_count) return FALSE;
3142 *mode = i;
3143 m = f->mode_config + i;
3144 if (m->blockflag) {
3145 n = f->blocksize_1;
3146 prev = get_bits(f,1);
3147 next = get_bits(f,1);
3148 } else {
3149 prev = next = 0;
3150 n = f->blocksize_0;
3151 }
3152
3153 // WINDOWING
3154
3155 window_center = n >> 1;
3156 if (m->blockflag && !prev) {
3157 *p_left_start = (n - f->blocksize_0) >> 2;
3158 *p_left_end = (n + f->blocksize_0) >> 2;
3159 } else {
3160 *p_left_start = 0;
3161 *p_left_end = window_center;
3162 }
3163 if (m->blockflag && !next) {
3164 *p_right_start = (n*3 - f->blocksize_0) >> 2;
3165 *p_right_end = (n*3 + f->blocksize_0) >> 2;
3166 } else {
3167 *p_right_start = window_center;
3168 *p_right_end = n;
3169 }
3170
3171 return TRUE;
3172 }
3173
vorbis_decode_packet_rest(vorb * f,int * len,Mode * m,int left_start,int left_end,int right_start,int right_end,int * p_left)3174 static int vorbis_decode_packet_rest(vorb *f, int *len, Mode *m, int left_start, int left_end, int right_start, int right_end, int *p_left)
3175 {
3176 Mapping *map;
3177 int i,j,k,n,n2;
3178 int zero_channel[256];
3179 int really_zero_channel[256];
3180
3181 // WINDOWING
3182
3183 n = f->blocksize[m->blockflag];
3184 map = &f->mapping[m->mapping];
3185
3186 // FLOORS
3187 n2 = n >> 1;
3188
3189 CHECK(f);
3190
3191 for (i=0; i < f->channels; ++i) {
3192 int s = map->chan[i].mux, floor;
3193 zero_channel[i] = FALSE;
3194 floor = map->submap_floor[s];
3195 if (f->floor_types[floor] == 0) {
3196 return error(f, VORBIS_invalid_stream);
3197 } else {
3198 Floor1 *g = &f->floor_config[floor].floor1;
3199 if (get_bits(f, 1)) {
3200 short *finalY;
3201 uint8 step2_flag[256];
3202 static int range_list[4] = { 256, 128, 86, 64 };
3203 int range = range_list[g->floor1_multiplier-1];
3204 int offset = 2;
3205 finalY = f->finalY[i];
3206 finalY[0] = get_bits(f, ilog(range)-1);
3207 finalY[1] = get_bits(f, ilog(range)-1);
3208 for (j=0; j < g->partitions; ++j) {
3209 int pclass = g->partition_class_list[j];
3210 int cdim = g->class_dimensions[pclass];
3211 int cbits = g->class_subclasses[pclass];
3212 int csub = (1 << cbits)-1;
3213 int cval = 0;
3214 if (cbits) {
3215 Codebook *c = f->codebooks + g->class_masterbooks[pclass];
3216 DECODE(cval,f,c);
3217 }
3218 for (k=0; k < cdim; ++k) {
3219 int book = g->subclass_books[pclass][cval & csub];
3220 cval = cval >> cbits;
3221 if (book >= 0) {
3222 int temp;
3223 Codebook *c = f->codebooks + book;
3224 DECODE(temp,f,c);
3225 finalY[offset++] = temp;
3226 } else
3227 finalY[offset++] = 0;
3228 }
3229 }
3230 if (f->valid_bits == INVALID_BITS) goto error; // behavior according to spec
3231 step2_flag[0] = step2_flag[1] = 1;
3232 for (j=2; j < g->values; ++j) {
3233 int low, high, pred, highroom, lowroom, room, val;
3234 low = g->neighbors[j][0];
3235 high = g->neighbors[j][1];
3236 //neighbors(g->Xlist, j, &low, &high);
3237 pred = predict_point(g->Xlist[j], g->Xlist[low], g->Xlist[high], finalY[low], finalY[high]);
3238 val = finalY[j];
3239 highroom = range - pred;
3240 lowroom = pred;
3241 if (highroom < lowroom)
3242 room = highroom * 2;
3243 else
3244 room = lowroom * 2;
3245 if (val) {
3246 step2_flag[low] = step2_flag[high] = 1;
3247 step2_flag[j] = 1;
3248 if (val >= room)
3249 if (highroom > lowroom)
3250 finalY[j] = val - lowroom + pred;
3251 else
3252 finalY[j] = pred - val + highroom - 1;
3253 else
3254 if (val & 1)
3255 finalY[j] = pred - ((val+1)>>1);
3256 else
3257 finalY[j] = pred + (val>>1);
3258 } else {
3259 step2_flag[j] = 0;
3260 finalY[j] = pred;
3261 }
3262 }
3263
3264 #ifdef STB_VORBIS_NO_DEFER_FLOOR
3265 do_floor(f, map, i, n, f->floor_buffers[i], finalY, step2_flag);
3266 #else
3267 // defer final floor computation until _after_ residue
3268 for (j=0; j < g->values; ++j) {
3269 if (!step2_flag[j])
3270 finalY[j] = -1;
3271 }
3272 #endif
3273 } else {
3274 error:
3275 zero_channel[i] = TRUE;
3276 }
3277 // So we just defer everything else to later
3278
3279 // at this point we've decoded the floor into buffer
3280 }
3281 }
3282 CHECK(f);
3283 // at this point we've decoded all floors
3284
3285 if (f->alloc.alloc_buffer)
3286 assert(f->alloc.alloc_buffer_length_in_bytes == f->temp_offset);
3287
3288 // re-enable coupled channels if necessary
3289 memcpy(really_zero_channel, zero_channel, sizeof(really_zero_channel[0]) * f->channels);
3290 for (i=0; i < map->coupling_steps; ++i)
3291 if (!zero_channel[map->chan[i].magnitude] || !zero_channel[map->chan[i].angle]) {
3292 zero_channel[map->chan[i].magnitude] = zero_channel[map->chan[i].angle] = FALSE;
3293 }
3294
3295 CHECK(f);
3296 // RESIDUE DECODE
3297 for (i=0; i < map->submaps; ++i) {
3298 float *residue_buffers[STB_VORBIS_MAX_CHANNELS];
3299 int r;
3300 uint8 do_not_decode[256];
3301 int ch = 0;
3302 for (j=0; j < f->channels; ++j) {
3303 if (map->chan[j].mux == i) {
3304 if (zero_channel[j]) {
3305 do_not_decode[ch] = TRUE;
3306 residue_buffers[ch] = NULL;
3307 } else {
3308 do_not_decode[ch] = FALSE;
3309 residue_buffers[ch] = f->channel_buffers[j];
3310 }
3311 ++ch;
3312 }
3313 }
3314 r = map->submap_residue[i];
3315 decode_residue(f, residue_buffers, ch, n2, r, do_not_decode);
3316 }
3317
3318 if (f->alloc.alloc_buffer)
3319 assert(f->alloc.alloc_buffer_length_in_bytes == f->temp_offset);
3320 CHECK(f);
3321
3322 // INVERSE COUPLING
3323 for (i = map->coupling_steps-1; i >= 0; --i) {
3324 int n2 = n >> 1;
3325 float *m = f->channel_buffers[map->chan[i].magnitude];
3326 float *a = f->channel_buffers[map->chan[i].angle ];
3327 for (j=0; j < n2; ++j) {
3328 float a2,m2;
3329 if (m[j] > 0)
3330 if (a[j] > 0)
3331 m2 = m[j], a2 = m[j] - a[j];
3332 else
3333 a2 = m[j], m2 = m[j] + a[j];
3334 else
3335 if (a[j] > 0)
3336 m2 = m[j], a2 = m[j] + a[j];
3337 else
3338 a2 = m[j], m2 = m[j] - a[j];
3339 m[j] = m2;
3340 a[j] = a2;
3341 }
3342 }
3343 CHECK(f);
3344
3345 // finish decoding the floors
3346 #ifndef STB_VORBIS_NO_DEFER_FLOOR
3347 for (i=0; i < f->channels; ++i) {
3348 if (really_zero_channel[i]) {
3349 memset(f->channel_buffers[i], 0, sizeof(*f->channel_buffers[i]) * n2);
3350 } else {
3351 do_floor(f, map, i, n, f->channel_buffers[i], f->finalY[i], NULL);
3352 }
3353 }
3354 #else
3355 for (i=0; i < f->channels; ++i) {
3356 if (really_zero_channel[i]) {
3357 memset(f->channel_buffers[i], 0, sizeof(*f->channel_buffers[i]) * n2);
3358 } else {
3359 for (j=0; j < n2; ++j)
3360 f->channel_buffers[i][j] *= f->floor_buffers[i][j];
3361 }
3362 }
3363 #endif
3364
3365 // INVERSE MDCT
3366 CHECK(f);
3367 for (i=0; i < f->channels; ++i)
3368 inverse_mdct(f->channel_buffers[i], n, f, m->blockflag);
3369 CHECK(f);
3370
3371 // this shouldn't be necessary, unless we exited on an error
3372 // and want to flush to get to the next packet
3373 flush_packet(f);
3374
3375 if (f->first_decode) {
3376 // assume we start so first non-discarded sample is sample 0
3377 // this isn't to spec, but spec would require us to read ahead
3378 // and decode the size of all current frames--could be done,
3379 // but presumably it's not a commonly used feature
3380 f->current_loc = -n2; // start of first frame is positioned for discard
3381 // we might have to discard samples "from" the next frame too,
3382 // if we're lapping a large block then a small at the start?
3383 f->discard_samples_deferred = n - right_end;
3384 f->current_loc_valid = TRUE;
3385 f->first_decode = FALSE;
3386 } else if (f->discard_samples_deferred) {
3387 if (f->discard_samples_deferred >= right_start - left_start) {
3388 f->discard_samples_deferred -= (right_start - left_start);
3389 left_start = right_start;
3390 *p_left = left_start;
3391 } else {
3392 left_start += f->discard_samples_deferred;
3393 *p_left = left_start;
3394 f->discard_samples_deferred = 0;
3395 }
3396 } else if (f->previous_length == 0 && f->current_loc_valid) {
3397 // we're recovering from a seek... that means we're going to discard
3398 // the samples from this packet even though we know our position from
3399 // the last page header, so we need to update the position based on
3400 // the discarded samples here
3401 // but wait, the code below is going to add this in itself even
3402 // on a discard, so we don't need to do it here...
3403 }
3404
3405 // check if we have ogg information about the sample # for this packet
3406 if (f->last_seg_which == f->end_seg_with_known_loc) {
3407 // if we have a valid current loc, and this is final:
3408 if (f->current_loc_valid && (f->page_flag & PAGEFLAG_last_page)) {
3409 uint32 current_end = f->known_loc_for_packet;
3410 // then let's infer the size of the (probably) short final frame
3411 if (current_end < f->current_loc + (right_end-left_start)) {
3412 if (current_end < f->current_loc) {
3413 // negative truncation, that's impossible!
3414 *len = 0;
3415 } else {
3416 *len = current_end - f->current_loc;
3417 }
3418 *len += left_start; // this doesn't seem right, but has no ill effect on my test files
3419 if (*len > right_end) *len = right_end; // this should never happen
3420 f->current_loc += *len;
3421 return TRUE;
3422 }
3423 }
3424 // otherwise, just set our sample loc
3425 // guess that the ogg granule pos refers to the _middle_ of the
3426 // last frame?
3427 // set f->current_loc to the position of left_start
3428 f->current_loc = f->known_loc_for_packet - (n2-left_start);
3429 f->current_loc_valid = TRUE;
3430 }
3431 if (f->current_loc_valid)
3432 f->current_loc += (right_start - left_start);
3433
3434 if (f->alloc.alloc_buffer)
3435 assert(f->alloc.alloc_buffer_length_in_bytes == f->temp_offset);
3436 *len = right_end; // ignore samples after the window goes to 0
3437 CHECK(f);
3438
3439 return TRUE;
3440 }
3441
vorbis_decode_packet(vorb * f,int * len,int * p_left,int * p_right)3442 static int vorbis_decode_packet(vorb *f, int *len, int *p_left, int *p_right)
3443 {
3444 int mode, left_end, right_end;
3445 if (!vorbis_decode_initial(f, p_left, &left_end, p_right, &right_end, &mode)) return 0;
3446 return vorbis_decode_packet_rest(f, len, f->mode_config + mode, *p_left, left_end, *p_right, right_end, p_left);
3447 }
3448
vorbis_finish_frame(stb_vorbis * f,int len,int left,int right)3449 static int vorbis_finish_frame(stb_vorbis *f, int len, int left, int right)
3450 {
3451 int prev,i,j;
3452 // we use right&left (the start of the right- and left-window sin()-regions)
3453 // to determine how much to return, rather than inferring from the rules
3454 // (same result, clearer code); 'left' indicates where our sin() window
3455 // starts, therefore where the previous window's right edge starts, and
3456 // therefore where to start mixing from the previous buffer. 'right'
3457 // indicates where our sin() ending-window starts, therefore that's where
3458 // we start saving, and where our returned-data ends.
3459
3460 // mixin from previous window
3461 if (f->previous_length) {
3462 int i,j, n = f->previous_length;
3463 float *w = get_window(f, n);
3464 if (w == NULL) return 0;
3465 for (i=0; i < f->channels; ++i) {
3466 for (j=0; j < n; ++j)
3467 f->channel_buffers[i][left+j] =
3468 f->channel_buffers[i][left+j]*w[ j] +
3469 f->previous_window[i][ j]*w[n-1-j];
3470 }
3471 }
3472
3473 prev = f->previous_length;
3474
3475 // last half of this data becomes previous window
3476 f->previous_length = len - right;
3477
3478 // @OPTIMIZE: could avoid this copy by double-buffering the
3479 // output (flipping previous_window with channel_buffers), but
3480 // then previous_window would have to be 2x as large, and
3481 // channel_buffers couldn't be temp mem (although they're NOT
3482 // currently temp mem, they could be (unless we want to level
3483 // performance by spreading out the computation))
3484 for (i=0; i < f->channels; ++i)
3485 for (j=0; right+j < len; ++j)
3486 f->previous_window[i][j] = f->channel_buffers[i][right+j];
3487
3488 if (!prev)
3489 // there was no previous packet, so this data isn't valid...
3490 // this isn't entirely true, only the would-have-overlapped data
3491 // isn't valid, but this seems to be what the spec requires
3492 return 0;
3493
3494 // truncate a short frame
3495 if (len < right) right = len;
3496
3497 f->samples_output += right-left;
3498
3499 return right - left;
3500 }
3501
vorbis_pump_first_frame(stb_vorbis * f)3502 static int vorbis_pump_first_frame(stb_vorbis *f)
3503 {
3504 int len, right, left, res;
3505 res = vorbis_decode_packet(f, &len, &left, &right);
3506 if (res)
3507 vorbis_finish_frame(f, len, left, right);
3508 return res;
3509 }
3510
3511 #ifndef STB_VORBIS_NO_PUSHDATA_API
is_whole_packet_present(stb_vorbis * f)3512 static int is_whole_packet_present(stb_vorbis *f)
3513 {
3514 // make sure that we have the packet available before continuing...
3515 // this requires a full ogg parse, but we know we can fetch from f->stream
3516
3517 // instead of coding this out explicitly, we could save the current read state,
3518 // read the next packet with get8() until end-of-packet, check f->eof, then
3519 // reset the state? but that would be slower, esp. since we'd have over 256 bytes
3520 // of state to restore (primarily the page segment table)
3521
3522 int s = f->next_seg, first = TRUE;
3523 uint8 *p = f->stream;
3524
3525 if (s != -1) { // if we're not starting the packet with a 'continue on next page' flag
3526 for (; s < f->segment_count; ++s) {
3527 p += f->segments[s];
3528 if (f->segments[s] < 255) // stop at first short segment
3529 break;
3530 }
3531 // either this continues, or it ends it...
3532 if (s == f->segment_count)
3533 s = -1; // set 'crosses page' flag
3534 if (p > f->stream_end) return error(f, VORBIS_need_more_data);
3535 first = FALSE;
3536 }
3537 for (; s == -1;) {
3538 uint8 *q;
3539 int n;
3540
3541 // check that we have the page header ready
3542 if (p + 26 >= f->stream_end) return error(f, VORBIS_need_more_data);
3543 // validate the page
3544 if (memcmp(p, ogg_page_header, 4)) return error(f, VORBIS_invalid_stream);
3545 if (p[4] != 0) return error(f, VORBIS_invalid_stream);
3546 if (first) { // the first segment must NOT have 'continued_packet', later ones MUST
3547 if (f->previous_length)
3548 if ((p[5] & PAGEFLAG_continued_packet)) return error(f, VORBIS_invalid_stream);
3549 // if no previous length, we're resynching, so we can come in on a continued-packet,
3550 // which we'll just drop
3551 } else {
3552 if (!(p[5] & PAGEFLAG_continued_packet)) return error(f, VORBIS_invalid_stream);
3553 }
3554 n = p[26]; // segment counts
3555 q = p+27; // q points to segment table
3556 p = q + n; // advance past header
3557 // make sure we've read the segment table
3558 if (p > f->stream_end) return error(f, VORBIS_need_more_data);
3559 for (s=0; s < n; ++s) {
3560 p += q[s];
3561 if (q[s] < 255)
3562 break;
3563 }
3564 if (s == n)
3565 s = -1; // set 'crosses page' flag
3566 if (p > f->stream_end) return error(f, VORBIS_need_more_data);
3567 first = FALSE;
3568 }
3569 return TRUE;
3570 }
3571 #endif // !STB_VORBIS_NO_PUSHDATA_API
3572
start_decoder(vorb * f)3573 static int start_decoder(vorb *f)
3574 {
3575 uint8 header[6], x,y;
3576 int len,i,j,k, max_submaps = 0;
3577 int longest_floorlist=0;
3578
3579 // first page, first packet
3580 f->first_decode = TRUE;
3581
3582 if (!start_page(f)) return FALSE;
3583 // validate page flag
3584 if (!(f->page_flag & PAGEFLAG_first_page)) return error(f, VORBIS_invalid_first_page);
3585 if (f->page_flag & PAGEFLAG_last_page) return error(f, VORBIS_invalid_first_page);
3586 if (f->page_flag & PAGEFLAG_continued_packet) return error(f, VORBIS_invalid_first_page);
3587 // check for expected packet length
3588 if (f->segment_count != 1) return error(f, VORBIS_invalid_first_page);
3589 if (f->segments[0] != 30) {
3590 // check for the Ogg skeleton fishead identifying header to refine our error
3591 if (f->segments[0] == 64 &&
3592 getn(f, header, 6) &&
3593 header[0] == 'f' &&
3594 header[1] == 'i' &&
3595 header[2] == 's' &&
3596 header[3] == 'h' &&
3597 header[4] == 'e' &&
3598 header[5] == 'a' &&
3599 get8(f) == 'd' &&
3600 get8(f) == '\0') return error(f, VORBIS_ogg_skeleton_not_supported);
3601 else
3602 return error(f, VORBIS_invalid_first_page);
3603 }
3604
3605 // read packet
3606 // check packet header
3607 if (get8(f) != VORBIS_packet_id) return error(f, VORBIS_invalid_first_page);
3608 if (!getn(f, header, 6)) return error(f, VORBIS_unexpected_eof);
3609 if (!vorbis_validate(header)) return error(f, VORBIS_invalid_first_page);
3610 // vorbis_version
3611 if (get32(f) != 0) return error(f, VORBIS_invalid_first_page);
3612 f->channels = get8(f); if (!f->channels) return error(f, VORBIS_invalid_first_page);
3613 if (f->channels > STB_VORBIS_MAX_CHANNELS) return error(f, VORBIS_too_many_channels);
3614 f->sample_rate = get32(f); if (!f->sample_rate) return error(f, VORBIS_invalid_first_page);
3615 get32(f); // bitrate_maximum
3616 get32(f); // bitrate_nominal
3617 get32(f); // bitrate_minimum
3618 x = get8(f);
3619 {
3620 int log0,log1;
3621 log0 = x & 15;
3622 log1 = x >> 4;
3623 f->blocksize_0 = 1 << log0;
3624 f->blocksize_1 = 1 << log1;
3625 if (log0 < 6 || log0 > 13) return error(f, VORBIS_invalid_setup);
3626 if (log1 < 6 || log1 > 13) return error(f, VORBIS_invalid_setup);
3627 if (log0 > log1) return error(f, VORBIS_invalid_setup);
3628 }
3629
3630 // framing_flag
3631 x = get8(f);
3632 if (!(x & 1)) return error(f, VORBIS_invalid_first_page);
3633
3634 // second packet!
3635 if (!start_page(f)) return FALSE;
3636
3637 if (!start_packet(f)) return FALSE;
3638
3639 if (!next_segment(f)) return FALSE;
3640
3641 if (get8_packet(f) != VORBIS_packet_comment) return error(f, VORBIS_invalid_setup);
3642 for (i=0; i < 6; ++i) header[i] = get8_packet(f);
3643 if (!vorbis_validate(header)) return error(f, VORBIS_invalid_setup);
3644 //file vendor
3645 len = get32_packet(f);
3646 f->vendor = (char*)setup_malloc(f, sizeof(char) * (len+1));
3647 if (f->vendor == NULL) return error(f, VORBIS_outofmem);
3648 for(i=0; i < len; ++i) {
3649 f->vendor[i] = get8_packet(f);
3650 }
3651 f->vendor[len] = (char)'\0';
3652 //user comments
3653 f->comment_list_length = get32_packet(f);
3654 f->comment_list = (char**)setup_malloc(f, sizeof(char*) * (f->comment_list_length));
3655 if (f->comment_list == NULL) return error(f, VORBIS_outofmem);
3656
3657 for(i=0; i < f->comment_list_length; ++i) {
3658 len = get32_packet(f);
3659 f->comment_list[i] = (char*)setup_malloc(f, sizeof(char) * (len+1));
3660 if (f->comment_list[i] == NULL) return error(f, VORBIS_outofmem);
3661
3662 for(j=0; j < len; ++j) {
3663 f->comment_list[i][j] = get8_packet(f);
3664 }
3665 f->comment_list[i][len] = (char)'\0';
3666 }
3667
3668 // framing_flag
3669 x = get8_packet(f);
3670 if (!(x & 1)) return error(f, VORBIS_invalid_setup);
3671
3672
3673 skip(f, f->bytes_in_seg);
3674 f->bytes_in_seg = 0;
3675
3676 do {
3677 len = next_segment(f);
3678 skip(f, len);
3679 f->bytes_in_seg = 0;
3680 } while (len);
3681
3682 // third packet!
3683 if (!start_packet(f)) return FALSE;
3684
3685 #ifndef STB_VORBIS_NO_PUSHDATA_API
3686 if (IS_PUSH_MODE(f)) {
3687 if (!is_whole_packet_present(f)) {
3688 // convert error in ogg header to write type
3689 if (f->error == VORBIS_invalid_stream)
3690 f->error = VORBIS_invalid_setup;
3691 return FALSE;
3692 }
3693 }
3694 #endif
3695
3696 crc32_init(); // always init it, to avoid multithread race conditions
3697
3698 if (get8_packet(f) != VORBIS_packet_setup) return error(f, VORBIS_invalid_setup);
3699 for (i=0; i < 6; ++i) header[i] = get8_packet(f);
3700 if (!vorbis_validate(header)) return error(f, VORBIS_invalid_setup);
3701
3702 // codebooks
3703
3704 f->codebook_count = get_bits(f,8) + 1;
3705 f->codebooks = (Codebook *) setup_malloc(f, sizeof(*f->codebooks) * f->codebook_count);
3706 if (f->codebooks == NULL) return error(f, VORBIS_outofmem);
3707 memset(f->codebooks, 0, sizeof(*f->codebooks) * f->codebook_count);
3708 for (i=0; i < f->codebook_count; ++i) {
3709 uint32 *values;
3710 int ordered, sorted_count;
3711 int total=0;
3712 uint8 *lengths;
3713 Codebook *c = f->codebooks+i;
3714 CHECK(f);
3715 x = get_bits(f, 8); if (x != 0x42) return error(f, VORBIS_invalid_setup);
3716 x = get_bits(f, 8); if (x != 0x43) return error(f, VORBIS_invalid_setup);
3717 x = get_bits(f, 8); if (x != 0x56) return error(f, VORBIS_invalid_setup);
3718 x = get_bits(f, 8);
3719 c->dimensions = (get_bits(f, 8)<<8) + x;
3720 x = get_bits(f, 8);
3721 y = get_bits(f, 8);
3722 c->entries = (get_bits(f, 8)<<16) + (y<<8) + x;
3723 ordered = get_bits(f,1);
3724 c->sparse = ordered ? 0 : get_bits(f,1);
3725
3726 if (c->dimensions == 0 && c->entries != 0) return error(f, VORBIS_invalid_setup);
3727
3728 if (c->sparse)
3729 lengths = (uint8 *) setup_temp_malloc(f, c->entries);
3730 else
3731 lengths = c->codeword_lengths = (uint8 *) setup_malloc(f, c->entries);
3732
3733 if (!lengths) return error(f, VORBIS_outofmem);
3734
3735 if (ordered) {
3736 int current_entry = 0;
3737 int current_length = get_bits(f,5) + 1;
3738 while (current_entry < c->entries) {
3739 int limit = c->entries - current_entry;
3740 int n = get_bits(f, ilog(limit));
3741 if (current_length >= 32) return error(f, VORBIS_invalid_setup);
3742 if (current_entry + n > (int) c->entries) { return error(f, VORBIS_invalid_setup); }
3743 memset(lengths + current_entry, current_length, n);
3744 current_entry += n;
3745 ++current_length;
3746 }
3747 } else {
3748 for (j=0; j < c->entries; ++j) {
3749 int present = c->sparse ? get_bits(f,1) : 1;
3750 if (present) {
3751 lengths[j] = get_bits(f, 5) + 1;
3752 ++total;
3753 if (lengths[j] == 32)
3754 return error(f, VORBIS_invalid_setup);
3755 } else {
3756 lengths[j] = NO_CODE;
3757 }
3758 }
3759 }
3760
3761 if (c->sparse && total >= c->entries >> 2) {
3762 // convert sparse items to non-sparse!
3763 if (c->entries > (int) f->setup_temp_memory_required)
3764 f->setup_temp_memory_required = c->entries;
3765
3766 c->codeword_lengths = (uint8 *) setup_malloc(f, c->entries);
3767 if (c->codeword_lengths == NULL) return error(f, VORBIS_outofmem);
3768 memcpy(c->codeword_lengths, lengths, c->entries);
3769 setup_temp_free(f, lengths, c->entries); // note this is only safe if there have been no intervening temp mallocs!
3770 lengths = c->codeword_lengths;
3771 c->sparse = 0;
3772 }
3773
3774 // compute the size of the sorted tables
3775 if (c->sparse) {
3776 sorted_count = total;
3777 } else {
3778 sorted_count = 0;
3779 #ifndef STB_VORBIS_NO_HUFFMAN_BINARY_SEARCH
3780 for (j=0; j < c->entries; ++j)
3781 if (lengths[j] > STB_VORBIS_FAST_HUFFMAN_LENGTH && lengths[j] != NO_CODE)
3782 ++sorted_count;
3783 #endif
3784 }
3785
3786 c->sorted_entries = sorted_count;
3787 values = NULL;
3788
3789 CHECK(f);
3790 if (!c->sparse) {
3791 c->codewords = (uint32 *) setup_malloc(f, sizeof(c->codewords[0]) * c->entries);
3792 if (!c->codewords) return error(f, VORBIS_outofmem);
3793 } else {
3794 unsigned int size;
3795 if (c->sorted_entries) {
3796 c->codeword_lengths = (uint8 *) setup_malloc(f, c->sorted_entries);
3797 if (!c->codeword_lengths) return error(f, VORBIS_outofmem);
3798 c->codewords = (uint32 *) setup_temp_malloc(f, sizeof(*c->codewords) * c->sorted_entries);
3799 if (!c->codewords) return error(f, VORBIS_outofmem);
3800 values = (uint32 *) setup_temp_malloc(f, sizeof(*values) * c->sorted_entries);
3801 if (!values) return error(f, VORBIS_outofmem);
3802 }
3803 size = c->entries + (sizeof(*c->codewords) + sizeof(*values)) * c->sorted_entries;
3804 if (size > f->setup_temp_memory_required)
3805 f->setup_temp_memory_required = size;
3806 }
3807
3808 if (!compute_codewords(c, lengths, c->entries, values)) {
3809 if (c->sparse) setup_temp_free(f, values, 0);
3810 return error(f, VORBIS_invalid_setup);
3811 }
3812
3813 if (c->sorted_entries) {
3814 // allocate an extra slot for sentinels
3815 c->sorted_codewords = (uint32 *) setup_malloc(f, sizeof(*c->sorted_codewords) * (c->sorted_entries+1));
3816 if (c->sorted_codewords == NULL) return error(f, VORBIS_outofmem);
3817 // allocate an extra slot at the front so that c->sorted_values[-1] is defined
3818 // so that we can catch that case without an extra if
3819 c->sorted_values = ( int *) setup_malloc(f, sizeof(*c->sorted_values ) * (c->sorted_entries+1));
3820 if (c->sorted_values == NULL) return error(f, VORBIS_outofmem);
3821 ++c->sorted_values;
3822 c->sorted_values[-1] = -1;
3823 compute_sorted_huffman(c, lengths, values);
3824 }
3825
3826 if (c->sparse) {
3827 setup_temp_free(f, values, sizeof(*values)*c->sorted_entries);
3828 setup_temp_free(f, c->codewords, sizeof(*c->codewords)*c->sorted_entries);
3829 setup_temp_free(f, lengths, c->entries);
3830 c->codewords = NULL;
3831 }
3832
3833 compute_accelerated_huffman(c);
3834
3835 CHECK(f);
3836 c->lookup_type = get_bits(f, 4);
3837 if (c->lookup_type > 2) return error(f, VORBIS_invalid_setup);
3838 if (c->lookup_type > 0) {
3839 uint16 *mults;
3840 c->minimum_value = float32_unpack(get_bits(f, 32));
3841 c->delta_value = float32_unpack(get_bits(f, 32));
3842 c->value_bits = get_bits(f, 4)+1;
3843 c->sequence_p = get_bits(f,1);
3844 if (c->lookup_type == 1) {
3845 int values = lookup1_values(c->entries, c->dimensions);
3846 if (values < 0) return error(f, VORBIS_invalid_setup);
3847 c->lookup_values = (uint32) values;
3848 } else {
3849 c->lookup_values = c->entries * c->dimensions;
3850 }
3851 if (c->lookup_values == 0) return error(f, VORBIS_invalid_setup);
3852 mults = (uint16 *) setup_temp_malloc(f, sizeof(mults[0]) * c->lookup_values);
3853 if (mults == NULL) return error(f, VORBIS_outofmem);
3854 for (j=0; j < (int) c->lookup_values; ++j) {
3855 int q = get_bits(f, c->value_bits);
3856 if (q == EOP) { setup_temp_free(f,mults,sizeof(mults[0])*c->lookup_values); return error(f, VORBIS_invalid_setup); }
3857 mults[j] = q;
3858 }
3859
3860 #ifndef STB_VORBIS_DIVIDES_IN_CODEBOOK
3861 if (c->lookup_type == 1) {
3862 int len, sparse = c->sparse;
3863 float last=0;
3864 // pre-expand the lookup1-style multiplicands, to avoid a divide in the inner loop
3865 if (sparse) {
3866 if (c->sorted_entries == 0) goto skip;
3867 c->multiplicands = (codetype *) setup_malloc(f, sizeof(c->multiplicands[0]) * c->sorted_entries * c->dimensions);
3868 } else
3869 c->multiplicands = (codetype *) setup_malloc(f, sizeof(c->multiplicands[0]) * c->entries * c->dimensions);
3870 if (c->multiplicands == NULL) { setup_temp_free(f,mults,sizeof(mults[0])*c->lookup_values); return error(f, VORBIS_outofmem); }
3871 len = sparse ? c->sorted_entries : c->entries;
3872 for (j=0; j < len; ++j) {
3873 unsigned int z = sparse ? c->sorted_values[j] : j;
3874 unsigned int div=1;
3875 for (k=0; k < c->dimensions; ++k) {
3876 int off = (z / div) % c->lookup_values;
3877 float val = mults[off];
3878 val = mults[off]*c->delta_value + c->minimum_value + last;
3879 c->multiplicands[j*c->dimensions + k] = val;
3880 if (c->sequence_p)
3881 last = val;
3882 if (k+1 < c->dimensions) {
3883 if (div > UINT_MAX / (unsigned int) c->lookup_values) {
3884 setup_temp_free(f, mults,sizeof(mults[0])*c->lookup_values);
3885 return error(f, VORBIS_invalid_setup);
3886 }
3887 div *= c->lookup_values;
3888 }
3889 }
3890 }
3891 c->lookup_type = 2;
3892 }
3893 else
3894 #endif
3895 {
3896 float last=0;
3897 CHECK(f);
3898 c->multiplicands = (codetype *) setup_malloc(f, sizeof(c->multiplicands[0]) * c->lookup_values);
3899 if (c->multiplicands == NULL) { setup_temp_free(f, mults,sizeof(mults[0])*c->lookup_values); return error(f, VORBIS_outofmem); }
3900 for (j=0; j < (int) c->lookup_values; ++j) {
3901 float val = mults[j] * c->delta_value + c->minimum_value + last;
3902 c->multiplicands[j] = val;
3903 if (c->sequence_p)
3904 last = val;
3905 }
3906 }
3907 #ifndef STB_VORBIS_DIVIDES_IN_CODEBOOK
3908 skip:;
3909 #endif
3910 setup_temp_free(f, mults, sizeof(mults[0])*c->lookup_values);
3911
3912 CHECK(f);
3913 }
3914 CHECK(f);
3915 }
3916
3917 // time domain transfers (notused)
3918
3919 x = get_bits(f, 6) + 1;
3920 for (i=0; i < x; ++i) {
3921 uint32 z = get_bits(f, 16);
3922 if (z != 0) return error(f, VORBIS_invalid_setup);
3923 }
3924
3925 // Floors
3926 f->floor_count = get_bits(f, 6)+1;
3927 f->floor_config = (Floor *) setup_malloc(f, f->floor_count * sizeof(*f->floor_config));
3928 if (f->floor_config == NULL) return error(f, VORBIS_outofmem);
3929 for (i=0; i < f->floor_count; ++i) {
3930 f->floor_types[i] = get_bits(f, 16);
3931 if (f->floor_types[i] > 1) return error(f, VORBIS_invalid_setup);
3932 if (f->floor_types[i] == 0) {
3933 Floor0 *g = &f->floor_config[i].floor0;
3934 g->order = get_bits(f,8);
3935 g->rate = get_bits(f,16);
3936 g->bark_map_size = get_bits(f,16);
3937 g->amplitude_bits = get_bits(f,6);
3938 g->amplitude_offset = get_bits(f,8);
3939 g->number_of_books = get_bits(f,4) + 1;
3940 for (j=0; j < g->number_of_books; ++j)
3941 g->book_list[j] = get_bits(f,8);
3942 return error(f, VORBIS_feature_not_supported);
3943 } else {
3944 stbv__floor_ordering p[31*8+2];
3945 Floor1 *g = &f->floor_config[i].floor1;
3946 int max_class = -1;
3947 g->partitions = get_bits(f, 5);
3948 for (j=0; j < g->partitions; ++j) {
3949 g->partition_class_list[j] = get_bits(f, 4);
3950 if (g->partition_class_list[j] > max_class)
3951 max_class = g->partition_class_list[j];
3952 }
3953 for (j=0; j <= max_class; ++j) {
3954 g->class_dimensions[j] = get_bits(f, 3)+1;
3955 g->class_subclasses[j] = get_bits(f, 2);
3956 if (g->class_subclasses[j]) {
3957 g->class_masterbooks[j] = get_bits(f, 8);
3958 if (g->class_masterbooks[j] >= f->codebook_count) return error(f, VORBIS_invalid_setup);
3959 }
3960 for (k=0; k < 1 << g->class_subclasses[j]; ++k) {
3961 g->subclass_books[j][k] = get_bits(f,8)-1;
3962 if (g->subclass_books[j][k] >= f->codebook_count) return error(f, VORBIS_invalid_setup);
3963 }
3964 }
3965 g->floor1_multiplier = get_bits(f,2)+1;
3966 g->rangebits = get_bits(f,4);
3967 g->Xlist[0] = 0;
3968 g->Xlist[1] = 1 << g->rangebits;
3969 g->values = 2;
3970 for (j=0; j < g->partitions; ++j) {
3971 int c = g->partition_class_list[j];
3972 for (k=0; k < g->class_dimensions[c]; ++k) {
3973 g->Xlist[g->values] = get_bits(f, g->rangebits);
3974 ++g->values;
3975 }
3976 }
3977 // precompute the sorting
3978 for (j=0; j < g->values; ++j) {
3979 p[j].x = g->Xlist[j];
3980 p[j].id = j;
3981 }
3982 qsort(p, g->values, sizeof(p[0]), point_compare);
3983 for (j=0; j < g->values-1; ++j)
3984 if (p[j].x == p[j+1].x)
3985 return error(f, VORBIS_invalid_setup);
3986 for (j=0; j < g->values; ++j)
3987 g->sorted_order[j] = (uint8) p[j].id;
3988 // precompute the neighbors
3989 for (j=2; j < g->values; ++j) {
3990 int low = 0,hi = 0;
3991 neighbors(g->Xlist, j, &low,&hi);
3992 g->neighbors[j][0] = low;
3993 g->neighbors[j][1] = hi;
3994 }
3995
3996 if (g->values > longest_floorlist)
3997 longest_floorlist = g->values;
3998 }
3999 }
4000
4001 // Residue
4002 f->residue_count = get_bits(f, 6)+1;
4003 f->residue_config = (Residue *) setup_malloc(f, f->residue_count * sizeof(f->residue_config[0]));
4004 if (f->residue_config == NULL) return error(f, VORBIS_outofmem);
4005 memset(f->residue_config, 0, f->residue_count * sizeof(f->residue_config[0]));
4006 for (i=0; i < f->residue_count; ++i) {
4007 uint8 residue_cascade[64];
4008 Residue *r = f->residue_config+i;
4009 f->residue_types[i] = get_bits(f, 16);
4010 if (f->residue_types[i] > 2) return error(f, VORBIS_invalid_setup);
4011 r->begin = get_bits(f, 24);
4012 r->end = get_bits(f, 24);
4013 if (r->end < r->begin) return error(f, VORBIS_invalid_setup);
4014 r->part_size = get_bits(f,24)+1;
4015 r->classifications = get_bits(f,6)+1;
4016 r->classbook = get_bits(f,8);
4017 if (r->classbook >= f->codebook_count) return error(f, VORBIS_invalid_setup);
4018 for (j=0; j < r->classifications; ++j) {
4019 uint8 high_bits=0;
4020 uint8 low_bits=get_bits(f,3);
4021 if (get_bits(f,1))
4022 high_bits = get_bits(f,5);
4023 residue_cascade[j] = high_bits*8 + low_bits;
4024 }
4025 r->residue_books = (short (*)[8]) setup_malloc(f, sizeof(r->residue_books[0]) * r->classifications);
4026 if (r->residue_books == NULL) return error(f, VORBIS_outofmem);
4027 for (j=0; j < r->classifications; ++j) {
4028 for (k=0; k < 8; ++k) {
4029 if (residue_cascade[j] & (1 << k)) {
4030 r->residue_books[j][k] = get_bits(f, 8);
4031 if (r->residue_books[j][k] >= f->codebook_count) return error(f, VORBIS_invalid_setup);
4032 } else {
4033 r->residue_books[j][k] = -1;
4034 }
4035 }
4036 }
4037 // precompute the classifications[] array to avoid inner-loop mod/divide
4038 // call it 'classdata' since we already have r->classifications
4039 r->classdata = (uint8 **) setup_malloc(f, sizeof(*r->classdata) * f->codebooks[r->classbook].entries);
4040 if (!r->classdata) return error(f, VORBIS_outofmem);
4041 memset(r->classdata, 0, sizeof(*r->classdata) * f->codebooks[r->classbook].entries);
4042 for (j=0; j < f->codebooks[r->classbook].entries; ++j) {
4043 int classwords = f->codebooks[r->classbook].dimensions;
4044 int temp = j;
4045 r->classdata[j] = (uint8 *) setup_malloc(f, sizeof(r->classdata[j][0]) * classwords);
4046 if (r->classdata[j] == NULL) return error(f, VORBIS_outofmem);
4047 for (k=classwords-1; k >= 0; --k) {
4048 r->classdata[j][k] = temp % r->classifications;
4049 temp /= r->classifications;
4050 }
4051 }
4052 }
4053
4054 f->mapping_count = get_bits(f,6)+1;
4055 f->mapping = (Mapping *) setup_malloc(f, f->mapping_count * sizeof(*f->mapping));
4056 if (f->mapping == NULL) return error(f, VORBIS_outofmem);
4057 memset(f->mapping, 0, f->mapping_count * sizeof(*f->mapping));
4058 for (i=0; i < f->mapping_count; ++i) {
4059 Mapping *m = f->mapping + i;
4060 int mapping_type = get_bits(f,16);
4061 if (mapping_type != 0) return error(f, VORBIS_invalid_setup);
4062 m->chan = (MappingChannel *) setup_malloc(f, f->channels * sizeof(*m->chan));
4063 if (m->chan == NULL) return error(f, VORBIS_outofmem);
4064 if (get_bits(f,1))
4065 m->submaps = get_bits(f,4)+1;
4066 else
4067 m->submaps = 1;
4068 if (m->submaps > max_submaps)
4069 max_submaps = m->submaps;
4070 if (get_bits(f,1)) {
4071 m->coupling_steps = get_bits(f,8)+1;
4072 if (m->coupling_steps > f->channels) return error(f, VORBIS_invalid_setup);
4073 for (k=0; k < m->coupling_steps; ++k) {
4074 m->chan[k].magnitude = get_bits(f, ilog(f->channels-1));
4075 m->chan[k].angle = get_bits(f, ilog(f->channels-1));
4076 if (m->chan[k].magnitude >= f->channels) return error(f, VORBIS_invalid_setup);
4077 if (m->chan[k].angle >= f->channels) return error(f, VORBIS_invalid_setup);
4078 if (m->chan[k].magnitude == m->chan[k].angle) return error(f, VORBIS_invalid_setup);
4079 }
4080 } else
4081 m->coupling_steps = 0;
4082
4083 // reserved field
4084 if (get_bits(f,2)) return error(f, VORBIS_invalid_setup);
4085 if (m->submaps > 1) {
4086 for (j=0; j < f->channels; ++j) {
4087 m->chan[j].mux = get_bits(f, 4);
4088 if (m->chan[j].mux >= m->submaps) return error(f, VORBIS_invalid_setup);
4089 }
4090 } else
4091 // @SPECIFICATION: this case is missing from the spec
4092 for (j=0; j < f->channels; ++j)
4093 m->chan[j].mux = 0;
4094
4095 for (j=0; j < m->submaps; ++j) {
4096 get_bits(f,8); // discard
4097 m->submap_floor[j] = get_bits(f,8);
4098 m->submap_residue[j] = get_bits(f,8);
4099 if (m->submap_floor[j] >= f->floor_count) return error(f, VORBIS_invalid_setup);
4100 if (m->submap_residue[j] >= f->residue_count) return error(f, VORBIS_invalid_setup);
4101 }
4102 }
4103
4104 // Modes
4105 f->mode_count = get_bits(f, 6)+1;
4106 for (i=0; i < f->mode_count; ++i) {
4107 Mode *m = f->mode_config+i;
4108 m->blockflag = get_bits(f,1);
4109 m->windowtype = get_bits(f,16);
4110 m->transformtype = get_bits(f,16);
4111 m->mapping = get_bits(f,8);
4112 if (m->windowtype != 0) return error(f, VORBIS_invalid_setup);
4113 if (m->transformtype != 0) return error(f, VORBIS_invalid_setup);
4114 if (m->mapping >= f->mapping_count) return error(f, VORBIS_invalid_setup);
4115 }
4116
4117 flush_packet(f);
4118
4119 f->previous_length = 0;
4120
4121 for (i=0; i < f->channels; ++i) {
4122 f->channel_buffers[i] = (float *) setup_malloc(f, sizeof(float) * f->blocksize_1);
4123 f->previous_window[i] = (float *) setup_malloc(f, sizeof(float) * f->blocksize_1/2);
4124 f->finalY[i] = (int16 *) setup_malloc(f, sizeof(int16) * longest_floorlist);
4125 if (f->channel_buffers[i] == NULL || f->previous_window[i] == NULL || f->finalY[i] == NULL) return error(f, VORBIS_outofmem);
4126 memset(f->channel_buffers[i], 0, sizeof(float) * f->blocksize_1);
4127 #ifdef STB_VORBIS_NO_DEFER_FLOOR
4128 f->floor_buffers[i] = (float *) setup_malloc(f, sizeof(float) * f->blocksize_1/2);
4129 if (f->floor_buffers[i] == NULL) return error(f, VORBIS_outofmem);
4130 #endif
4131 }
4132
4133 if (!init_blocksize(f, 0, f->blocksize_0)) return FALSE;
4134 if (!init_blocksize(f, 1, f->blocksize_1)) return FALSE;
4135 f->blocksize[0] = f->blocksize_0;
4136 f->blocksize[1] = f->blocksize_1;
4137
4138 #ifdef STB_VORBIS_DIVIDE_TABLE
4139 if (integer_divide_table[1][1]==0)
4140 for (i=0; i < DIVTAB_NUMER; ++i)
4141 for (j=1; j < DIVTAB_DENOM; ++j)
4142 integer_divide_table[i][j] = i / j;
4143 #endif
4144
4145 // compute how much temporary memory is needed
4146
4147 // 1.
4148 {
4149 uint32 imdct_mem = (f->blocksize_1 * sizeof(float) >> 1);
4150 uint32 classify_mem;
4151 int i,max_part_read=0;
4152 for (i=0; i < f->residue_count; ++i) {
4153 Residue *r = f->residue_config + i;
4154 unsigned int actual_size = f->blocksize_1 / 2;
4155 unsigned int limit_r_begin = r->begin < actual_size ? r->begin : actual_size;
4156 unsigned int limit_r_end = r->end < actual_size ? r->end : actual_size;
4157 int n_read = limit_r_end - limit_r_begin;
4158 int part_read = n_read / r->part_size;
4159 if (part_read > max_part_read)
4160 max_part_read = part_read;
4161 }
4162 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
4163 classify_mem = f->channels * (sizeof(void*) + max_part_read * sizeof(uint8 *));
4164 #else
4165 classify_mem = f->channels * (sizeof(void*) + max_part_read * sizeof(int *));
4166 #endif
4167
4168 // maximum reasonable partition size is f->blocksize_1
4169
4170 f->temp_memory_required = classify_mem;
4171 if (imdct_mem > f->temp_memory_required)
4172 f->temp_memory_required = imdct_mem;
4173 }
4174
4175
4176 if (f->alloc.alloc_buffer) {
4177 assert(f->temp_offset == f->alloc.alloc_buffer_length_in_bytes);
4178 // check if there's enough temp memory so we don't error later
4179 if (f->setup_offset + sizeof(*f) + f->temp_memory_required > (unsigned) f->temp_offset)
4180 return error(f, VORBIS_outofmem);
4181 }
4182
4183 // @TODO: stb_vorbis_seek_start expects first_audio_page_offset to point to a page
4184 // without PAGEFLAG_continued_packet, so this either points to the first page, or
4185 // the page after the end of the headers. It might be cleaner to point to a page
4186 // in the middle of the headers, when that's the page where the first audio packet
4187 // starts, but we'd have to also correctly skip the end of any continued packet in
4188 // stb_vorbis_seek_start.
4189 if (f->next_seg == -1) {
4190 f->first_audio_page_offset = stb_vorbis_get_file_offset(f);
4191 } else {
4192 f->first_audio_page_offset = 0;
4193 }
4194
4195 return TRUE;
4196 }
4197
vorbis_deinit(stb_vorbis * p)4198 static void vorbis_deinit(stb_vorbis *p)
4199 {
4200 int i,j;
4201
4202 setup_free(p, p->vendor);
4203 for (i=0; i < p->comment_list_length; ++i) {
4204 setup_free(p, p->comment_list[i]);
4205 }
4206 setup_free(p, p->comment_list);
4207
4208 if (p->residue_config) {
4209 for (i=0; i < p->residue_count; ++i) {
4210 Residue *r = p->residue_config+i;
4211 if (r->classdata) {
4212 for (j=0; j < p->codebooks[r->classbook].entries; ++j)
4213 setup_free(p, r->classdata[j]);
4214 setup_free(p, r->classdata);
4215 }
4216 setup_free(p, r->residue_books);
4217 }
4218 }
4219
4220 if (p->codebooks) {
4221 CHECK(p);
4222 for (i=0; i < p->codebook_count; ++i) {
4223 Codebook *c = p->codebooks + i;
4224 setup_free(p, c->codeword_lengths);
4225 setup_free(p, c->multiplicands);
4226 setup_free(p, c->codewords);
4227 setup_free(p, c->sorted_codewords);
4228 // c->sorted_values[-1] is the first entry in the array
4229 setup_free(p, c->sorted_values ? c->sorted_values-1 : NULL);
4230 }
4231 setup_free(p, p->codebooks);
4232 }
4233 setup_free(p, p->floor_config);
4234 setup_free(p, p->residue_config);
4235 if (p->mapping) {
4236 for (i=0; i < p->mapping_count; ++i)
4237 setup_free(p, p->mapping[i].chan);
4238 setup_free(p, p->mapping);
4239 }
4240 CHECK(p);
4241 for (i=0; i < p->channels && i < STB_VORBIS_MAX_CHANNELS; ++i) {
4242 setup_free(p, p->channel_buffers[i]);
4243 setup_free(p, p->previous_window[i]);
4244 #ifdef STB_VORBIS_NO_DEFER_FLOOR
4245 setup_free(p, p->floor_buffers[i]);
4246 #endif
4247 setup_free(p, p->finalY[i]);
4248 }
4249 for (i=0; i < 2; ++i) {
4250 setup_free(p, p->A[i]);
4251 setup_free(p, p->B[i]);
4252 setup_free(p, p->C[i]);
4253 setup_free(p, p->window[i]);
4254 setup_free(p, p->bit_reverse[i]);
4255 }
4256 #ifndef STB_VORBIS_NO_STDIO
4257 if (p->close_on_free) fclose(p->f);
4258 #endif
4259 }
4260
stb_vorbis_close(stb_vorbis * p)4261 void stb_vorbis_close(stb_vorbis *p)
4262 {
4263 if (p == NULL) return;
4264 vorbis_deinit(p);
4265 setup_free(p,p);
4266 }
4267
vorbis_init(stb_vorbis * p,const stb_vorbis_alloc * z)4268 static void vorbis_init(stb_vorbis *p, const stb_vorbis_alloc *z)
4269 {
4270 memset(p, 0, sizeof(*p)); // NULL out all malloc'd pointers to start
4271 if (z) {
4272 p->alloc = *z;
4273 p->alloc.alloc_buffer_length_in_bytes &= ~7;
4274 p->temp_offset = p->alloc.alloc_buffer_length_in_bytes;
4275 }
4276 p->eof = 0;
4277 p->error = VORBIS__no_error;
4278 p->stream = NULL;
4279 p->codebooks = NULL;
4280 p->page_crc_tests = -1;
4281 #ifndef STB_VORBIS_NO_STDIO
4282 p->close_on_free = FALSE;
4283 p->f = NULL;
4284 #endif
4285 }
4286
stb_vorbis_get_sample_offset(stb_vorbis * f)4287 int stb_vorbis_get_sample_offset(stb_vorbis *f)
4288 {
4289 if (f->current_loc_valid)
4290 return f->current_loc;
4291 else
4292 return -1;
4293 }
4294
stb_vorbis_get_info(stb_vorbis * f)4295 stb_vorbis_info stb_vorbis_get_info(stb_vorbis *f)
4296 {
4297 stb_vorbis_info d;
4298 d.channels = f->channels;
4299 d.sample_rate = f->sample_rate;
4300 d.setup_memory_required = f->setup_memory_required;
4301 d.setup_temp_memory_required = f->setup_temp_memory_required;
4302 d.temp_memory_required = f->temp_memory_required;
4303 d.max_frame_size = f->blocksize_1 >> 1;
4304 return d;
4305 }
4306
stb_vorbis_get_comment(stb_vorbis * f)4307 stb_vorbis_comment stb_vorbis_get_comment(stb_vorbis *f)
4308 {
4309 stb_vorbis_comment d;
4310 d.vendor = f->vendor;
4311 d.comment_list_length = f->comment_list_length;
4312 d.comment_list = f->comment_list;
4313 return d;
4314 }
4315
stb_vorbis_get_error(stb_vorbis * f)4316 int stb_vorbis_get_error(stb_vorbis *f)
4317 {
4318 int e = f->error;
4319 f->error = VORBIS__no_error;
4320 return e;
4321 }
4322
vorbis_alloc(stb_vorbis * f)4323 static stb_vorbis * vorbis_alloc(stb_vorbis *f)
4324 {
4325 stb_vorbis *p = (stb_vorbis *) setup_malloc(f, sizeof(*p));
4326 return p;
4327 }
4328
4329 #ifndef STB_VORBIS_NO_PUSHDATA_API
4330
stb_vorbis_flush_pushdata(stb_vorbis * f)4331 void stb_vorbis_flush_pushdata(stb_vorbis *f)
4332 {
4333 f->previous_length = 0;
4334 f->page_crc_tests = 0;
4335 f->discard_samples_deferred = 0;
4336 f->current_loc_valid = FALSE;
4337 f->first_decode = FALSE;
4338 f->samples_output = 0;
4339 f->channel_buffer_start = 0;
4340 f->channel_buffer_end = 0;
4341 }
4342
vorbis_search_for_page_pushdata(vorb * f,uint8 * data,int data_len)4343 static int vorbis_search_for_page_pushdata(vorb *f, uint8 *data, int data_len)
4344 {
4345 int i,n;
4346 for (i=0; i < f->page_crc_tests; ++i)
4347 f->scan[i].bytes_done = 0;
4348
4349 // if we have room for more scans, search for them first, because
4350 // they may cause us to stop early if their header is incomplete
4351 if (f->page_crc_tests < STB_VORBIS_PUSHDATA_CRC_COUNT) {
4352 if (data_len < 4) return 0;
4353 data_len -= 3; // need to look for 4-byte sequence, so don't miss
4354 // one that straddles a boundary
4355 for (i=0; i < data_len; ++i) {
4356 if (data[i] == 0x4f) {
4357 if (0==memcmp(data+i, ogg_page_header, 4)) {
4358 int j,len;
4359 uint32 crc;
4360 // make sure we have the whole page header
4361 if (i+26 >= data_len || i+27+data[i+26] >= data_len) {
4362 // only read up to this page start, so hopefully we'll
4363 // have the whole page header start next time
4364 data_len = i;
4365 break;
4366 }
4367 // ok, we have it all; compute the length of the page
4368 len = 27 + data[i+26];
4369 for (j=0; j < data[i+26]; ++j)
4370 len += data[i+27+j];
4371 // scan everything up to the embedded crc (which we must 0)
4372 crc = 0;
4373 for (j=0; j < 22; ++j)
4374 crc = crc32_update(crc, data[i+j]);
4375 // now process 4 0-bytes
4376 for ( ; j < 26; ++j)
4377 crc = crc32_update(crc, 0);
4378 // len is the total number of bytes we need to scan
4379 n = f->page_crc_tests++;
4380 f->scan[n].bytes_left = len-j;
4381 f->scan[n].crc_so_far = crc;
4382 f->scan[n].goal_crc = data[i+22] + (data[i+23] << 8) + (data[i+24]<<16) + (data[i+25]<<24);
4383 // if the last frame on a page is continued to the next, then
4384 // we can't recover the sample_loc immediately
4385 if (data[i+27+data[i+26]-1] == 255)
4386 f->scan[n].sample_loc = ~0;
4387 else
4388 f->scan[n].sample_loc = data[i+6] + (data[i+7] << 8) + (data[i+ 8]<<16) + (data[i+ 9]<<24);
4389 f->scan[n].bytes_done = i+j;
4390 if (f->page_crc_tests == STB_VORBIS_PUSHDATA_CRC_COUNT)
4391 break;
4392 // keep going if we still have room for more
4393 }
4394 }
4395 }
4396 }
4397
4398 for (i=0; i < f->page_crc_tests;) {
4399 uint32 crc;
4400 int j;
4401 int n = f->scan[i].bytes_done;
4402 int m = f->scan[i].bytes_left;
4403 if (m > data_len - n) m = data_len - n;
4404 // m is the bytes to scan in the current chunk
4405 crc = f->scan[i].crc_so_far;
4406 for (j=0; j < m; ++j)
4407 crc = crc32_update(crc, data[n+j]);
4408 f->scan[i].bytes_left -= m;
4409 f->scan[i].crc_so_far = crc;
4410 if (f->scan[i].bytes_left == 0) {
4411 // does it match?
4412 if (f->scan[i].crc_so_far == f->scan[i].goal_crc) {
4413 // Houston, we have page
4414 data_len = n+m; // consumption amount is wherever that scan ended
4415 f->page_crc_tests = -1; // drop out of page scan mode
4416 f->previous_length = 0; // decode-but-don't-output one frame
4417 f->next_seg = -1; // start a new page
4418 f->current_loc = f->scan[i].sample_loc; // set the current sample location
4419 // to the amount we'd have decoded had we decoded this page
4420 f->current_loc_valid = f->current_loc != ~0U;
4421 return data_len;
4422 }
4423 // delete entry
4424 f->scan[i] = f->scan[--f->page_crc_tests];
4425 } else {
4426 ++i;
4427 }
4428 }
4429
4430 return data_len;
4431 }
4432
4433 // return value: number of bytes we used
stb_vorbis_decode_frame_pushdata(stb_vorbis * f,const uint8 * data,int data_len,int * channels,float *** output,int * samples)4434 int stb_vorbis_decode_frame_pushdata(
4435 stb_vorbis *f, // the file we're decoding
4436 const uint8 *data, int data_len, // the memory available for decoding
4437 int *channels, // place to write number of float * buffers
4438 float ***output, // place to write float ** array of float * buffers
4439 int *samples // place to write number of output samples
4440 )
4441 {
4442 int i;
4443 int len,right,left;
4444
4445 if (!IS_PUSH_MODE(f)) return error(f, VORBIS_invalid_api_mixing);
4446
4447 if (f->page_crc_tests >= 0) {
4448 *samples = 0;
4449 return vorbis_search_for_page_pushdata(f, (uint8 *) data, data_len);
4450 }
4451
4452 f->stream = (uint8 *) data;
4453 f->stream_end = (uint8 *) data + data_len;
4454 f->error = VORBIS__no_error;
4455
4456 // check that we have the entire packet in memory
4457 if (!is_whole_packet_present(f)) {
4458 *samples = 0;
4459 return 0;
4460 }
4461
4462 if (!vorbis_decode_packet(f, &len, &left, &right)) {
4463 // save the actual error we encountered
4464 enum STBVorbisError error = f->error;
4465 if (error == VORBIS_bad_packet_type) {
4466 // flush and resynch
4467 f->error = VORBIS__no_error;
4468 while (get8_packet(f) != EOP)
4469 if (f->eof) break;
4470 *samples = 0;
4471 return (int) (f->stream - data);
4472 }
4473 if (error == VORBIS_continued_packet_flag_invalid) {
4474 if (f->previous_length == 0) {
4475 // we may be resynching, in which case it's ok to hit one
4476 // of these; just discard the packet
4477 f->error = VORBIS__no_error;
4478 while (get8_packet(f) != EOP)
4479 if (f->eof) break;
4480 *samples = 0;
4481 return (int) (f->stream - data);
4482 }
4483 }
4484 // if we get an error while parsing, what to do?
4485 // well, it DEFINITELY won't work to continue from where we are!
4486 stb_vorbis_flush_pushdata(f);
4487 // restore the error that actually made us bail
4488 f->error = error;
4489 *samples = 0;
4490 return 1;
4491 }
4492
4493 // success!
4494 len = vorbis_finish_frame(f, len, left, right);
4495 for (i=0; i < f->channels; ++i)
4496 f->outputs[i] = f->channel_buffers[i] + left;
4497
4498 if (channels) *channels = f->channels;
4499 *samples = len;
4500 *output = f->outputs;
4501 return (int) (f->stream - data);
4502 }
4503
stb_vorbis_open_pushdata(const unsigned char * data,int data_len,int * data_used,int * error,const stb_vorbis_alloc * alloc)4504 stb_vorbis *stb_vorbis_open_pushdata(
4505 const unsigned char *data, int data_len, // the memory available for decoding
4506 int *data_used, // only defined if result is not NULL
4507 int *error, const stb_vorbis_alloc *alloc)
4508 {
4509 stb_vorbis *f, p;
4510 vorbis_init(&p, alloc);
4511 p.stream = (uint8 *) data;
4512 p.stream_end = (uint8 *) data + data_len;
4513 p.push_mode = TRUE;
4514 if (!start_decoder(&p)) {
4515 if (p.eof)
4516 *error = VORBIS_need_more_data;
4517 else
4518 *error = p.error;
4519 return NULL;
4520 }
4521 f = vorbis_alloc(&p);
4522 if (f) {
4523 *f = p;
4524 *data_used = (int) (f->stream - data);
4525 *error = 0;
4526 return f;
4527 } else {
4528 vorbis_deinit(&p);
4529 return NULL;
4530 }
4531 }
4532 #endif // STB_VORBIS_NO_PUSHDATA_API
4533
stb_vorbis_get_file_offset(stb_vorbis * f)4534 unsigned int stb_vorbis_get_file_offset(stb_vorbis *f)
4535 {
4536 #ifndef STB_VORBIS_NO_PUSHDATA_API
4537 if (f->push_mode) return 0;
4538 #endif
4539 if (USE_MEMORY(f)) return (unsigned int) (f->stream - f->stream_start);
4540 #ifndef STB_VORBIS_NO_STDIO
4541 return (unsigned int) (ftell(f->f) - f->f_start);
4542 #endif
4543 }
4544
4545 #ifndef STB_VORBIS_NO_PULLDATA_API
4546 //
4547 // DATA-PULLING API
4548 //
4549
vorbis_find_page(stb_vorbis * f,uint32 * end,uint32 * last)4550 static uint32 vorbis_find_page(stb_vorbis *f, uint32 *end, uint32 *last)
4551 {
4552 for(;;) {
4553 int n;
4554 if (f->eof) return 0;
4555 n = get8(f);
4556 if (n == 0x4f) { // page header candidate
4557 unsigned int retry_loc = stb_vorbis_get_file_offset(f);
4558 int i;
4559 // check if we're off the end of a file_section stream
4560 if (retry_loc - 25 > f->stream_len)
4561 return 0;
4562 // check the rest of the header
4563 for (i=1; i < 4; ++i)
4564 if (get8(f) != ogg_page_header[i])
4565 break;
4566 if (f->eof) return 0;
4567 if (i == 4) {
4568 uint8 header[27];
4569 uint32 i, crc, goal, len;
4570 for (i=0; i < 4; ++i)
4571 header[i] = ogg_page_header[i];
4572 for (; i < 27; ++i)
4573 header[i] = get8(f);
4574 if (f->eof) return 0;
4575 if (header[4] != 0) goto invalid;
4576 goal = header[22] + (header[23] << 8) + (header[24]<<16) + (header[25]<<24);
4577 for (i=22; i < 26; ++i)
4578 header[i] = 0;
4579 crc = 0;
4580 for (i=0; i < 27; ++i)
4581 crc = crc32_update(crc, header[i]);
4582 len = 0;
4583 for (i=0; i < header[26]; ++i) {
4584 int s = get8(f);
4585 crc = crc32_update(crc, s);
4586 len += s;
4587 }
4588 if (len && f->eof) return 0;
4589 for (i=0; i < len; ++i)
4590 crc = crc32_update(crc, get8(f));
4591 // finished parsing probable page
4592 if (crc == goal) {
4593 // we could now check that it's either got the last
4594 // page flag set, OR it's followed by the capture
4595 // pattern, but I guess TECHNICALLY you could have
4596 // a file with garbage between each ogg page and recover
4597 // from it automatically? So even though that paranoia
4598 // might decrease the chance of an invalid decode by
4599 // another 2^32, not worth it since it would hose those
4600 // invalid-but-useful files?
4601 if (end)
4602 *end = stb_vorbis_get_file_offset(f);
4603 if (last) {
4604 if (header[5] & 0x04)
4605 *last = 1;
4606 else
4607 *last = 0;
4608 }
4609 set_file_offset(f, retry_loc-1);
4610 return 1;
4611 }
4612 }
4613 invalid:
4614 // not a valid page, so rewind and look for next one
4615 set_file_offset(f, retry_loc);
4616 }
4617 }
4618 }
4619
4620
4621 #define SAMPLE_unknown 0xffffffff
4622
4623 // seeking is implemented with a binary search, which narrows down the range to
4624 // 64K, before using a linear search (because finding the synchronization
4625 // pattern can be expensive, and the chance we'd find the end page again is
4626 // relatively high for small ranges)
4627 //
4628 // two initial interpolation-style probes are used at the start of the search
4629 // to try to bound either side of the binary search sensibly, while still
4630 // working in O(log n) time if they fail.
4631
get_seek_page_info(stb_vorbis * f,ProbedPage * z)4632 static int get_seek_page_info(stb_vorbis *f, ProbedPage *z)
4633 {
4634 uint8 header[27], lacing[255];
4635 int i,len;
4636
4637 // record where the page starts
4638 z->page_start = stb_vorbis_get_file_offset(f);
4639
4640 // parse the header
4641 getn(f, header, 27);
4642 if (header[0] != 'O' || header[1] != 'g' || header[2] != 'g' || header[3] != 'S')
4643 return 0;
4644 getn(f, lacing, header[26]);
4645
4646 // determine the length of the payload
4647 len = 0;
4648 for (i=0; i < header[26]; ++i)
4649 len += lacing[i];
4650
4651 // this implies where the page ends
4652 z->page_end = z->page_start + 27 + header[26] + len;
4653
4654 // read the last-decoded sample out of the data
4655 z->last_decoded_sample = header[6] + (header[7] << 8) + (header[8] << 16) + (header[9] << 24);
4656
4657 // restore file state to where we were
4658 set_file_offset(f, z->page_start);
4659 return 1;
4660 }
4661
4662 // rarely used function to seek back to the preceding page while finding the
4663 // start of a packet
go_to_page_before(stb_vorbis * f,unsigned int limit_offset)4664 static int go_to_page_before(stb_vorbis *f, unsigned int limit_offset)
4665 {
4666 unsigned int previous_safe, end;
4667
4668 // now we want to seek back 64K from the limit
4669 if (limit_offset >= 65536 && limit_offset-65536 >= f->first_audio_page_offset)
4670 previous_safe = limit_offset - 65536;
4671 else
4672 previous_safe = f->first_audio_page_offset;
4673
4674 set_file_offset(f, previous_safe);
4675
4676 while (vorbis_find_page(f, &end, NULL)) {
4677 if (end >= limit_offset && stb_vorbis_get_file_offset(f) < limit_offset)
4678 return 1;
4679 set_file_offset(f, end);
4680 }
4681
4682 return 0;
4683 }
4684
4685 // implements the search logic for finding a page and starting decoding. if
4686 // the function succeeds, current_loc_valid will be true and current_loc will
4687 // be less than or equal to the provided sample number (the closer the
4688 // better).
seek_to_sample_coarse(stb_vorbis * f,uint32 sample_number)4689 static int seek_to_sample_coarse(stb_vorbis *f, uint32 sample_number)
4690 {
4691 ProbedPage left, right, mid;
4692 int i, start_seg_with_known_loc, end_pos, page_start;
4693 uint32 delta, stream_length, padding, last_sample_limit;
4694 double offset = 0.0, bytes_per_sample = 0.0;
4695 int probe = 0;
4696
4697 // find the last page and validate the target sample
4698 stream_length = stb_vorbis_stream_length_in_samples(f);
4699 if (stream_length == 0) return error(f, VORBIS_seek_without_length);
4700 if (sample_number > stream_length) return error(f, VORBIS_seek_invalid);
4701
4702 // this is the maximum difference between the window-center (which is the
4703 // actual granule position value), and the right-start (which the spec
4704 // indicates should be the granule position (give or take one)).
4705 padding = ((f->blocksize_1 - f->blocksize_0) >> 2);
4706 if (sample_number < padding)
4707 last_sample_limit = 0;
4708 else
4709 last_sample_limit = sample_number - padding;
4710
4711 left = f->p_first;
4712 while (left.last_decoded_sample == ~0U) {
4713 // (untested) the first page does not have a 'last_decoded_sample'
4714 set_file_offset(f, left.page_end);
4715 if (!get_seek_page_info(f, &left)) goto error;
4716 }
4717
4718 right = f->p_last;
4719 assert(right.last_decoded_sample != ~0U);
4720
4721 // starting from the start is handled differently
4722 if (last_sample_limit <= left.last_decoded_sample) {
4723 if (stb_vorbis_seek_start(f)) {
4724 if (f->current_loc > sample_number)
4725 return error(f, VORBIS_seek_failed);
4726 return 1;
4727 }
4728 return 0;
4729 }
4730
4731 while (left.page_end != right.page_start) {
4732 assert(left.page_end < right.page_start);
4733 // search range in bytes
4734 delta = right.page_start - left.page_end;
4735 if (delta <= 65536) {
4736 // there's only 64K left to search - handle it linearly
4737 set_file_offset(f, left.page_end);
4738 } else {
4739 if (probe < 2) {
4740 if (probe == 0) {
4741 // first probe (interpolate)
4742 double data_bytes = right.page_end - left.page_start;
4743 bytes_per_sample = data_bytes / right.last_decoded_sample;
4744 offset = left.page_start + bytes_per_sample * (last_sample_limit - left.last_decoded_sample);
4745 } else {
4746 // second probe (try to bound the other side)
4747 double error = ((double) last_sample_limit - mid.last_decoded_sample) * bytes_per_sample;
4748 if (error >= 0 && error < 8000) error = 8000;
4749 if (error < 0 && error > -8000) error = -8000;
4750 offset += error * 2;
4751 }
4752
4753 // ensure the offset is valid
4754 if (offset < left.page_end)
4755 offset = left.page_end;
4756 if (offset > right.page_start - 65536)
4757 offset = right.page_start - 65536;
4758
4759 set_file_offset(f, (unsigned int) offset);
4760 } else {
4761 // binary search for large ranges (offset by 32K to ensure
4762 // we don't hit the right page)
4763 set_file_offset(f, left.page_end + (delta / 2) - 32768);
4764 }
4765
4766 if (!vorbis_find_page(f, NULL, NULL)) goto error;
4767 }
4768
4769 for (;;) {
4770 if (!get_seek_page_info(f, &mid)) goto error;
4771 if (mid.last_decoded_sample != ~0U) break;
4772 // (untested) no frames end on this page
4773 set_file_offset(f, mid.page_end);
4774 assert(mid.page_start < right.page_start);
4775 }
4776
4777 // if we've just found the last page again then we're in a tricky file,
4778 // and we're close enough (if it wasn't an interpolation probe).
4779 if (mid.page_start == right.page_start) {
4780 if (probe >= 2 || delta <= 65536)
4781 break;
4782 } else {
4783 if (last_sample_limit < mid.last_decoded_sample)
4784 right = mid;
4785 else
4786 left = mid;
4787 }
4788
4789 ++probe;
4790 }
4791
4792 // seek back to start of the last packet
4793 page_start = left.page_start;
4794 set_file_offset(f, page_start);
4795 if (!start_page(f)) return error(f, VORBIS_seek_failed);
4796 end_pos = f->end_seg_with_known_loc;
4797 assert(end_pos >= 0);
4798
4799 for (;;) {
4800 for (i = end_pos; i > 0; --i)
4801 if (f->segments[i-1] != 255)
4802 break;
4803
4804 start_seg_with_known_loc = i;
4805
4806 if (start_seg_with_known_loc > 0 || !(f->page_flag & PAGEFLAG_continued_packet))
4807 break;
4808
4809 // (untested) the final packet begins on an earlier page
4810 if (!go_to_page_before(f, page_start))
4811 goto error;
4812
4813 page_start = stb_vorbis_get_file_offset(f);
4814 if (!start_page(f)) goto error;
4815 end_pos = f->segment_count - 1;
4816 }
4817
4818 // prepare to start decoding
4819 f->current_loc_valid = FALSE;
4820 f->last_seg = FALSE;
4821 f->valid_bits = 0;
4822 f->packet_bytes = 0;
4823 f->bytes_in_seg = 0;
4824 f->previous_length = 0;
4825 f->next_seg = start_seg_with_known_loc;
4826
4827 for (i = 0; i < start_seg_with_known_loc; i++)
4828 skip(f, f->segments[i]);
4829
4830 // start decoding (optimizable - this frame is generally discarded)
4831 if (!vorbis_pump_first_frame(f))
4832 return 0;
4833 if (f->current_loc > sample_number)
4834 return error(f, VORBIS_seek_failed);
4835 return 1;
4836
4837 error:
4838 // try to restore the file to a valid state
4839 stb_vorbis_seek_start(f);
4840 return error(f, VORBIS_seek_failed);
4841 }
4842
4843 // the same as vorbis_decode_initial, but without advancing
peek_decode_initial(vorb * f,int * p_left_start,int * p_left_end,int * p_right_start,int * p_right_end,int * mode)4844 static int peek_decode_initial(vorb *f, int *p_left_start, int *p_left_end, int *p_right_start, int *p_right_end, int *mode)
4845 {
4846 int bits_read, bytes_read;
4847
4848 if (!vorbis_decode_initial(f, p_left_start, p_left_end, p_right_start, p_right_end, mode))
4849 return 0;
4850
4851 // either 1 or 2 bytes were read, figure out which so we can rewind
4852 bits_read = 1 + ilog(f->mode_count-1);
4853 if (f->mode_config[*mode].blockflag)
4854 bits_read += 2;
4855 bytes_read = (bits_read + 7) / 8;
4856
4857 f->bytes_in_seg += bytes_read;
4858 f->packet_bytes -= bytes_read;
4859 skip(f, -bytes_read);
4860 if (f->next_seg == -1)
4861 f->next_seg = f->segment_count - 1;
4862 else
4863 f->next_seg--;
4864 f->valid_bits = 0;
4865
4866 return 1;
4867 }
4868
stb_vorbis_seek_frame(stb_vorbis * f,unsigned int sample_number)4869 int stb_vorbis_seek_frame(stb_vorbis *f, unsigned int sample_number)
4870 {
4871 uint32 max_frame_samples;
4872
4873 if (IS_PUSH_MODE(f)) return error(f, VORBIS_invalid_api_mixing);
4874
4875 // fast page-level search
4876 if (!seek_to_sample_coarse(f, sample_number))
4877 return 0;
4878
4879 assert(f->current_loc_valid);
4880 assert(f->current_loc <= sample_number);
4881
4882 // linear search for the relevant packet
4883 max_frame_samples = (f->blocksize_1*3 - f->blocksize_0) >> 2;
4884 while (f->current_loc < sample_number) {
4885 int left_start, left_end, right_start, right_end, mode, frame_samples;
4886 if (!peek_decode_initial(f, &left_start, &left_end, &right_start, &right_end, &mode))
4887 return error(f, VORBIS_seek_failed);
4888 // calculate the number of samples returned by the next frame
4889 frame_samples = right_start - left_start;
4890 if (f->current_loc + frame_samples > sample_number) {
4891 return 1; // the next frame will contain the sample
4892 } else if (f->current_loc + frame_samples + max_frame_samples > sample_number) {
4893 // there's a chance the frame after this could contain the sample
4894 vorbis_pump_first_frame(f);
4895 } else {
4896 // this frame is too early to be relevant
4897 f->current_loc += frame_samples;
4898 f->previous_length = 0;
4899 maybe_start_packet(f);
4900 flush_packet(f);
4901 }
4902 }
4903 // the next frame should start with the sample
4904 if (f->current_loc != sample_number) return error(f, VORBIS_seek_failed);
4905 return 1;
4906 }
4907
stb_vorbis_seek(stb_vorbis * f,unsigned int sample_number)4908 int stb_vorbis_seek(stb_vorbis *f, unsigned int sample_number)
4909 {
4910 if (!stb_vorbis_seek_frame(f, sample_number))
4911 return 0;
4912
4913 if (sample_number != f->current_loc) {
4914 int n;
4915 uint32 frame_start = f->current_loc;
4916 stb_vorbis_get_frame_float(f, &n, NULL);
4917 assert(sample_number > frame_start);
4918 assert(f->channel_buffer_start + (int) (sample_number-frame_start) <= f->channel_buffer_end);
4919 f->channel_buffer_start += (sample_number - frame_start);
4920 }
4921
4922 return 1;
4923 }
4924
stb_vorbis_seek_start(stb_vorbis * f)4925 int stb_vorbis_seek_start(stb_vorbis *f)
4926 {
4927 if (IS_PUSH_MODE(f)) { return error(f, VORBIS_invalid_api_mixing); }
4928 set_file_offset(f, f->first_audio_page_offset);
4929 f->previous_length = 0;
4930 f->first_decode = TRUE;
4931 f->next_seg = -1;
4932 return vorbis_pump_first_frame(f);
4933 }
4934
stb_vorbis_stream_length_in_samples(stb_vorbis * f)4935 unsigned int stb_vorbis_stream_length_in_samples(stb_vorbis *f)
4936 {
4937 unsigned int restore_offset, previous_safe;
4938 unsigned int end, last_page_loc;
4939
4940 if (IS_PUSH_MODE(f)) return error(f, VORBIS_invalid_api_mixing);
4941 if (!f->total_samples) {
4942 unsigned int last;
4943 uint32 lo,hi;
4944 char header[6];
4945
4946 // first, store the current decode position so we can restore it
4947 restore_offset = stb_vorbis_get_file_offset(f);
4948
4949 // now we want to seek back 64K from the end (the last page must
4950 // be at most a little less than 64K, but let's allow a little slop)
4951 if (f->stream_len >= 65536 && f->stream_len-65536 >= f->first_audio_page_offset)
4952 previous_safe = f->stream_len - 65536;
4953 else
4954 previous_safe = f->first_audio_page_offset;
4955
4956 set_file_offset(f, previous_safe);
4957 // previous_safe is now our candidate 'earliest known place that seeking
4958 // to will lead to the final page'
4959
4960 if (!vorbis_find_page(f, &end, &last)) {
4961 // if we can't find a page, we're hosed!
4962 f->error = VORBIS_cant_find_last_page;
4963 f->total_samples = 0xffffffff;
4964 goto done;
4965 }
4966
4967 // check if there are more pages
4968 last_page_loc = stb_vorbis_get_file_offset(f);
4969
4970 // stop when the last_page flag is set, not when we reach eof;
4971 // this allows us to stop short of a 'file_section' end without
4972 // explicitly checking the length of the section
4973 while (!last) {
4974 set_file_offset(f, end);
4975 if (!vorbis_find_page(f, &end, &last)) {
4976 // the last page we found didn't have the 'last page' flag
4977 // set. whoops!
4978 break;
4979 }
4980 previous_safe = last_page_loc+1;
4981 last_page_loc = stb_vorbis_get_file_offset(f);
4982 }
4983
4984 set_file_offset(f, last_page_loc);
4985
4986 // parse the header
4987 getn(f, (unsigned char *)header, 6);
4988 // extract the absolute granule position
4989 lo = get32(f);
4990 hi = get32(f);
4991 if (lo == 0xffffffff && hi == 0xffffffff) {
4992 f->error = VORBIS_cant_find_last_page;
4993 f->total_samples = SAMPLE_unknown;
4994 goto done;
4995 }
4996 if (hi)
4997 lo = 0xfffffffe; // saturate
4998 f->total_samples = lo;
4999
5000 f->p_last.page_start = last_page_loc;
5001 f->p_last.page_end = end;
5002 f->p_last.last_decoded_sample = lo;
5003
5004 done:
5005 set_file_offset(f, restore_offset);
5006 }
5007 return f->total_samples == SAMPLE_unknown ? 0 : f->total_samples;
5008 }
5009
stb_vorbis_stream_length_in_seconds(stb_vorbis * f)5010 float stb_vorbis_stream_length_in_seconds(stb_vorbis *f)
5011 {
5012 return stb_vorbis_stream_length_in_samples(f) / (float) f->sample_rate;
5013 }
5014
5015
5016
stb_vorbis_get_frame_float(stb_vorbis * f,int * channels,float *** output)5017 int stb_vorbis_get_frame_float(stb_vorbis *f, int *channels, float ***output)
5018 {
5019 int len, right,left,i;
5020 if (IS_PUSH_MODE(f)) return error(f, VORBIS_invalid_api_mixing);
5021
5022 if (!vorbis_decode_packet(f, &len, &left, &right)) {
5023 f->channel_buffer_start = f->channel_buffer_end = 0;
5024 return 0;
5025 }
5026
5027 len = vorbis_finish_frame(f, len, left, right);
5028 for (i=0; i < f->channels; ++i)
5029 f->outputs[i] = f->channel_buffers[i] + left;
5030
5031 f->channel_buffer_start = left;
5032 f->channel_buffer_end = left+len;
5033
5034 if (channels) *channels = f->channels;
5035 if (output) *output = f->outputs;
5036 return len;
5037 }
5038
5039 #ifndef STB_VORBIS_NO_STDIO
5040
stb_vorbis_open_file_section(FILE * file,int close_on_free,int * error,const stb_vorbis_alloc * alloc,unsigned int length)5041 stb_vorbis * stb_vorbis_open_file_section(FILE *file, int close_on_free, int *error, const stb_vorbis_alloc *alloc, unsigned int length)
5042 {
5043 stb_vorbis *f, p;
5044 vorbis_init(&p, alloc);
5045 p.f = file;
5046 p.f_start = (uint32) ftell(file);
5047 p.stream_len = length;
5048 p.close_on_free = close_on_free;
5049 if (start_decoder(&p)) {
5050 f = vorbis_alloc(&p);
5051 if (f) {
5052 *f = p;
5053 vorbis_pump_first_frame(f);
5054 return f;
5055 }
5056 }
5057 if (error) *error = p.error;
5058 vorbis_deinit(&p);
5059 return NULL;
5060 }
5061
stb_vorbis_open_file(FILE * file,int close_on_free,int * error,const stb_vorbis_alloc * alloc)5062 stb_vorbis * stb_vorbis_open_file(FILE *file, int close_on_free, int *error, const stb_vorbis_alloc *alloc)
5063 {
5064 unsigned int len, start;
5065 start = (unsigned int) ftell(file);
5066 fseek(file, 0, SEEK_END);
5067 len = (unsigned int) (ftell(file) - start);
5068 fseek(file, start, SEEK_SET);
5069 return stb_vorbis_open_file_section(file, close_on_free, error, alloc, len);
5070 }
5071
stb_vorbis_open_filename(const char * filename,int * error,const stb_vorbis_alloc * alloc)5072 stb_vorbis * stb_vorbis_open_filename(const char *filename, int *error, const stb_vorbis_alloc *alloc)
5073 {
5074 FILE *f;
5075 #if defined(_WIN32) && defined(__STDC_WANT_SECURE_LIB__)
5076 if (0 != fopen_s(&f, filename, "rb"))
5077 f = NULL;
5078 #else
5079 f = fopen(filename, "rb");
5080 #endif
5081 if (f)
5082 return stb_vorbis_open_file(f, TRUE, error, alloc);
5083 if (error) *error = VORBIS_file_open_failure;
5084 return NULL;
5085 }
5086 #endif // STB_VORBIS_NO_STDIO
5087
stb_vorbis_open_memory(const unsigned char * data,int len,int * error,const stb_vorbis_alloc * alloc)5088 stb_vorbis * stb_vorbis_open_memory(const unsigned char *data, int len, int *error, const stb_vorbis_alloc *alloc)
5089 {
5090 stb_vorbis *f, p;
5091 if (data == NULL) return NULL;
5092 vorbis_init(&p, alloc);
5093 p.stream = (uint8 *) data;
5094 p.stream_end = (uint8 *) data + len;
5095 p.stream_start = (uint8 *) p.stream;
5096 p.stream_len = len;
5097 p.push_mode = FALSE;
5098 if (start_decoder(&p)) {
5099 f = vorbis_alloc(&p);
5100 if (f) {
5101 *f = p;
5102 vorbis_pump_first_frame(f);
5103 if (error) *error = VORBIS__no_error;
5104 return f;
5105 }
5106 }
5107 if (error) *error = p.error;
5108 vorbis_deinit(&p);
5109 return NULL;
5110 }
5111
5112 #ifndef STB_VORBIS_NO_INTEGER_CONVERSION
5113 #define PLAYBACK_MONO 1
5114 #define PLAYBACK_LEFT 2
5115 #define PLAYBACK_RIGHT 4
5116
5117 #define L (PLAYBACK_LEFT | PLAYBACK_MONO)
5118 #define C (PLAYBACK_LEFT | PLAYBACK_RIGHT | PLAYBACK_MONO)
5119 #define R (PLAYBACK_RIGHT | PLAYBACK_MONO)
5120
5121 static int8 channel_position[7][6] =
5122 {
5123 { 0 },
5124 { C },
5125 { L, R },
5126 { L, C, R },
5127 { L, R, L, R },
5128 { L, C, R, L, R },
5129 { L, C, R, L, R, C },
5130 };
5131
5132
5133 #ifndef STB_VORBIS_NO_FAST_SCALED_FLOAT
5134 typedef union {
5135 float f;
5136 int i;
5137 } float_conv;
5138 typedef char stb_vorbis_float_size_test[sizeof(float)==4 && sizeof(int) == 4];
5139 #define FASTDEF(x) float_conv x
5140 // add (1<<23) to convert to int, then divide by 2^SHIFT, then add 0.5/2^SHIFT to round
5141 #define MAGIC(SHIFT) (1.5f * (1 << (23-SHIFT)) + 0.5f/(1 << SHIFT))
5142 #define ADDEND(SHIFT) (((150-SHIFT) << 23) + (1 << 22))
5143 #define FAST_SCALED_FLOAT_TO_INT(temp,x,s) (temp.f = (x) + MAGIC(s), temp.i - ADDEND(s))
5144 #define check_endianness()
5145 #else
5146 #define FAST_SCALED_FLOAT_TO_INT(temp,x,s) ((int) ((x) * (1 << (s))))
5147 #define check_endianness()
5148 #define FASTDEF(x)
5149 #endif
5150
copy_samples(short * dest,float * src,int len)5151 static void copy_samples(short *dest, float *src, int len)
5152 {
5153 int i;
5154 check_endianness();
5155 for (i=0; i < len; ++i) {
5156 FASTDEF(temp);
5157 int v = FAST_SCALED_FLOAT_TO_INT(temp, src[i],15);
5158 if ((unsigned int) (v + 32768) > 65535)
5159 v = v < 0 ? -32768 : 32767;
5160 dest[i] = v;
5161 }
5162 }
5163
compute_samples(int mask,short * output,int num_c,float ** data,int d_offset,int len)5164 static void compute_samples(int mask, short *output, int num_c, float **data, int d_offset, int len)
5165 {
5166 #define BUFFER_SIZE 32
5167 float buffer[BUFFER_SIZE];
5168 int i,j,o,n = BUFFER_SIZE;
5169 check_endianness();
5170 for (o = 0; o < len; o += BUFFER_SIZE) {
5171 memset(buffer, 0, sizeof(buffer));
5172 if (o + n > len) n = len - o;
5173 for (j=0; j < num_c; ++j) {
5174 if (channel_position[num_c][j] & mask) {
5175 for (i=0; i < n; ++i)
5176 buffer[i] += data[j][d_offset+o+i];
5177 }
5178 }
5179 for (i=0; i < n; ++i) {
5180 FASTDEF(temp);
5181 int v = FAST_SCALED_FLOAT_TO_INT(temp,buffer[i],15);
5182 if ((unsigned int) (v + 32768) > 65535)
5183 v = v < 0 ? -32768 : 32767;
5184 output[o+i] = v;
5185 }
5186 }
5187 }
5188
compute_stereo_samples(short * output,int num_c,float ** data,int d_offset,int len)5189 static void compute_stereo_samples(short *output, int num_c, float **data, int d_offset, int len)
5190 {
5191 #define BUFFER_SIZE 32
5192 float buffer[BUFFER_SIZE];
5193 int i,j,o,n = BUFFER_SIZE >> 1;
5194 // o is the offset in the source data
5195 check_endianness();
5196 for (o = 0; o < len; o += BUFFER_SIZE >> 1) {
5197 // o2 is the offset in the output data
5198 int o2 = o << 1;
5199 memset(buffer, 0, sizeof(buffer));
5200 if (o + n > len) n = len - o;
5201 for (j=0; j < num_c; ++j) {
5202 int m = channel_position[num_c][j] & (PLAYBACK_LEFT | PLAYBACK_RIGHT);
5203 if (m == (PLAYBACK_LEFT | PLAYBACK_RIGHT)) {
5204 for (i=0; i < n; ++i) {
5205 buffer[i*2+0] += data[j][d_offset+o+i];
5206 buffer[i*2+1] += data[j][d_offset+o+i];
5207 }
5208 } else if (m == PLAYBACK_LEFT) {
5209 for (i=0; i < n; ++i) {
5210 buffer[i*2+0] += data[j][d_offset+o+i];
5211 }
5212 } else if (m == PLAYBACK_RIGHT) {
5213 for (i=0; i < n; ++i) {
5214 buffer[i*2+1] += data[j][d_offset+o+i];
5215 }
5216 }
5217 }
5218 for (i=0; i < (n<<1); ++i) {
5219 FASTDEF(temp);
5220 int v = FAST_SCALED_FLOAT_TO_INT(temp,buffer[i],15);
5221 if ((unsigned int) (v + 32768) > 65535)
5222 v = v < 0 ? -32768 : 32767;
5223 output[o2+i] = v;
5224 }
5225 }
5226 }
5227
convert_samples_short(int buf_c,short ** buffer,int b_offset,int data_c,float ** data,int d_offset,int samples)5228 static void convert_samples_short(int buf_c, short **buffer, int b_offset, int data_c, float **data, int d_offset, int samples)
5229 {
5230 int i;
5231 if (buf_c != data_c && buf_c <= 2 && data_c <= 6) {
5232 static int channel_selector[3][2] = { {0}, {PLAYBACK_MONO}, {PLAYBACK_LEFT, PLAYBACK_RIGHT} };
5233 for (i=0; i < buf_c; ++i)
5234 compute_samples(channel_selector[buf_c][i], buffer[i]+b_offset, data_c, data, d_offset, samples);
5235 } else {
5236 int limit = buf_c < data_c ? buf_c : data_c;
5237 for (i=0; i < limit; ++i)
5238 copy_samples(buffer[i]+b_offset, data[i]+d_offset, samples);
5239 for ( ; i < buf_c; ++i)
5240 memset(buffer[i]+b_offset, 0, sizeof(short) * samples);
5241 }
5242 }
5243
stb_vorbis_get_frame_short(stb_vorbis * f,int num_c,short ** buffer,int num_samples)5244 int stb_vorbis_get_frame_short(stb_vorbis *f, int num_c, short **buffer, int num_samples)
5245 {
5246 float **output = NULL;
5247 int len = stb_vorbis_get_frame_float(f, NULL, &output);
5248 if (len > num_samples) len = num_samples;
5249 if (len)
5250 convert_samples_short(num_c, buffer, 0, f->channels, output, 0, len);
5251 return len;
5252 }
5253
convert_channels_short_interleaved(int buf_c,short * buffer,int data_c,float ** data,int d_offset,int len)5254 static void convert_channels_short_interleaved(int buf_c, short *buffer, int data_c, float **data, int d_offset, int len)
5255 {
5256 int i;
5257 check_endianness();
5258 if (buf_c != data_c && buf_c <= 2 && data_c <= 6) {
5259 assert(buf_c == 2);
5260 for (i=0; i < buf_c; ++i)
5261 compute_stereo_samples(buffer, data_c, data, d_offset, len);
5262 } else {
5263 int limit = buf_c < data_c ? buf_c : data_c;
5264 int j;
5265 for (j=0; j < len; ++j) {
5266 for (i=0; i < limit; ++i) {
5267 FASTDEF(temp);
5268 float f = data[i][d_offset+j];
5269 int v = FAST_SCALED_FLOAT_TO_INT(temp, f,15);//data[i][d_offset+j],15);
5270 if ((unsigned int) (v + 32768) > 65535)
5271 v = v < 0 ? -32768 : 32767;
5272 *buffer++ = v;
5273 }
5274 for ( ; i < buf_c; ++i)
5275 *buffer++ = 0;
5276 }
5277 }
5278 }
5279
stb_vorbis_get_frame_short_interleaved(stb_vorbis * f,int num_c,short * buffer,int num_shorts)5280 int stb_vorbis_get_frame_short_interleaved(stb_vorbis *f, int num_c, short *buffer, int num_shorts)
5281 {
5282 float **output;
5283 int len;
5284 if (num_c == 1) return stb_vorbis_get_frame_short(f,num_c,&buffer, num_shorts);
5285 len = stb_vorbis_get_frame_float(f, NULL, &output);
5286 if (len) {
5287 if (len*num_c > num_shorts) len = num_shorts / num_c;
5288 convert_channels_short_interleaved(num_c, buffer, f->channels, output, 0, len);
5289 }
5290 return len;
5291 }
5292
stb_vorbis_get_samples_short_interleaved(stb_vorbis * f,int channels,short * buffer,int num_shorts)5293 int stb_vorbis_get_samples_short_interleaved(stb_vorbis *f, int channels, short *buffer, int num_shorts)
5294 {
5295 float **outputs;
5296 int len = num_shorts / channels;
5297 int n=0;
5298 int z = f->channels;
5299 if (z > channels) z = channels;
5300 while (n < len) {
5301 int k = f->channel_buffer_end - f->channel_buffer_start;
5302 if (n+k >= len) k = len - n;
5303 if (k)
5304 convert_channels_short_interleaved(channels, buffer, f->channels, f->channel_buffers, f->channel_buffer_start, k);
5305 buffer += k*channels;
5306 n += k;
5307 f->channel_buffer_start += k;
5308 if (n == len) break;
5309 if (!stb_vorbis_get_frame_float(f, NULL, &outputs)) break;
5310 }
5311 return n;
5312 }
5313
stb_vorbis_get_samples_short(stb_vorbis * f,int channels,short ** buffer,int len)5314 int stb_vorbis_get_samples_short(stb_vorbis *f, int channels, short **buffer, int len)
5315 {
5316 float **outputs;
5317 int n=0;
5318 int z = f->channels;
5319 if (z > channels) z = channels;
5320 while (n < len) {
5321 int k = f->channel_buffer_end - f->channel_buffer_start;
5322 if (n+k >= len) k = len - n;
5323 if (k)
5324 convert_samples_short(channels, buffer, n, f->channels, f->channel_buffers, f->channel_buffer_start, k);
5325 n += k;
5326 f->channel_buffer_start += k;
5327 if (n == len) break;
5328 if (!stb_vorbis_get_frame_float(f, NULL, &outputs)) break;
5329 }
5330 return n;
5331 }
5332
5333 #ifndef STB_VORBIS_NO_STDIO
stb_vorbis_decode_filename(const char * filename,int * channels,int * sample_rate,short ** output)5334 int stb_vorbis_decode_filename(const char *filename, int *channels, int *sample_rate, short **output)
5335 {
5336 int data_len, offset, total, limit, error;
5337 short *data;
5338 stb_vorbis *v = stb_vorbis_open_filename(filename, &error, NULL);
5339 if (v == NULL) return -1;
5340 limit = v->channels * 4096;
5341 *channels = v->channels;
5342 if (sample_rate)
5343 *sample_rate = v->sample_rate;
5344 offset = data_len = 0;
5345 total = limit;
5346 data = (short *) malloc(total * sizeof(*data));
5347 if (data == NULL) {
5348 stb_vorbis_close(v);
5349 return -2;
5350 }
5351 for (;;) {
5352 int n = stb_vorbis_get_frame_short_interleaved(v, v->channels, data+offset, total-offset);
5353 if (n == 0) break;
5354 data_len += n;
5355 offset += n * v->channels;
5356 if (offset + limit > total) {
5357 short *data2;
5358 total *= 2;
5359 data2 = (short *) realloc(data, total * sizeof(*data));
5360 if (data2 == NULL) {
5361 free(data);
5362 stb_vorbis_close(v);
5363 return -2;
5364 }
5365 data = data2;
5366 }
5367 }
5368 *output = data;
5369 stb_vorbis_close(v);
5370 return data_len;
5371 }
5372 #endif // NO_STDIO
5373
stb_vorbis_decode_memory(const uint8 * mem,int len,int * channels,int * sample_rate,short ** output)5374 int stb_vorbis_decode_memory(const uint8 *mem, int len, int *channels, int *sample_rate, short **output)
5375 {
5376 int data_len, offset, total, limit, error;
5377 short *data;
5378 stb_vorbis *v = stb_vorbis_open_memory(mem, len, &error, NULL);
5379 if (v == NULL) return -1;
5380 limit = v->channels * 4096;
5381 *channels = v->channels;
5382 if (sample_rate)
5383 *sample_rate = v->sample_rate;
5384 offset = data_len = 0;
5385 total = limit;
5386 data = (short *) malloc(total * sizeof(*data));
5387 if (data == NULL) {
5388 stb_vorbis_close(v);
5389 return -2;
5390 }
5391 for (;;) {
5392 int n = stb_vorbis_get_frame_short_interleaved(v, v->channels, data+offset, total-offset);
5393 if (n == 0) break;
5394 data_len += n;
5395 offset += n * v->channels;
5396 if (offset + limit > total) {
5397 short *data2;
5398 total *= 2;
5399 data2 = (short *) realloc(data, total * sizeof(*data));
5400 if (data2 == NULL) {
5401 free(data);
5402 stb_vorbis_close(v);
5403 return -2;
5404 }
5405 data = data2;
5406 }
5407 }
5408 *output = data;
5409 stb_vorbis_close(v);
5410 return data_len;
5411 }
5412 #endif // STB_VORBIS_NO_INTEGER_CONVERSION
5413
stb_vorbis_get_samples_float_interleaved(stb_vorbis * f,int channels,float * buffer,int num_floats)5414 int stb_vorbis_get_samples_float_interleaved(stb_vorbis *f, int channels, float *buffer, int num_floats)
5415 {
5416 float **outputs;
5417 int len = num_floats / channels;
5418 int n=0;
5419 int z = f->channels;
5420 if (z > channels) z = channels;
5421 while (n < len) {
5422 int i,j;
5423 int k = f->channel_buffer_end - f->channel_buffer_start;
5424 if (n+k >= len) k = len - n;
5425 for (j=0; j < k; ++j) {
5426 for (i=0; i < z; ++i)
5427 *buffer++ = f->channel_buffers[i][f->channel_buffer_start+j];
5428 for ( ; i < channels; ++i)
5429 *buffer++ = 0;
5430 }
5431 n += k;
5432 f->channel_buffer_start += k;
5433 if (n == len)
5434 break;
5435 if (!stb_vorbis_get_frame_float(f, NULL, &outputs))
5436 break;
5437 }
5438 return n;
5439 }
5440
stb_vorbis_get_samples_float(stb_vorbis * f,int channels,float ** buffer,int num_samples)5441 int stb_vorbis_get_samples_float(stb_vorbis *f, int channels, float **buffer, int num_samples)
5442 {
5443 float **outputs;
5444 int n=0;
5445 int z = f->channels;
5446 if (z > channels) z = channels;
5447 while (n < num_samples) {
5448 int i;
5449 int k = f->channel_buffer_end - f->channel_buffer_start;
5450 if (n+k >= num_samples) k = num_samples - n;
5451 if (k) {
5452 for (i=0; i < z; ++i)
5453 memcpy(buffer[i]+n, f->channel_buffers[i]+f->channel_buffer_start, sizeof(float)*k);
5454 for ( ; i < channels; ++i)
5455 memset(buffer[i]+n, 0, sizeof(float) * k);
5456 }
5457 n += k;
5458 f->channel_buffer_start += k;
5459 if (n == num_samples)
5460 break;
5461 if (!stb_vorbis_get_frame_float(f, NULL, &outputs))
5462 break;
5463 }
5464 return n;
5465 }
5466 #endif // STB_VORBIS_NO_PULLDATA_API
5467
5468 /* Version history
5469 1.17 - 2019-07-08 - fix CVE-2019-13217, -13218, -13219, -13220, -13221, -13222, -13223
5470 found with Mayhem by ForAllSecure
5471 1.16 - 2019-03-04 - fix warnings
5472 1.15 - 2019-02-07 - explicit failure if Ogg Skeleton data is found
5473 1.14 - 2018-02-11 - delete bogus dealloca usage
5474 1.13 - 2018-01-29 - fix truncation of last frame (hopefully)
5475 1.12 - 2017-11-21 - limit residue begin/end to blocksize/2 to avoid large temp allocs in bad/corrupt files
5476 1.11 - 2017-07-23 - fix MinGW compilation
5477 1.10 - 2017-03-03 - more robust seeking; fix negative ilog(); clear error in open_memory
5478 1.09 - 2016-04-04 - back out 'avoid discarding last frame' fix from previous version
5479 1.08 - 2016-04-02 - fixed multiple warnings; fix setup memory leaks;
5480 avoid discarding last frame of audio data
5481 1.07 - 2015-01-16 - fixed some warnings, fix mingw, const-correct API
5482 some more crash fixes when out of memory or with corrupt files
5483 1.06 - 2015-08-31 - full, correct support for seeking API (Dougall Johnson)
5484 some crash fixes when out of memory or with corrupt files
5485 1.05 - 2015-04-19 - don't define __forceinline if it's redundant
5486 1.04 - 2014-08-27 - fix missing const-correct case in API
5487 1.03 - 2014-08-07 - Warning fixes
5488 1.02 - 2014-07-09 - Declare qsort compare function _cdecl on windows
5489 1.01 - 2014-06-18 - fix stb_vorbis_get_samples_float
5490 1.0 - 2014-05-26 - fix memory leaks; fix warnings; fix bugs in multichannel
5491 (API change) report sample rate for decode-full-file funcs
5492 0.99996 - bracket #include <malloc.h> for macintosh compilation by Laurent Gomila
5493 0.99995 - use union instead of pointer-cast for fast-float-to-int to avoid alias-optimization problem
5494 0.99994 - change fast-float-to-int to work in single-precision FPU mode, remove endian-dependence
5495 0.99993 - remove assert that fired on legal files with empty tables
5496 0.99992 - rewind-to-start
5497 0.99991 - bugfix to stb_vorbis_get_samples_short by Bernhard Wodo
5498 0.9999 - (should have been 0.99990) fix no-CRT support, compiling as C++
5499 0.9998 - add a full-decode function with a memory source
5500 0.9997 - fix a bug in the read-from-FILE case in 0.9996 addition
5501 0.9996 - query length of vorbis stream in samples/seconds
5502 0.9995 - bugfix to another optimization that only happened in certain files
5503 0.9994 - bugfix to one of the optimizations that caused significant (but inaudible?) errors
5504 0.9993 - performance improvements; runs in 99% to 104% of time of reference implementation
5505 0.9992 - performance improvement of IMDCT; now performs close to reference implementation
5506 0.9991 - performance improvement of IMDCT
5507 0.999 - (should have been 0.9990) performance improvement of IMDCT
5508 0.998 - no-CRT support from Casey Muratori
5509 0.997 - bugfixes for bugs found by Terje Mathisen
5510 0.996 - bugfix: fast-huffman decode initialized incorrectly for sparse codebooks; fixing gives 10% speedup - found by Terje Mathisen
5511 0.995 - bugfix: fix to 'effective' overrun detection - found by Terje Mathisen
5512 0.994 - bugfix: garbage decode on final VQ symbol of a non-multiple - found by Terje Mathisen
5513 0.993 - bugfix: pushdata API required 1 extra byte for empty page (failed to consume final page if empty) - found by Terje Mathisen
5514 0.992 - fixes for MinGW warning
5515 0.991 - turn fast-float-conversion on by default
5516 0.990 - fix push-mode seek recovery if you seek into the headers
5517 0.98b - fix to bad release of 0.98
5518 0.98 - fix push-mode seek recovery; robustify float-to-int and support non-fast mode
5519 0.97 - builds under c++ (typecasting, don't use 'class' keyword)
5520 0.96 - somehow MY 0.95 was right, but the web one was wrong, so here's my 0.95 rereleased as 0.96, fixes a typo in the clamping code
5521 0.95 - clamping code for 16-bit functions
5522 0.94 - not publically released
5523 0.93 - fixed all-zero-floor case (was decoding garbage)
5524 0.92 - fixed a memory leak
5525 0.91 - conditional compiles to omit parts of the API and the infrastructure to support them: STB_VORBIS_NO_PULLDATA_API, STB_VORBIS_NO_PUSHDATA_API, STB_VORBIS_NO_STDIO, STB_VORBIS_NO_INTEGER_CONVERSION
5526 0.90 - first public release
5527 */
5528
5529 #endif // STB_VORBIS_HEADER_ONLY
5530
5531
5532 /*
5533 ------------------------------------------------------------------------------
5534 This software is available under 2 licenses -- choose whichever you prefer.
5535 ------------------------------------------------------------------------------
5536 ALTERNATIVE A - MIT License
5537 Copyright (c) 2017 Sean Barrett
5538 Permission is hereby granted, free of charge, to any person obtaining a copy of
5539 this software and associated documentation files (the "Software"), to deal in
5540 the Software without restriction, including without limitation the rights to
5541 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
5542 of the Software, and to permit persons to whom the Software is furnished to do
5543 so, subject to the following conditions:
5544 The above copyright notice and this permission notice shall be included in all
5545 copies or substantial portions of the Software.
5546 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
5547 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
5548 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
5549 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
5550 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
5551 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
5552 SOFTWARE.
5553 ------------------------------------------------------------------------------
5554 ALTERNATIVE B - Public Domain (www.unlicense.org)
5555 This is free and unencumbered software released into the public domain.
5556 Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
5557 software, either in source code form or as a compiled binary, for any purpose,
5558 commercial or non-commercial, and by any means.
5559 In jurisdictions that recognize copyright laws, the author or authors of this
5560 software dedicate any and all copyright interest in the software to the public
5561 domain. We make this dedication for the benefit of the public at large and to
5562 the detriment of our heirs and successors. We intend this dedication to be an
5563 overt act of relinquishment in perpetuity of all present and future rights to
5564 this software under copyright law.
5565 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
5566 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
5567 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
5568 AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
5569 ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
5570 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
5571 ------------------------------------------------------------------------------
5572 */
5573