1 // Ogg Vorbis audio decoder - v1.19 - public domain
2 // http://nothings.org/stb_vorbis/
3 //
4 // Original version written by Sean Barrett in 2007.
5 //
6 // Originally sponsored by RAD Game Tools. Seeking implementation
7 // sponsored by Phillip Bennefall, Marc Andersen, Aaron Baker,
8 // Elias Software, Aras Pranckevicius, and Sean Barrett.
9 //
10 // LICENSE
11 //
12 // See end of file for license information.
13 //
14 // Limitations:
15 //
16 // - floor 0 not supported (used in old ogg vorbis files pre-2004)
17 // - lossless sample-truncation at beginning ignored
18 // - cannot concatenate multiple vorbis streams
19 // - sample positions are 32-bit, limiting seekable 192Khz
20 // files to around 6 hours (Ogg supports 64-bit)
21 //
22 // Feature contributors:
23 // Dougall Johnson (sample-exact seeking)
24 //
25 // Bugfix/warning contributors:
26 // Terje Mathisen Niklas Frykholm Andy Hill
27 // Casey Muratori John Bolton Gargaj
28 // Laurent Gomila Marc LeBlanc Ronny Chevalier
29 // Bernhard Wodo Evan Balster github:alxprd
30 // Tom Beaumont Ingo Leitgeb Nicolas Guillemot
31 // Phillip Bennefall Rohit Thiago Goulart
32 // github:manxorist saga musix github:infatum
33 // Timur Gagiev Maxwell Koo Peter Waller
34 // github:audinowho Dougall Johnson
35 //
36 // Partial history:
37 // 1.19 - 2020-02-05 - warnings
38 // 1.18 - 2020-02-02 - fix seek bugs; parse header comments; misc warnings etc.
39 // 1.17 - 2019-07-08 - fix CVE-2019-13217..CVE-2019-13223 (by ForAllSecure)
40 // 1.16 - 2019-03-04 - fix warnings
41 // 1.15 - 2019-02-07 - explicit failure if Ogg Skeleton data is found
42 // 1.14 - 2018-02-11 - delete bogus dealloca usage
43 // 1.13 - 2018-01-29 - fix truncation of last frame (hopefully)
44 // 1.12 - 2017-11-21 - limit residue begin/end to blocksize/2 to avoid large temp allocs in bad/corrupt files
45 // 1.11 - 2017-07-23 - fix MinGW compilation
46 // 1.10 - 2017-03-03 - more robust seeking; fix negative ilog(); clear error in open_memory
47 // 1.09 - 2016-04-04 - back out 'truncation of last frame' fix from previous version
48 // 1.08 - 2016-04-02 - warnings; setup memory leaks; truncation of last frame
49 // 1.07 - 2015-01-16 - fixes for crashes on invalid files; warning fixes; const
50 // 1.06 - 2015-08-31 - full, correct support for seeking API (Dougall Johnson)
51 // some crash fixes when out of memory or with corrupt files
52 // fix some inappropriately signed shifts
53 // 1.05 - 2015-04-19 - don't define __forceinline if it's redundant
54 // 1.04 - 2014-08-27 - fix missing const-correct case in API
55 // 1.03 - 2014-08-07 - warning fixes
56 // 1.02 - 2014-07-09 - declare qsort comparison as explicitly _cdecl in Windows
57 // 1.01 - 2014-06-18 - fix stb_vorbis_get_samples_float (interleaved was correct)
58 // 1.0 - 2014-05-26 - fix memory leaks; fix warnings; fix bugs in >2-channel;
59 // (API change) report sample rate for decode-full-file funcs
60 //
61 // See end of file for full version history.
62
63
64 //////////////////////////////////////////////////////////////////////////////
65 //
66 // HEADER BEGINS HERE
67 //
68
69 #ifndef STB_VORBIS_INCLUDE_STB_VORBIS_H
70 #define STB_VORBIS_INCLUDE_STB_VORBIS_H
71
72 #if defined(STB_VORBIS_NO_CRT) && !defined(STB_VORBIS_NO_STDIO)
73 #define STB_VORBIS_NO_STDIO 1
74 #endif
75
76 #ifndef STB_VORBIS_NO_STDIO
77 #include <stdio.h>
78 #endif
79
80 #ifdef __cplusplus
81 extern "C" {
82 #endif
83
84 /////////// THREAD SAFETY
85
86 // Individual stb_vorbis* handles are not thread-safe; you cannot decode from
87 // them from multiple threads at the same time. However, you can have multiple
88 // stb_vorbis* handles and decode from them independently in multiple thrads.
89
90
91 /////////// MEMORY ALLOCATION
92
93 // normally stb_vorbis uses malloc() to allocate memory at startup,
94 // and alloca() to allocate temporary memory during a frame on the
95 // stack. (Memory consumption will depend on the amount of setup
96 // data in the file and how you set the compile flags for speed
97 // vs. size. In my test files the maximal-size usage is ~150KB.)
98 //
99 // You can modify the wrapper functions in the source (setup_malloc,
100 // setup_temp_malloc, temp_malloc) to change this behavior, or you
101 // can use a simpler allocation model: you pass in a buffer from
102 // which stb_vorbis will allocate _all_ its memory (including the
103 // temp memory). "open" may fail with a VORBIS_outofmem if you
104 // do not pass in enough data; there is no way to determine how
105 // much you do need except to succeed (at which point you can
106 // query get_info to find the exact amount required. yes I know
107 // this is lame).
108 //
109 // If you pass in a non-NULL buffer of the type below, allocation
110 // will occur from it as described above. Otherwise just pass NULL
111 // to use malloc()/alloca()
112
113 typedef struct
114 {
115 char *alloc_buffer;
116 int alloc_buffer_length_in_bytes;
117 } stb_vorbis_alloc;
118
119
120 /////////// FUNCTIONS USEABLE WITH ALL INPUT MODES
121
122 typedef struct stb_vorbis stb_vorbis;
123
124 typedef struct
125 {
126 unsigned int sample_rate;
127 int channels;
128
129 unsigned int setup_memory_required;
130 unsigned int setup_temp_memory_required;
131 unsigned int temp_memory_required;
132
133 int max_frame_size;
134 } stb_vorbis_info;
135
136 typedef struct
137 {
138 char *vendor;
139
140 int comment_list_length;
141 char **comment_list;
142 } stb_vorbis_comment;
143
144 // get general information about the file
145 extern stb_vorbis_info stb_vorbis_get_info(stb_vorbis *f);
146
147 // get ogg comments
148 extern stb_vorbis_comment stb_vorbis_get_comment(stb_vorbis *f);
149
150 // get the last error detected (clears it, too)
151 extern int stb_vorbis_get_error(stb_vorbis *f);
152
153 // close an ogg vorbis file and free all memory in use
154 extern void stb_vorbis_close(stb_vorbis *f);
155
156 // this function returns the offset (in samples) from the beginning of the
157 // file that will be returned by the next decode, if it is known, or -1
158 // otherwise. after a flush_pushdata() call, this may take a while before
159 // it becomes valid again.
160 // NOT WORKING YET after a seek with PULLDATA API
161 extern int stb_vorbis_get_sample_offset(stb_vorbis *f);
162
163 // returns the current seek point within the file, or offset from the beginning
164 // of the memory buffer. In pushdata mode it returns 0.
165 extern unsigned int stb_vorbis_get_file_offset(stb_vorbis *f);
166
167 /////////// PUSHDATA API
168
169 #ifndef STB_VORBIS_NO_PUSHDATA_API
170
171 // this API allows you to get blocks of data from any source and hand
172 // them to stb_vorbis. you have to buffer them; stb_vorbis will tell
173 // you how much it used, and you have to give it the rest next time;
174 // and stb_vorbis may not have enough data to work with and you will
175 // need to give it the same data again PLUS more. Note that the Vorbis
176 // specification does not bound the size of an individual frame.
177
178 extern stb_vorbis *stb_vorbis_open_pushdata(
179 const unsigned char * datablock, int datablock_length_in_bytes,
180 int *datablock_memory_consumed_in_bytes,
181 int *error,
182 const stb_vorbis_alloc *alloc_buffer);
183 // create a vorbis decoder by passing in the initial data block containing
184 // the ogg&vorbis headers (you don't need to do parse them, just provide
185 // the first N bytes of the file--you're told if it's not enough, see below)
186 // on success, returns an stb_vorbis *, does not set error, returns the amount of
187 // data parsed/consumed on this call in *datablock_memory_consumed_in_bytes;
188 // on failure, returns NULL on error and sets *error, does not change *datablock_memory_consumed
189 // if returns NULL and *error is VORBIS_need_more_data, then the input block was
190 // incomplete and you need to pass in a larger block from the start of the file
191
192 extern int stb_vorbis_decode_frame_pushdata(
193 stb_vorbis *f,
194 const unsigned char *datablock, int datablock_length_in_bytes,
195 int *channels, // place to write number of float * buffers
196 float ***output, // place to write float ** array of float * buffers
197 int *samples // place to write number of output samples
198 );
199 // decode a frame of audio sample data if possible from the passed-in data block
200 //
201 // return value: number of bytes we used from datablock
202 //
203 // possible cases:
204 // 0 bytes used, 0 samples output (need more data)
205 // N bytes used, 0 samples output (resynching the stream, keep going)
206 // N bytes used, M samples output (one frame of data)
207 // note that after opening a file, you will ALWAYS get one N-bytes,0-sample
208 // frame, because Vorbis always "discards" the first frame.
209 //
210 // Note that on resynch, stb_vorbis will rarely consume all of the buffer,
211 // instead only datablock_length_in_bytes-3 or less. This is because it wants
212 // to avoid missing parts of a page header if they cross a datablock boundary,
213 // without writing state-machiney code to record a partial detection.
214 //
215 // The number of channels returned are stored in *channels (which can be
216 // NULL--it is always the same as the number of channels reported by
217 // get_info). *output will contain an array of float* buffers, one per
218 // channel. In other words, (*output)[0][0] contains the first sample from
219 // the first channel, and (*output)[1][0] contains the first sample from
220 // the second channel.
221
222 extern void stb_vorbis_flush_pushdata(stb_vorbis *f);
223 // inform stb_vorbis that your next datablock will not be contiguous with
224 // previous ones (e.g. you've seeked in the data); future attempts to decode
225 // frames will cause stb_vorbis to resynchronize (as noted above), and
226 // once it sees a valid Ogg page (typically 4-8KB, as large as 64KB), it
227 // will begin decoding the _next_ frame.
228 //
229 // if you want to seek using pushdata, you need to seek in your file, then
230 // call stb_vorbis_flush_pushdata(), then start calling decoding, then once
231 // decoding is returning you data, call stb_vorbis_get_sample_offset, and
232 // if you don't like the result, seek your file again and repeat.
233 #endif
234
235
236 ////////// PULLING INPUT API
237
238 #ifndef STB_VORBIS_NO_PULLDATA_API
239 // This API assumes stb_vorbis is allowed to pull data from a source--
240 // either a block of memory containing the _entire_ vorbis stream, or a
241 // FILE * that you or it create, or possibly some other reading mechanism
242 // if you go modify the source to replace the FILE * case with some kind
243 // of callback to your code. (But if you don't support seeking, you may
244 // just want to go ahead and use pushdata.)
245
246 #if !defined(STB_VORBIS_NO_STDIO) && !defined(STB_VORBIS_NO_INTEGER_CONVERSION)
247 extern int stb_vorbis_decode_filename(const char *filename, int *channels, int *sample_rate, short **output);
248 #endif
249 #if !defined(STB_VORBIS_NO_INTEGER_CONVERSION)
250 extern int stb_vorbis_decode_memory(const unsigned char *mem, int len, int *channels, int *sample_rate, short **output);
251 #endif
252 // decode an entire file and output the data interleaved into a malloc()ed
253 // buffer stored in *output. The return value is the number of samples
254 // decoded, or -1 if the file could not be opened or was not an ogg vorbis file.
255 // When you're done with it, just free() the pointer returned in *output.
256
257 extern stb_vorbis * stb_vorbis_open_memory(const unsigned char *data, int len,
258 int *error, const stb_vorbis_alloc *alloc_buffer);
259 // create an ogg vorbis decoder from an ogg vorbis stream in memory (note
260 // this must be the entire stream!). on failure, returns NULL and sets *error
261
262 #ifndef STB_VORBIS_NO_STDIO
263 extern stb_vorbis * stb_vorbis_open_filename(const char *filename,
264 int *error, const stb_vorbis_alloc *alloc_buffer);
265 // create an ogg vorbis decoder from a filename via fopen(). on failure,
266 // returns NULL and sets *error (possibly to VORBIS_file_open_failure).
267
268 extern stb_vorbis * stb_vorbis_open_file(FILE *f, int close_handle_on_close,
269 int *error, const stb_vorbis_alloc *alloc_buffer);
270 // create an ogg vorbis decoder from an open FILE *, looking for a stream at
271 // the _current_ seek point (ftell). on failure, returns NULL and sets *error.
272 // note that stb_vorbis must "own" this stream; if you seek it in between
273 // calls to stb_vorbis, it will become confused. Moreover, if you attempt to
274 // perform stb_vorbis_seek_*() operations on this file, it will assume it
275 // owns the _entire_ rest of the file after the start point. Use the next
276 // function, stb_vorbis_open_file_section(), to limit it.
277
278 extern stb_vorbis * stb_vorbis_open_file_section(FILE *f, int close_handle_on_close,
279 int *error, const stb_vorbis_alloc *alloc_buffer, unsigned int len);
280 // create an ogg vorbis decoder from an open FILE *, looking for a stream at
281 // the _current_ seek point (ftell); the stream will be of length 'len' bytes.
282 // on failure, returns NULL and sets *error. note that stb_vorbis must "own"
283 // this stream; if you seek it in between calls to stb_vorbis, it will become
284 // confused.
285 #endif
286
287 extern int stb_vorbis_seek_frame(stb_vorbis *f, unsigned int sample_number);
288 extern int stb_vorbis_seek(stb_vorbis *f, unsigned int sample_number);
289 // these functions seek in the Vorbis file to (approximately) 'sample_number'.
290 // after calling seek_frame(), the next call to get_frame_*() will include
291 // the specified sample. after calling stb_vorbis_seek(), the next call to
292 // stb_vorbis_get_samples_* will start with the specified sample. If you
293 // do not need to seek to EXACTLY the target sample when using get_samples_*,
294 // you can also use seek_frame().
295
296 extern int stb_vorbis_seek_start(stb_vorbis *f);
297 // this function is equivalent to stb_vorbis_seek(f,0)
298
299 extern unsigned int stb_vorbis_stream_length_in_samples(stb_vorbis *f);
300 extern float stb_vorbis_stream_length_in_seconds(stb_vorbis *f);
301 // these functions return the total length of the vorbis stream
302
303 extern int stb_vorbis_get_frame_float(stb_vorbis *f, int *channels, float ***output);
304 // decode the next frame and return the number of samples. the number of
305 // channels returned are stored in *channels (which can be NULL--it is always
306 // the same as the number of channels reported by get_info). *output will
307 // contain an array of float* buffers, one per channel. These outputs will
308 // be overwritten on the next call to stb_vorbis_get_frame_*.
309 //
310 // You generally should not intermix calls to stb_vorbis_get_frame_*()
311 // and stb_vorbis_get_samples_*(), since the latter calls the former.
312
313 #ifndef STB_VORBIS_NO_INTEGER_CONVERSION
314 extern int stb_vorbis_get_frame_short_interleaved(stb_vorbis *f, int num_c, short *buffer, int num_shorts);
315 extern int stb_vorbis_get_frame_short (stb_vorbis *f, int num_c, short **buffer, int num_samples);
316 #endif
317 // decode the next frame and return the number of *samples* per channel.
318 // Note that for interleaved data, you pass in the number of shorts (the
319 // size of your array), but the return value is the number of samples per
320 // channel, not the total number of samples.
321 //
322 // The data is coerced to the number of channels you request according to the
323 // channel coercion rules (see below). You must pass in the size of your
324 // buffer(s) so that stb_vorbis will not overwrite the end of the buffer.
325 // The maximum buffer size needed can be gotten from get_info(); however,
326 // the Vorbis I specification implies an absolute maximum of 4096 samples
327 // per channel.
328
329 // Channel coercion rules:
330 // Let M be the number of channels requested, and N the number of channels present,
331 // and Cn be the nth channel; let stereo L be the sum of all L and center channels,
332 // and stereo R be the sum of all R and center channels (channel assignment from the
333 // vorbis spec).
334 // M N output
335 // 1 k sum(Ck) for all k
336 // 2 * stereo L, stereo R
337 // k l k > l, the first l channels, then 0s
338 // k l k <= l, the first k channels
339 // Note that this is not _good_ surround etc. mixing at all! It's just so
340 // you get something useful.
341
342 extern int stb_vorbis_get_samples_float_interleaved(stb_vorbis *f, int channels, float *buffer, int num_floats);
343 extern int stb_vorbis_get_samples_float(stb_vorbis *f, int channels, float **buffer, int num_samples);
344 // gets num_samples samples, not necessarily on a frame boundary--this requires
345 // buffering so you have to supply the buffers. DOES NOT APPLY THE COERCION RULES.
346 // Returns the number of samples stored per channel; it may be less than requested
347 // at the end of the file. If there are no more samples in the file, returns 0.
348
349 #ifndef STB_VORBIS_NO_INTEGER_CONVERSION
350 extern int stb_vorbis_get_samples_short_interleaved(stb_vorbis *f, int channels, short *buffer, int num_shorts);
351 extern int stb_vorbis_get_samples_short(stb_vorbis *f, int channels, short **buffer, int num_samples);
352 #endif
353 // gets num_samples samples, not necessarily on a frame boundary--this requires
354 // buffering so you have to supply the buffers. Applies the coercion rules above
355 // to produce 'channels' channels. Returns the number of samples stored per channel;
356 // it may be less than requested at the end of the file. If there are no more
357 // samples in the file, returns 0.
358
359 #endif
360
361 //////// ERROR CODES
362
363 enum STBVorbisError
364 {
365 VORBIS__no_error,
366
367 VORBIS_need_more_data=1, // not a real error
368
369 VORBIS_invalid_api_mixing, // can't mix API modes
370 VORBIS_outofmem, // not enough memory
371 VORBIS_feature_not_supported, // uses floor 0
372 VORBIS_too_many_channels, // STB_VORBIS_MAX_CHANNELS is too small
373 VORBIS_file_open_failure, // fopen() failed
374 VORBIS_seek_without_length, // can't seek in unknown-length file
375
376 VORBIS_unexpected_eof=10, // file is truncated?
377 VORBIS_seek_invalid, // seek past EOF
378
379 // decoding errors (corrupt/invalid stream) -- you probably
380 // don't care about the exact details of these
381
382 // vorbis errors:
383 VORBIS_invalid_setup=20,
384 VORBIS_invalid_stream,
385
386 // ogg errors:
387 VORBIS_missing_capture_pattern=30,
388 VORBIS_invalid_stream_structure_version,
389 VORBIS_continued_packet_flag_invalid,
390 VORBIS_incorrect_stream_serial_number,
391 VORBIS_invalid_first_page,
392 VORBIS_bad_packet_type,
393 VORBIS_cant_find_last_page,
394 VORBIS_seek_failed,
395 VORBIS_ogg_skeleton_not_supported
396 };
397
398
399 #ifdef __cplusplus
400 }
401 #endif
402
403 #endif // STB_VORBIS_INCLUDE_STB_VORBIS_H
404 //
405 // HEADER ENDS HERE
406 //
407 //////////////////////////////////////////////////////////////////////////////
408
409 #ifndef STB_VORBIS_HEADER_ONLY
410
411 // global configuration settings (e.g. set these in the project/makefile),
412 // or just set them in this file at the top (although ideally the first few
413 // should be visible when the header file is compiled too, although it's not
414 // crucial)
415
416 // STB_VORBIS_NO_PUSHDATA_API
417 // does not compile the code for the various stb_vorbis_*_pushdata()
418 // functions
419 // #define STB_VORBIS_NO_PUSHDATA_API
420
421 // STB_VORBIS_NO_PULLDATA_API
422 // does not compile the code for the non-pushdata APIs
423 // #define STB_VORBIS_NO_PULLDATA_API
424
425 // STB_VORBIS_NO_STDIO
426 // does not compile the code for the APIs that use FILE *s internally
427 // or externally (implied by STB_VORBIS_NO_PULLDATA_API)
428 // #define STB_VORBIS_NO_STDIO
429
430 // STB_VORBIS_NO_INTEGER_CONVERSION
431 // does not compile the code for converting audio sample data from
432 // float to integer (implied by STB_VORBIS_NO_PULLDATA_API)
433 // #define STB_VORBIS_NO_INTEGER_CONVERSION
434
435 // STB_VORBIS_NO_FAST_SCALED_FLOAT
436 // does not use a fast float-to-int trick to accelerate float-to-int on
437 // most platforms which requires endianness be defined correctly.
438 //#define STB_VORBIS_NO_FAST_SCALED_FLOAT
439
440
441 // STB_VORBIS_MAX_CHANNELS [number]
442 // globally define this to the maximum number of channels you need.
443 // The spec does not put a restriction on channels except that
444 // the count is stored in a byte, so 255 is the hard limit.
445 // Reducing this saves about 16 bytes per value, so using 16 saves
446 // (255-16)*16 or around 4KB. Plus anything other memory usage
447 // I forgot to account for. Can probably go as low as 8 (7.1 audio),
448 // 6 (5.1 audio), or 2 (stereo only).
449 #ifndef STB_VORBIS_MAX_CHANNELS
450 #define STB_VORBIS_MAX_CHANNELS 16 // enough for anyone?
451 #endif
452
453 // STB_VORBIS_PUSHDATA_CRC_COUNT [number]
454 // after a flush_pushdata(), stb_vorbis begins scanning for the
455 // next valid page, without backtracking. when it finds something
456 // that looks like a page, it streams through it and verifies its
457 // CRC32. Should that validation fail, it keeps scanning. But it's
458 // possible that _while_ streaming through to check the CRC32 of
459 // one candidate page, it sees another candidate page. This #define
460 // determines how many "overlapping" candidate pages it can search
461 // at once. Note that "real" pages are typically ~4KB to ~8KB, whereas
462 // garbage pages could be as big as 64KB, but probably average ~16KB.
463 // So don't hose ourselves by scanning an apparent 64KB page and
464 // missing a ton of real ones in the interim; so minimum of 2
465 #ifndef STB_VORBIS_PUSHDATA_CRC_COUNT
466 #define STB_VORBIS_PUSHDATA_CRC_COUNT 4
467 #endif
468
469 // STB_VORBIS_FAST_HUFFMAN_LENGTH [number]
470 // sets the log size of the huffman-acceleration table. Maximum
471 // supported value is 24. with larger numbers, more decodings are O(1),
472 // but the table size is larger so worse cache missing, so you'll have
473 // to probe (and try multiple ogg vorbis files) to find the sweet spot.
474 #ifndef STB_VORBIS_FAST_HUFFMAN_LENGTH
475 #define STB_VORBIS_FAST_HUFFMAN_LENGTH 10
476 #endif
477
478 // STB_VORBIS_FAST_BINARY_LENGTH [number]
479 // sets the log size of the binary-search acceleration table. this
480 // is used in similar fashion to the fast-huffman size to set initial
481 // parameters for the binary search
482
483 // STB_VORBIS_FAST_HUFFMAN_INT
484 // The fast huffman tables are much more efficient if they can be
485 // stored as 16-bit results instead of 32-bit results. This restricts
486 // the codebooks to having only 65535 possible outcomes, though.
487 // (At least, accelerated by the huffman table.)
488 #ifndef STB_VORBIS_FAST_HUFFMAN_INT
489 #define STB_VORBIS_FAST_HUFFMAN_SHORT
490 #endif
491
492 // STB_VORBIS_NO_HUFFMAN_BINARY_SEARCH
493 // If the 'fast huffman' search doesn't succeed, then stb_vorbis falls
494 // back on binary searching for the correct one. This requires storing
495 // extra tables with the huffman codes in sorted order. Defining this
496 // symbol trades off space for speed by forcing a linear search in the
497 // non-fast case, except for "sparse" codebooks.
498 // #define STB_VORBIS_NO_HUFFMAN_BINARY_SEARCH
499
500 // STB_VORBIS_DIVIDES_IN_RESIDUE
501 // stb_vorbis precomputes the result of the scalar residue decoding
502 // that would otherwise require a divide per chunk. you can trade off
503 // space for time by defining this symbol.
504 // #define STB_VORBIS_DIVIDES_IN_RESIDUE
505
506 // STB_VORBIS_DIVIDES_IN_CODEBOOK
507 // vorbis VQ codebooks can be encoded two ways: with every case explicitly
508 // stored, or with all elements being chosen from a small range of values,
509 // and all values possible in all elements. By default, stb_vorbis expands
510 // this latter kind out to look like the former kind for ease of decoding,
511 // because otherwise an integer divide-per-vector-element is required to
512 // unpack the index. If you define STB_VORBIS_DIVIDES_IN_CODEBOOK, you can
513 // trade off storage for speed.
514 //#define STB_VORBIS_DIVIDES_IN_CODEBOOK
515
516 #ifdef STB_VORBIS_CODEBOOK_SHORTS
517 #error "STB_VORBIS_CODEBOOK_SHORTS is no longer supported as it produced incorrect results for some input formats"
518 #endif
519
520 // STB_VORBIS_DIVIDE_TABLE
521 // this replaces small integer divides in the floor decode loop with
522 // table lookups. made less than 1% difference, so disabled by default.
523
524 // STB_VORBIS_NO_INLINE_DECODE
525 // disables the inlining of the scalar codebook fast-huffman decode.
526 // might save a little codespace; useful for debugging
527 // #define STB_VORBIS_NO_INLINE_DECODE
528
529 // STB_VORBIS_NO_DEFER_FLOOR
530 // Normally we only decode the floor without synthesizing the actual
531 // full curve. We can instead synthesize the curve immediately. This
532 // requires more memory and is very likely slower, so I don't think
533 // you'd ever want to do it except for debugging.
534 // #define STB_VORBIS_NO_DEFER_FLOOR
535
536
537
538
539 //////////////////////////////////////////////////////////////////////////////
540
541 #ifdef STB_VORBIS_NO_PULLDATA_API
542 #define STB_VORBIS_NO_INTEGER_CONVERSION
543 #define STB_VORBIS_NO_STDIO
544 #endif
545
546 #if defined(STB_VORBIS_NO_CRT) && !defined(STB_VORBIS_NO_STDIO)
547 #define STB_VORBIS_NO_STDIO 1
548 #endif
549
550 #ifndef STB_VORBIS_NO_INTEGER_CONVERSION
551 #ifndef STB_VORBIS_NO_FAST_SCALED_FLOAT
552
553 // only need endianness for fast-float-to-int, which we don't
554 // use for pushdata
555
556 #ifndef STB_VORBIS_BIG_ENDIAN
557 #define STB_VORBIS_ENDIAN 0
558 #else
559 #define STB_VORBIS_ENDIAN 1
560 #endif
561
562 #endif
563 #endif
564
565
566 #ifndef STB_VORBIS_NO_STDIO
567 #include <stdio.h>
568 #endif
569
570 #ifndef STB_VORBIS_NO_CRT
571 #include <stdlib.h>
572 #include <string.h>
573 #include <assert.h>
574 #include <math.h>
575
576 // find definition of alloca if it's not in stdlib.h:
577 #if defined(_MSC_VER) || defined(__MINGW32__)
578 #include <malloc.h>
579 #endif
580 #if defined(__linux__) || defined(__linux) || defined(__EMSCRIPTEN__)
581 #include <alloca.h>
582 #endif
583 #else // STB_VORBIS_NO_CRT
584 #define NULL 0
585 #define malloc(s) 0
586 #define free(s) ((void) 0)
587 #define realloc(s) 0
588 #endif // STB_VORBIS_NO_CRT
589
590 #include <limits.h>
591
592 #ifdef __MINGW32__
593 // eff you mingw:
594 // "fixed":
595 // http://sourceforge.net/p/mingw-w64/mailman/message/32882927/
596 // "no that broke the build, reverted, who cares about C":
597 // http://sourceforge.net/p/mingw-w64/mailman/message/32890381/
598 #ifdef __forceinline
599 #undef __forceinline
600 #endif
601 #define __forceinline
602 #define alloca __builtin_alloca
603 #elif !defined(_MSC_VER)
604 #if __GNUC__
605 #define __forceinline inline
606 #else
607 #define __forceinline
608 #endif
609 #endif
610
611 #if STB_VORBIS_MAX_CHANNELS > 256
612 #error "Value of STB_VORBIS_MAX_CHANNELS outside of allowed range"
613 #endif
614
615 #if STB_VORBIS_FAST_HUFFMAN_LENGTH > 24
616 #error "Value of STB_VORBIS_FAST_HUFFMAN_LENGTH outside of allowed range"
617 #endif
618
619
620 #if 0
621 #include <crtdbg.h>
622 #define CHECK(f) _CrtIsValidHeapPointer(f->channel_buffers[1])
623 #else
624 #define CHECK(f) ((void) 0)
625 #endif
626
627 #define MAX_BLOCKSIZE_LOG 13 // from specification
628 #define MAX_BLOCKSIZE (1 << MAX_BLOCKSIZE_LOG)
629
630
631 typedef unsigned char uint8;
632 typedef signed char int8;
633 typedef unsigned short uint16;
634 typedef signed short int16;
635 typedef unsigned int uint32;
636 typedef signed int int32;
637
638 #ifndef TRUE
639 #define TRUE 1
640 #define FALSE 0
641 #endif
642
643 typedef float codetype;
644
645 // @NOTE
646 //
647 // Some arrays below are tagged "//varies", which means it's actually
648 // a variable-sized piece of data, but rather than malloc I assume it's
649 // small enough it's better to just allocate it all together with the
650 // main thing
651 //
652 // Most of the variables are specified with the smallest size I could pack
653 // them into. It might give better performance to make them all full-sized
654 // integers. It should be safe to freely rearrange the structures or change
655 // the sizes larger--nothing relies on silently truncating etc., nor the
656 // order of variables.
657
658 #define FAST_HUFFMAN_TABLE_SIZE (1 << STB_VORBIS_FAST_HUFFMAN_LENGTH)
659 #define FAST_HUFFMAN_TABLE_MASK (FAST_HUFFMAN_TABLE_SIZE - 1)
660
661 typedef struct
662 {
663 int dimensions, entries;
664 uint8 *codeword_lengths;
665 float minimum_value;
666 float delta_value;
667 uint8 value_bits;
668 uint8 lookup_type;
669 uint8 sequence_p;
670 uint8 sparse;
671 uint32 lookup_values;
672 codetype *multiplicands;
673 uint32 *codewords;
674 #ifdef STB_VORBIS_FAST_HUFFMAN_SHORT
675 int16 fast_huffman[FAST_HUFFMAN_TABLE_SIZE];
676 #else
677 int32 fast_huffman[FAST_HUFFMAN_TABLE_SIZE];
678 #endif
679 uint32 *sorted_codewords;
680 int *sorted_values;
681 int sorted_entries;
682 } Codebook;
683
684 typedef struct
685 {
686 uint8 order;
687 uint16 rate;
688 uint16 bark_map_size;
689 uint8 amplitude_bits;
690 uint8 amplitude_offset;
691 uint8 number_of_books;
692 uint8 book_list[16]; // varies
693 } Floor0;
694
695 typedef struct
696 {
697 uint8 partitions;
698 uint8 partition_class_list[32]; // varies
699 uint8 class_dimensions[16]; // varies
700 uint8 class_subclasses[16]; // varies
701 uint8 class_masterbooks[16]; // varies
702 int16 subclass_books[16][8]; // varies
703 uint16 Xlist[31*8+2]; // varies
704 uint8 sorted_order[31*8+2];
705 uint8 neighbors[31*8+2][2];
706 uint8 floor1_multiplier;
707 uint8 rangebits;
708 int values;
709 } Floor1;
710
711 typedef union
712 {
713 Floor0 floor0;
714 Floor1 floor1;
715 } Floor;
716
717 typedef struct
718 {
719 uint32 begin, end;
720 uint32 part_size;
721 uint8 classifications;
722 uint8 classbook;
723 uint8 **classdata;
724 int16 (*residue_books)[8];
725 } Residue;
726
727 typedef struct
728 {
729 uint8 magnitude;
730 uint8 angle;
731 uint8 mux;
732 } MappingChannel;
733
734 typedef struct
735 {
736 uint16 coupling_steps;
737 MappingChannel *chan;
738 uint8 submaps;
739 uint8 submap_floor[15]; // varies
740 uint8 submap_residue[15]; // varies
741 } Mapping;
742
743 typedef struct
744 {
745 uint8 blockflag;
746 uint8 mapping;
747 uint16 windowtype;
748 uint16 transformtype;
749 } Mode;
750
751 typedef struct
752 {
753 uint32 goal_crc; // expected crc if match
754 int bytes_left; // bytes left in packet
755 uint32 crc_so_far; // running crc
756 int bytes_done; // bytes processed in _current_ chunk
757 uint32 sample_loc; // granule pos encoded in page
758 } CRCscan;
759
760 typedef struct
761 {
762 uint32 page_start, page_end;
763 uint32 last_decoded_sample;
764 } ProbedPage;
765
766 struct stb_vorbis
767 {
768 // user-accessible info
769 unsigned int sample_rate;
770 int channels;
771
772 unsigned int setup_memory_required;
773 unsigned int temp_memory_required;
774 unsigned int setup_temp_memory_required;
775
776 char *vendor;
777 int comment_list_length;
778 char **comment_list;
779
780 // input config
781 #ifndef STB_VORBIS_NO_STDIO
782 FILE *f;
783 uint32 f_start;
784 int close_on_free;
785 #endif
786
787 uint8 *stream;
788 uint8 *stream_start;
789 uint8 *stream_end;
790
791 uint32 stream_len;
792
793 uint8 push_mode;
794
795 // the page to seek to when seeking to start, may be zero
796 uint32 first_audio_page_offset;
797
798 // p_first is the page on which the first audio packet ends
799 // (but not necessarily the page on which it starts)
800 ProbedPage p_first, p_last;
801
802 // memory management
803 stb_vorbis_alloc alloc;
804 int setup_offset;
805 int temp_offset;
806
807 // run-time results
808 int eof;
809 enum STBVorbisError error;
810
811 // user-useful data
812
813 // header info
814 int blocksize[2];
815 int blocksize_0, blocksize_1;
816 int codebook_count;
817 Codebook *codebooks;
818 int floor_count;
819 uint16 floor_types[64]; // varies
820 Floor *floor_config;
821 int residue_count;
822 uint16 residue_types[64]; // varies
823 Residue *residue_config;
824 int mapping_count;
825 Mapping *mapping;
826 int mode_count;
827 Mode mode_config[64]; // varies
828
829 uint32 total_samples;
830
831 // decode buffer
832 float *channel_buffers[STB_VORBIS_MAX_CHANNELS];
833 float *outputs [STB_VORBIS_MAX_CHANNELS];
834
835 float *previous_window[STB_VORBIS_MAX_CHANNELS];
836 int previous_length;
837
838 #ifndef STB_VORBIS_NO_DEFER_FLOOR
839 int16 *finalY[STB_VORBIS_MAX_CHANNELS];
840 #else
841 float *floor_buffers[STB_VORBIS_MAX_CHANNELS];
842 #endif
843
844 uint32 current_loc; // sample location of next frame to decode
845 int current_loc_valid;
846
847 // per-blocksize precomputed data
848
849 // twiddle factors
850 float *A[2],*B[2],*C[2];
851 float *window[2];
852 uint16 *bit_reverse[2];
853
854 // current page/packet/segment streaming info
855 uint32 serial; // stream serial number for verification
856 int last_page;
857 int segment_count;
858 uint8 segments[255];
859 uint8 page_flag;
860 uint8 bytes_in_seg;
861 uint8 first_decode;
862 int next_seg;
863 int last_seg; // flag that we're on the last segment
864 int last_seg_which; // what was the segment number of the last seg?
865 uint32 acc;
866 int valid_bits;
867 int packet_bytes;
868 int end_seg_with_known_loc;
869 uint32 known_loc_for_packet;
870 int discard_samples_deferred;
871 uint32 samples_output;
872
873 // push mode scanning
874 int page_crc_tests; // only in push_mode: number of tests active; -1 if not searching
875 #ifndef STB_VORBIS_NO_PUSHDATA_API
876 CRCscan scan[STB_VORBIS_PUSHDATA_CRC_COUNT];
877 #endif
878
879 // sample-access
880 int channel_buffer_start;
881 int channel_buffer_end;
882 };
883
884 #if defined(STB_VORBIS_NO_PUSHDATA_API)
885 #define IS_PUSH_MODE(f) FALSE
886 #elif defined(STB_VORBIS_NO_PULLDATA_API)
887 #define IS_PUSH_MODE(f) TRUE
888 #else
889 #define IS_PUSH_MODE(f) ((f)->push_mode)
890 #endif
891
892 typedef struct stb_vorbis vorb;
893
error(vorb * f,enum STBVorbisError e)894 static int error(vorb *f, enum STBVorbisError e)
895 {
896 f->error = e;
897 if (!f->eof && e != VORBIS_need_more_data) {
898 f->error=e; // breakpoint for debugging
899 }
900 return 0;
901 }
902
903
904 // these functions are used for allocating temporary memory
905 // while decoding. if you can afford the stack space, use
906 // alloca(); otherwise, provide a temp buffer and it will
907 // allocate out of those.
908
909 #define array_size_required(count,size) (count*(sizeof(void *)+(size)))
910
911 #define temp_alloc(f,size) (f->alloc.alloc_buffer ? setup_temp_malloc(f,size) : alloca(size))
912 #define temp_free(f,p) (void)0
913 #define temp_alloc_save(f) ((f)->temp_offset)
914 #define temp_alloc_restore(f,p) ((f)->temp_offset = (p))
915
916 #define temp_block_array(f,count,size) make_block_array(temp_alloc(f,array_size_required(count,size)), count, size)
917
918 // given a sufficiently large block of memory, make an array of pointers to subblocks of it
make_block_array(void * mem,int count,int size)919 static void *make_block_array(void *mem, int count, int size)
920 {
921 int i;
922 void ** p = (void **) mem;
923 char *q = (char *) (p + count);
924 for (i=0; i < count; ++i) {
925 p[i] = q;
926 q += size;
927 }
928 return p;
929 }
930
setup_malloc(vorb * f,int sz)931 static void *setup_malloc(vorb *f, int sz)
932 {
933 sz = (sz+7) & ~7; // round up to nearest 8 for alignment of future allocs.
934 f->setup_memory_required += sz;
935 if (f->alloc.alloc_buffer) {
936 void *p = (char *) f->alloc.alloc_buffer + f->setup_offset;
937 if (f->setup_offset + sz > f->temp_offset) return NULL;
938 f->setup_offset += sz;
939 return p;
940 }
941 return sz ? malloc(sz) : NULL;
942 }
943
setup_free(vorb * f,void * p)944 static void setup_free(vorb *f, void *p)
945 {
946 if (f->alloc.alloc_buffer) return; // do nothing; setup mem is a stack
947 free(p);
948 }
949
setup_temp_malloc(vorb * f,int sz)950 static void *setup_temp_malloc(vorb *f, int sz)
951 {
952 sz = (sz+7) & ~7; // round up to nearest 8 for alignment of future allocs.
953 if (f->alloc.alloc_buffer) {
954 if (f->temp_offset - sz < f->setup_offset) return NULL;
955 f->temp_offset -= sz;
956 return (char *) f->alloc.alloc_buffer + f->temp_offset;
957 }
958 return malloc(sz);
959 }
960
setup_temp_free(vorb * f,void * p,int sz)961 static void setup_temp_free(vorb *f, void *p, int sz)
962 {
963 if (f->alloc.alloc_buffer) {
964 f->temp_offset += (sz+3)&~3;
965 return;
966 }
967 free(p);
968 }
969
970 #define CRC32_POLY 0x04c11db7 // from spec
971
972 static uint32 crc_table[256];
crc32_init(void)973 static void crc32_init(void)
974 {
975 int i,j;
976 uint32 s;
977 for(i=0; i < 256; i++) {
978 for (s=(uint32) i << 24, j=0; j < 8; ++j)
979 s = (s << 1) ^ (s >= (1U<<31) ? CRC32_POLY : 0);
980 crc_table[i] = s;
981 }
982 }
983
crc32_update(uint32 crc,uint8 byte)984 static __forceinline uint32 crc32_update(uint32 crc, uint8 byte)
985 {
986 return (crc << 8) ^ crc_table[byte ^ (crc >> 24)];
987 }
988
989
990 // used in setup, and for huffman that doesn't go fast path
bit_reverse(unsigned int n)991 static unsigned int bit_reverse(unsigned int n)
992 {
993 n = ((n & 0xAAAAAAAA) >> 1) | ((n & 0x55555555) << 1);
994 n = ((n & 0xCCCCCCCC) >> 2) | ((n & 0x33333333) << 2);
995 n = ((n & 0xF0F0F0F0) >> 4) | ((n & 0x0F0F0F0F) << 4);
996 n = ((n & 0xFF00FF00) >> 8) | ((n & 0x00FF00FF) << 8);
997 return (n >> 16) | (n << 16);
998 }
999
square(float x)1000 static float square(float x)
1001 {
1002 return x*x;
1003 }
1004
1005 // this is a weird definition of log2() for which log2(1) = 1, log2(2) = 2, log2(4) = 3
1006 // as required by the specification. fast(?) implementation from stb.h
1007 // @OPTIMIZE: called multiple times per-packet with "constants"; move to setup
ilog(int32 n)1008 static int ilog(int32 n)
1009 {
1010 static signed char log2_4[16] = { 0,1,2,2,3,3,3,3,4,4,4,4,4,4,4,4 };
1011
1012 if (n < 0) return 0; // signed n returns 0
1013
1014 // 2 compares if n < 16, 3 compares otherwise (4 if signed or n > 1<<29)
1015 if (n < (1 << 14))
1016 if (n < (1 << 4)) return 0 + log2_4[n ];
1017 else if (n < (1 << 9)) return 5 + log2_4[n >> 5];
1018 else return 10 + log2_4[n >> 10];
1019 else if (n < (1 << 24))
1020 if (n < (1 << 19)) return 15 + log2_4[n >> 15];
1021 else return 20 + log2_4[n >> 20];
1022 else if (n < (1 << 29)) return 25 + log2_4[n >> 25];
1023 else return 30 + log2_4[n >> 30];
1024 }
1025
1026 #ifndef M_PI
1027 #define M_PI 3.14159265358979323846264f // from CRC
1028 #endif
1029
1030 // code length assigned to a value with no huffman encoding
1031 #define NO_CODE 255
1032
1033 /////////////////////// LEAF SETUP FUNCTIONS //////////////////////////
1034 //
1035 // these functions are only called at setup, and only a few times
1036 // per file
1037
float32_unpack(uint32 x)1038 static float float32_unpack(uint32 x)
1039 {
1040 // from the specification
1041 uint32 mantissa = x & 0x1fffff;
1042 uint32 sign = x & 0x80000000;
1043 uint32 exp = (x & 0x7fe00000) >> 21;
1044 double res = sign ? -(double)mantissa : (double)mantissa;
1045 return (float) ldexp((float)res, exp-788);
1046 }
1047
1048
1049 // zlib & jpeg huffman tables assume that the output symbols
1050 // can either be arbitrarily arranged, or have monotonically
1051 // increasing frequencies--they rely on the lengths being sorted;
1052 // this makes for a very simple generation algorithm.
1053 // vorbis allows a huffman table with non-sorted lengths. This
1054 // requires a more sophisticated construction, since symbols in
1055 // order do not map to huffman codes "in order".
add_entry(Codebook * c,uint32 huff_code,int symbol,int count,int len,uint32 * values)1056 static void add_entry(Codebook *c, uint32 huff_code, int symbol, int count, int len, uint32 *values)
1057 {
1058 if (!c->sparse) {
1059 c->codewords [symbol] = huff_code;
1060 } else {
1061 c->codewords [count] = huff_code;
1062 c->codeword_lengths[count] = len;
1063 values [count] = symbol;
1064 }
1065 }
1066
compute_codewords(Codebook * c,uint8 * len,int n,uint32 * values)1067 static int compute_codewords(Codebook *c, uint8 *len, int n, uint32 *values)
1068 {
1069 int i,k,m=0;
1070 uint32 available[32];
1071
1072 memset(available, 0, sizeof(available));
1073 // find the first entry
1074 for (k=0; k < n; ++k) if (len[k] < NO_CODE) break;
1075 if (k == n) { assert(c->sorted_entries == 0); return TRUE; }
1076 // add to the list
1077 add_entry(c, 0, k, m++, len[k], values);
1078 // add all available leaves
1079 for (i=1; i <= len[k]; ++i)
1080 available[i] = 1U << (32-i);
1081 // note that the above code treats the first case specially,
1082 // but it's really the same as the following code, so they
1083 // could probably be combined (except the initial code is 0,
1084 // and I use 0 in available[] to mean 'empty')
1085 for (i=k+1; i < n; ++i) {
1086 uint32 res;
1087 int z = len[i], y;
1088 if (z == NO_CODE) continue;
1089 // find lowest available leaf (should always be earliest,
1090 // which is what the specification calls for)
1091 // note that this property, and the fact we can never have
1092 // more than one free leaf at a given level, isn't totally
1093 // trivial to prove, but it seems true and the assert never
1094 // fires, so!
1095 while (z > 0 && !available[z]) --z;
1096 if (z == 0) { return FALSE; }
1097 res = available[z];
1098 assert(z >= 0 && z < 32);
1099 available[z] = 0;
1100 add_entry(c, bit_reverse(res), i, m++, len[i], values);
1101 // propagate availability up the tree
1102 if (z != len[i]) {
1103 assert(len[i] >= 0 && len[i] < 32);
1104 for (y=len[i]; y > z; --y) {
1105 assert(available[y] == 0);
1106 available[y] = res + (1 << (32-y));
1107 }
1108 }
1109 }
1110 return TRUE;
1111 }
1112
1113 // accelerated huffman table allows fast O(1) match of all symbols
1114 // of length <= STB_VORBIS_FAST_HUFFMAN_LENGTH
compute_accelerated_huffman(Codebook * c)1115 static void compute_accelerated_huffman(Codebook *c)
1116 {
1117 int i, len;
1118 for (i=0; i < FAST_HUFFMAN_TABLE_SIZE; ++i)
1119 c->fast_huffman[i] = -1;
1120
1121 len = c->sparse ? c->sorted_entries : c->entries;
1122 #ifdef STB_VORBIS_FAST_HUFFMAN_SHORT
1123 if (len > 32767) len = 32767; // largest possible value we can encode!
1124 #endif
1125 for (i=0; i < len; ++i) {
1126 if (c->codeword_lengths[i] <= STB_VORBIS_FAST_HUFFMAN_LENGTH) {
1127 uint32 z = c->sparse ? bit_reverse(c->sorted_codewords[i]) : c->codewords[i];
1128 // set table entries for all bit combinations in the higher bits
1129 while (z < FAST_HUFFMAN_TABLE_SIZE) {
1130 c->fast_huffman[z] = i;
1131 z += 1 << c->codeword_lengths[i];
1132 }
1133 }
1134 }
1135 }
1136
1137 #ifdef _MSC_VER
1138 #define STBV_CDECL __cdecl
1139 #else
1140 #define STBV_CDECL
1141 #endif
1142
uint32_compare(const void * p,const void * q)1143 static int STBV_CDECL uint32_compare(const void *p, const void *q)
1144 {
1145 uint32 x = * (uint32 *) p;
1146 uint32 y = * (uint32 *) q;
1147 return x < y ? -1 : x > y;
1148 }
1149
include_in_sort(Codebook * c,uint8 len)1150 static int include_in_sort(Codebook *c, uint8 len)
1151 {
1152 if (c->sparse) { assert(len != NO_CODE); return TRUE; }
1153 if (len == NO_CODE) return FALSE;
1154 if (len > STB_VORBIS_FAST_HUFFMAN_LENGTH) return TRUE;
1155 return FALSE;
1156 }
1157
1158 // if the fast table above doesn't work, we want to binary
1159 // search them... need to reverse the bits
compute_sorted_huffman(Codebook * c,uint8 * lengths,uint32 * values)1160 static void compute_sorted_huffman(Codebook *c, uint8 *lengths, uint32 *values)
1161 {
1162 int i, len;
1163 // build a list of all the entries
1164 // OPTIMIZATION: don't include the short ones, since they'll be caught by FAST_HUFFMAN.
1165 // this is kind of a frivolous optimization--I don't see any performance improvement,
1166 // but it's like 4 extra lines of code, so.
1167 if (!c->sparse) {
1168 int k = 0;
1169 for (i=0; i < c->entries; ++i)
1170 if (include_in_sort(c, lengths[i]))
1171 c->sorted_codewords[k++] = bit_reverse(c->codewords[i]);
1172 assert(k == c->sorted_entries);
1173 } else {
1174 for (i=0; i < c->sorted_entries; ++i)
1175 c->sorted_codewords[i] = bit_reverse(c->codewords[i]);
1176 }
1177
1178 qsort(c->sorted_codewords, c->sorted_entries, sizeof(c->sorted_codewords[0]), uint32_compare);
1179 c->sorted_codewords[c->sorted_entries] = 0xffffffff;
1180
1181 len = c->sparse ? c->sorted_entries : c->entries;
1182 // now we need to indicate how they correspond; we could either
1183 // #1: sort a different data structure that says who they correspond to
1184 // #2: for each sorted entry, search the original list to find who corresponds
1185 // #3: for each original entry, find the sorted entry
1186 // #1 requires extra storage, #2 is slow, #3 can use binary search!
1187 for (i=0; i < len; ++i) {
1188 int huff_len = c->sparse ? lengths[values[i]] : lengths[i];
1189 if (include_in_sort(c,huff_len)) {
1190 uint32 code = bit_reverse(c->codewords[i]);
1191 int x=0, n=c->sorted_entries;
1192 while (n > 1) {
1193 // invariant: sc[x] <= code < sc[x+n]
1194 int m = x + (n >> 1);
1195 if (c->sorted_codewords[m] <= code) {
1196 x = m;
1197 n -= (n>>1);
1198 } else {
1199 n >>= 1;
1200 }
1201 }
1202 assert(c->sorted_codewords[x] == code);
1203 if (c->sparse) {
1204 c->sorted_values[x] = values[i];
1205 c->codeword_lengths[x] = huff_len;
1206 } else {
1207 c->sorted_values[x] = i;
1208 }
1209 }
1210 }
1211 }
1212
1213 // only run while parsing the header (3 times)
vorbis_validate(uint8 * data)1214 static int vorbis_validate(uint8 *data)
1215 {
1216 static uint8 vorbis[6] = { 'v', 'o', 'r', 'b', 'i', 's' };
1217 return memcmp(data, vorbis, 6) == 0;
1218 }
1219
1220 // called from setup only, once per code book
1221 // (formula implied by specification)
lookup1_values(int entries,int dim)1222 static int lookup1_values(int entries, int dim)
1223 {
1224 int r = (int) floor(exp((float) log((float) entries) / dim));
1225 if ((int) floor(pow((float) r+1, dim)) <= entries) // (int) cast for MinGW warning;
1226 ++r; // floor() to avoid _ftol() when non-CRT
1227 if (pow((float) r+1, dim) <= entries)
1228 return -1;
1229 if ((int) floor(pow((float) r, dim)) > entries)
1230 return -1;
1231 return r;
1232 }
1233
1234 // called twice per file
compute_twiddle_factors(int n,float * A,float * B,float * C)1235 static void compute_twiddle_factors(int n, float *A, float *B, float *C)
1236 {
1237 int n4 = n >> 2, n8 = n >> 3;
1238 int k,k2;
1239
1240 for (k=k2=0; k < n4; ++k,k2+=2) {
1241 A[k2 ] = (float) cos(4*k*M_PI/n);
1242 A[k2+1] = (float) -sin(4*k*M_PI/n);
1243 B[k2 ] = (float) cos((k2+1)*M_PI/n/2) * 0.5f;
1244 B[k2+1] = (float) sin((k2+1)*M_PI/n/2) * 0.5f;
1245 }
1246 for (k=k2=0; k < n8; ++k,k2+=2) {
1247 C[k2 ] = (float) cos(2*(k2+1)*M_PI/n);
1248 C[k2+1] = (float) -sin(2*(k2+1)*M_PI/n);
1249 }
1250 }
1251
compute_window(int n,float * window)1252 static void compute_window(int n, float *window)
1253 {
1254 int n2 = n >> 1, i;
1255 for (i=0; i < n2; ++i)
1256 window[i] = (float) sin(0.5 * M_PI * square((float) sin((i - 0 + 0.5) / n2 * 0.5 * M_PI)));
1257 }
1258
compute_bitreverse(int n,uint16 * rev)1259 static void compute_bitreverse(int n, uint16 *rev)
1260 {
1261 int ld = ilog(n) - 1; // ilog is off-by-one from normal definitions
1262 int i, n8 = n >> 3;
1263 for (i=0; i < n8; ++i)
1264 rev[i] = (bit_reverse(i) >> (32-ld+3)) << 2;
1265 }
1266
init_blocksize(vorb * f,int b,int n)1267 static int init_blocksize(vorb *f, int b, int n)
1268 {
1269 int n2 = n >> 1, n4 = n >> 2, n8 = n >> 3;
1270 f->A[b] = (float *) setup_malloc(f, sizeof(float) * n2);
1271 f->B[b] = (float *) setup_malloc(f, sizeof(float) * n2);
1272 f->C[b] = (float *) setup_malloc(f, sizeof(float) * n4);
1273 if (!f->A[b] || !f->B[b] || !f->C[b]) return error(f, VORBIS_outofmem);
1274 compute_twiddle_factors(n, f->A[b], f->B[b], f->C[b]);
1275 f->window[b] = (float *) setup_malloc(f, sizeof(float) * n2);
1276 if (!f->window[b]) return error(f, VORBIS_outofmem);
1277 compute_window(n, f->window[b]);
1278 f->bit_reverse[b] = (uint16 *) setup_malloc(f, sizeof(uint16) * n8);
1279 if (!f->bit_reverse[b]) return error(f, VORBIS_outofmem);
1280 compute_bitreverse(n, f->bit_reverse[b]);
1281 return TRUE;
1282 }
1283
neighbors(uint16 * x,int n,int * plow,int * phigh)1284 static void neighbors(uint16 *x, int n, int *plow, int *phigh)
1285 {
1286 int low = -1;
1287 int high = 65536;
1288 int i;
1289 for (i=0; i < n; ++i) {
1290 if (x[i] > low && x[i] < x[n]) { *plow = i; low = x[i]; }
1291 if (x[i] < high && x[i] > x[n]) { *phigh = i; high = x[i]; }
1292 }
1293 }
1294
1295 // this has been repurposed so y is now the original index instead of y
1296 typedef struct
1297 {
1298 uint16 x,id;
1299 } stbv__floor_ordering;
1300
point_compare(const void * p,const void * q)1301 static int STBV_CDECL point_compare(const void *p, const void *q)
1302 {
1303 stbv__floor_ordering *a = (stbv__floor_ordering *) p;
1304 stbv__floor_ordering *b = (stbv__floor_ordering *) q;
1305 return a->x < b->x ? -1 : a->x > b->x;
1306 }
1307
1308 //
1309 /////////////////////// END LEAF SETUP FUNCTIONS //////////////////////////
1310
1311
1312 #if defined(STB_VORBIS_NO_STDIO)
1313 #define USE_MEMORY(z) TRUE
1314 #else
1315 #define USE_MEMORY(z) ((z)->stream)
1316 #endif
1317
get8(vorb * z)1318 static uint8 get8(vorb *z)
1319 {
1320 if (USE_MEMORY(z)) {
1321 if (z->stream >= z->stream_end) { z->eof = TRUE; return 0; }
1322 return *z->stream++;
1323 }
1324
1325 #ifndef STB_VORBIS_NO_STDIO
1326 {
1327 int c = fgetc(z->f);
1328 if (c == EOF) { z->eof = TRUE; return 0; }
1329 return c;
1330 }
1331 #endif
1332 }
1333
get32(vorb * f)1334 static uint32 get32(vorb *f)
1335 {
1336 uint32 x;
1337 x = get8(f);
1338 x += get8(f) << 8;
1339 x += get8(f) << 16;
1340 x += (uint32) get8(f) << 24;
1341 return x;
1342 }
1343
getn(vorb * z,uint8 * data,int n)1344 static int getn(vorb *z, uint8 *data, int n)
1345 {
1346 if (USE_MEMORY(z)) {
1347 if (z->stream+n > z->stream_end) { z->eof = 1; return 0; }
1348 memcpy(data, z->stream, n);
1349 z->stream += n;
1350 return 1;
1351 }
1352
1353 #ifndef STB_VORBIS_NO_STDIO
1354 if (fread(data, n, 1, z->f) == 1)
1355 return 1;
1356 else {
1357 z->eof = 1;
1358 return 0;
1359 }
1360 #endif
1361 }
1362
skip(vorb * z,int n)1363 static void skip(vorb *z, int n)
1364 {
1365 if (USE_MEMORY(z)) {
1366 z->stream += n;
1367 if (z->stream >= z->stream_end) z->eof = 1;
1368 return;
1369 }
1370 #ifndef STB_VORBIS_NO_STDIO
1371 {
1372 long x = ftell(z->f);
1373 fseek(z->f, x+n, SEEK_SET);
1374 }
1375 #endif
1376 }
1377
set_file_offset(stb_vorbis * f,unsigned int loc)1378 static int set_file_offset(stb_vorbis *f, unsigned int loc)
1379 {
1380 #ifndef STB_VORBIS_NO_PUSHDATA_API
1381 if (f->push_mode) return 0;
1382 #endif
1383 f->eof = 0;
1384 if (USE_MEMORY(f)) {
1385 if (f->stream_start + loc >= f->stream_end || f->stream_start + loc < f->stream_start) {
1386 f->stream = f->stream_end;
1387 f->eof = 1;
1388 return 0;
1389 } else {
1390 f->stream = f->stream_start + loc;
1391 return 1;
1392 }
1393 }
1394 #ifndef STB_VORBIS_NO_STDIO
1395 if (loc + f->f_start < loc || loc >= 0x80000000) {
1396 loc = 0x7fffffff;
1397 f->eof = 1;
1398 } else {
1399 loc += f->f_start;
1400 }
1401 if (!fseek(f->f, loc, SEEK_SET))
1402 return 1;
1403 f->eof = 1;
1404 fseek(f->f, f->f_start, SEEK_END);
1405 return 0;
1406 #endif
1407 }
1408
1409
1410 static uint8 ogg_page_header[4] = { 0x4f, 0x67, 0x67, 0x53 };
1411
capture_pattern(vorb * f)1412 static int capture_pattern(vorb *f)
1413 {
1414 if (0x4f != get8(f)) return FALSE;
1415 if (0x67 != get8(f)) return FALSE;
1416 if (0x67 != get8(f)) return FALSE;
1417 if (0x53 != get8(f)) return FALSE;
1418 return TRUE;
1419 }
1420
1421 #define PAGEFLAG_continued_packet 1
1422 #define PAGEFLAG_first_page 2
1423 #define PAGEFLAG_last_page 4
1424
start_page_no_capturepattern(vorb * f)1425 static int start_page_no_capturepattern(vorb *f)
1426 {
1427 uint32 loc0,loc1,n;
1428 if (f->first_decode && !IS_PUSH_MODE(f)) {
1429 f->p_first.page_start = stb_vorbis_get_file_offset(f) - 4;
1430 }
1431 // stream structure version
1432 if (0 != get8(f)) return error(f, VORBIS_invalid_stream_structure_version);
1433 // header flag
1434 f->page_flag = get8(f);
1435 // absolute granule position
1436 loc0 = get32(f);
1437 loc1 = get32(f);
1438 // @TODO: validate loc0,loc1 as valid positions?
1439 // stream serial number -- vorbis doesn't interleave, so discard
1440 get32(f);
1441 //if (f->serial != get32(f)) return error(f, VORBIS_incorrect_stream_serial_number);
1442 // page sequence number
1443 n = get32(f);
1444 f->last_page = n;
1445 // CRC32
1446 get32(f);
1447 // page_segments
1448 f->segment_count = get8(f);
1449 if (!getn(f, f->segments, f->segment_count))
1450 return error(f, VORBIS_unexpected_eof);
1451 // assume we _don't_ know any the sample position of any segments
1452 f->end_seg_with_known_loc = -2;
1453 if (loc0 != ~0U || loc1 != ~0U) {
1454 int i;
1455 // determine which packet is the last one that will complete
1456 for (i=f->segment_count-1; i >= 0; --i)
1457 if (f->segments[i] < 255)
1458 break;
1459 // 'i' is now the index of the _last_ segment of a packet that ends
1460 if (i >= 0) {
1461 f->end_seg_with_known_loc = i;
1462 f->known_loc_for_packet = loc0;
1463 }
1464 }
1465 if (f->first_decode) {
1466 int i,len;
1467 len = 0;
1468 for (i=0; i < f->segment_count; ++i)
1469 len += f->segments[i];
1470 len += 27 + f->segment_count;
1471 f->p_first.page_end = f->p_first.page_start + len;
1472 f->p_first.last_decoded_sample = loc0;
1473 }
1474 f->next_seg = 0;
1475 return TRUE;
1476 }
1477
start_page(vorb * f)1478 static int start_page(vorb *f)
1479 {
1480 if (!capture_pattern(f)) return error(f, VORBIS_missing_capture_pattern);
1481 return start_page_no_capturepattern(f);
1482 }
1483
start_packet(vorb * f)1484 static int start_packet(vorb *f)
1485 {
1486 while (f->next_seg == -1) {
1487 if (!start_page(f)) return FALSE;
1488 if (f->page_flag & PAGEFLAG_continued_packet)
1489 return error(f, VORBIS_continued_packet_flag_invalid);
1490 }
1491 f->last_seg = FALSE;
1492 f->valid_bits = 0;
1493 f->packet_bytes = 0;
1494 f->bytes_in_seg = 0;
1495 // f->next_seg is now valid
1496 return TRUE;
1497 }
1498
maybe_start_packet(vorb * f)1499 static int maybe_start_packet(vorb *f)
1500 {
1501 if (f->next_seg == -1) {
1502 int x = get8(f);
1503 if (f->eof) return FALSE; // EOF at page boundary is not an error!
1504 if (0x4f != x ) return error(f, VORBIS_missing_capture_pattern);
1505 if (0x67 != get8(f)) return error(f, VORBIS_missing_capture_pattern);
1506 if (0x67 != get8(f)) return error(f, VORBIS_missing_capture_pattern);
1507 if (0x53 != get8(f)) return error(f, VORBIS_missing_capture_pattern);
1508 if (!start_page_no_capturepattern(f)) return FALSE;
1509 if (f->page_flag & PAGEFLAG_continued_packet) {
1510 // set up enough state that we can read this packet if we want,
1511 // e.g. during recovery
1512 f->last_seg = FALSE;
1513 f->bytes_in_seg = 0;
1514 return error(f, VORBIS_continued_packet_flag_invalid);
1515 }
1516 }
1517 return start_packet(f);
1518 }
1519
next_segment(vorb * f)1520 static int next_segment(vorb *f)
1521 {
1522 int len;
1523 if (f->last_seg) return 0;
1524 if (f->next_seg == -1) {
1525 f->last_seg_which = f->segment_count-1; // in case start_page fails
1526 if (!start_page(f)) { f->last_seg = 1; return 0; }
1527 if (!(f->page_flag & PAGEFLAG_continued_packet)) return error(f, VORBIS_continued_packet_flag_invalid);
1528 }
1529 len = f->segments[f->next_seg++];
1530 if (len < 255) {
1531 f->last_seg = TRUE;
1532 f->last_seg_which = f->next_seg-1;
1533 }
1534 if (f->next_seg >= f->segment_count)
1535 f->next_seg = -1;
1536 assert(f->bytes_in_seg == 0);
1537 f->bytes_in_seg = len;
1538 return len;
1539 }
1540
1541 #define EOP (-1)
1542 #define INVALID_BITS (-1)
1543
get8_packet_raw(vorb * f)1544 static int get8_packet_raw(vorb *f)
1545 {
1546 if (!f->bytes_in_seg) { // CLANG!
1547 if (f->last_seg) return EOP;
1548 else if (!next_segment(f)) return EOP;
1549 }
1550 assert(f->bytes_in_seg > 0);
1551 --f->bytes_in_seg;
1552 ++f->packet_bytes;
1553 return get8(f);
1554 }
1555
get8_packet(vorb * f)1556 static int get8_packet(vorb *f)
1557 {
1558 int x = get8_packet_raw(f);
1559 f->valid_bits = 0;
1560 return x;
1561 }
1562
get32_packet(vorb * f)1563 static int get32_packet(vorb *f)
1564 {
1565 uint32 x;
1566 x = get8_packet(f);
1567 x += get8_packet(f) << 8;
1568 x += get8_packet(f) << 16;
1569 x += (uint32) get8_packet(f) << 24;
1570 return x;
1571 }
1572
flush_packet(vorb * f)1573 static void flush_packet(vorb *f)
1574 {
1575 while (get8_packet_raw(f) != EOP);
1576 }
1577
1578 // @OPTIMIZE: this is the secondary bit decoder, so it's probably not as important
1579 // as the huffman decoder?
get_bits(vorb * f,int n)1580 static uint32 get_bits(vorb *f, int n)
1581 {
1582 uint32 z;
1583
1584 if (f->valid_bits < 0) return 0;
1585 if (f->valid_bits < n) {
1586 if (n > 24) {
1587 // the accumulator technique below would not work correctly in this case
1588 z = get_bits(f, 24);
1589 z += get_bits(f, n-24) << 24;
1590 return z;
1591 }
1592 if (f->valid_bits == 0) f->acc = 0;
1593 while (f->valid_bits < n) {
1594 int z = get8_packet_raw(f);
1595 if (z == EOP) {
1596 f->valid_bits = INVALID_BITS;
1597 return 0;
1598 }
1599 f->acc += z << f->valid_bits;
1600 f->valid_bits += 8;
1601 }
1602 }
1603 if (f->valid_bits < 0) return 0;
1604 z = f->acc & ((1 << n)-1);
1605 f->acc >>= n;
1606 f->valid_bits -= n;
1607 return z;
1608 }
1609
1610 // @OPTIMIZE: primary accumulator for huffman
1611 // expand the buffer to as many bits as possible without reading off end of packet
1612 // it might be nice to allow f->valid_bits and f->acc to be stored in registers,
1613 // e.g. cache them locally and decode locally
prep_huffman(vorb * f)1614 static __forceinline void prep_huffman(vorb *f)
1615 {
1616 if (f->valid_bits <= 24) {
1617 if (f->valid_bits == 0) f->acc = 0;
1618 do {
1619 int z;
1620 if (f->last_seg && !f->bytes_in_seg) return;
1621 z = get8_packet_raw(f);
1622 if (z == EOP) return;
1623 f->acc += (unsigned) z << f->valid_bits;
1624 f->valid_bits += 8;
1625 } while (f->valid_bits <= 24);
1626 }
1627 }
1628
1629 enum
1630 {
1631 VORBIS_packet_id = 1,
1632 VORBIS_packet_comment = 3,
1633 VORBIS_packet_setup = 5
1634 };
1635
codebook_decode_scalar_raw(vorb * f,Codebook * c)1636 static int codebook_decode_scalar_raw(vorb *f, Codebook *c)
1637 {
1638 int i;
1639 prep_huffman(f);
1640
1641 if (c->codewords == NULL && c->sorted_codewords == NULL)
1642 return -1;
1643
1644 // cases to use binary search: sorted_codewords && !c->codewords
1645 // sorted_codewords && c->entries > 8
1646 if (c->entries > 8 ? c->sorted_codewords!=NULL : !c->codewords) {
1647 // binary search
1648 uint32 code = bit_reverse(f->acc);
1649 int x=0, n=c->sorted_entries, len;
1650
1651 while (n > 1) {
1652 // invariant: sc[x] <= code < sc[x+n]
1653 int m = x + (n >> 1);
1654 if (c->sorted_codewords[m] <= code) {
1655 x = m;
1656 n -= (n>>1);
1657 } else {
1658 n >>= 1;
1659 }
1660 }
1661 // x is now the sorted index
1662 if (!c->sparse) x = c->sorted_values[x];
1663 // x is now sorted index if sparse, or symbol otherwise
1664 len = c->codeword_lengths[x];
1665 if (f->valid_bits >= len) {
1666 f->acc >>= len;
1667 f->valid_bits -= len;
1668 return x;
1669 }
1670
1671 f->valid_bits = 0;
1672 return -1;
1673 }
1674
1675 // if small, linear search
1676 assert(!c->sparse);
1677 for (i=0; i < c->entries; ++i) {
1678 if (c->codeword_lengths[i] == NO_CODE) continue;
1679 if (c->codewords[i] == (f->acc & ((1 << c->codeword_lengths[i])-1))) {
1680 if (f->valid_bits >= c->codeword_lengths[i]) {
1681 f->acc >>= c->codeword_lengths[i];
1682 f->valid_bits -= c->codeword_lengths[i];
1683 return i;
1684 }
1685 f->valid_bits = 0;
1686 return -1;
1687 }
1688 }
1689
1690 error(f, VORBIS_invalid_stream);
1691 f->valid_bits = 0;
1692 return -1;
1693 }
1694
1695 #ifndef STB_VORBIS_NO_INLINE_DECODE
1696
1697 #define DECODE_RAW(var, f,c) \
1698 if (f->valid_bits < STB_VORBIS_FAST_HUFFMAN_LENGTH) \
1699 prep_huffman(f); \
1700 var = f->acc & FAST_HUFFMAN_TABLE_MASK; \
1701 var = c->fast_huffman[var]; \
1702 if (var >= 0) { \
1703 int n = c->codeword_lengths[var]; \
1704 f->acc >>= n; \
1705 f->valid_bits -= n; \
1706 if (f->valid_bits < 0) { f->valid_bits = 0; var = -1; } \
1707 } else { \
1708 var = codebook_decode_scalar_raw(f,c); \
1709 }
1710
1711 #else
1712
codebook_decode_scalar(vorb * f,Codebook * c)1713 static int codebook_decode_scalar(vorb *f, Codebook *c)
1714 {
1715 int i;
1716 if (f->valid_bits < STB_VORBIS_FAST_HUFFMAN_LENGTH)
1717 prep_huffman(f);
1718 // fast huffman table lookup
1719 i = f->acc & FAST_HUFFMAN_TABLE_MASK;
1720 i = c->fast_huffman[i];
1721 if (i >= 0) {
1722 f->acc >>= c->codeword_lengths[i];
1723 f->valid_bits -= c->codeword_lengths[i];
1724 if (f->valid_bits < 0) { f->valid_bits = 0; return -1; }
1725 return i;
1726 }
1727 return codebook_decode_scalar_raw(f,c);
1728 }
1729
1730 #define DECODE_RAW(var,f,c) var = codebook_decode_scalar(f,c);
1731
1732 #endif
1733
1734 #define DECODE(var,f,c) \
1735 DECODE_RAW(var,f,c) \
1736 if (c->sparse) var = c->sorted_values[var];
1737
1738 #ifndef STB_VORBIS_DIVIDES_IN_CODEBOOK
1739 #define DECODE_VQ(var,f,c) DECODE_RAW(var,f,c)
1740 #else
1741 #define DECODE_VQ(var,f,c) DECODE(var,f,c)
1742 #endif
1743
1744
1745
1746
1747
1748
1749 // CODEBOOK_ELEMENT_FAST is an optimization for the CODEBOOK_FLOATS case
1750 // where we avoid one addition
1751 #define CODEBOOK_ELEMENT(c,off) (c->multiplicands[off])
1752 #define CODEBOOK_ELEMENT_FAST(c,off) (c->multiplicands[off])
1753 #define CODEBOOK_ELEMENT_BASE(c) (0)
1754
codebook_decode_start(vorb * f,Codebook * c)1755 static int codebook_decode_start(vorb *f, Codebook *c)
1756 {
1757 int z = -1;
1758
1759 // type 0 is only legal in a scalar context
1760 if (c->lookup_type == 0)
1761 error(f, VORBIS_invalid_stream);
1762 else {
1763 DECODE_VQ(z,f,c);
1764 if (c->sparse) assert(z < c->sorted_entries);
1765 if (z < 0) { // check for EOP
1766 if (!f->bytes_in_seg)
1767 if (f->last_seg)
1768 return z;
1769 error(f, VORBIS_invalid_stream);
1770 }
1771 }
1772 return z;
1773 }
1774
codebook_decode(vorb * f,Codebook * c,float * output,int len)1775 static int codebook_decode(vorb *f, Codebook *c, float *output, int len)
1776 {
1777 int i,z = codebook_decode_start(f,c);
1778 if (z < 0) return FALSE;
1779 if (len > c->dimensions) len = c->dimensions;
1780
1781 #ifdef STB_VORBIS_DIVIDES_IN_CODEBOOK
1782 if (c->lookup_type == 1) {
1783 float last = CODEBOOK_ELEMENT_BASE(c);
1784 int div = 1;
1785 for (i=0; i < len; ++i) {
1786 int off = (z / div) % c->lookup_values;
1787 float val = CODEBOOK_ELEMENT_FAST(c,off) + last;
1788 output[i] += val;
1789 if (c->sequence_p) last = val + c->minimum_value;
1790 div *= c->lookup_values;
1791 }
1792 return TRUE;
1793 }
1794 #endif
1795
1796 z *= c->dimensions;
1797 if (c->sequence_p) {
1798 float last = CODEBOOK_ELEMENT_BASE(c);
1799 for (i=0; i < len; ++i) {
1800 float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1801 output[i] += val;
1802 last = val + c->minimum_value;
1803 }
1804 } else {
1805 float last = CODEBOOK_ELEMENT_BASE(c);
1806 for (i=0; i < len; ++i) {
1807 output[i] += CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1808 }
1809 }
1810
1811 return TRUE;
1812 }
1813
codebook_decode_step(vorb * f,Codebook * c,float * output,int len,int step)1814 static int codebook_decode_step(vorb *f, Codebook *c, float *output, int len, int step)
1815 {
1816 int i,z = codebook_decode_start(f,c);
1817 float last = CODEBOOK_ELEMENT_BASE(c);
1818 if (z < 0) return FALSE;
1819 if (len > c->dimensions) len = c->dimensions;
1820
1821 #ifdef STB_VORBIS_DIVIDES_IN_CODEBOOK
1822 if (c->lookup_type == 1) {
1823 int div = 1;
1824 for (i=0; i < len; ++i) {
1825 int off = (z / div) % c->lookup_values;
1826 float val = CODEBOOK_ELEMENT_FAST(c,off) + last;
1827 output[i*step] += val;
1828 if (c->sequence_p) last = val;
1829 div *= c->lookup_values;
1830 }
1831 return TRUE;
1832 }
1833 #endif
1834
1835 z *= c->dimensions;
1836 for (i=0; i < len; ++i) {
1837 float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1838 output[i*step] += val;
1839 if (c->sequence_p) last = val;
1840 }
1841
1842 return TRUE;
1843 }
1844
codebook_decode_deinterleave_repeat(vorb * f,Codebook * c,float ** outputs,int ch,int * c_inter_p,int * p_inter_p,int len,int total_decode)1845 static int codebook_decode_deinterleave_repeat(vorb *f, Codebook *c, float **outputs, int ch, int *c_inter_p, int *p_inter_p, int len, int total_decode)
1846 {
1847 int c_inter = *c_inter_p;
1848 int p_inter = *p_inter_p;
1849 int i,z, effective = c->dimensions;
1850
1851 // type 0 is only legal in a scalar context
1852 if (c->lookup_type == 0) return error(f, VORBIS_invalid_stream);
1853
1854 while (total_decode > 0) {
1855 float last = CODEBOOK_ELEMENT_BASE(c);
1856 DECODE_VQ(z,f,c);
1857 #ifndef STB_VORBIS_DIVIDES_IN_CODEBOOK
1858 assert(!c->sparse || z < c->sorted_entries);
1859 #endif
1860 if (z < 0) {
1861 if (!f->bytes_in_seg)
1862 if (f->last_seg) return FALSE;
1863 return error(f, VORBIS_invalid_stream);
1864 }
1865
1866 // if this will take us off the end of the buffers, stop short!
1867 // we check by computing the length of the virtual interleaved
1868 // buffer (len*ch), our current offset within it (p_inter*ch)+(c_inter),
1869 // and the length we'll be using (effective)
1870 if (c_inter + p_inter*ch + effective > len * ch) {
1871 effective = len*ch - (p_inter*ch - c_inter);
1872 }
1873
1874 #ifdef STB_VORBIS_DIVIDES_IN_CODEBOOK
1875 if (c->lookup_type == 1) {
1876 int div = 1;
1877 for (i=0; i < effective; ++i) {
1878 int off = (z / div) % c->lookup_values;
1879 float val = CODEBOOK_ELEMENT_FAST(c,off) + last;
1880 if (outputs[c_inter])
1881 outputs[c_inter][p_inter] += val;
1882 if (++c_inter == ch) { c_inter = 0; ++p_inter; }
1883 if (c->sequence_p) last = val;
1884 div *= c->lookup_values;
1885 }
1886 } else
1887 #endif
1888 {
1889 z *= c->dimensions;
1890 if (c->sequence_p) {
1891 for (i=0; i < effective; ++i) {
1892 float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1893 if (outputs[c_inter])
1894 outputs[c_inter][p_inter] += val;
1895 if (++c_inter == ch) { c_inter = 0; ++p_inter; }
1896 last = val;
1897 }
1898 } else {
1899 for (i=0; i < effective; ++i) {
1900 float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1901 if (outputs[c_inter])
1902 outputs[c_inter][p_inter] += val;
1903 if (++c_inter == ch) { c_inter = 0; ++p_inter; }
1904 }
1905 }
1906 }
1907
1908 total_decode -= effective;
1909 }
1910 *c_inter_p = c_inter;
1911 *p_inter_p = p_inter;
1912 return TRUE;
1913 }
1914
predict_point(int x,int x0,int x1,int y0,int y1)1915 static int predict_point(int x, int x0, int x1, int y0, int y1)
1916 {
1917 int dy = y1 - y0;
1918 int adx = x1 - x0;
1919 // @OPTIMIZE: force int division to round in the right direction... is this necessary on x86?
1920 int err = abs(dy) * (x - x0);
1921 int off = err / adx;
1922 return dy < 0 ? y0 - off : y0 + off;
1923 }
1924
1925 // the following table is block-copied from the specification
1926 static float inverse_db_table[256] =
1927 {
1928 1.0649863e-07f, 1.1341951e-07f, 1.2079015e-07f, 1.2863978e-07f,
1929 1.3699951e-07f, 1.4590251e-07f, 1.5538408e-07f, 1.6548181e-07f,
1930 1.7623575e-07f, 1.8768855e-07f, 1.9988561e-07f, 2.1287530e-07f,
1931 2.2670913e-07f, 2.4144197e-07f, 2.5713223e-07f, 2.7384213e-07f,
1932 2.9163793e-07f, 3.1059021e-07f, 3.3077411e-07f, 3.5226968e-07f,
1933 3.7516214e-07f, 3.9954229e-07f, 4.2550680e-07f, 4.5315863e-07f,
1934 4.8260743e-07f, 5.1396998e-07f, 5.4737065e-07f, 5.8294187e-07f,
1935 6.2082472e-07f, 6.6116941e-07f, 7.0413592e-07f, 7.4989464e-07f,
1936 7.9862701e-07f, 8.5052630e-07f, 9.0579828e-07f, 9.6466216e-07f,
1937 1.0273513e-06f, 1.0941144e-06f, 1.1652161e-06f, 1.2409384e-06f,
1938 1.3215816e-06f, 1.4074654e-06f, 1.4989305e-06f, 1.5963394e-06f,
1939 1.7000785e-06f, 1.8105592e-06f, 1.9282195e-06f, 2.0535261e-06f,
1940 2.1869758e-06f, 2.3290978e-06f, 2.4804557e-06f, 2.6416497e-06f,
1941 2.8133190e-06f, 2.9961443e-06f, 3.1908506e-06f, 3.3982101e-06f,
1942 3.6190449e-06f, 3.8542308e-06f, 4.1047004e-06f, 4.3714470e-06f,
1943 4.6555282e-06f, 4.9580707e-06f, 5.2802740e-06f, 5.6234160e-06f,
1944 5.9888572e-06f, 6.3780469e-06f, 6.7925283e-06f, 7.2339451e-06f,
1945 7.7040476e-06f, 8.2047000e-06f, 8.7378876e-06f, 9.3057248e-06f,
1946 9.9104632e-06f, 1.0554501e-05f, 1.1240392e-05f, 1.1970856e-05f,
1947 1.2748789e-05f, 1.3577278e-05f, 1.4459606e-05f, 1.5399272e-05f,
1948 1.6400004e-05f, 1.7465768e-05f, 1.8600792e-05f, 1.9809576e-05f,
1949 2.1096914e-05f, 2.2467911e-05f, 2.3928002e-05f, 2.5482978e-05f,
1950 2.7139006e-05f, 2.8902651e-05f, 3.0780908e-05f, 3.2781225e-05f,
1951 3.4911534e-05f, 3.7180282e-05f, 3.9596466e-05f, 4.2169667e-05f,
1952 4.4910090e-05f, 4.7828601e-05f, 5.0936773e-05f, 5.4246931e-05f,
1953 5.7772202e-05f, 6.1526565e-05f, 6.5524908e-05f, 6.9783085e-05f,
1954 7.4317983e-05f, 7.9147585e-05f, 8.4291040e-05f, 8.9768747e-05f,
1955 9.5602426e-05f, 0.00010181521f, 0.00010843174f, 0.00011547824f,
1956 0.00012298267f, 0.00013097477f, 0.00013948625f, 0.00014855085f,
1957 0.00015820453f, 0.00016848555f, 0.00017943469f, 0.00019109536f,
1958 0.00020351382f, 0.00021673929f, 0.00023082423f, 0.00024582449f,
1959 0.00026179955f, 0.00027881276f, 0.00029693158f, 0.00031622787f,
1960 0.00033677814f, 0.00035866388f, 0.00038197188f, 0.00040679456f,
1961 0.00043323036f, 0.00046138411f, 0.00049136745f, 0.00052329927f,
1962 0.00055730621f, 0.00059352311f, 0.00063209358f, 0.00067317058f,
1963 0.00071691700f, 0.00076350630f, 0.00081312324f, 0.00086596457f,
1964 0.00092223983f, 0.00098217216f, 0.0010459992f, 0.0011139742f,
1965 0.0011863665f, 0.0012634633f, 0.0013455702f, 0.0014330129f,
1966 0.0015261382f, 0.0016253153f, 0.0017309374f, 0.0018434235f,
1967 0.0019632195f, 0.0020908006f, 0.0022266726f, 0.0023713743f,
1968 0.0025254795f, 0.0026895994f, 0.0028643847f, 0.0030505286f,
1969 0.0032487691f, 0.0034598925f, 0.0036847358f, 0.0039241906f,
1970 0.0041792066f, 0.0044507950f, 0.0047400328f, 0.0050480668f,
1971 0.0053761186f, 0.0057254891f, 0.0060975636f, 0.0064938176f,
1972 0.0069158225f, 0.0073652516f, 0.0078438871f, 0.0083536271f,
1973 0.0088964928f, 0.009474637f, 0.010090352f, 0.010746080f,
1974 0.011444421f, 0.012188144f, 0.012980198f, 0.013823725f,
1975 0.014722068f, 0.015678791f, 0.016697687f, 0.017782797f,
1976 0.018938423f, 0.020169149f, 0.021479854f, 0.022875735f,
1977 0.024362330f, 0.025945531f, 0.027631618f, 0.029427276f,
1978 0.031339626f, 0.033376252f, 0.035545228f, 0.037855157f,
1979 0.040315199f, 0.042935108f, 0.045725273f, 0.048696758f,
1980 0.051861348f, 0.055231591f, 0.058820850f, 0.062643361f,
1981 0.066714279f, 0.071049749f, 0.075666962f, 0.080584227f,
1982 0.085821044f, 0.091398179f, 0.097337747f, 0.10366330f,
1983 0.11039993f, 0.11757434f, 0.12521498f, 0.13335215f,
1984 0.14201813f, 0.15124727f, 0.16107617f, 0.17154380f,
1985 0.18269168f, 0.19456402f, 0.20720788f, 0.22067342f,
1986 0.23501402f, 0.25028656f, 0.26655159f, 0.28387361f,
1987 0.30232132f, 0.32196786f, 0.34289114f, 0.36517414f,
1988 0.38890521f, 0.41417847f, 0.44109412f, 0.46975890f,
1989 0.50028648f, 0.53279791f, 0.56742212f, 0.60429640f,
1990 0.64356699f, 0.68538959f, 0.72993007f, 0.77736504f,
1991 0.82788260f, 0.88168307f, 0.9389798f, 1.0f
1992 };
1993
1994
1995 // @OPTIMIZE: if you want to replace this bresenham line-drawing routine,
1996 // note that you must produce bit-identical output to decode correctly;
1997 // this specific sequence of operations is specified in the spec (it's
1998 // drawing integer-quantized frequency-space lines that the encoder
1999 // expects to be exactly the same)
2000 // ... also, isn't the whole point of Bresenham's algorithm to NOT
2001 // have to divide in the setup? sigh.
2002 #ifndef STB_VORBIS_NO_DEFER_FLOOR
2003 #define LINE_OP(a,b) a *= b
2004 #else
2005 #define LINE_OP(a,b) a = b
2006 #endif
2007
2008 #ifdef STB_VORBIS_DIVIDE_TABLE
2009 #define DIVTAB_NUMER 32
2010 #define DIVTAB_DENOM 64
2011 int8 integer_divide_table[DIVTAB_NUMER][DIVTAB_DENOM]; // 2KB
2012 #endif
2013
draw_line(float * output,int x0,int y0,int x1,int y1,int n)2014 static __forceinline void draw_line(float *output, int x0, int y0, int x1, int y1, int n)
2015 {
2016 int dy = y1 - y0;
2017 int adx = x1 - x0;
2018 int ady = abs(dy);
2019 int base;
2020 int x=x0,y=y0;
2021 int err = 0;
2022 int sy;
2023
2024 #ifdef STB_VORBIS_DIVIDE_TABLE
2025 if (adx < DIVTAB_DENOM && ady < DIVTAB_NUMER) {
2026 if (dy < 0) {
2027 base = -integer_divide_table[ady][adx];
2028 sy = base-1;
2029 } else {
2030 base = integer_divide_table[ady][adx];
2031 sy = base+1;
2032 }
2033 } else {
2034 base = dy / adx;
2035 if (dy < 0)
2036 sy = base - 1;
2037 else
2038 sy = base+1;
2039 }
2040 #else
2041 base = dy / adx;
2042 if (dy < 0)
2043 sy = base - 1;
2044 else
2045 sy = base+1;
2046 #endif
2047 ady -= abs(base) * adx;
2048 if (x1 > n) x1 = n;
2049 if (x < x1) {
2050 LINE_OP(output[x], inverse_db_table[y&255]);
2051 for (++x; x < x1; ++x) {
2052 err += ady;
2053 if (err >= adx) {
2054 err -= adx;
2055 y += sy;
2056 } else
2057 y += base;
2058 LINE_OP(output[x], inverse_db_table[y&255]);
2059 }
2060 }
2061 }
2062
residue_decode(vorb * f,Codebook * book,float * target,int offset,int n,int rtype)2063 static int residue_decode(vorb *f, Codebook *book, float *target, int offset, int n, int rtype)
2064 {
2065 int k;
2066 if (rtype == 0) {
2067 int step = n / book->dimensions;
2068 for (k=0; k < step; ++k)
2069 if (!codebook_decode_step(f, book, target+offset+k, n-offset-k, step))
2070 return FALSE;
2071 } else {
2072 for (k=0; k < n; ) {
2073 if (!codebook_decode(f, book, target+offset, n-k))
2074 return FALSE;
2075 k += book->dimensions;
2076 offset += book->dimensions;
2077 }
2078 }
2079 return TRUE;
2080 }
2081
2082 // n is 1/2 of the blocksize --
2083 // specification: "Correct per-vector decode length is [n]/2"
decode_residue(vorb * f,float * residue_buffers[],int ch,int n,int rn,uint8 * do_not_decode)2084 static void decode_residue(vorb *f, float *residue_buffers[], int ch, int n, int rn, uint8 *do_not_decode)
2085 {
2086 int i,j,pass;
2087 Residue *r = f->residue_config + rn;
2088 int rtype = f->residue_types[rn];
2089 int c = r->classbook;
2090 int classwords = f->codebooks[c].dimensions;
2091 unsigned int actual_size = rtype == 2 ? n*2 : n;
2092 unsigned int limit_r_begin = (r->begin < actual_size ? r->begin : actual_size);
2093 unsigned int limit_r_end = (r->end < actual_size ? r->end : actual_size);
2094 int n_read = limit_r_end - limit_r_begin;
2095 int part_read = n_read / r->part_size;
2096 int temp_alloc_point = temp_alloc_save(f);
2097 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2098 uint8 ***part_classdata = (uint8 ***) temp_block_array(f,f->channels, part_read * sizeof(**part_classdata));
2099 #else
2100 int **classifications = (int **) temp_block_array(f,f->channels, part_read * sizeof(**classifications));
2101 #endif
2102
2103 CHECK(f);
2104
2105 for (i=0; i < ch; ++i)
2106 if (!do_not_decode[i])
2107 memset(residue_buffers[i], 0, sizeof(float) * n);
2108
2109 if (rtype == 2 && ch != 1) {
2110 for (j=0; j < ch; ++j)
2111 if (!do_not_decode[j])
2112 break;
2113 if (j == ch)
2114 goto done;
2115
2116 for (pass=0; pass < 8; ++pass) {
2117 int pcount = 0, class_set = 0;
2118 if (ch == 2) {
2119 while (pcount < part_read) {
2120 int z = r->begin + pcount*r->part_size;
2121 int c_inter = (z & 1), p_inter = z>>1;
2122 if (pass == 0) {
2123 Codebook *c = f->codebooks+r->classbook;
2124 int q;
2125 DECODE(q,f,c);
2126 if (q == EOP) goto done;
2127 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2128 part_classdata[0][class_set] = r->classdata[q];
2129 #else
2130 for (i=classwords-1; i >= 0; --i) {
2131 classifications[0][i+pcount] = q % r->classifications;
2132 q /= r->classifications;
2133 }
2134 #endif
2135 }
2136 for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) {
2137 int z = r->begin + pcount*r->part_size;
2138 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2139 int c = part_classdata[0][class_set][i];
2140 #else
2141 int c = classifications[0][pcount];
2142 #endif
2143 int b = r->residue_books[c][pass];
2144 if (b >= 0) {
2145 Codebook *book = f->codebooks + b;
2146 #ifdef STB_VORBIS_DIVIDES_IN_CODEBOOK
2147 if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r->part_size))
2148 goto done;
2149 #else
2150 // saves 1%
2151 if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r->part_size))
2152 goto done;
2153 #endif
2154 } else {
2155 z += r->part_size;
2156 c_inter = z & 1;
2157 p_inter = z >> 1;
2158 }
2159 }
2160 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2161 ++class_set;
2162 #endif
2163 }
2164 } else if (ch > 2) {
2165 while (pcount < part_read) {
2166 int z = r->begin + pcount*r->part_size;
2167 int c_inter = z % ch, p_inter = z/ch;
2168 if (pass == 0) {
2169 Codebook *c = f->codebooks+r->classbook;
2170 int q;
2171 DECODE(q,f,c);
2172 if (q == EOP) goto done;
2173 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2174 part_classdata[0][class_set] = r->classdata[q];
2175 #else
2176 for (i=classwords-1; i >= 0; --i) {
2177 classifications[0][i+pcount] = q % r->classifications;
2178 q /= r->classifications;
2179 }
2180 #endif
2181 }
2182 for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) {
2183 int z = r->begin + pcount*r->part_size;
2184 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2185 int c = part_classdata[0][class_set][i];
2186 #else
2187 int c = classifications[0][pcount];
2188 #endif
2189 int b = r->residue_books[c][pass];
2190 if (b >= 0) {
2191 Codebook *book = f->codebooks + b;
2192 if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r->part_size))
2193 goto done;
2194 } else {
2195 z += r->part_size;
2196 c_inter = z % ch;
2197 p_inter = z / ch;
2198 }
2199 }
2200 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2201 ++class_set;
2202 #endif
2203 }
2204 }
2205 }
2206 goto done;
2207 }
2208 CHECK(f);
2209
2210 for (pass=0; pass < 8; ++pass) {
2211 int pcount = 0, class_set=0;
2212 while (pcount < part_read) {
2213 if (pass == 0) {
2214 for (j=0; j < ch; ++j) {
2215 if (!do_not_decode[j]) {
2216 Codebook *c = f->codebooks+r->classbook;
2217 int temp;
2218 DECODE(temp,f,c);
2219 if (temp == EOP) goto done;
2220 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2221 part_classdata[j][class_set] = r->classdata[temp];
2222 #else
2223 for (i=classwords-1; i >= 0; --i) {
2224 classifications[j][i+pcount] = temp % r->classifications;
2225 temp /= r->classifications;
2226 }
2227 #endif
2228 }
2229 }
2230 }
2231 for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) {
2232 for (j=0; j < ch; ++j) {
2233 if (!do_not_decode[j]) {
2234 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2235 int c = part_classdata[j][class_set][i];
2236 #else
2237 int c = classifications[j][pcount];
2238 #endif
2239 int b = r->residue_books[c][pass];
2240 if (b >= 0) {
2241 float *target = residue_buffers[j];
2242 int offset = r->begin + pcount * r->part_size;
2243 int n = r->part_size;
2244 Codebook *book = f->codebooks + b;
2245 if (!residue_decode(f, book, target, offset, n, rtype))
2246 goto done;
2247 }
2248 }
2249 }
2250 }
2251 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2252 ++class_set;
2253 #endif
2254 }
2255 }
2256 done:
2257 CHECK(f);
2258 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2259 temp_free(f,part_classdata);
2260 #else
2261 temp_free(f,classifications);
2262 #endif
2263 temp_alloc_restore(f,temp_alloc_point);
2264 }
2265
2266
2267 #if 0
2268 // slow way for debugging
2269 void inverse_mdct_slow(float *buffer, int n)
2270 {
2271 int i,j;
2272 int n2 = n >> 1;
2273 float *x = (float *) malloc(sizeof(*x) * n2);
2274 memcpy(x, buffer, sizeof(*x) * n2);
2275 for (i=0; i < n; ++i) {
2276 float acc = 0;
2277 for (j=0; j < n2; ++j)
2278 // formula from paper:
2279 //acc += n/4.0f * x[j] * (float) cos(M_PI / 2 / n * (2 * i + 1 + n/2.0)*(2*j+1));
2280 // formula from wikipedia
2281 //acc += 2.0f / n2 * x[j] * (float) cos(M_PI/n2 * (i + 0.5 + n2/2)*(j + 0.5));
2282 // these are equivalent, except the formula from the paper inverts the multiplier!
2283 // however, what actually works is NO MULTIPLIER!?!
2284 //acc += 64 * 2.0f / n2 * x[j] * (float) cos(M_PI/n2 * (i + 0.5 + n2/2)*(j + 0.5));
2285 acc += x[j] * (float) cos(M_PI / 2 / n * (2 * i + 1 + n/2.0)*(2*j+1));
2286 buffer[i] = acc;
2287 }
2288 free(x);
2289 }
2290 #elif 0
2291 // same as above, but just barely able to run in real time on modern machines
inverse_mdct_slow(float * buffer,int n,vorb * f,int blocktype)2292 void inverse_mdct_slow(float *buffer, int n, vorb *f, int blocktype)
2293 {
2294 float mcos[16384];
2295 int i,j;
2296 int n2 = n >> 1, nmask = (n << 2) -1;
2297 float *x = (float *) malloc(sizeof(*x) * n2);
2298 memcpy(x, buffer, sizeof(*x) * n2);
2299 for (i=0; i < 4*n; ++i)
2300 mcos[i] = (float) cos(M_PI / 2 * i / n);
2301
2302 for (i=0; i < n; ++i) {
2303 float acc = 0;
2304 for (j=0; j < n2; ++j)
2305 acc += x[j] * mcos[(2 * i + 1 + n2)*(2*j+1) & nmask];
2306 buffer[i] = acc;
2307 }
2308 free(x);
2309 }
2310 #elif 0
2311 // transform to use a slow dct-iv; this is STILL basically trivial,
2312 // but only requires half as many ops
dct_iv_slow(float * buffer,int n)2313 void dct_iv_slow(float *buffer, int n)
2314 {
2315 float mcos[16384];
2316 float x[2048];
2317 int i,j;
2318 int n2 = n >> 1, nmask = (n << 3) - 1;
2319 memcpy(x, buffer, sizeof(*x) * n);
2320 for (i=0; i < 8*n; ++i)
2321 mcos[i] = (float) cos(M_PI / 4 * i / n);
2322 for (i=0; i < n; ++i) {
2323 float acc = 0;
2324 for (j=0; j < n; ++j)
2325 acc += x[j] * mcos[((2 * i + 1)*(2*j+1)) & nmask];
2326 buffer[i] = acc;
2327 }
2328 }
2329
inverse_mdct_slow(float * buffer,int n,vorb * f,int blocktype)2330 void inverse_mdct_slow(float *buffer, int n, vorb *f, int blocktype)
2331 {
2332 int i, n4 = n >> 2, n2 = n >> 1, n3_4 = n - n4;
2333 float temp[4096];
2334
2335 memcpy(temp, buffer, n2 * sizeof(float));
2336 dct_iv_slow(temp, n2); // returns -c'-d, a-b'
2337
2338 for (i=0; i < n4 ; ++i) buffer[i] = temp[i+n4]; // a-b'
2339 for ( ; i < n3_4; ++i) buffer[i] = -temp[n3_4 - i - 1]; // b-a', c+d'
2340 for ( ; i < n ; ++i) buffer[i] = -temp[i - n3_4]; // c'+d
2341 }
2342 #endif
2343
2344 #ifndef LIBVORBIS_MDCT
2345 #define LIBVORBIS_MDCT 0
2346 #endif
2347
2348 #if LIBVORBIS_MDCT
2349 // directly call the vorbis MDCT using an interface documented
2350 // by Jeff Roberts... useful for performance comparison
2351 typedef struct
2352 {
2353 int n;
2354 int log2n;
2355
2356 float *trig;
2357 int *bitrev;
2358
2359 float scale;
2360 } mdct_lookup;
2361
2362 extern void mdct_init(mdct_lookup *lookup, int n);
2363 extern void mdct_clear(mdct_lookup *l);
2364 extern void mdct_backward(mdct_lookup *init, float *in, float *out);
2365
2366 mdct_lookup M1,M2;
2367
inverse_mdct(float * buffer,int n,vorb * f,int blocktype)2368 void inverse_mdct(float *buffer, int n, vorb *f, int blocktype)
2369 {
2370 mdct_lookup *M;
2371 if (M1.n == n) M = &M1;
2372 else if (M2.n == n) M = &M2;
2373 else if (M1.n == 0) { mdct_init(&M1, n); M = &M1; }
2374 else {
2375 if (M2.n) __asm int 3;
2376 mdct_init(&M2, n);
2377 M = &M2;
2378 }
2379
2380 mdct_backward(M, buffer, buffer);
2381 }
2382 #endif
2383
2384
2385 // the following were split out into separate functions while optimizing;
2386 // they could be pushed back up but eh. __forceinline showed no change;
2387 // they're probably already being inlined.
imdct_step3_iter0_loop(int n,float * e,int i_off,int k_off,float * A)2388 static void imdct_step3_iter0_loop(int n, float *e, int i_off, int k_off, float *A)
2389 {
2390 float *ee0 = e + i_off;
2391 float *ee2 = ee0 + k_off;
2392 int i;
2393
2394 assert((n & 3) == 0);
2395 for (i=(n>>2); i > 0; --i) {
2396 float k00_20, k01_21;
2397 k00_20 = ee0[ 0] - ee2[ 0];
2398 k01_21 = ee0[-1] - ee2[-1];
2399 ee0[ 0] += ee2[ 0];//ee0[ 0] = ee0[ 0] + ee2[ 0];
2400 ee0[-1] += ee2[-1];//ee0[-1] = ee0[-1] + ee2[-1];
2401 ee2[ 0] = k00_20 * A[0] - k01_21 * A[1];
2402 ee2[-1] = k01_21 * A[0] + k00_20 * A[1];
2403 A += 8;
2404
2405 k00_20 = ee0[-2] - ee2[-2];
2406 k01_21 = ee0[-3] - ee2[-3];
2407 ee0[-2] += ee2[-2];//ee0[-2] = ee0[-2] + ee2[-2];
2408 ee0[-3] += ee2[-3];//ee0[-3] = ee0[-3] + ee2[-3];
2409 ee2[-2] = k00_20 * A[0] - k01_21 * A[1];
2410 ee2[-3] = k01_21 * A[0] + k00_20 * A[1];
2411 A += 8;
2412
2413 k00_20 = ee0[-4] - ee2[-4];
2414 k01_21 = ee0[-5] - ee2[-5];
2415 ee0[-4] += ee2[-4];//ee0[-4] = ee0[-4] + ee2[-4];
2416 ee0[-5] += ee2[-5];//ee0[-5] = ee0[-5] + ee2[-5];
2417 ee2[-4] = k00_20 * A[0] - k01_21 * A[1];
2418 ee2[-5] = k01_21 * A[0] + k00_20 * A[1];
2419 A += 8;
2420
2421 k00_20 = ee0[-6] - ee2[-6];
2422 k01_21 = ee0[-7] - ee2[-7];
2423 ee0[-6] += ee2[-6];//ee0[-6] = ee0[-6] + ee2[-6];
2424 ee0[-7] += ee2[-7];//ee0[-7] = ee0[-7] + ee2[-7];
2425 ee2[-6] = k00_20 * A[0] - k01_21 * A[1];
2426 ee2[-7] = k01_21 * A[0] + k00_20 * A[1];
2427 A += 8;
2428 ee0 -= 8;
2429 ee2 -= 8;
2430 }
2431 }
2432
imdct_step3_inner_r_loop(int lim,float * e,int d0,int k_off,float * A,int k1)2433 static void imdct_step3_inner_r_loop(int lim, float *e, int d0, int k_off, float *A, int k1)
2434 {
2435 int i;
2436 float k00_20, k01_21;
2437
2438 float *e0 = e + d0;
2439 float *e2 = e0 + k_off;
2440
2441 for (i=lim >> 2; i > 0; --i) {
2442 k00_20 = e0[-0] - e2[-0];
2443 k01_21 = e0[-1] - e2[-1];
2444 e0[-0] += e2[-0];//e0[-0] = e0[-0] + e2[-0];
2445 e0[-1] += e2[-1];//e0[-1] = e0[-1] + e2[-1];
2446 e2[-0] = (k00_20)*A[0] - (k01_21) * A[1];
2447 e2[-1] = (k01_21)*A[0] + (k00_20) * A[1];
2448
2449 A += k1;
2450
2451 k00_20 = e0[-2] - e2[-2];
2452 k01_21 = e0[-3] - e2[-3];
2453 e0[-2] += e2[-2];//e0[-2] = e0[-2] + e2[-2];
2454 e0[-3] += e2[-3];//e0[-3] = e0[-3] + e2[-3];
2455 e2[-2] = (k00_20)*A[0] - (k01_21) * A[1];
2456 e2[-3] = (k01_21)*A[0] + (k00_20) * A[1];
2457
2458 A += k1;
2459
2460 k00_20 = e0[-4] - e2[-4];
2461 k01_21 = e0[-5] - e2[-5];
2462 e0[-4] += e2[-4];//e0[-4] = e0[-4] + e2[-4];
2463 e0[-5] += e2[-5];//e0[-5] = e0[-5] + e2[-5];
2464 e2[-4] = (k00_20)*A[0] - (k01_21) * A[1];
2465 e2[-5] = (k01_21)*A[0] + (k00_20) * A[1];
2466
2467 A += k1;
2468
2469 k00_20 = e0[-6] - e2[-6];
2470 k01_21 = e0[-7] - e2[-7];
2471 e0[-6] += e2[-6];//e0[-6] = e0[-6] + e2[-6];
2472 e0[-7] += e2[-7];//e0[-7] = e0[-7] + e2[-7];
2473 e2[-6] = (k00_20)*A[0] - (k01_21) * A[1];
2474 e2[-7] = (k01_21)*A[0] + (k00_20) * A[1];
2475
2476 e0 -= 8;
2477 e2 -= 8;
2478
2479 A += k1;
2480 }
2481 }
2482
imdct_step3_inner_s_loop(int n,float * e,int i_off,int k_off,float * A,int a_off,int k0)2483 static void imdct_step3_inner_s_loop(int n, float *e, int i_off, int k_off, float *A, int a_off, int k0)
2484 {
2485 int i;
2486 float A0 = A[0];
2487 float A1 = A[0+1];
2488 float A2 = A[0+a_off];
2489 float A3 = A[0+a_off+1];
2490 float A4 = A[0+a_off*2+0];
2491 float A5 = A[0+a_off*2+1];
2492 float A6 = A[0+a_off*3+0];
2493 float A7 = A[0+a_off*3+1];
2494
2495 float k00,k11;
2496
2497 float *ee0 = e +i_off;
2498 float *ee2 = ee0+k_off;
2499
2500 for (i=n; i > 0; --i) {
2501 k00 = ee0[ 0] - ee2[ 0];
2502 k11 = ee0[-1] - ee2[-1];
2503 ee0[ 0] = ee0[ 0] + ee2[ 0];
2504 ee0[-1] = ee0[-1] + ee2[-1];
2505 ee2[ 0] = (k00) * A0 - (k11) * A1;
2506 ee2[-1] = (k11) * A0 + (k00) * A1;
2507
2508 k00 = ee0[-2] - ee2[-2];
2509 k11 = ee0[-3] - ee2[-3];
2510 ee0[-2] = ee0[-2] + ee2[-2];
2511 ee0[-3] = ee0[-3] + ee2[-3];
2512 ee2[-2] = (k00) * A2 - (k11) * A3;
2513 ee2[-3] = (k11) * A2 + (k00) * A3;
2514
2515 k00 = ee0[-4] - ee2[-4];
2516 k11 = ee0[-5] - ee2[-5];
2517 ee0[-4] = ee0[-4] + ee2[-4];
2518 ee0[-5] = ee0[-5] + ee2[-5];
2519 ee2[-4] = (k00) * A4 - (k11) * A5;
2520 ee2[-5] = (k11) * A4 + (k00) * A5;
2521
2522 k00 = ee0[-6] - ee2[-6];
2523 k11 = ee0[-7] - ee2[-7];
2524 ee0[-6] = ee0[-6] + ee2[-6];
2525 ee0[-7] = ee0[-7] + ee2[-7];
2526 ee2[-6] = (k00) * A6 - (k11) * A7;
2527 ee2[-7] = (k11) * A6 + (k00) * A7;
2528
2529 ee0 -= k0;
2530 ee2 -= k0;
2531 }
2532 }
2533
iter_54(float * z)2534 static __forceinline void iter_54(float *z)
2535 {
2536 float k00,k11,k22,k33;
2537 float y0,y1,y2,y3;
2538
2539 k00 = z[ 0] - z[-4];
2540 y0 = z[ 0] + z[-4];
2541 y2 = z[-2] + z[-6];
2542 k22 = z[-2] - z[-6];
2543
2544 z[-0] = y0 + y2; // z0 + z4 + z2 + z6
2545 z[-2] = y0 - y2; // z0 + z4 - z2 - z6
2546
2547 // done with y0,y2
2548
2549 k33 = z[-3] - z[-7];
2550
2551 z[-4] = k00 + k33; // z0 - z4 + z3 - z7
2552 z[-6] = k00 - k33; // z0 - z4 - z3 + z7
2553
2554 // done with k33
2555
2556 k11 = z[-1] - z[-5];
2557 y1 = z[-1] + z[-5];
2558 y3 = z[-3] + z[-7];
2559
2560 z[-1] = y1 + y3; // z1 + z5 + z3 + z7
2561 z[-3] = y1 - y3; // z1 + z5 - z3 - z7
2562 z[-5] = k11 - k22; // z1 - z5 + z2 - z6
2563 z[-7] = k11 + k22; // z1 - z5 - z2 + z6
2564 }
2565
imdct_step3_inner_s_loop_ld654(int n,float * e,int i_off,float * A,int base_n)2566 static void imdct_step3_inner_s_loop_ld654(int n, float *e, int i_off, float *A, int base_n)
2567 {
2568 int a_off = base_n >> 3;
2569 float A2 = A[0+a_off];
2570 float *z = e + i_off;
2571 float *base = z - 16 * n;
2572
2573 while (z > base) {
2574 float k00,k11;
2575
2576 k00 = z[-0] - z[-8];
2577 k11 = z[-1] - z[-9];
2578 z[-0] = z[-0] + z[-8];
2579 z[-1] = z[-1] + z[-9];
2580 z[-8] = k00;
2581 z[-9] = k11 ;
2582
2583 k00 = z[ -2] - z[-10];
2584 k11 = z[ -3] - z[-11];
2585 z[ -2] = z[ -2] + z[-10];
2586 z[ -3] = z[ -3] + z[-11];
2587 z[-10] = (k00+k11) * A2;
2588 z[-11] = (k11-k00) * A2;
2589
2590 k00 = z[-12] - z[ -4]; // reverse to avoid a unary negation
2591 k11 = z[ -5] - z[-13];
2592 z[ -4] = z[ -4] + z[-12];
2593 z[ -5] = z[ -5] + z[-13];
2594 z[-12] = k11;
2595 z[-13] = k00;
2596
2597 k00 = z[-14] - z[ -6]; // reverse to avoid a unary negation
2598 k11 = z[ -7] - z[-15];
2599 z[ -6] = z[ -6] + z[-14];
2600 z[ -7] = z[ -7] + z[-15];
2601 z[-14] = (k00+k11) * A2;
2602 z[-15] = (k00-k11) * A2;
2603
2604 iter_54(z);
2605 iter_54(z-8);
2606 z -= 16;
2607 }
2608 }
2609
inverse_mdct(float * buffer,int n,vorb * f,int blocktype)2610 static void inverse_mdct(float *buffer, int n, vorb *f, int blocktype)
2611 {
2612 int n2 = n >> 1, n4 = n >> 2, n8 = n >> 3, l;
2613 int ld;
2614 // @OPTIMIZE: reduce register pressure by using fewer variables?
2615 int save_point = temp_alloc_save(f);
2616 float *buf2 = (float *) temp_alloc(f, n2 * sizeof(*buf2));
2617 float *u=NULL,*v=NULL;
2618 // twiddle factors
2619 float *A = f->A[blocktype];
2620
2621 // IMDCT algorithm from "The use of multirate filter banks for coding of high quality digital audio"
2622 // See notes about bugs in that paper in less-optimal implementation 'inverse_mdct_old' after this function.
2623
2624 // kernel from paper
2625
2626
2627 // merged:
2628 // copy and reflect spectral data
2629 // step 0
2630
2631 // note that it turns out that the items added together during
2632 // this step are, in fact, being added to themselves (as reflected
2633 // by step 0). inexplicable inefficiency! this became obvious
2634 // once I combined the passes.
2635
2636 // so there's a missing 'times 2' here (for adding X to itself).
2637 // this propagates through linearly to the end, where the numbers
2638 // are 1/2 too small, and need to be compensated for.
2639
2640 {
2641 float *d,*e, *AA, *e_stop;
2642 d = &buf2[n2-2];
2643 AA = A;
2644 e = &buffer[0];
2645 e_stop = &buffer[n2];
2646 while (e != e_stop) {
2647 d[1] = (e[0] * AA[0] - e[2]*AA[1]);
2648 d[0] = (e[0] * AA[1] + e[2]*AA[0]);
2649 d -= 2;
2650 AA += 2;
2651 e += 4;
2652 }
2653
2654 e = &buffer[n2-3];
2655 while (d >= buf2) {
2656 d[1] = (-e[2] * AA[0] - -e[0]*AA[1]);
2657 d[0] = (-e[2] * AA[1] + -e[0]*AA[0]);
2658 d -= 2;
2659 AA += 2;
2660 e -= 4;
2661 }
2662 }
2663
2664 // now we use symbolic names for these, so that we can
2665 // possibly swap their meaning as we change which operations
2666 // are in place
2667
2668 u = buffer;
2669 v = buf2;
2670
2671 // step 2 (paper output is w, now u)
2672 // this could be in place, but the data ends up in the wrong
2673 // place... _somebody_'s got to swap it, so this is nominated
2674 {
2675 float *AA = &A[n2-8];
2676 float *d0,*d1, *e0, *e1;
2677
2678 e0 = &v[n4];
2679 e1 = &v[0];
2680
2681 d0 = &u[n4];
2682 d1 = &u[0];
2683
2684 while (AA >= A) {
2685 float v40_20, v41_21;
2686
2687 v41_21 = e0[1] - e1[1];
2688 v40_20 = e0[0] - e1[0];
2689 d0[1] = e0[1] + e1[1];
2690 d0[0] = e0[0] + e1[0];
2691 d1[1] = v41_21*AA[4] - v40_20*AA[5];
2692 d1[0] = v40_20*AA[4] + v41_21*AA[5];
2693
2694 v41_21 = e0[3] - e1[3];
2695 v40_20 = e0[2] - e1[2];
2696 d0[3] = e0[3] + e1[3];
2697 d0[2] = e0[2] + e1[2];
2698 d1[3] = v41_21*AA[0] - v40_20*AA[1];
2699 d1[2] = v40_20*AA[0] + v41_21*AA[1];
2700
2701 AA -= 8;
2702
2703 d0 += 4;
2704 d1 += 4;
2705 e0 += 4;
2706 e1 += 4;
2707 }
2708 }
2709
2710 // step 3
2711 ld = ilog(n) - 1; // ilog is off-by-one from normal definitions
2712
2713 // optimized step 3:
2714
2715 // the original step3 loop can be nested r inside s or s inside r;
2716 // it's written originally as s inside r, but this is dumb when r
2717 // iterates many times, and s few. So I have two copies of it and
2718 // switch between them halfway.
2719
2720 // this is iteration 0 of step 3
2721 imdct_step3_iter0_loop(n >> 4, u, n2-1-n4*0, -(n >> 3), A);
2722 imdct_step3_iter0_loop(n >> 4, u, n2-1-n4*1, -(n >> 3), A);
2723
2724 // this is iteration 1 of step 3
2725 imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*0, -(n >> 4), A, 16);
2726 imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*1, -(n >> 4), A, 16);
2727 imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*2, -(n >> 4), A, 16);
2728 imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*3, -(n >> 4), A, 16);
2729
2730 l=2;
2731 for (; l < (ld-3)>>1; ++l) {
2732 int k0 = n >> (l+2), k0_2 = k0>>1;
2733 int lim = 1 << (l+1);
2734 int i;
2735 for (i=0; i < lim; ++i)
2736 imdct_step3_inner_r_loop(n >> (l+4), u, n2-1 - k0*i, -k0_2, A, 1 << (l+3));
2737 }
2738
2739 for (; l < ld-6; ++l) {
2740 int k0 = n >> (l+2), k1 = 1 << (l+3), k0_2 = k0>>1;
2741 int rlim = n >> (l+6), r;
2742 int lim = 1 << (l+1);
2743 int i_off;
2744 float *A0 = A;
2745 i_off = n2-1;
2746 for (r=rlim; r > 0; --r) {
2747 imdct_step3_inner_s_loop(lim, u, i_off, -k0_2, A0, k1, k0);
2748 A0 += k1*4;
2749 i_off -= 8;
2750 }
2751 }
2752
2753 // iterations with count:
2754 // ld-6,-5,-4 all interleaved together
2755 // the big win comes from getting rid of needless flops
2756 // due to the constants on pass 5 & 4 being all 1 and 0;
2757 // combining them to be simultaneous to improve cache made little difference
2758 imdct_step3_inner_s_loop_ld654(n >> 5, u, n2-1, A, n);
2759
2760 // output is u
2761
2762 // step 4, 5, and 6
2763 // cannot be in-place because of step 5
2764 {
2765 uint16 *bitrev = f->bit_reverse[blocktype];
2766 // weirdly, I'd have thought reading sequentially and writing
2767 // erratically would have been better than vice-versa, but in
2768 // fact that's not what my testing showed. (That is, with
2769 // j = bitreverse(i), do you read i and write j, or read j and write i.)
2770
2771 float *d0 = &v[n4-4];
2772 float *d1 = &v[n2-4];
2773 while (d0 >= v) {
2774 int k4;
2775
2776 k4 = bitrev[0];
2777 d1[3] = u[k4+0];
2778 d1[2] = u[k4+1];
2779 d0[3] = u[k4+2];
2780 d0[2] = u[k4+3];
2781
2782 k4 = bitrev[1];
2783 d1[1] = u[k4+0];
2784 d1[0] = u[k4+1];
2785 d0[1] = u[k4+2];
2786 d0[0] = u[k4+3];
2787
2788 d0 -= 4;
2789 d1 -= 4;
2790 bitrev += 2;
2791 }
2792 }
2793 // (paper output is u, now v)
2794
2795
2796 // data must be in buf2
2797 assert(v == buf2);
2798
2799 // step 7 (paper output is v, now v)
2800 // this is now in place
2801 {
2802 float *C = f->C[blocktype];
2803 float *d, *e;
2804
2805 d = v;
2806 e = v + n2 - 4;
2807
2808 while (d < e) {
2809 float a02,a11,b0,b1,b2,b3;
2810
2811 a02 = d[0] - e[2];
2812 a11 = d[1] + e[3];
2813
2814 b0 = C[1]*a02 + C[0]*a11;
2815 b1 = C[1]*a11 - C[0]*a02;
2816
2817 b2 = d[0] + e[ 2];
2818 b3 = d[1] - e[ 3];
2819
2820 d[0] = b2 + b0;
2821 d[1] = b3 + b1;
2822 e[2] = b2 - b0;
2823 e[3] = b1 - b3;
2824
2825 a02 = d[2] - e[0];
2826 a11 = d[3] + e[1];
2827
2828 b0 = C[3]*a02 + C[2]*a11;
2829 b1 = C[3]*a11 - C[2]*a02;
2830
2831 b2 = d[2] + e[ 0];
2832 b3 = d[3] - e[ 1];
2833
2834 d[2] = b2 + b0;
2835 d[3] = b3 + b1;
2836 e[0] = b2 - b0;
2837 e[1] = b1 - b3;
2838
2839 C += 4;
2840 d += 4;
2841 e -= 4;
2842 }
2843 }
2844
2845 // data must be in buf2
2846
2847
2848 // step 8+decode (paper output is X, now buffer)
2849 // this generates pairs of data a la 8 and pushes them directly through
2850 // the decode kernel (pushing rather than pulling) to avoid having
2851 // to make another pass later
2852
2853 // this cannot POSSIBLY be in place, so we refer to the buffers directly
2854
2855 {
2856 float *d0,*d1,*d2,*d3;
2857
2858 float *B = f->B[blocktype] + n2 - 8;
2859 float *e = buf2 + n2 - 8;
2860 d0 = &buffer[0];
2861 d1 = &buffer[n2-4];
2862 d2 = &buffer[n2];
2863 d3 = &buffer[n-4];
2864 while (e >= v) {
2865 float p0,p1,p2,p3;
2866
2867 p3 = e[6]*B[7] - e[7]*B[6];
2868 p2 = -e[6]*B[6] - e[7]*B[7];
2869
2870 d0[0] = p3;
2871 d1[3] = - p3;
2872 d2[0] = p2;
2873 d3[3] = p2;
2874
2875 p1 = e[4]*B[5] - e[5]*B[4];
2876 p0 = -e[4]*B[4] - e[5]*B[5];
2877
2878 d0[1] = p1;
2879 d1[2] = - p1;
2880 d2[1] = p0;
2881 d3[2] = p0;
2882
2883 p3 = e[2]*B[3] - e[3]*B[2];
2884 p2 = -e[2]*B[2] - e[3]*B[3];
2885
2886 d0[2] = p3;
2887 d1[1] = - p3;
2888 d2[2] = p2;
2889 d3[1] = p2;
2890
2891 p1 = e[0]*B[1] - e[1]*B[0];
2892 p0 = -e[0]*B[0] - e[1]*B[1];
2893
2894 d0[3] = p1;
2895 d1[0] = - p1;
2896 d2[3] = p0;
2897 d3[0] = p0;
2898
2899 B -= 8;
2900 e -= 8;
2901 d0 += 4;
2902 d2 += 4;
2903 d1 -= 4;
2904 d3 -= 4;
2905 }
2906 }
2907
2908 temp_free(f,buf2);
2909 temp_alloc_restore(f,save_point);
2910 }
2911
2912 #if 0
2913 // this is the original version of the above code, if you want to optimize it from scratch
2914 void inverse_mdct_naive(float *buffer, int n)
2915 {
2916 float s;
2917 float A[1 << 12], B[1 << 12], C[1 << 11];
2918 int i,k,k2,k4, n2 = n >> 1, n4 = n >> 2, n8 = n >> 3, l;
2919 int n3_4 = n - n4, ld;
2920 // how can they claim this only uses N words?!
2921 // oh, because they're only used sparsely, whoops
2922 float u[1 << 13], X[1 << 13], v[1 << 13], w[1 << 13];
2923 // set up twiddle factors
2924
2925 for (k=k2=0; k < n4; ++k,k2+=2) {
2926 A[k2 ] = (float) cos(4*k*M_PI/n);
2927 A[k2+1] = (float) -sin(4*k*M_PI/n);
2928 B[k2 ] = (float) cos((k2+1)*M_PI/n/2);
2929 B[k2+1] = (float) sin((k2+1)*M_PI/n/2);
2930 }
2931 for (k=k2=0; k < n8; ++k,k2+=2) {
2932 C[k2 ] = (float) cos(2*(k2+1)*M_PI/n);
2933 C[k2+1] = (float) -sin(2*(k2+1)*M_PI/n);
2934 }
2935
2936 // IMDCT algorithm from "The use of multirate filter banks for coding of high quality digital audio"
2937 // Note there are bugs in that pseudocode, presumably due to them attempting
2938 // to rename the arrays nicely rather than representing the way their actual
2939 // implementation bounces buffers back and forth. As a result, even in the
2940 // "some formulars corrected" version, a direct implementation fails. These
2941 // are noted below as "paper bug".
2942
2943 // copy and reflect spectral data
2944 for (k=0; k < n2; ++k) u[k] = buffer[k];
2945 for ( ; k < n ; ++k) u[k] = -buffer[n - k - 1];
2946 // kernel from paper
2947 // step 1
2948 for (k=k2=k4=0; k < n4; k+=1, k2+=2, k4+=4) {
2949 v[n-k4-1] = (u[k4] - u[n-k4-1]) * A[k2] - (u[k4+2] - u[n-k4-3])*A[k2+1];
2950 v[n-k4-3] = (u[k4] - u[n-k4-1]) * A[k2+1] + (u[k4+2] - u[n-k4-3])*A[k2];
2951 }
2952 // step 2
2953 for (k=k4=0; k < n8; k+=1, k4+=4) {
2954 w[n2+3+k4] = v[n2+3+k4] + v[k4+3];
2955 w[n2+1+k4] = v[n2+1+k4] + v[k4+1];
2956 w[k4+3] = (v[n2+3+k4] - v[k4+3])*A[n2-4-k4] - (v[n2+1+k4]-v[k4+1])*A[n2-3-k4];
2957 w[k4+1] = (v[n2+1+k4] - v[k4+1])*A[n2-4-k4] + (v[n2+3+k4]-v[k4+3])*A[n2-3-k4];
2958 }
2959 // step 3
2960 ld = ilog(n) - 1; // ilog is off-by-one from normal definitions
2961 for (l=0; l < ld-3; ++l) {
2962 int k0 = n >> (l+2), k1 = 1 << (l+3);
2963 int rlim = n >> (l+4), r4, r;
2964 int s2lim = 1 << (l+2), s2;
2965 for (r=r4=0; r < rlim; r4+=4,++r) {
2966 for (s2=0; s2 < s2lim; s2+=2) {
2967 u[n-1-k0*s2-r4] = w[n-1-k0*s2-r4] + w[n-1-k0*(s2+1)-r4];
2968 u[n-3-k0*s2-r4] = w[n-3-k0*s2-r4] + w[n-3-k0*(s2+1)-r4];
2969 u[n-1-k0*(s2+1)-r4] = (w[n-1-k0*s2-r4] - w[n-1-k0*(s2+1)-r4]) * A[r*k1]
2970 - (w[n-3-k0*s2-r4] - w[n-3-k0*(s2+1)-r4]) * A[r*k1+1];
2971 u[n-3-k0*(s2+1)-r4] = (w[n-3-k0*s2-r4] - w[n-3-k0*(s2+1)-r4]) * A[r*k1]
2972 + (w[n-1-k0*s2-r4] - w[n-1-k0*(s2+1)-r4]) * A[r*k1+1];
2973 }
2974 }
2975 if (l+1 < ld-3) {
2976 // paper bug: ping-ponging of u&w here is omitted
2977 memcpy(w, u, sizeof(u));
2978 }
2979 }
2980
2981 // step 4
2982 for (i=0; i < n8; ++i) {
2983 int j = bit_reverse(i) >> (32-ld+3);
2984 assert(j < n8);
2985 if (i == j) {
2986 // paper bug: original code probably swapped in place; if copying,
2987 // need to directly copy in this case
2988 int i8 = i << 3;
2989 v[i8+1] = u[i8+1];
2990 v[i8+3] = u[i8+3];
2991 v[i8+5] = u[i8+5];
2992 v[i8+7] = u[i8+7];
2993 } else if (i < j) {
2994 int i8 = i << 3, j8 = j << 3;
2995 v[j8+1] = u[i8+1], v[i8+1] = u[j8 + 1];
2996 v[j8+3] = u[i8+3], v[i8+3] = u[j8 + 3];
2997 v[j8+5] = u[i8+5], v[i8+5] = u[j8 + 5];
2998 v[j8+7] = u[i8+7], v[i8+7] = u[j8 + 7];
2999 }
3000 }
3001 // step 5
3002 for (k=0; k < n2; ++k) {
3003 w[k] = v[k*2+1];
3004 }
3005 // step 6
3006 for (k=k2=k4=0; k < n8; ++k, k2 += 2, k4 += 4) {
3007 u[n-1-k2] = w[k4];
3008 u[n-2-k2] = w[k4+1];
3009 u[n3_4 - 1 - k2] = w[k4+2];
3010 u[n3_4 - 2 - k2] = w[k4+3];
3011 }
3012 // step 7
3013 for (k=k2=0; k < n8; ++k, k2 += 2) {
3014 v[n2 + k2 ] = ( u[n2 + k2] + u[n-2-k2] + C[k2+1]*(u[n2+k2]-u[n-2-k2]) + C[k2]*(u[n2+k2+1]+u[n-2-k2+1]))/2;
3015 v[n-2 - k2] = ( u[n2 + k2] + u[n-2-k2] - C[k2+1]*(u[n2+k2]-u[n-2-k2]) - C[k2]*(u[n2+k2+1]+u[n-2-k2+1]))/2;
3016 v[n2+1+ k2] = ( u[n2+1+k2] - u[n-1-k2] + C[k2+1]*(u[n2+1+k2]+u[n-1-k2]) - C[k2]*(u[n2+k2]-u[n-2-k2]))/2;
3017 v[n-1 - k2] = (-u[n2+1+k2] + u[n-1-k2] + C[k2+1]*(u[n2+1+k2]+u[n-1-k2]) - C[k2]*(u[n2+k2]-u[n-2-k2]))/2;
3018 }
3019 // step 8
3020 for (k=k2=0; k < n4; ++k,k2 += 2) {
3021 X[k] = v[k2+n2]*B[k2 ] + v[k2+1+n2]*B[k2+1];
3022 X[n2-1-k] = v[k2+n2]*B[k2+1] - v[k2+1+n2]*B[k2 ];
3023 }
3024
3025 // decode kernel to output
3026 // determined the following value experimentally
3027 // (by first figuring out what made inverse_mdct_slow work); then matching that here
3028 // (probably vorbis encoder premultiplies by n or n/2, to save it on the decoder?)
3029 s = 0.5; // theoretically would be n4
3030
3031 // [[[ note! the s value of 0.5 is compensated for by the B[] in the current code,
3032 // so it needs to use the "old" B values to behave correctly, or else
3033 // set s to 1.0 ]]]
3034 for (i=0; i < n4 ; ++i) buffer[i] = s * X[i+n4];
3035 for ( ; i < n3_4; ++i) buffer[i] = -s * X[n3_4 - i - 1];
3036 for ( ; i < n ; ++i) buffer[i] = -s * X[i - n3_4];
3037 }
3038 #endif
3039
get_window(vorb * f,int len)3040 static float *get_window(vorb *f, int len)
3041 {
3042 len <<= 1;
3043 if (len == f->blocksize_0) return f->window[0];
3044 if (len == f->blocksize_1) return f->window[1];
3045 return NULL;
3046 }
3047
3048 #ifndef STB_VORBIS_NO_DEFER_FLOOR
3049 typedef int16 YTYPE;
3050 #else
3051 typedef int YTYPE;
3052 #endif
do_floor(vorb * f,Mapping * map,int i,int n,float * target,YTYPE * finalY,uint8 * step2_flag)3053 static int do_floor(vorb *f, Mapping *map, int i, int n, float *target, YTYPE *finalY, uint8 *step2_flag)
3054 {
3055 int n2 = n >> 1;
3056 int s = map->chan[i].mux, floor;
3057 floor = map->submap_floor[s];
3058 if (f->floor_types[floor] == 0) {
3059 return error(f, VORBIS_invalid_stream);
3060 } else {
3061 Floor1 *g = &f->floor_config[floor].floor1;
3062 int j,q;
3063 int lx = 0, ly = finalY[0] * g->floor1_multiplier;
3064 for (q=1; q < g->values; ++q) {
3065 j = g->sorted_order[q];
3066 #ifndef STB_VORBIS_NO_DEFER_FLOOR
3067 if (finalY[j] >= 0)
3068 #else
3069 if (step2_flag[j])
3070 #endif
3071 {
3072 int hy = finalY[j] * g->floor1_multiplier;
3073 int hx = g->Xlist[j];
3074 if (lx != hx)
3075 draw_line(target, lx,ly, hx,hy, n2);
3076 CHECK(f);
3077 lx = hx, ly = hy;
3078 }
3079 }
3080 if (lx < n2) {
3081 // optimization of: draw_line(target, lx,ly, n,ly, n2);
3082 for (j=lx; j < n2; ++j)
3083 LINE_OP(target[j], inverse_db_table[ly]);
3084 CHECK(f);
3085 }
3086 }
3087 return TRUE;
3088 }
3089
3090 // The meaning of "left" and "right"
3091 //
3092 // For a given frame:
3093 // we compute samples from 0..n
3094 // window_center is n/2
3095 // we'll window and mix the samples from left_start to left_end with data from the previous frame
3096 // all of the samples from left_end to right_start can be output without mixing; however,
3097 // this interval is 0-length except when transitioning between short and long frames
3098 // all of the samples from right_start to right_end need to be mixed with the next frame,
3099 // which we don't have, so those get saved in a buffer
3100 // frame N's right_end-right_start, the number of samples to mix with the next frame,
3101 // has to be the same as frame N+1's left_end-left_start (which they are by
3102 // construction)
3103
vorbis_decode_initial(vorb * f,int * p_left_start,int * p_left_end,int * p_right_start,int * p_right_end,int * mode)3104 static int vorbis_decode_initial(vorb *f, int *p_left_start, int *p_left_end, int *p_right_start, int *p_right_end, int *mode)
3105 {
3106 Mode *m;
3107 int i, n, prev, next, window_center;
3108 f->channel_buffer_start = f->channel_buffer_end = 0;
3109
3110 retry:
3111 if (f->eof) return FALSE;
3112 if (!maybe_start_packet(f))
3113 return FALSE;
3114 // check packet type
3115 if (get_bits(f,1) != 0) {
3116 if (IS_PUSH_MODE(f))
3117 return error(f,VORBIS_bad_packet_type);
3118 while (EOP != get8_packet(f));
3119 goto retry;
3120 }
3121
3122 if (f->alloc.alloc_buffer)
3123 assert(f->alloc.alloc_buffer_length_in_bytes == f->temp_offset);
3124
3125 i = get_bits(f, ilog(f->mode_count-1));
3126 if (i == EOP) return FALSE;
3127 if (i >= f->mode_count) return FALSE;
3128 *mode = i;
3129 m = f->mode_config + i;
3130 if (m->blockflag) {
3131 n = f->blocksize_1;
3132 prev = get_bits(f,1);
3133 next = get_bits(f,1);
3134 } else {
3135 prev = next = 0;
3136 n = f->blocksize_0;
3137 }
3138
3139 // WINDOWING
3140
3141 window_center = n >> 1;
3142 if (m->blockflag && !prev) {
3143 *p_left_start = (n - f->blocksize_0) >> 2;
3144 *p_left_end = (n + f->blocksize_0) >> 2;
3145 } else {
3146 *p_left_start = 0;
3147 *p_left_end = window_center;
3148 }
3149 if (m->blockflag && !next) {
3150 *p_right_start = (n*3 - f->blocksize_0) >> 2;
3151 *p_right_end = (n*3 + f->blocksize_0) >> 2;
3152 } else {
3153 *p_right_start = window_center;
3154 *p_right_end = n;
3155 }
3156
3157 return TRUE;
3158 }
3159
vorbis_decode_packet_rest(vorb * f,int * len,Mode * m,int left_start,int left_end,int right_start,int right_end,int * p_left)3160 static int vorbis_decode_packet_rest(vorb *f, int *len, Mode *m, int left_start, int left_end, int right_start, int right_end, int *p_left)
3161 {
3162 Mapping *map;
3163 int i,j,k,n,n2;
3164 int zero_channel[256];
3165 int really_zero_channel[256];
3166
3167 // WINDOWING
3168
3169 n = f->blocksize[m->blockflag];
3170 map = &f->mapping[m->mapping];
3171
3172 // FLOORS
3173 n2 = n >> 1;
3174
3175 CHECK(f);
3176
3177 for (i=0; i < f->channels; ++i) {
3178 int s = map->chan[i].mux, floor;
3179 zero_channel[i] = FALSE;
3180 floor = map->submap_floor[s];
3181 if (f->floor_types[floor] == 0) {
3182 return error(f, VORBIS_invalid_stream);
3183 } else {
3184 Floor1 *g = &f->floor_config[floor].floor1;
3185 if (get_bits(f, 1)) {
3186 short *finalY;
3187 uint8 step2_flag[256];
3188 static int range_list[4] = { 256, 128, 86, 64 };
3189 int range = range_list[g->floor1_multiplier-1];
3190 int offset = 2;
3191 finalY = f->finalY[i];
3192 finalY[0] = get_bits(f, ilog(range)-1);
3193 finalY[1] = get_bits(f, ilog(range)-1);
3194 for (j=0; j < g->partitions; ++j) {
3195 int pclass = g->partition_class_list[j];
3196 int cdim = g->class_dimensions[pclass];
3197 int cbits = g->class_subclasses[pclass];
3198 int csub = (1 << cbits)-1;
3199 int cval = 0;
3200 if (cbits) {
3201 Codebook *c = f->codebooks + g->class_masterbooks[pclass];
3202 DECODE(cval,f,c);
3203 }
3204 for (k=0; k < cdim; ++k) {
3205 int book = g->subclass_books[pclass][cval & csub];
3206 cval = cval >> cbits;
3207 if (book >= 0) {
3208 int temp;
3209 Codebook *c = f->codebooks + book;
3210 DECODE(temp,f,c);
3211 finalY[offset++] = temp;
3212 } else
3213 finalY[offset++] = 0;
3214 }
3215 }
3216 if (f->valid_bits == INVALID_BITS) goto error; // behavior according to spec
3217 step2_flag[0] = step2_flag[1] = 1;
3218 for (j=2; j < g->values; ++j) {
3219 int low, high, pred, highroom, lowroom, room, val;
3220 low = g->neighbors[j][0];
3221 high = g->neighbors[j][1];
3222 //neighbors(g->Xlist, j, &low, &high);
3223 pred = predict_point(g->Xlist[j], g->Xlist[low], g->Xlist[high], finalY[low], finalY[high]);
3224 val = finalY[j];
3225 highroom = range - pred;
3226 lowroom = pred;
3227 if (highroom < lowroom)
3228 room = highroom * 2;
3229 else
3230 room = lowroom * 2;
3231 if (val) {
3232 step2_flag[low] = step2_flag[high] = 1;
3233 step2_flag[j] = 1;
3234 if (val >= room)
3235 if (highroom > lowroom)
3236 finalY[j] = val - lowroom + pred;
3237 else
3238 finalY[j] = pred - val + highroom - 1;
3239 else
3240 if (val & 1)
3241 finalY[j] = pred - ((val+1)>>1);
3242 else
3243 finalY[j] = pred + (val>>1);
3244 } else {
3245 step2_flag[j] = 0;
3246 finalY[j] = pred;
3247 }
3248 }
3249
3250 #ifdef STB_VORBIS_NO_DEFER_FLOOR
3251 do_floor(f, map, i, n, f->floor_buffers[i], finalY, step2_flag);
3252 #else
3253 // defer final floor computation until _after_ residue
3254 for (j=0; j < g->values; ++j) {
3255 if (!step2_flag[j])
3256 finalY[j] = -1;
3257 }
3258 #endif
3259 } else {
3260 error:
3261 zero_channel[i] = TRUE;
3262 }
3263 // So we just defer everything else to later
3264
3265 // at this point we've decoded the floor into buffer
3266 }
3267 }
3268 CHECK(f);
3269 // at this point we've decoded all floors
3270
3271 if (f->alloc.alloc_buffer)
3272 assert(f->alloc.alloc_buffer_length_in_bytes == f->temp_offset);
3273
3274 // re-enable coupled channels if necessary
3275 memcpy(really_zero_channel, zero_channel, sizeof(really_zero_channel[0]) * f->channels);
3276 for (i=0; i < map->coupling_steps; ++i)
3277 if (!zero_channel[map->chan[i].magnitude] || !zero_channel[map->chan[i].angle]) {
3278 zero_channel[map->chan[i].magnitude] = zero_channel[map->chan[i].angle] = FALSE;
3279 }
3280
3281 CHECK(f);
3282 // RESIDUE DECODE
3283 for (i=0; i < map->submaps; ++i) {
3284 float *residue_buffers[STB_VORBIS_MAX_CHANNELS];
3285 int r;
3286 uint8 do_not_decode[256];
3287 int ch = 0;
3288 for (j=0; j < f->channels; ++j) {
3289 if (map->chan[j].mux == i) {
3290 if (zero_channel[j]) {
3291 do_not_decode[ch] = TRUE;
3292 residue_buffers[ch] = NULL;
3293 } else {
3294 do_not_decode[ch] = FALSE;
3295 residue_buffers[ch] = f->channel_buffers[j];
3296 }
3297 ++ch;
3298 }
3299 }
3300 r = map->submap_residue[i];
3301 decode_residue(f, residue_buffers, ch, n2, r, do_not_decode);
3302 }
3303
3304 if (f->alloc.alloc_buffer)
3305 assert(f->alloc.alloc_buffer_length_in_bytes == f->temp_offset);
3306 CHECK(f);
3307
3308 // INVERSE COUPLING
3309 for (i = map->coupling_steps-1; i >= 0; --i) {
3310 int n2 = n >> 1;
3311 float *m = f->channel_buffers[map->chan[i].magnitude];
3312 float *a = f->channel_buffers[map->chan[i].angle ];
3313 for (j=0; j < n2; ++j) {
3314 float a2,m2;
3315 if (m[j] > 0)
3316 if (a[j] > 0)
3317 m2 = m[j], a2 = m[j] - a[j];
3318 else
3319 a2 = m[j], m2 = m[j] + a[j];
3320 else
3321 if (a[j] > 0)
3322 m2 = m[j], a2 = m[j] + a[j];
3323 else
3324 a2 = m[j], m2 = m[j] - a[j];
3325 m[j] = m2;
3326 a[j] = a2;
3327 }
3328 }
3329 CHECK(f);
3330
3331 // finish decoding the floors
3332 #ifndef STB_VORBIS_NO_DEFER_FLOOR
3333 for (i=0; i < f->channels; ++i) {
3334 if (really_zero_channel[i]) {
3335 memset(f->channel_buffers[i], 0, sizeof(*f->channel_buffers[i]) * n2);
3336 } else {
3337 do_floor(f, map, i, n, f->channel_buffers[i], f->finalY[i], NULL);
3338 }
3339 }
3340 #else
3341 for (i=0; i < f->channels; ++i) {
3342 if (really_zero_channel[i]) {
3343 memset(f->channel_buffers[i], 0, sizeof(*f->channel_buffers[i]) * n2);
3344 } else {
3345 for (j=0; j < n2; ++j)
3346 f->channel_buffers[i][j] *= f->floor_buffers[i][j];
3347 }
3348 }
3349 #endif
3350
3351 // INVERSE MDCT
3352 CHECK(f);
3353 for (i=0; i < f->channels; ++i)
3354 inverse_mdct(f->channel_buffers[i], n, f, m->blockflag);
3355 CHECK(f);
3356
3357 // this shouldn't be necessary, unless we exited on an error
3358 // and want to flush to get to the next packet
3359 flush_packet(f);
3360
3361 if (f->first_decode) {
3362 // assume we start so first non-discarded sample is sample 0
3363 // this isn't to spec, but spec would require us to read ahead
3364 // and decode the size of all current frames--could be done,
3365 // but presumably it's not a commonly used feature
3366 f->current_loc = -n2; // start of first frame is positioned for discard
3367 // we might have to discard samples "from" the next frame too,
3368 // if we're lapping a large block then a small at the start?
3369 f->discard_samples_deferred = n - right_end;
3370 f->current_loc_valid = TRUE;
3371 f->first_decode = FALSE;
3372 } else if (f->discard_samples_deferred) {
3373 if (f->discard_samples_deferred >= right_start - left_start) {
3374 f->discard_samples_deferred -= (right_start - left_start);
3375 left_start = right_start;
3376 *p_left = left_start;
3377 } else {
3378 left_start += f->discard_samples_deferred;
3379 *p_left = left_start;
3380 f->discard_samples_deferred = 0;
3381 }
3382 } else if (f->previous_length == 0 && f->current_loc_valid) {
3383 // we're recovering from a seek... that means we're going to discard
3384 // the samples from this packet even though we know our position from
3385 // the last page header, so we need to update the position based on
3386 // the discarded samples here
3387 // but wait, the code below is going to add this in itself even
3388 // on a discard, so we don't need to do it here...
3389 }
3390
3391 // check if we have ogg information about the sample # for this packet
3392 if (f->last_seg_which == f->end_seg_with_known_loc) {
3393 // if we have a valid current loc, and this is final:
3394 if (f->current_loc_valid && (f->page_flag & PAGEFLAG_last_page)) {
3395 uint32 current_end = f->known_loc_for_packet;
3396 // then let's infer the size of the (probably) short final frame
3397 if (current_end < f->current_loc + (right_end-left_start)) {
3398 if (current_end < f->current_loc) {
3399 // negative truncation, that's impossible!
3400 *len = 0;
3401 } else {
3402 *len = current_end - f->current_loc;
3403 }
3404 *len += left_start; // this doesn't seem right, but has no ill effect on my test files
3405 if (*len > right_end) *len = right_end; // this should never happen
3406 f->current_loc += *len;
3407 return TRUE;
3408 }
3409 }
3410 // otherwise, just set our sample loc
3411 // guess that the ogg granule pos refers to the _middle_ of the
3412 // last frame?
3413 // set f->current_loc to the position of left_start
3414 f->current_loc = f->known_loc_for_packet - (n2-left_start);
3415 f->current_loc_valid = TRUE;
3416 }
3417 if (f->current_loc_valid)
3418 f->current_loc += (right_start - left_start);
3419
3420 if (f->alloc.alloc_buffer)
3421 assert(f->alloc.alloc_buffer_length_in_bytes == f->temp_offset);
3422 *len = right_end; // ignore samples after the window goes to 0
3423 CHECK(f);
3424
3425 return TRUE;
3426 }
3427
vorbis_decode_packet(vorb * f,int * len,int * p_left,int * p_right)3428 static int vorbis_decode_packet(vorb *f, int *len, int *p_left, int *p_right)
3429 {
3430 int mode, left_end, right_end;
3431 if (!vorbis_decode_initial(f, p_left, &left_end, p_right, &right_end, &mode)) return 0;
3432 return vorbis_decode_packet_rest(f, len, f->mode_config + mode, *p_left, left_end, *p_right, right_end, p_left);
3433 }
3434
vorbis_finish_frame(stb_vorbis * f,int len,int left,int right)3435 static int vorbis_finish_frame(stb_vorbis *f, int len, int left, int right)
3436 {
3437 int prev,i,j;
3438 // we use right&left (the start of the right- and left-window sin()-regions)
3439 // to determine how much to return, rather than inferring from the rules
3440 // (same result, clearer code); 'left' indicates where our sin() window
3441 // starts, therefore where the previous window's right edge starts, and
3442 // therefore where to start mixing from the previous buffer. 'right'
3443 // indicates where our sin() ending-window starts, therefore that's where
3444 // we start saving, and where our returned-data ends.
3445
3446 // mixin from previous window
3447 if (f->previous_length) {
3448 int i,j, n = f->previous_length;
3449 float *w = get_window(f, n);
3450 if (w == NULL) return 0;
3451 for (i=0; i < f->channels; ++i) {
3452 for (j=0; j < n; ++j)
3453 f->channel_buffers[i][left+j] =
3454 f->channel_buffers[i][left+j]*w[ j] +
3455 f->previous_window[i][ j]*w[n-1-j];
3456 }
3457 }
3458
3459 prev = f->previous_length;
3460
3461 // last half of this data becomes previous window
3462 f->previous_length = len - right;
3463
3464 // @OPTIMIZE: could avoid this copy by double-buffering the
3465 // output (flipping previous_window with channel_buffers), but
3466 // then previous_window would have to be 2x as large, and
3467 // channel_buffers couldn't be temp mem (although they're NOT
3468 // currently temp mem, they could be (unless we want to level
3469 // performance by spreading out the computation))
3470 for (i=0; i < f->channels; ++i)
3471 for (j=0; right+j < len; ++j)
3472 f->previous_window[i][j] = f->channel_buffers[i][right+j];
3473
3474 if (!prev)
3475 // there was no previous packet, so this data isn't valid...
3476 // this isn't entirely true, only the would-have-overlapped data
3477 // isn't valid, but this seems to be what the spec requires
3478 return 0;
3479
3480 // truncate a short frame
3481 if (len < right) right = len;
3482
3483 f->samples_output += right-left;
3484
3485 return right - left;
3486 }
3487
vorbis_pump_first_frame(stb_vorbis * f)3488 static int vorbis_pump_first_frame(stb_vorbis *f)
3489 {
3490 int len, right, left, res;
3491 res = vorbis_decode_packet(f, &len, &left, &right);
3492 if (res)
3493 vorbis_finish_frame(f, len, left, right);
3494 return res;
3495 }
3496
3497 #ifndef STB_VORBIS_NO_PUSHDATA_API
is_whole_packet_present(stb_vorbis * f)3498 static int is_whole_packet_present(stb_vorbis *f)
3499 {
3500 // make sure that we have the packet available before continuing...
3501 // this requires a full ogg parse, but we know we can fetch from f->stream
3502
3503 // instead of coding this out explicitly, we could save the current read state,
3504 // read the next packet with get8() until end-of-packet, check f->eof, then
3505 // reset the state? but that would be slower, esp. since we'd have over 256 bytes
3506 // of state to restore (primarily the page segment table)
3507
3508 int s = f->next_seg, first = TRUE;
3509 uint8 *p = f->stream;
3510
3511 if (s != -1) { // if we're not starting the packet with a 'continue on next page' flag
3512 for (; s < f->segment_count; ++s) {
3513 p += f->segments[s];
3514 if (f->segments[s] < 255) // stop at first short segment
3515 break;
3516 }
3517 // either this continues, or it ends it...
3518 if (s == f->segment_count)
3519 s = -1; // set 'crosses page' flag
3520 if (p > f->stream_end) return error(f, VORBIS_need_more_data);
3521 first = FALSE;
3522 }
3523 for (; s == -1;) {
3524 uint8 *q;
3525 int n;
3526
3527 // check that we have the page header ready
3528 if (p + 26 >= f->stream_end) return error(f, VORBIS_need_more_data);
3529 // validate the page
3530 if (memcmp(p, ogg_page_header, 4)) return error(f, VORBIS_invalid_stream);
3531 if (p[4] != 0) return error(f, VORBIS_invalid_stream);
3532 if (first) { // the first segment must NOT have 'continued_packet', later ones MUST
3533 if (f->previous_length)
3534 if ((p[5] & PAGEFLAG_continued_packet)) return error(f, VORBIS_invalid_stream);
3535 // if no previous length, we're resynching, so we can come in on a continued-packet,
3536 // which we'll just drop
3537 } else {
3538 if (!(p[5] & PAGEFLAG_continued_packet)) return error(f, VORBIS_invalid_stream);
3539 }
3540 n = p[26]; // segment counts
3541 q = p+27; // q points to segment table
3542 p = q + n; // advance past header
3543 // make sure we've read the segment table
3544 if (p > f->stream_end) return error(f, VORBIS_need_more_data);
3545 for (s=0; s < n; ++s) {
3546 p += q[s];
3547 if (q[s] < 255)
3548 break;
3549 }
3550 if (s == n)
3551 s = -1; // set 'crosses page' flag
3552 if (p > f->stream_end) return error(f, VORBIS_need_more_data);
3553 first = FALSE;
3554 }
3555 return TRUE;
3556 }
3557 #endif // !STB_VORBIS_NO_PUSHDATA_API
3558
start_decoder(vorb * f)3559 static int start_decoder(vorb *f)
3560 {
3561 uint8 header[6], x,y;
3562 int len,i,j,k, max_submaps = 0;
3563 int longest_floorlist=0;
3564
3565 // first page, first packet
3566 f->first_decode = TRUE;
3567
3568 if (!start_page(f)) return FALSE;
3569 // validate page flag
3570 if (!(f->page_flag & PAGEFLAG_first_page)) return error(f, VORBIS_invalid_first_page);
3571 if (f->page_flag & PAGEFLAG_last_page) return error(f, VORBIS_invalid_first_page);
3572 if (f->page_flag & PAGEFLAG_continued_packet) return error(f, VORBIS_invalid_first_page);
3573 // check for expected packet length
3574 if (f->segment_count != 1) return error(f, VORBIS_invalid_first_page);
3575 if (f->segments[0] != 30) {
3576 // check for the Ogg skeleton fishead identifying header to refine our error
3577 if (f->segments[0] == 64 &&
3578 getn(f, header, 6) &&
3579 header[0] == 'f' &&
3580 header[1] == 'i' &&
3581 header[2] == 's' &&
3582 header[3] == 'h' &&
3583 header[4] == 'e' &&
3584 header[5] == 'a' &&
3585 get8(f) == 'd' &&
3586 get8(f) == '\0') return error(f, VORBIS_ogg_skeleton_not_supported);
3587 else
3588 return error(f, VORBIS_invalid_first_page);
3589 }
3590
3591 // read packet
3592 // check packet header
3593 if (get8(f) != VORBIS_packet_id) return error(f, VORBIS_invalid_first_page);
3594 if (!getn(f, header, 6)) return error(f, VORBIS_unexpected_eof);
3595 if (!vorbis_validate(header)) return error(f, VORBIS_invalid_first_page);
3596 // vorbis_version
3597 if (get32(f) != 0) return error(f, VORBIS_invalid_first_page);
3598 f->channels = get8(f); if (!f->channels) return error(f, VORBIS_invalid_first_page);
3599 if (f->channels > STB_VORBIS_MAX_CHANNELS) return error(f, VORBIS_too_many_channels);
3600 f->sample_rate = get32(f); if (!f->sample_rate) return error(f, VORBIS_invalid_first_page);
3601 get32(f); // bitrate_maximum
3602 get32(f); // bitrate_nominal
3603 get32(f); // bitrate_minimum
3604 x = get8(f);
3605 {
3606 int log0,log1;
3607 log0 = x & 15;
3608 log1 = x >> 4;
3609 f->blocksize_0 = 1 << log0;
3610 f->blocksize_1 = 1 << log1;
3611 if (log0 < 6 || log0 > 13) return error(f, VORBIS_invalid_setup);
3612 if (log1 < 6 || log1 > 13) return error(f, VORBIS_invalid_setup);
3613 if (log0 > log1) return error(f, VORBIS_invalid_setup);
3614 }
3615
3616 // framing_flag
3617 x = get8(f);
3618 if (!(x & 1)) return error(f, VORBIS_invalid_first_page);
3619
3620 // second packet!
3621 if (!start_page(f)) return FALSE;
3622
3623 if (!start_packet(f)) return FALSE;
3624
3625 if (!next_segment(f)) return FALSE;
3626
3627 if (get8_packet(f) != VORBIS_packet_comment) return error(f, VORBIS_invalid_setup);
3628 for (i=0; i < 6; ++i) header[i] = get8_packet(f);
3629 if (!vorbis_validate(header)) return error(f, VORBIS_invalid_setup);
3630 //file vendor
3631 len = get32_packet(f);
3632 f->vendor = (char*)setup_malloc(f, sizeof(char) * (len+1));
3633 for(i=0; i < len; ++i) {
3634 f->vendor[i] = get8_packet(f);
3635 }
3636 f->vendor[len] = (char)'\0';
3637 //user comments
3638 f->comment_list_length = get32_packet(f);
3639 f->comment_list = (char**)setup_malloc(f, sizeof(char*) * (f->comment_list_length));
3640
3641 for(i=0; i < f->comment_list_length; ++i) {
3642 len = get32_packet(f);
3643 f->comment_list[i] = (char*)setup_malloc(f, sizeof(char) * (len+1));
3644
3645 for(j=0; j < len; ++j) {
3646 f->comment_list[i][j] = get8_packet(f);
3647 }
3648 f->comment_list[i][len] = (char)'\0';
3649 }
3650
3651 // framing_flag
3652 x = get8_packet(f);
3653 if (!(x & 1)) return error(f, VORBIS_invalid_setup);
3654
3655
3656 skip(f, f->bytes_in_seg);
3657 f->bytes_in_seg = 0;
3658
3659 do {
3660 len = next_segment(f);
3661 skip(f, len);
3662 f->bytes_in_seg = 0;
3663 } while (len);
3664
3665 // third packet!
3666 if (!start_packet(f)) return FALSE;
3667
3668 #ifndef STB_VORBIS_NO_PUSHDATA_API
3669 if (IS_PUSH_MODE(f)) {
3670 if (!is_whole_packet_present(f)) {
3671 // convert error in ogg header to write type
3672 if (f->error == VORBIS_invalid_stream)
3673 f->error = VORBIS_invalid_setup;
3674 return FALSE;
3675 }
3676 }
3677 #endif
3678
3679 crc32_init(); // always init it, to avoid multithread race conditions
3680
3681 if (get8_packet(f) != VORBIS_packet_setup) return error(f, VORBIS_invalid_setup);
3682 for (i=0; i < 6; ++i) header[i] = get8_packet(f);
3683 if (!vorbis_validate(header)) return error(f, VORBIS_invalid_setup);
3684
3685 // codebooks
3686
3687 f->codebook_count = get_bits(f,8) + 1;
3688 f->codebooks = (Codebook *) setup_malloc(f, sizeof(*f->codebooks) * f->codebook_count);
3689 if (f->codebooks == NULL) return error(f, VORBIS_outofmem);
3690 memset(f->codebooks, 0, sizeof(*f->codebooks) * f->codebook_count);
3691 for (i=0; i < f->codebook_count; ++i) {
3692 uint32 *values;
3693 int ordered, sorted_count;
3694 int total=0;
3695 uint8 *lengths;
3696 Codebook *c = f->codebooks+i;
3697 CHECK(f);
3698 x = get_bits(f, 8); if (x != 0x42) return error(f, VORBIS_invalid_setup);
3699 x = get_bits(f, 8); if (x != 0x43) return error(f, VORBIS_invalid_setup);
3700 x = get_bits(f, 8); if (x != 0x56) return error(f, VORBIS_invalid_setup);
3701 x = get_bits(f, 8);
3702 c->dimensions = (get_bits(f, 8)<<8) + x;
3703 x = get_bits(f, 8);
3704 y = get_bits(f, 8);
3705 c->entries = (get_bits(f, 8)<<16) + (y<<8) + x;
3706 ordered = get_bits(f,1);
3707 c->sparse = ordered ? 0 : get_bits(f,1);
3708
3709 if (c->dimensions == 0 && c->entries != 0) return error(f, VORBIS_invalid_setup);
3710
3711 if (c->sparse)
3712 lengths = (uint8 *) setup_temp_malloc(f, c->entries);
3713 else
3714 lengths = c->codeword_lengths = (uint8 *) setup_malloc(f, c->entries);
3715
3716 if (!lengths) return error(f, VORBIS_outofmem);
3717
3718 if (ordered) {
3719 int current_entry = 0;
3720 int current_length = get_bits(f,5) + 1;
3721 while (current_entry < c->entries) {
3722 int limit = c->entries - current_entry;
3723 int n = get_bits(f, ilog(limit));
3724 if (current_length >= 32) return error(f, VORBIS_invalid_setup);
3725 if (current_entry + n > (int) c->entries) { return error(f, VORBIS_invalid_setup); }
3726 memset(lengths + current_entry, current_length, n);
3727 current_entry += n;
3728 ++current_length;
3729 }
3730 } else {
3731 for (j=0; j < c->entries; ++j) {
3732 int present = c->sparse ? get_bits(f,1) : 1;
3733 if (present) {
3734 lengths[j] = get_bits(f, 5) + 1;
3735 ++total;
3736 if (lengths[j] == 32)
3737 return error(f, VORBIS_invalid_setup);
3738 } else {
3739 lengths[j] = NO_CODE;
3740 }
3741 }
3742 }
3743
3744 if (c->sparse && total >= c->entries >> 2) {
3745 // convert sparse items to non-sparse!
3746 if (c->entries > (int) f->setup_temp_memory_required)
3747 f->setup_temp_memory_required = c->entries;
3748
3749 c->codeword_lengths = (uint8 *) setup_malloc(f, c->entries);
3750 if (c->codeword_lengths == NULL) return error(f, VORBIS_outofmem);
3751 memcpy(c->codeword_lengths, lengths, c->entries);
3752 setup_temp_free(f, lengths, c->entries); // note this is only safe if there have been no intervening temp mallocs!
3753 lengths = c->codeword_lengths;
3754 c->sparse = 0;
3755 }
3756
3757 // compute the size of the sorted tables
3758 if (c->sparse) {
3759 sorted_count = total;
3760 } else {
3761 sorted_count = 0;
3762 #ifndef STB_VORBIS_NO_HUFFMAN_BINARY_SEARCH
3763 for (j=0; j < c->entries; ++j)
3764 if (lengths[j] > STB_VORBIS_FAST_HUFFMAN_LENGTH && lengths[j] != NO_CODE)
3765 ++sorted_count;
3766 #endif
3767 }
3768
3769 c->sorted_entries = sorted_count;
3770 values = NULL;
3771
3772 CHECK(f);
3773 if (!c->sparse) {
3774 c->codewords = (uint32 *) setup_malloc(f, sizeof(c->codewords[0]) * c->entries);
3775 if (!c->codewords) return error(f, VORBIS_outofmem);
3776 } else {
3777 unsigned int size;
3778 if (c->sorted_entries) {
3779 c->codeword_lengths = (uint8 *) setup_malloc(f, c->sorted_entries);
3780 if (!c->codeword_lengths) return error(f, VORBIS_outofmem);
3781 c->codewords = (uint32 *) setup_temp_malloc(f, sizeof(*c->codewords) * c->sorted_entries);
3782 if (!c->codewords) return error(f, VORBIS_outofmem);
3783 values = (uint32 *) setup_temp_malloc(f, sizeof(*values) * c->sorted_entries);
3784 if (!values) return error(f, VORBIS_outofmem);
3785 }
3786 size = c->entries + (sizeof(*c->codewords) + sizeof(*values)) * c->sorted_entries;
3787 if (size > f->setup_temp_memory_required)
3788 f->setup_temp_memory_required = size;
3789 }
3790
3791 if (!compute_codewords(c, lengths, c->entries, values)) {
3792 if (c->sparse) setup_temp_free(f, values, 0);
3793 return error(f, VORBIS_invalid_setup);
3794 }
3795
3796 if (c->sorted_entries) {
3797 // allocate an extra slot for sentinels
3798 c->sorted_codewords = (uint32 *) setup_malloc(f, sizeof(*c->sorted_codewords) * (c->sorted_entries+1));
3799 if (c->sorted_codewords == NULL) return error(f, VORBIS_outofmem);
3800 // allocate an extra slot at the front so that c->sorted_values[-1] is defined
3801 // so that we can catch that case without an extra if
3802 c->sorted_values = ( int *) setup_malloc(f, sizeof(*c->sorted_values ) * (c->sorted_entries+1));
3803 if (c->sorted_values == NULL) return error(f, VORBIS_outofmem);
3804 ++c->sorted_values;
3805 c->sorted_values[-1] = -1;
3806 compute_sorted_huffman(c, lengths, values);
3807 }
3808
3809 if (c->sparse) {
3810 setup_temp_free(f, values, sizeof(*values)*c->sorted_entries);
3811 setup_temp_free(f, c->codewords, sizeof(*c->codewords)*c->sorted_entries);
3812 setup_temp_free(f, lengths, c->entries);
3813 c->codewords = NULL;
3814 }
3815
3816 compute_accelerated_huffman(c);
3817
3818 CHECK(f);
3819 c->lookup_type = get_bits(f, 4);
3820 if (c->lookup_type > 2) return error(f, VORBIS_invalid_setup);
3821 if (c->lookup_type > 0) {
3822 uint16 *mults;
3823 c->minimum_value = float32_unpack(get_bits(f, 32));
3824 c->delta_value = float32_unpack(get_bits(f, 32));
3825 c->value_bits = get_bits(f, 4)+1;
3826 c->sequence_p = get_bits(f,1);
3827 if (c->lookup_type == 1) {
3828 int values = lookup1_values(c->entries, c->dimensions);
3829 if (values < 0) return error(f, VORBIS_invalid_setup);
3830 c->lookup_values = (uint32) values;
3831 } else {
3832 c->lookup_values = c->entries * c->dimensions;
3833 }
3834 if (c->lookup_values == 0) return error(f, VORBIS_invalid_setup);
3835 mults = (uint16 *) setup_temp_malloc(f, sizeof(mults[0]) * c->lookup_values);
3836 if (mults == NULL) return error(f, VORBIS_outofmem);
3837 for (j=0; j < (int) c->lookup_values; ++j) {
3838 int q = get_bits(f, c->value_bits);
3839 if (q == EOP) { setup_temp_free(f,mults,sizeof(mults[0])*c->lookup_values); return error(f, VORBIS_invalid_setup); }
3840 mults[j] = q;
3841 }
3842
3843 #ifndef STB_VORBIS_DIVIDES_IN_CODEBOOK
3844 if (c->lookup_type == 1) {
3845 int len, sparse = c->sparse;
3846 float last=0;
3847 // pre-expand the lookup1-style multiplicands, to avoid a divide in the inner loop
3848 if (sparse) {
3849 if (c->sorted_entries == 0) goto skip;
3850 c->multiplicands = (codetype *) setup_malloc(f, sizeof(c->multiplicands[0]) * c->sorted_entries * c->dimensions);
3851 } else
3852 c->multiplicands = (codetype *) setup_malloc(f, sizeof(c->multiplicands[0]) * c->entries * c->dimensions);
3853 if (c->multiplicands == NULL) { setup_temp_free(f,mults,sizeof(mults[0])*c->lookup_values); return error(f, VORBIS_outofmem); }
3854 len = sparse ? c->sorted_entries : c->entries;
3855 for (j=0; j < len; ++j) {
3856 unsigned int z = sparse ? c->sorted_values[j] : j;
3857 unsigned int div=1;
3858 for (k=0; k < c->dimensions; ++k) {
3859 int off = (z / div) % c->lookup_values;
3860 float val = mults[off];
3861 val = mults[off]*c->delta_value + c->minimum_value + last;
3862 c->multiplicands[j*c->dimensions + k] = val;
3863 if (c->sequence_p)
3864 last = val;
3865 if (k+1 < c->dimensions) {
3866 if (div > UINT_MAX / (unsigned int) c->lookup_values) {
3867 setup_temp_free(f, mults,sizeof(mults[0])*c->lookup_values);
3868 return error(f, VORBIS_invalid_setup);
3869 }
3870 div *= c->lookup_values;
3871 }
3872 }
3873 }
3874 c->lookup_type = 2;
3875 }
3876 else
3877 #endif
3878 {
3879 float last=0;
3880 CHECK(f);
3881 c->multiplicands = (codetype *) setup_malloc(f, sizeof(c->multiplicands[0]) * c->lookup_values);
3882 if (c->multiplicands == NULL) { setup_temp_free(f, mults,sizeof(mults[0])*c->lookup_values); return error(f, VORBIS_outofmem); }
3883 for (j=0; j < (int) c->lookup_values; ++j) {
3884 float val = mults[j] * c->delta_value + c->minimum_value + last;
3885 c->multiplicands[j] = val;
3886 if (c->sequence_p)
3887 last = val;
3888 }
3889 }
3890 #ifndef STB_VORBIS_DIVIDES_IN_CODEBOOK
3891 skip:;
3892 #endif
3893 setup_temp_free(f, mults, sizeof(mults[0])*c->lookup_values);
3894
3895 CHECK(f);
3896 }
3897 CHECK(f);
3898 }
3899
3900 // time domain transfers (notused)
3901
3902 x = get_bits(f, 6) + 1;
3903 for (i=0; i < x; ++i) {
3904 uint32 z = get_bits(f, 16);
3905 if (z != 0) return error(f, VORBIS_invalid_setup);
3906 }
3907
3908 // Floors
3909 f->floor_count = get_bits(f, 6)+1;
3910 f->floor_config = (Floor *) setup_malloc(f, f->floor_count * sizeof(*f->floor_config));
3911 if (f->floor_config == NULL) return error(f, VORBIS_outofmem);
3912 for (i=0; i < f->floor_count; ++i) {
3913 f->floor_types[i] = get_bits(f, 16);
3914 if (f->floor_types[i] > 1) return error(f, VORBIS_invalid_setup);
3915 if (f->floor_types[i] == 0) {
3916 Floor0 *g = &f->floor_config[i].floor0;
3917 g->order = get_bits(f,8);
3918 g->rate = get_bits(f,16);
3919 g->bark_map_size = get_bits(f,16);
3920 g->amplitude_bits = get_bits(f,6);
3921 g->amplitude_offset = get_bits(f,8);
3922 g->number_of_books = get_bits(f,4) + 1;
3923 for (j=0; j < g->number_of_books; ++j)
3924 g->book_list[j] = get_bits(f,8);
3925 return error(f, VORBIS_feature_not_supported);
3926 } else {
3927 stbv__floor_ordering p[31*8+2];
3928 Floor1 *g = &f->floor_config[i].floor1;
3929 int max_class = -1;
3930 g->partitions = get_bits(f, 5);
3931 for (j=0; j < g->partitions; ++j) {
3932 g->partition_class_list[j] = get_bits(f, 4);
3933 if (g->partition_class_list[j] > max_class)
3934 max_class = g->partition_class_list[j];
3935 }
3936 for (j=0; j <= max_class; ++j) {
3937 g->class_dimensions[j] = get_bits(f, 3)+1;
3938 g->class_subclasses[j] = get_bits(f, 2);
3939 if (g->class_subclasses[j]) {
3940 g->class_masterbooks[j] = get_bits(f, 8);
3941 if (g->class_masterbooks[j] >= f->codebook_count) return error(f, VORBIS_invalid_setup);
3942 }
3943 for (k=0; k < 1 << g->class_subclasses[j]; ++k) {
3944 g->subclass_books[j][k] = get_bits(f,8)-1;
3945 if (g->subclass_books[j][k] >= f->codebook_count) return error(f, VORBIS_invalid_setup);
3946 }
3947 }
3948 g->floor1_multiplier = get_bits(f,2)+1;
3949 g->rangebits = get_bits(f,4);
3950 g->Xlist[0] = 0;
3951 g->Xlist[1] = 1 << g->rangebits;
3952 g->values = 2;
3953 for (j=0; j < g->partitions; ++j) {
3954 int c = g->partition_class_list[j];
3955 for (k=0; k < g->class_dimensions[c]; ++k) {
3956 g->Xlist[g->values] = get_bits(f, g->rangebits);
3957 ++g->values;
3958 }
3959 }
3960 // precompute the sorting
3961 for (j=0; j < g->values; ++j) {
3962 p[j].x = g->Xlist[j];
3963 p[j].id = j;
3964 }
3965 qsort(p, g->values, sizeof(p[0]), point_compare);
3966 for (j=0; j < g->values-1; ++j)
3967 if (p[j].x == p[j+1].x)
3968 return error(f, VORBIS_invalid_setup);
3969 for (j=0; j < g->values; ++j)
3970 g->sorted_order[j] = (uint8) p[j].id;
3971 // precompute the neighbors
3972 for (j=2; j < g->values; ++j) {
3973 int low = 0,hi = 0;
3974 neighbors(g->Xlist, j, &low,&hi);
3975 g->neighbors[j][0] = low;
3976 g->neighbors[j][1] = hi;
3977 }
3978
3979 if (g->values > longest_floorlist)
3980 longest_floorlist = g->values;
3981 }
3982 }
3983
3984 // Residue
3985 f->residue_count = get_bits(f, 6)+1;
3986 f->residue_config = (Residue *) setup_malloc(f, f->residue_count * sizeof(f->residue_config[0]));
3987 if (f->residue_config == NULL) return error(f, VORBIS_outofmem);
3988 memset(f->residue_config, 0, f->residue_count * sizeof(f->residue_config[0]));
3989 for (i=0; i < f->residue_count; ++i) {
3990 uint8 residue_cascade[64];
3991 Residue *r = f->residue_config+i;
3992 f->residue_types[i] = get_bits(f, 16);
3993 if (f->residue_types[i] > 2) return error(f, VORBIS_invalid_setup);
3994 r->begin = get_bits(f, 24);
3995 r->end = get_bits(f, 24);
3996 if (r->end < r->begin) return error(f, VORBIS_invalid_setup);
3997 r->part_size = get_bits(f,24)+1;
3998 r->classifications = get_bits(f,6)+1;
3999 r->classbook = get_bits(f,8);
4000 if (r->classbook >= f->codebook_count) return error(f, VORBIS_invalid_setup);
4001 for (j=0; j < r->classifications; ++j) {
4002 uint8 high_bits=0;
4003 uint8 low_bits=get_bits(f,3);
4004 if (get_bits(f,1))
4005 high_bits = get_bits(f,5);
4006 residue_cascade[j] = high_bits*8 + low_bits;
4007 }
4008 r->residue_books = (short (*)[8]) setup_malloc(f, sizeof(r->residue_books[0]) * r->classifications);
4009 if (r->residue_books == NULL) return error(f, VORBIS_outofmem);
4010 for (j=0; j < r->classifications; ++j) {
4011 for (k=0; k < 8; ++k) {
4012 if (residue_cascade[j] & (1 << k)) {
4013 r->residue_books[j][k] = get_bits(f, 8);
4014 if (r->residue_books[j][k] >= f->codebook_count) return error(f, VORBIS_invalid_setup);
4015 } else {
4016 r->residue_books[j][k] = -1;
4017 }
4018 }
4019 }
4020 // precompute the classifications[] array to avoid inner-loop mod/divide
4021 // call it 'classdata' since we already have r->classifications
4022 r->classdata = (uint8 **) setup_malloc(f, sizeof(*r->classdata) * f->codebooks[r->classbook].entries);
4023 if (!r->classdata) return error(f, VORBIS_outofmem);
4024 memset(r->classdata, 0, sizeof(*r->classdata) * f->codebooks[r->classbook].entries);
4025 for (j=0; j < f->codebooks[r->classbook].entries; ++j) {
4026 int classwords = f->codebooks[r->classbook].dimensions;
4027 int temp = j;
4028 r->classdata[j] = (uint8 *) setup_malloc(f, sizeof(r->classdata[j][0]) * classwords);
4029 if (r->classdata[j] == NULL) return error(f, VORBIS_outofmem);
4030 for (k=classwords-1; k >= 0; --k) {
4031 r->classdata[j][k] = temp % r->classifications;
4032 temp /= r->classifications;
4033 }
4034 }
4035 }
4036
4037 f->mapping_count = get_bits(f,6)+1;
4038 f->mapping = (Mapping *) setup_malloc(f, f->mapping_count * sizeof(*f->mapping));
4039 if (f->mapping == NULL) return error(f, VORBIS_outofmem);
4040 memset(f->mapping, 0, f->mapping_count * sizeof(*f->mapping));
4041 for (i=0; i < f->mapping_count; ++i) {
4042 Mapping *m = f->mapping + i;
4043 int mapping_type = get_bits(f,16);
4044 if (mapping_type != 0) return error(f, VORBIS_invalid_setup);
4045 m->chan = (MappingChannel *) setup_malloc(f, f->channels * sizeof(*m->chan));
4046 if (m->chan == NULL) return error(f, VORBIS_outofmem);
4047 if (get_bits(f,1))
4048 m->submaps = get_bits(f,4)+1;
4049 else
4050 m->submaps = 1;
4051 if (m->submaps > max_submaps)
4052 max_submaps = m->submaps;
4053 if (get_bits(f,1)) {
4054 m->coupling_steps = get_bits(f,8)+1;
4055 if (m->coupling_steps > f->channels) return error(f, VORBIS_invalid_setup);
4056 for (k=0; k < m->coupling_steps; ++k) {
4057 m->chan[k].magnitude = get_bits(f, ilog(f->channels-1));
4058 m->chan[k].angle = get_bits(f, ilog(f->channels-1));
4059 if (m->chan[k].magnitude >= f->channels) return error(f, VORBIS_invalid_setup);
4060 if (m->chan[k].angle >= f->channels) return error(f, VORBIS_invalid_setup);
4061 if (m->chan[k].magnitude == m->chan[k].angle) return error(f, VORBIS_invalid_setup);
4062 }
4063 } else
4064 m->coupling_steps = 0;
4065
4066 // reserved field
4067 if (get_bits(f,2)) return error(f, VORBIS_invalid_setup);
4068 if (m->submaps > 1) {
4069 for (j=0; j < f->channels; ++j) {
4070 m->chan[j].mux = get_bits(f, 4);
4071 if (m->chan[j].mux >= m->submaps) return error(f, VORBIS_invalid_setup);
4072 }
4073 } else
4074 // @SPECIFICATION: this case is missing from the spec
4075 for (j=0; j < f->channels; ++j)
4076 m->chan[j].mux = 0;
4077
4078 for (j=0; j < m->submaps; ++j) {
4079 get_bits(f,8); // discard
4080 m->submap_floor[j] = get_bits(f,8);
4081 m->submap_residue[j] = get_bits(f,8);
4082 if (m->submap_floor[j] >= f->floor_count) return error(f, VORBIS_invalid_setup);
4083 if (m->submap_residue[j] >= f->residue_count) return error(f, VORBIS_invalid_setup);
4084 }
4085 }
4086
4087 // Modes
4088 f->mode_count = get_bits(f, 6)+1;
4089 for (i=0; i < f->mode_count; ++i) {
4090 Mode *m = f->mode_config+i;
4091 m->blockflag = get_bits(f,1);
4092 m->windowtype = get_bits(f,16);
4093 m->transformtype = get_bits(f,16);
4094 m->mapping = get_bits(f,8);
4095 if (m->windowtype != 0) return error(f, VORBIS_invalid_setup);
4096 if (m->transformtype != 0) return error(f, VORBIS_invalid_setup);
4097 if (m->mapping >= f->mapping_count) return error(f, VORBIS_invalid_setup);
4098 }
4099
4100 flush_packet(f);
4101
4102 f->previous_length = 0;
4103
4104 for (i=0; i < f->channels; ++i) {
4105 f->channel_buffers[i] = (float *) setup_malloc(f, sizeof(float) * f->blocksize_1);
4106 f->previous_window[i] = (float *) setup_malloc(f, sizeof(float) * f->blocksize_1/2);
4107 f->finalY[i] = (int16 *) setup_malloc(f, sizeof(int16) * longest_floorlist);
4108 if (f->channel_buffers[i] == NULL || f->previous_window[i] == NULL || f->finalY[i] == NULL) return error(f, VORBIS_outofmem);
4109 memset(f->channel_buffers[i], 0, sizeof(float) * f->blocksize_1);
4110 #ifdef STB_VORBIS_NO_DEFER_FLOOR
4111 f->floor_buffers[i] = (float *) setup_malloc(f, sizeof(float) * f->blocksize_1/2);
4112 if (f->floor_buffers[i] == NULL) return error(f, VORBIS_outofmem);
4113 #endif
4114 }
4115
4116 if (!init_blocksize(f, 0, f->blocksize_0)) return FALSE;
4117 if (!init_blocksize(f, 1, f->blocksize_1)) return FALSE;
4118 f->blocksize[0] = f->blocksize_0;
4119 f->blocksize[1] = f->blocksize_1;
4120
4121 #ifdef STB_VORBIS_DIVIDE_TABLE
4122 if (integer_divide_table[1][1]==0)
4123 for (i=0; i < DIVTAB_NUMER; ++i)
4124 for (j=1; j < DIVTAB_DENOM; ++j)
4125 integer_divide_table[i][j] = i / j;
4126 #endif
4127
4128 // compute how much temporary memory is needed
4129
4130 // 1.
4131 {
4132 uint32 imdct_mem = (f->blocksize_1 * sizeof(float) >> 1);
4133 uint32 classify_mem;
4134 int i,max_part_read=0;
4135 for (i=0; i < f->residue_count; ++i) {
4136 Residue *r = f->residue_config + i;
4137 unsigned int actual_size = f->blocksize_1 / 2;
4138 unsigned int limit_r_begin = r->begin < actual_size ? r->begin : actual_size;
4139 unsigned int limit_r_end = r->end < actual_size ? r->end : actual_size;
4140 int n_read = limit_r_end - limit_r_begin;
4141 int part_read = n_read / r->part_size;
4142 if (part_read > max_part_read)
4143 max_part_read = part_read;
4144 }
4145 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
4146 classify_mem = f->channels * (sizeof(void*) + max_part_read * sizeof(uint8 *));
4147 #else
4148 classify_mem = f->channels * (sizeof(void*) + max_part_read * sizeof(int *));
4149 #endif
4150
4151 // maximum reasonable partition size is f->blocksize_1
4152
4153 f->temp_memory_required = classify_mem;
4154 if (imdct_mem > f->temp_memory_required)
4155 f->temp_memory_required = imdct_mem;
4156 }
4157
4158
4159 if (f->alloc.alloc_buffer) {
4160 assert(f->temp_offset == f->alloc.alloc_buffer_length_in_bytes);
4161 // check if there's enough temp memory so we don't error later
4162 if (f->setup_offset + sizeof(*f) + f->temp_memory_required > (unsigned) f->temp_offset)
4163 return error(f, VORBIS_outofmem);
4164 }
4165
4166 // @TODO: stb_vorbis_seek_start expects first_audio_page_offset to point to a page
4167 // without PAGEFLAG_continued_packet, so this either points to the first page, or
4168 // the page after the end of the headers. It might be cleaner to point to a page
4169 // in the middle of the headers, when that's the page where the first audio packet
4170 // starts, but we'd have to also correctly skip the end of any continued packet in
4171 // stb_vorbis_seek_start.
4172 if (f->next_seg == -1) {
4173 f->first_audio_page_offset = stb_vorbis_get_file_offset(f);
4174 } else {
4175 f->first_audio_page_offset = 0;
4176 }
4177
4178 return TRUE;
4179 }
4180
vorbis_deinit(stb_vorbis * p)4181 static void vorbis_deinit(stb_vorbis *p)
4182 {
4183 int i,j;
4184
4185 setup_free(p, p->vendor);
4186 for (i=0; i < p->comment_list_length; ++i) {
4187 setup_free(p, p->comment_list[i]);
4188 }
4189 setup_free(p, p->comment_list);
4190
4191 if (p->residue_config) {
4192 for (i=0; i < p->residue_count; ++i) {
4193 Residue *r = p->residue_config+i;
4194 if (r->classdata) {
4195 for (j=0; j < p->codebooks[r->classbook].entries; ++j)
4196 setup_free(p, r->classdata[j]);
4197 setup_free(p, r->classdata);
4198 }
4199 setup_free(p, r->residue_books);
4200 }
4201 }
4202
4203 if (p->codebooks) {
4204 CHECK(p);
4205 for (i=0; i < p->codebook_count; ++i) {
4206 Codebook *c = p->codebooks + i;
4207 setup_free(p, c->codeword_lengths);
4208 setup_free(p, c->multiplicands);
4209 setup_free(p, c->codewords);
4210 setup_free(p, c->sorted_codewords);
4211 // c->sorted_values[-1] is the first entry in the array
4212 setup_free(p, c->sorted_values ? c->sorted_values-1 : NULL);
4213 }
4214 setup_free(p, p->codebooks);
4215 }
4216 setup_free(p, p->floor_config);
4217 setup_free(p, p->residue_config);
4218 if (p->mapping) {
4219 for (i=0; i < p->mapping_count; ++i)
4220 setup_free(p, p->mapping[i].chan);
4221 setup_free(p, p->mapping);
4222 }
4223 CHECK(p);
4224 for (i=0; i < p->channels && i < STB_VORBIS_MAX_CHANNELS; ++i) {
4225 setup_free(p, p->channel_buffers[i]);
4226 setup_free(p, p->previous_window[i]);
4227 #ifdef STB_VORBIS_NO_DEFER_FLOOR
4228 setup_free(p, p->floor_buffers[i]);
4229 #endif
4230 setup_free(p, p->finalY[i]);
4231 }
4232 for (i=0; i < 2; ++i) {
4233 setup_free(p, p->A[i]);
4234 setup_free(p, p->B[i]);
4235 setup_free(p, p->C[i]);
4236 setup_free(p, p->window[i]);
4237 setup_free(p, p->bit_reverse[i]);
4238 }
4239 #ifndef STB_VORBIS_NO_STDIO
4240 if (p->close_on_free) fclose(p->f);
4241 #endif
4242 }
4243
stb_vorbis_close(stb_vorbis * p)4244 void stb_vorbis_close(stb_vorbis *p)
4245 {
4246 if (p == NULL) return;
4247 vorbis_deinit(p);
4248 setup_free(p,p);
4249 }
4250
vorbis_init(stb_vorbis * p,const stb_vorbis_alloc * z)4251 static void vorbis_init(stb_vorbis *p, const stb_vorbis_alloc *z)
4252 {
4253 memset(p, 0, sizeof(*p)); // NULL out all malloc'd pointers to start
4254 if (z) {
4255 p->alloc = *z;
4256 p->alloc.alloc_buffer_length_in_bytes = (p->alloc.alloc_buffer_length_in_bytes+3) & ~3;
4257 p->temp_offset = p->alloc.alloc_buffer_length_in_bytes;
4258 }
4259 p->eof = 0;
4260 p->error = VORBIS__no_error;
4261 p->stream = NULL;
4262 p->codebooks = NULL;
4263 p->page_crc_tests = -1;
4264 #ifndef STB_VORBIS_NO_STDIO
4265 p->close_on_free = FALSE;
4266 p->f = NULL;
4267 #endif
4268 }
4269
stb_vorbis_get_sample_offset(stb_vorbis * f)4270 int stb_vorbis_get_sample_offset(stb_vorbis *f)
4271 {
4272 if (f->current_loc_valid)
4273 return f->current_loc;
4274 else
4275 return -1;
4276 }
4277
stb_vorbis_get_info(stb_vorbis * f)4278 stb_vorbis_info stb_vorbis_get_info(stb_vorbis *f)
4279 {
4280 stb_vorbis_info d;
4281 d.channels = f->channels;
4282 d.sample_rate = f->sample_rate;
4283 d.setup_memory_required = f->setup_memory_required;
4284 d.setup_temp_memory_required = f->setup_temp_memory_required;
4285 d.temp_memory_required = f->temp_memory_required;
4286 d.max_frame_size = f->blocksize_1 >> 1;
4287 return d;
4288 }
4289
stb_vorbis_get_comment(stb_vorbis * f)4290 stb_vorbis_comment stb_vorbis_get_comment(stb_vorbis *f)
4291 {
4292 stb_vorbis_comment d;
4293 d.vendor = f->vendor;
4294 d.comment_list_length = f->comment_list_length;
4295 d.comment_list = f->comment_list;
4296 return d;
4297 }
4298
stb_vorbis_get_error(stb_vorbis * f)4299 int stb_vorbis_get_error(stb_vorbis *f)
4300 {
4301 int e = f->error;
4302 f->error = VORBIS__no_error;
4303 return e;
4304 }
4305
vorbis_alloc(stb_vorbis * f)4306 static stb_vorbis * vorbis_alloc(stb_vorbis *f)
4307 {
4308 stb_vorbis *p = (stb_vorbis *) setup_malloc(f, sizeof(*p));
4309 return p;
4310 }
4311
4312 #ifndef STB_VORBIS_NO_PUSHDATA_API
4313
stb_vorbis_flush_pushdata(stb_vorbis * f)4314 void stb_vorbis_flush_pushdata(stb_vorbis *f)
4315 {
4316 f->previous_length = 0;
4317 f->page_crc_tests = 0;
4318 f->discard_samples_deferred = 0;
4319 f->current_loc_valid = FALSE;
4320 f->first_decode = FALSE;
4321 f->samples_output = 0;
4322 f->channel_buffer_start = 0;
4323 f->channel_buffer_end = 0;
4324 }
4325
vorbis_search_for_page_pushdata(vorb * f,uint8 * data,int data_len)4326 static int vorbis_search_for_page_pushdata(vorb *f, uint8 *data, int data_len)
4327 {
4328 int i,n;
4329 for (i=0; i < f->page_crc_tests; ++i)
4330 f->scan[i].bytes_done = 0;
4331
4332 // if we have room for more scans, search for them first, because
4333 // they may cause us to stop early if their header is incomplete
4334 if (f->page_crc_tests < STB_VORBIS_PUSHDATA_CRC_COUNT) {
4335 if (data_len < 4) return 0;
4336 data_len -= 3; // need to look for 4-byte sequence, so don't miss
4337 // one that straddles a boundary
4338 for (i=0; i < data_len; ++i) {
4339 if (data[i] == 0x4f) {
4340 if (0==memcmp(data+i, ogg_page_header, 4)) {
4341 int j,len;
4342 uint32 crc;
4343 // make sure we have the whole page header
4344 if (i+26 >= data_len || i+27+data[i+26] >= data_len) {
4345 // only read up to this page start, so hopefully we'll
4346 // have the whole page header start next time
4347 data_len = i;
4348 break;
4349 }
4350 // ok, we have it all; compute the length of the page
4351 len = 27 + data[i+26];
4352 for (j=0; j < data[i+26]; ++j)
4353 len += data[i+27+j];
4354 // scan everything up to the embedded crc (which we must 0)
4355 crc = 0;
4356 for (j=0; j < 22; ++j)
4357 crc = crc32_update(crc, data[i+j]);
4358 // now process 4 0-bytes
4359 for ( ; j < 26; ++j)
4360 crc = crc32_update(crc, 0);
4361 // len is the total number of bytes we need to scan
4362 n = f->page_crc_tests++;
4363 f->scan[n].bytes_left = len-j;
4364 f->scan[n].crc_so_far = crc;
4365 f->scan[n].goal_crc = data[i+22] + (data[i+23] << 8) + (data[i+24]<<16) + (data[i+25]<<24);
4366 // if the last frame on a page is continued to the next, then
4367 // we can't recover the sample_loc immediately
4368 if (data[i+27+data[i+26]-1] == 255)
4369 f->scan[n].sample_loc = ~0;
4370 else
4371 f->scan[n].sample_loc = data[i+6] + (data[i+7] << 8) + (data[i+ 8]<<16) + (data[i+ 9]<<24);
4372 f->scan[n].bytes_done = i+j;
4373 if (f->page_crc_tests == STB_VORBIS_PUSHDATA_CRC_COUNT)
4374 break;
4375 // keep going if we still have room for more
4376 }
4377 }
4378 }
4379 }
4380
4381 for (i=0; i < f->page_crc_tests;) {
4382 uint32 crc;
4383 int j;
4384 int n = f->scan[i].bytes_done;
4385 int m = f->scan[i].bytes_left;
4386 if (m > data_len - n) m = data_len - n;
4387 // m is the bytes to scan in the current chunk
4388 crc = f->scan[i].crc_so_far;
4389 for (j=0; j < m; ++j)
4390 crc = crc32_update(crc, data[n+j]);
4391 f->scan[i].bytes_left -= m;
4392 f->scan[i].crc_so_far = crc;
4393 if (f->scan[i].bytes_left == 0) {
4394 // does it match?
4395 if (f->scan[i].crc_so_far == f->scan[i].goal_crc) {
4396 // Houston, we have page
4397 data_len = n+m; // consumption amount is wherever that scan ended
4398 f->page_crc_tests = -1; // drop out of page scan mode
4399 f->previous_length = 0; // decode-but-don't-output one frame
4400 f->next_seg = -1; // start a new page
4401 f->current_loc = f->scan[i].sample_loc; // set the current sample location
4402 // to the amount we'd have decoded had we decoded this page
4403 f->current_loc_valid = f->current_loc != ~0U;
4404 return data_len;
4405 }
4406 // delete entry
4407 f->scan[i] = f->scan[--f->page_crc_tests];
4408 } else {
4409 ++i;
4410 }
4411 }
4412
4413 return data_len;
4414 }
4415
4416 // return value: number of bytes we used
stb_vorbis_decode_frame_pushdata(stb_vorbis * f,const uint8 * data,int data_len,int * channels,float *** output,int * samples)4417 int stb_vorbis_decode_frame_pushdata(
4418 stb_vorbis *f, // the file we're decoding
4419 const uint8 *data, int data_len, // the memory available for decoding
4420 int *channels, // place to write number of float * buffers
4421 float ***output, // place to write float ** array of float * buffers
4422 int *samples // place to write number of output samples
4423 )
4424 {
4425 int i;
4426 int len,right,left;
4427
4428 if (!IS_PUSH_MODE(f)) return error(f, VORBIS_invalid_api_mixing);
4429
4430 if (f->page_crc_tests >= 0) {
4431 *samples = 0;
4432 return vorbis_search_for_page_pushdata(f, (uint8 *) data, data_len);
4433 }
4434
4435 f->stream = (uint8 *) data;
4436 f->stream_end = (uint8 *) data + data_len;
4437 f->error = VORBIS__no_error;
4438
4439 // check that we have the entire packet in memory
4440 if (!is_whole_packet_present(f)) {
4441 *samples = 0;
4442 return 0;
4443 }
4444
4445 if (!vorbis_decode_packet(f, &len, &left, &right)) {
4446 // save the actual error we encountered
4447 enum STBVorbisError error = f->error;
4448 if (error == VORBIS_bad_packet_type) {
4449 // flush and resynch
4450 f->error = VORBIS__no_error;
4451 while (get8_packet(f) != EOP)
4452 if (f->eof) break;
4453 *samples = 0;
4454 return (int) (f->stream - data);
4455 }
4456 if (error == VORBIS_continued_packet_flag_invalid) {
4457 if (f->previous_length == 0) {
4458 // we may be resynching, in which case it's ok to hit one
4459 // of these; just discard the packet
4460 f->error = VORBIS__no_error;
4461 while (get8_packet(f) != EOP)
4462 if (f->eof) break;
4463 *samples = 0;
4464 return (int) (f->stream - data);
4465 }
4466 }
4467 // if we get an error while parsing, what to do?
4468 // well, it DEFINITELY won't work to continue from where we are!
4469 stb_vorbis_flush_pushdata(f);
4470 // restore the error that actually made us bail
4471 f->error = error;
4472 *samples = 0;
4473 return 1;
4474 }
4475
4476 // success!
4477 len = vorbis_finish_frame(f, len, left, right);
4478 for (i=0; i < f->channels; ++i)
4479 f->outputs[i] = f->channel_buffers[i] + left;
4480
4481 if (channels) *channels = f->channels;
4482 *samples = len;
4483 *output = f->outputs;
4484 return (int) (f->stream - data);
4485 }
4486
stb_vorbis_open_pushdata(const unsigned char * data,int data_len,int * data_used,int * error,const stb_vorbis_alloc * alloc)4487 stb_vorbis *stb_vorbis_open_pushdata(
4488 const unsigned char *data, int data_len, // the memory available for decoding
4489 int *data_used, // only defined if result is not NULL
4490 int *error, const stb_vorbis_alloc *alloc)
4491 {
4492 stb_vorbis *f, p;
4493 vorbis_init(&p, alloc);
4494 p.stream = (uint8 *) data;
4495 p.stream_end = (uint8 *) data + data_len;
4496 p.push_mode = TRUE;
4497 if (!start_decoder(&p)) {
4498 if (p.eof)
4499 *error = VORBIS_need_more_data;
4500 else
4501 *error = p.error;
4502 return NULL;
4503 }
4504 f = vorbis_alloc(&p);
4505 if (f) {
4506 *f = p;
4507 *data_used = (int) (f->stream - data);
4508 *error = 0;
4509 return f;
4510 } else {
4511 vorbis_deinit(&p);
4512 return NULL;
4513 }
4514 }
4515 #endif // STB_VORBIS_NO_PUSHDATA_API
4516
stb_vorbis_get_file_offset(stb_vorbis * f)4517 unsigned int stb_vorbis_get_file_offset(stb_vorbis *f)
4518 {
4519 #ifndef STB_VORBIS_NO_PUSHDATA_API
4520 if (f->push_mode) return 0;
4521 #endif
4522 if (USE_MEMORY(f)) return (unsigned int) (f->stream - f->stream_start);
4523 #ifndef STB_VORBIS_NO_STDIO
4524 return (unsigned int) (ftell(f->f) - f->f_start);
4525 #endif
4526 }
4527
4528 #ifndef STB_VORBIS_NO_PULLDATA_API
4529 //
4530 // DATA-PULLING API
4531 //
4532
vorbis_find_page(stb_vorbis * f,uint32 * end,uint32 * last)4533 static uint32 vorbis_find_page(stb_vorbis *f, uint32 *end, uint32 *last)
4534 {
4535 for(;;) {
4536 int n;
4537 if (f->eof) return 0;
4538 n = get8(f);
4539 if (n == 0x4f) { // page header candidate
4540 unsigned int retry_loc = stb_vorbis_get_file_offset(f);
4541 int i;
4542 // check if we're off the end of a file_section stream
4543 if (retry_loc - 25 > f->stream_len)
4544 return 0;
4545 // check the rest of the header
4546 for (i=1; i < 4; ++i)
4547 if (get8(f) != ogg_page_header[i])
4548 break;
4549 if (f->eof) return 0;
4550 if (i == 4) {
4551 uint8 header[27];
4552 uint32 i, crc, goal, len;
4553 for (i=0; i < 4; ++i)
4554 header[i] = ogg_page_header[i];
4555 for (; i < 27; ++i)
4556 header[i] = get8(f);
4557 if (f->eof) return 0;
4558 if (header[4] != 0) goto invalid;
4559 goal = header[22] + (header[23] << 8) + (header[24]<<16) + (header[25]<<24);
4560 for (i=22; i < 26; ++i)
4561 header[i] = 0;
4562 crc = 0;
4563 for (i=0; i < 27; ++i)
4564 crc = crc32_update(crc, header[i]);
4565 len = 0;
4566 for (i=0; i < header[26]; ++i) {
4567 int s = get8(f);
4568 crc = crc32_update(crc, s);
4569 len += s;
4570 }
4571 if (len && f->eof) return 0;
4572 for (i=0; i < len; ++i)
4573 crc = crc32_update(crc, get8(f));
4574 // finished parsing probable page
4575 if (crc == goal) {
4576 // we could now check that it's either got the last
4577 // page flag set, OR it's followed by the capture
4578 // pattern, but I guess TECHNICALLY you could have
4579 // a file with garbage between each ogg page and recover
4580 // from it automatically? So even though that paranoia
4581 // might decrease the chance of an invalid decode by
4582 // another 2^32, not worth it since it would hose those
4583 // invalid-but-useful files?
4584 if (end)
4585 *end = stb_vorbis_get_file_offset(f);
4586 if (last) {
4587 if (header[5] & 0x04)
4588 *last = 1;
4589 else
4590 *last = 0;
4591 }
4592 set_file_offset(f, retry_loc-1);
4593 return 1;
4594 }
4595 }
4596 invalid:
4597 // not a valid page, so rewind and look for next one
4598 set_file_offset(f, retry_loc);
4599 }
4600 }
4601 }
4602
4603
4604 #define SAMPLE_unknown 0xffffffff
4605
4606 // seeking is implemented with a binary search, which narrows down the range to
4607 // 64K, before using a linear search (because finding the synchronization
4608 // pattern can be expensive, and the chance we'd find the end page again is
4609 // relatively high for small ranges)
4610 //
4611 // two initial interpolation-style probes are used at the start of the search
4612 // to try to bound either side of the binary search sensibly, while still
4613 // working in O(log n) time if they fail.
4614
get_seek_page_info(stb_vorbis * f,ProbedPage * z)4615 static int get_seek_page_info(stb_vorbis *f, ProbedPage *z)
4616 {
4617 uint8 header[27], lacing[255];
4618 int i,len;
4619
4620 // record where the page starts
4621 z->page_start = stb_vorbis_get_file_offset(f);
4622
4623 // parse the header
4624 getn(f, header, 27);
4625 if (header[0] != 'O' || header[1] != 'g' || header[2] != 'g' || header[3] != 'S')
4626 return 0;
4627 getn(f, lacing, header[26]);
4628
4629 // determine the length of the payload
4630 len = 0;
4631 for (i=0; i < header[26]; ++i)
4632 len += lacing[i];
4633
4634 // this implies where the page ends
4635 z->page_end = z->page_start + 27 + header[26] + len;
4636
4637 // read the last-decoded sample out of the data
4638 z->last_decoded_sample = header[6] + (header[7] << 8) + (header[8] << 16) + (header[9] << 24);
4639
4640 // restore file state to where we were
4641 set_file_offset(f, z->page_start);
4642 return 1;
4643 }
4644
4645 // rarely used function to seek back to the preceding page while finding the
4646 // start of a packet
go_to_page_before(stb_vorbis * f,unsigned int limit_offset)4647 static int go_to_page_before(stb_vorbis *f, unsigned int limit_offset)
4648 {
4649 unsigned int previous_safe, end;
4650
4651 // now we want to seek back 64K from the limit
4652 if (limit_offset >= 65536 && limit_offset-65536 >= f->first_audio_page_offset)
4653 previous_safe = limit_offset - 65536;
4654 else
4655 previous_safe = f->first_audio_page_offset;
4656
4657 set_file_offset(f, previous_safe);
4658
4659 while (vorbis_find_page(f, &end, NULL)) {
4660 if (end >= limit_offset && stb_vorbis_get_file_offset(f) < limit_offset)
4661 return 1;
4662 set_file_offset(f, end);
4663 }
4664
4665 return 0;
4666 }
4667
4668 // implements the search logic for finding a page and starting decoding. if
4669 // the function succeeds, current_loc_valid will be true and current_loc will
4670 // be less than or equal to the provided sample number (the closer the
4671 // better).
seek_to_sample_coarse(stb_vorbis * f,uint32 sample_number)4672 static int seek_to_sample_coarse(stb_vorbis *f, uint32 sample_number)
4673 {
4674 ProbedPage left, right, mid;
4675 int i, start_seg_with_known_loc, end_pos, page_start;
4676 uint32 delta, stream_length, padding, last_sample_limit;
4677 double offset = 0.0, bytes_per_sample = 0.0;
4678 int probe = 0;
4679
4680 // find the last page and validate the target sample
4681 stream_length = stb_vorbis_stream_length_in_samples(f);
4682 if (stream_length == 0) return error(f, VORBIS_seek_without_length);
4683 if (sample_number > stream_length) return error(f, VORBIS_seek_invalid);
4684
4685 // this is the maximum difference between the window-center (which is the
4686 // actual granule position value), and the right-start (which the spec
4687 // indicates should be the granule position (give or take one)).
4688 padding = ((f->blocksize_1 - f->blocksize_0) >> 2);
4689 if (sample_number < padding)
4690 last_sample_limit = 0;
4691 else
4692 last_sample_limit = sample_number - padding;
4693
4694 left = f->p_first;
4695 while (left.last_decoded_sample == ~0U) {
4696 // (untested) the first page does not have a 'last_decoded_sample'
4697 set_file_offset(f, left.page_end);
4698 if (!get_seek_page_info(f, &left)) goto error;
4699 }
4700
4701 right = f->p_last;
4702 assert(right.last_decoded_sample != ~0U);
4703
4704 // starting from the start is handled differently
4705 if (last_sample_limit <= left.last_decoded_sample) {
4706 if (stb_vorbis_seek_start(f)) {
4707 if (f->current_loc > sample_number)
4708 return error(f, VORBIS_seek_failed);
4709 return 1;
4710 }
4711 return 0;
4712 }
4713
4714 while (left.page_end != right.page_start) {
4715 assert(left.page_end < right.page_start);
4716 // search range in bytes
4717 delta = right.page_start - left.page_end;
4718 if (delta <= 65536) {
4719 // there's only 64K left to search - handle it linearly
4720 set_file_offset(f, left.page_end);
4721 } else {
4722 if (probe < 2) {
4723 if (probe == 0) {
4724 // first probe (interpolate)
4725 double data_bytes = right.page_end - left.page_start;
4726 bytes_per_sample = data_bytes / right.last_decoded_sample;
4727 offset = left.page_start + bytes_per_sample * (last_sample_limit - left.last_decoded_sample);
4728 } else {
4729 // second probe (try to bound the other side)
4730 double error = ((double) last_sample_limit - mid.last_decoded_sample) * bytes_per_sample;
4731 if (error >= 0 && error < 8000) error = 8000;
4732 if (error < 0 && error > -8000) error = -8000;
4733 offset += error * 2;
4734 }
4735
4736 // ensure the offset is valid
4737 if (offset < left.page_end)
4738 offset = left.page_end;
4739 if (offset > right.page_start - 65536)
4740 offset = right.page_start - 65536;
4741
4742 set_file_offset(f, (unsigned int) offset);
4743 } else {
4744 // binary search for large ranges (offset by 32K to ensure
4745 // we don't hit the right page)
4746 set_file_offset(f, left.page_end + (delta / 2) - 32768);
4747 }
4748
4749 if (!vorbis_find_page(f, NULL, NULL)) goto error;
4750 }
4751
4752 for (;;) {
4753 if (!get_seek_page_info(f, &mid)) goto error;
4754 if (mid.last_decoded_sample != ~0U) break;
4755 // (untested) no frames end on this page
4756 set_file_offset(f, mid.page_end);
4757 assert(mid.page_start < right.page_start);
4758 }
4759
4760 // if we've just found the last page again then we're in a tricky file,
4761 // and we're close enough (if it wasn't an interpolation probe).
4762 if (mid.page_start == right.page_start) {
4763 if (probe >= 2 || delta <= 65536)
4764 break;
4765 } else {
4766 if (last_sample_limit < mid.last_decoded_sample)
4767 right = mid;
4768 else
4769 left = mid;
4770 }
4771
4772 ++probe;
4773 }
4774
4775 // seek back to start of the last packet
4776 page_start = left.page_start;
4777 set_file_offset(f, page_start);
4778 if (!start_page(f)) return error(f, VORBIS_seek_failed);
4779 end_pos = f->end_seg_with_known_loc;
4780 assert(end_pos >= 0);
4781
4782 for (;;) {
4783 for (i = end_pos; i > 0; --i)
4784 if (f->segments[i-1] != 255)
4785 break;
4786
4787 start_seg_with_known_loc = i;
4788
4789 if (start_seg_with_known_loc > 0 || !(f->page_flag & PAGEFLAG_continued_packet))
4790 break;
4791
4792 // (untested) the final packet begins on an earlier page
4793 if (!go_to_page_before(f, page_start))
4794 goto error;
4795
4796 page_start = stb_vorbis_get_file_offset(f);
4797 if (!start_page(f)) goto error;
4798 end_pos = f->segment_count - 1;
4799 }
4800
4801 // prepare to start decoding
4802 f->current_loc_valid = FALSE;
4803 f->last_seg = FALSE;
4804 f->valid_bits = 0;
4805 f->packet_bytes = 0;
4806 f->bytes_in_seg = 0;
4807 f->previous_length = 0;
4808 f->next_seg = start_seg_with_known_loc;
4809
4810 for (i = 0; i < start_seg_with_known_loc; i++)
4811 skip(f, f->segments[i]);
4812
4813 // start decoding (optimizable - this frame is generally discarded)
4814 if (!vorbis_pump_first_frame(f))
4815 return 0;
4816 if (f->current_loc > sample_number)
4817 return error(f, VORBIS_seek_failed);
4818 return 1;
4819
4820 error:
4821 // try to restore the file to a valid state
4822 stb_vorbis_seek_start(f);
4823 return error(f, VORBIS_seek_failed);
4824 }
4825
4826 // the same as vorbis_decode_initial, but without advancing
peek_decode_initial(vorb * f,int * p_left_start,int * p_left_end,int * p_right_start,int * p_right_end,int * mode)4827 static int peek_decode_initial(vorb *f, int *p_left_start, int *p_left_end, int *p_right_start, int *p_right_end, int *mode)
4828 {
4829 int bits_read, bytes_read;
4830
4831 if (!vorbis_decode_initial(f, p_left_start, p_left_end, p_right_start, p_right_end, mode))
4832 return 0;
4833
4834 // either 1 or 2 bytes were read, figure out which so we can rewind
4835 bits_read = 1 + ilog(f->mode_count-1);
4836 if (f->mode_config[*mode].blockflag)
4837 bits_read += 2;
4838 bytes_read = (bits_read + 7) / 8;
4839
4840 f->bytes_in_seg += bytes_read;
4841 f->packet_bytes -= bytes_read;
4842 skip(f, -bytes_read);
4843 if (f->next_seg == -1)
4844 f->next_seg = f->segment_count - 1;
4845 else
4846 f->next_seg--;
4847 f->valid_bits = 0;
4848
4849 return 1;
4850 }
4851
stb_vorbis_seek_frame(stb_vorbis * f,unsigned int sample_number)4852 int stb_vorbis_seek_frame(stb_vorbis *f, unsigned int sample_number)
4853 {
4854 uint32 max_frame_samples;
4855
4856 if (IS_PUSH_MODE(f)) return error(f, VORBIS_invalid_api_mixing);
4857
4858 // fast page-level search
4859 if (!seek_to_sample_coarse(f, sample_number))
4860 return 0;
4861
4862 assert(f->current_loc_valid);
4863 assert(f->current_loc <= sample_number);
4864
4865 // linear search for the relevant packet
4866 max_frame_samples = (f->blocksize_1*3 - f->blocksize_0) >> 2;
4867 while (f->current_loc < sample_number) {
4868 int left_start, left_end, right_start, right_end, mode, frame_samples;
4869 if (!peek_decode_initial(f, &left_start, &left_end, &right_start, &right_end, &mode))
4870 return error(f, VORBIS_seek_failed);
4871 // calculate the number of samples returned by the next frame
4872 frame_samples = right_start - left_start;
4873 if (f->current_loc + frame_samples > sample_number) {
4874 return 1; // the next frame will contain the sample
4875 } else if (f->current_loc + frame_samples + max_frame_samples > sample_number) {
4876 // there's a chance the frame after this could contain the sample
4877 vorbis_pump_first_frame(f);
4878 } else {
4879 // this frame is too early to be relevant
4880 f->current_loc += frame_samples;
4881 f->previous_length = 0;
4882 maybe_start_packet(f);
4883 flush_packet(f);
4884 }
4885 }
4886 // the next frame should start with the sample
4887 if (f->current_loc != sample_number) return error(f, VORBIS_seek_failed);
4888 return 1;
4889 }
4890
stb_vorbis_seek(stb_vorbis * f,unsigned int sample_number)4891 int stb_vorbis_seek(stb_vorbis *f, unsigned int sample_number)
4892 {
4893 if (!stb_vorbis_seek_frame(f, sample_number))
4894 return 0;
4895
4896 if (sample_number != f->current_loc) {
4897 int n;
4898 uint32 frame_start = f->current_loc;
4899 stb_vorbis_get_frame_float(f, &n, NULL);
4900 assert(sample_number > frame_start);
4901 assert(f->channel_buffer_start + (int) (sample_number-frame_start) <= f->channel_buffer_end);
4902 f->channel_buffer_start += (sample_number - frame_start);
4903 }
4904
4905 return 1;
4906 }
4907
stb_vorbis_seek_start(stb_vorbis * f)4908 int stb_vorbis_seek_start(stb_vorbis *f)
4909 {
4910 if (IS_PUSH_MODE(f)) { return error(f, VORBIS_invalid_api_mixing); }
4911 set_file_offset(f, f->first_audio_page_offset);
4912 f->previous_length = 0;
4913 f->first_decode = TRUE;
4914 f->next_seg = -1;
4915 return vorbis_pump_first_frame(f);
4916 }
4917
stb_vorbis_stream_length_in_samples(stb_vorbis * f)4918 unsigned int stb_vorbis_stream_length_in_samples(stb_vorbis *f)
4919 {
4920 unsigned int restore_offset, previous_safe;
4921 unsigned int end, last_page_loc;
4922
4923 if (IS_PUSH_MODE(f)) return error(f, VORBIS_invalid_api_mixing);
4924 if (!f->total_samples) {
4925 unsigned int last;
4926 uint32 lo,hi;
4927 char header[6];
4928
4929 // first, store the current decode position so we can restore it
4930 restore_offset = stb_vorbis_get_file_offset(f);
4931
4932 // now we want to seek back 64K from the end (the last page must
4933 // be at most a little less than 64K, but let's allow a little slop)
4934 if (f->stream_len >= 65536 && f->stream_len-65536 >= f->first_audio_page_offset)
4935 previous_safe = f->stream_len - 65536;
4936 else
4937 previous_safe = f->first_audio_page_offset;
4938
4939 set_file_offset(f, previous_safe);
4940 // previous_safe is now our candidate 'earliest known place that seeking
4941 // to will lead to the final page'
4942
4943 if (!vorbis_find_page(f, &end, &last)) {
4944 // if we can't find a page, we're hosed!
4945 f->error = VORBIS_cant_find_last_page;
4946 f->total_samples = 0xffffffff;
4947 goto done;
4948 }
4949
4950 // check if there are more pages
4951 last_page_loc = stb_vorbis_get_file_offset(f);
4952
4953 // stop when the last_page flag is set, not when we reach eof;
4954 // this allows us to stop short of a 'file_section' end without
4955 // explicitly checking the length of the section
4956 while (!last) {
4957 set_file_offset(f, end);
4958 if (!vorbis_find_page(f, &end, &last)) {
4959 // the last page we found didn't have the 'last page' flag
4960 // set. whoops!
4961 break;
4962 }
4963 previous_safe = last_page_loc+1;
4964 last_page_loc = stb_vorbis_get_file_offset(f);
4965 }
4966
4967 set_file_offset(f, last_page_loc);
4968
4969 // parse the header
4970 getn(f, (unsigned char *)header, 6);
4971 // extract the absolute granule position
4972 lo = get32(f);
4973 hi = get32(f);
4974 if (lo == 0xffffffff && hi == 0xffffffff) {
4975 f->error = VORBIS_cant_find_last_page;
4976 f->total_samples = SAMPLE_unknown;
4977 goto done;
4978 }
4979 if (hi)
4980 lo = 0xfffffffe; // saturate
4981 f->total_samples = lo;
4982
4983 f->p_last.page_start = last_page_loc;
4984 f->p_last.page_end = end;
4985 f->p_last.last_decoded_sample = lo;
4986
4987 done:
4988 set_file_offset(f, restore_offset);
4989 }
4990 return f->total_samples == SAMPLE_unknown ? 0 : f->total_samples;
4991 }
4992
stb_vorbis_stream_length_in_seconds(stb_vorbis * f)4993 float stb_vorbis_stream_length_in_seconds(stb_vorbis *f)
4994 {
4995 return stb_vorbis_stream_length_in_samples(f) / (float) f->sample_rate;
4996 }
4997
4998
4999
stb_vorbis_get_frame_float(stb_vorbis * f,int * channels,float *** output)5000 int stb_vorbis_get_frame_float(stb_vorbis *f, int *channels, float ***output)
5001 {
5002 int len, right,left,i;
5003 if (IS_PUSH_MODE(f)) return error(f, VORBIS_invalid_api_mixing);
5004
5005 if (!vorbis_decode_packet(f, &len, &left, &right)) {
5006 f->channel_buffer_start = f->channel_buffer_end = 0;
5007 return 0;
5008 }
5009
5010 len = vorbis_finish_frame(f, len, left, right);
5011 for (i=0; i < f->channels; ++i)
5012 f->outputs[i] = f->channel_buffers[i] + left;
5013
5014 f->channel_buffer_start = left;
5015 f->channel_buffer_end = left+len;
5016
5017 if (channels) *channels = f->channels;
5018 if (output) *output = f->outputs;
5019 return len;
5020 }
5021
5022 #ifndef STB_VORBIS_NO_STDIO
5023
stb_vorbis_open_file_section(FILE * file,int close_on_free,int * error,const stb_vorbis_alloc * alloc,unsigned int length)5024 stb_vorbis * stb_vorbis_open_file_section(FILE *file, int close_on_free, int *error, const stb_vorbis_alloc *alloc, unsigned int length)
5025 {
5026 stb_vorbis *f, p;
5027 vorbis_init(&p, alloc);
5028 p.f = file;
5029 p.f_start = (uint32) ftell(file);
5030 p.stream_len = length;
5031 p.close_on_free = close_on_free;
5032 if (start_decoder(&p)) {
5033 f = vorbis_alloc(&p);
5034 if (f) {
5035 *f = p;
5036 vorbis_pump_first_frame(f);
5037 return f;
5038 }
5039 }
5040 if (error) *error = p.error;
5041 vorbis_deinit(&p);
5042 return NULL;
5043 }
5044
stb_vorbis_open_file(FILE * file,int close_on_free,int * error,const stb_vorbis_alloc * alloc)5045 stb_vorbis * stb_vorbis_open_file(FILE *file, int close_on_free, int *error, const stb_vorbis_alloc *alloc)
5046 {
5047 unsigned int len, start;
5048 start = (unsigned int) ftell(file);
5049 fseek(file, 0, SEEK_END);
5050 len = (unsigned int) (ftell(file) - start);
5051 fseek(file, start, SEEK_SET);
5052 return stb_vorbis_open_file_section(file, close_on_free, error, alloc, len);
5053 }
5054
stb_vorbis_open_filename(const char * filename,int * error,const stb_vorbis_alloc * alloc)5055 stb_vorbis * stb_vorbis_open_filename(const char *filename, int *error, const stb_vorbis_alloc *alloc)
5056 {
5057 FILE *f;
5058 #if defined(_WIN32) && defined(__STDC_WANT_SECURE_LIB__)
5059 if (0 != fopen_s(&f, filename, "rb"))
5060 f = NULL;
5061 #else
5062 f = fopen(filename, "rb");
5063 #endif
5064 if (f)
5065 return stb_vorbis_open_file(f, TRUE, error, alloc);
5066 if (error) *error = VORBIS_file_open_failure;
5067 return NULL;
5068 }
5069 #endif // STB_VORBIS_NO_STDIO
5070
stb_vorbis_open_memory(const unsigned char * data,int len,int * error,const stb_vorbis_alloc * alloc)5071 stb_vorbis * stb_vorbis_open_memory(const unsigned char *data, int len, int *error, const stb_vorbis_alloc *alloc)
5072 {
5073 stb_vorbis *f, p;
5074 if (data == NULL) return NULL;
5075 vorbis_init(&p, alloc);
5076 p.stream = (uint8 *) data;
5077 p.stream_end = (uint8 *) data + len;
5078 p.stream_start = (uint8 *) p.stream;
5079 p.stream_len = len;
5080 p.push_mode = FALSE;
5081 if (start_decoder(&p)) {
5082 f = vorbis_alloc(&p);
5083 if (f) {
5084 *f = p;
5085 vorbis_pump_first_frame(f);
5086 if (error) *error = VORBIS__no_error;
5087 return f;
5088 }
5089 }
5090 if (error) *error = p.error;
5091 vorbis_deinit(&p);
5092 return NULL;
5093 }
5094
5095 #ifndef STB_VORBIS_NO_INTEGER_CONVERSION
5096 #define PLAYBACK_MONO 1
5097 #define PLAYBACK_LEFT 2
5098 #define PLAYBACK_RIGHT 4
5099
5100 #define L (PLAYBACK_LEFT | PLAYBACK_MONO)
5101 #define C (PLAYBACK_LEFT | PLAYBACK_RIGHT | PLAYBACK_MONO)
5102 #define R (PLAYBACK_RIGHT | PLAYBACK_MONO)
5103
5104 static int8 channel_position[7][6] =
5105 {
5106 { 0 },
5107 { C },
5108 { L, R },
5109 { L, C, R },
5110 { L, R, L, R },
5111 { L, C, R, L, R },
5112 { L, C, R, L, R, C },
5113 };
5114
5115
5116 #ifndef STB_VORBIS_NO_FAST_SCALED_FLOAT
5117 typedef union {
5118 float f;
5119 int i;
5120 } float_conv;
5121 typedef char stb_vorbis_float_size_test[sizeof(float)==4 && sizeof(int) == 4];
5122 #define FASTDEF(x) float_conv x
5123 // add (1<<23) to convert to int, then divide by 2^SHIFT, then add 0.5/2^SHIFT to round
5124 #define MAGIC(SHIFT) (1.5f * (1 << (23-SHIFT)) + 0.5f/(1 << SHIFT))
5125 #define ADDEND(SHIFT) (((150-SHIFT) << 23) + (1 << 22))
5126 #define FAST_SCALED_FLOAT_TO_INT(temp,x,s) (temp.f = (x) + MAGIC(s), temp.i - ADDEND(s))
5127 #define check_endianness()
5128 #else
5129 #define FAST_SCALED_FLOAT_TO_INT(temp,x,s) ((int) ((x) * (1 << (s))))
5130 #define check_endianness()
5131 #define FASTDEF(x)
5132 #endif
5133
copy_samples(short * dest,float * src,int len)5134 static void copy_samples(short *dest, float *src, int len)
5135 {
5136 int i;
5137 check_endianness();
5138 for (i=0; i < len; ++i) {
5139 FASTDEF(temp);
5140 int v = FAST_SCALED_FLOAT_TO_INT(temp, src[i],15);
5141 if ((unsigned int) (v + 32768) > 65535)
5142 v = v < 0 ? -32768 : 32767;
5143 dest[i] = v;
5144 }
5145 }
5146
compute_samples(int mask,short * output,int num_c,float ** data,int d_offset,int len)5147 static void compute_samples(int mask, short *output, int num_c, float **data, int d_offset, int len)
5148 {
5149 #define BUFFER_SIZE 32
5150 float buffer[BUFFER_SIZE];
5151 int i,j,o,n = BUFFER_SIZE;
5152 check_endianness();
5153 for (o = 0; o < len; o += BUFFER_SIZE) {
5154 memset(buffer, 0, sizeof(buffer));
5155 if (o + n > len) n = len - o;
5156 for (j=0; j < num_c; ++j) {
5157 if (channel_position[num_c][j] & mask) {
5158 for (i=0; i < n; ++i)
5159 buffer[i] += data[j][d_offset+o+i];
5160 }
5161 }
5162 for (i=0; i < n; ++i) {
5163 FASTDEF(temp);
5164 int v = FAST_SCALED_FLOAT_TO_INT(temp,buffer[i],15);
5165 if ((unsigned int) (v + 32768) > 65535)
5166 v = v < 0 ? -32768 : 32767;
5167 output[o+i] = v;
5168 }
5169 }
5170 }
5171
compute_stereo_samples(short * output,int num_c,float ** data,int d_offset,int len)5172 static void compute_stereo_samples(short *output, int num_c, float **data, int d_offset, int len)
5173 {
5174 #define BUFFER_SIZE 32
5175 float buffer[BUFFER_SIZE];
5176 int i,j,o,n = BUFFER_SIZE >> 1;
5177 // o is the offset in the source data
5178 check_endianness();
5179 for (o = 0; o < len; o += BUFFER_SIZE >> 1) {
5180 // o2 is the offset in the output data
5181 int o2 = o << 1;
5182 memset(buffer, 0, sizeof(buffer));
5183 if (o + n > len) n = len - o;
5184 for (j=0; j < num_c; ++j) {
5185 int m = channel_position[num_c][j] & (PLAYBACK_LEFT | PLAYBACK_RIGHT);
5186 if (m == (PLAYBACK_LEFT | PLAYBACK_RIGHT)) {
5187 for (i=0; i < n; ++i) {
5188 buffer[i*2+0] += data[j][d_offset+o+i];
5189 buffer[i*2+1] += data[j][d_offset+o+i];
5190 }
5191 } else if (m == PLAYBACK_LEFT) {
5192 for (i=0; i < n; ++i) {
5193 buffer[i*2+0] += data[j][d_offset+o+i];
5194 }
5195 } else if (m == PLAYBACK_RIGHT) {
5196 for (i=0; i < n; ++i) {
5197 buffer[i*2+1] += data[j][d_offset+o+i];
5198 }
5199 }
5200 }
5201 for (i=0; i < (n<<1); ++i) {
5202 FASTDEF(temp);
5203 int v = FAST_SCALED_FLOAT_TO_INT(temp,buffer[i],15);
5204 if ((unsigned int) (v + 32768) > 65535)
5205 v = v < 0 ? -32768 : 32767;
5206 output[o2+i] = v;
5207 }
5208 }
5209 }
5210
convert_samples_short(int buf_c,short ** buffer,int b_offset,int data_c,float ** data,int d_offset,int samples)5211 static void convert_samples_short(int buf_c, short **buffer, int b_offset, int data_c, float **data, int d_offset, int samples)
5212 {
5213 int i;
5214 if (buf_c != data_c && buf_c <= 2 && data_c <= 6) {
5215 static int channel_selector[3][2] = { {0}, {PLAYBACK_MONO}, {PLAYBACK_LEFT, PLAYBACK_RIGHT} };
5216 for (i=0; i < buf_c; ++i)
5217 compute_samples(channel_selector[buf_c][i], buffer[i]+b_offset, data_c, data, d_offset, samples);
5218 } else {
5219 int limit = buf_c < data_c ? buf_c : data_c;
5220 for (i=0; i < limit; ++i)
5221 copy_samples(buffer[i]+b_offset, data[i]+d_offset, samples);
5222 for ( ; i < buf_c; ++i)
5223 memset(buffer[i]+b_offset, 0, sizeof(short) * samples);
5224 }
5225 }
5226
stb_vorbis_get_frame_short(stb_vorbis * f,int num_c,short ** buffer,int num_samples)5227 int stb_vorbis_get_frame_short(stb_vorbis *f, int num_c, short **buffer, int num_samples)
5228 {
5229 float **output = NULL;
5230 int len = stb_vorbis_get_frame_float(f, NULL, &output);
5231 if (len > num_samples) len = num_samples;
5232 if (len)
5233 convert_samples_short(num_c, buffer, 0, f->channels, output, 0, len);
5234 return len;
5235 }
5236
convert_channels_short_interleaved(int buf_c,short * buffer,int data_c,float ** data,int d_offset,int len)5237 static void convert_channels_short_interleaved(int buf_c, short *buffer, int data_c, float **data, int d_offset, int len)
5238 {
5239 int i;
5240 check_endianness();
5241 if (buf_c != data_c && buf_c <= 2 && data_c <= 6) {
5242 assert(buf_c == 2);
5243 for (i=0; i < buf_c; ++i)
5244 compute_stereo_samples(buffer, data_c, data, d_offset, len);
5245 } else {
5246 int limit = buf_c < data_c ? buf_c : data_c;
5247 int j;
5248 for (j=0; j < len; ++j) {
5249 for (i=0; i < limit; ++i) {
5250 FASTDEF(temp);
5251 float f = data[i][d_offset+j];
5252 int v = FAST_SCALED_FLOAT_TO_INT(temp, f,15);//data[i][d_offset+j],15);
5253 if ((unsigned int) (v + 32768) > 65535)
5254 v = v < 0 ? -32768 : 32767;
5255 *buffer++ = v;
5256 }
5257 for ( ; i < buf_c; ++i)
5258 *buffer++ = 0;
5259 }
5260 }
5261 }
5262
stb_vorbis_get_frame_short_interleaved(stb_vorbis * f,int num_c,short * buffer,int num_shorts)5263 int stb_vorbis_get_frame_short_interleaved(stb_vorbis *f, int num_c, short *buffer, int num_shorts)
5264 {
5265 float **output;
5266 int len;
5267 if (num_c == 1) return stb_vorbis_get_frame_short(f,num_c,&buffer, num_shorts);
5268 len = stb_vorbis_get_frame_float(f, NULL, &output);
5269 if (len) {
5270 if (len*num_c > num_shorts) len = num_shorts / num_c;
5271 convert_channels_short_interleaved(num_c, buffer, f->channels, output, 0, len);
5272 }
5273 return len;
5274 }
5275
stb_vorbis_get_samples_short_interleaved(stb_vorbis * f,int channels,short * buffer,int num_shorts)5276 int stb_vorbis_get_samples_short_interleaved(stb_vorbis *f, int channels, short *buffer, int num_shorts)
5277 {
5278 float **outputs;
5279 int len = num_shorts / channels;
5280 int n=0;
5281 int z = f->channels;
5282 if (z > channels) z = channels;
5283 while (n < len) {
5284 int k = f->channel_buffer_end - f->channel_buffer_start;
5285 if (n+k >= len) k = len - n;
5286 if (k)
5287 convert_channels_short_interleaved(channels, buffer, f->channels, f->channel_buffers, f->channel_buffer_start, k);
5288 buffer += k*channels;
5289 n += k;
5290 f->channel_buffer_start += k;
5291 if (n == len) break;
5292 if (!stb_vorbis_get_frame_float(f, NULL, &outputs)) break;
5293 }
5294 return n;
5295 }
5296
stb_vorbis_get_samples_short(stb_vorbis * f,int channels,short ** buffer,int len)5297 int stb_vorbis_get_samples_short(stb_vorbis *f, int channels, short **buffer, int len)
5298 {
5299 float **outputs;
5300 int n=0;
5301 int z = f->channels;
5302 if (z > channels) z = channels;
5303 while (n < len) {
5304 int k = f->channel_buffer_end - f->channel_buffer_start;
5305 if (n+k >= len) k = len - n;
5306 if (k)
5307 convert_samples_short(channels, buffer, n, f->channels, f->channel_buffers, f->channel_buffer_start, k);
5308 n += k;
5309 f->channel_buffer_start += k;
5310 if (n == len) break;
5311 if (!stb_vorbis_get_frame_float(f, NULL, &outputs)) break;
5312 }
5313 return n;
5314 }
5315
5316 #ifndef STB_VORBIS_NO_STDIO
stb_vorbis_decode_filename(const char * filename,int * channels,int * sample_rate,short ** output)5317 int stb_vorbis_decode_filename(const char *filename, int *channels, int *sample_rate, short **output)
5318 {
5319 int data_len, offset, total, limit, error;
5320 short *data;
5321 stb_vorbis *v = stb_vorbis_open_filename(filename, &error, NULL);
5322 if (v == NULL) return -1;
5323 limit = v->channels * 4096;
5324 *channels = v->channels;
5325 if (sample_rate)
5326 *sample_rate = v->sample_rate;
5327 offset = data_len = 0;
5328 total = limit;
5329 data = (short *) malloc(total * sizeof(*data));
5330 if (data == NULL) {
5331 stb_vorbis_close(v);
5332 return -2;
5333 }
5334 for (;;) {
5335 int n = stb_vorbis_get_frame_short_interleaved(v, v->channels, data+offset, total-offset);
5336 if (n == 0) break;
5337 data_len += n;
5338 offset += n * v->channels;
5339 if (offset + limit > total) {
5340 short *data2;
5341 total *= 2;
5342 data2 = (short *) realloc(data, total * sizeof(*data));
5343 if (data2 == NULL) {
5344 free(data);
5345 stb_vorbis_close(v);
5346 return -2;
5347 }
5348 data = data2;
5349 }
5350 }
5351 *output = data;
5352 stb_vorbis_close(v);
5353 return data_len;
5354 }
5355 #endif // NO_STDIO
5356
stb_vorbis_decode_memory(const uint8 * mem,int len,int * channels,int * sample_rate,short ** output)5357 int stb_vorbis_decode_memory(const uint8 *mem, int len, int *channels, int *sample_rate, short **output)
5358 {
5359 int data_len, offset, total, limit, error;
5360 short *data;
5361 stb_vorbis *v = stb_vorbis_open_memory(mem, len, &error, NULL);
5362 if (v == NULL) return -1;
5363 limit = v->channels * 4096;
5364 *channels = v->channels;
5365 if (sample_rate)
5366 *sample_rate = v->sample_rate;
5367 offset = data_len = 0;
5368 total = limit;
5369 data = (short *) malloc(total * sizeof(*data));
5370 if (data == NULL) {
5371 stb_vorbis_close(v);
5372 return -2;
5373 }
5374 for (;;) {
5375 int n = stb_vorbis_get_frame_short_interleaved(v, v->channels, data+offset, total-offset);
5376 if (n == 0) break;
5377 data_len += n;
5378 offset += n * v->channels;
5379 if (offset + limit > total) {
5380 short *data2;
5381 total *= 2;
5382 data2 = (short *) realloc(data, total * sizeof(*data));
5383 if (data2 == NULL) {
5384 free(data);
5385 stb_vorbis_close(v);
5386 return -2;
5387 }
5388 data = data2;
5389 }
5390 }
5391 *output = data;
5392 stb_vorbis_close(v);
5393 return data_len;
5394 }
5395 #endif // STB_VORBIS_NO_INTEGER_CONVERSION
5396
stb_vorbis_get_samples_float_interleaved(stb_vorbis * f,int channels,float * buffer,int num_floats)5397 int stb_vorbis_get_samples_float_interleaved(stb_vorbis *f, int channels, float *buffer, int num_floats)
5398 {
5399 float **outputs;
5400 int len = num_floats / channels;
5401 int n=0;
5402 int z = f->channels;
5403 if (z > channels) z = channels;
5404 while (n < len) {
5405 int i,j;
5406 int k = f->channel_buffer_end - f->channel_buffer_start;
5407 if (n+k >= len) k = len - n;
5408 for (j=0; j < k; ++j) {
5409 for (i=0; i < z; ++i)
5410 *buffer++ = f->channel_buffers[i][f->channel_buffer_start+j];
5411 for ( ; i < channels; ++i)
5412 *buffer++ = 0;
5413 }
5414 n += k;
5415 f->channel_buffer_start += k;
5416 if (n == len)
5417 break;
5418 if (!stb_vorbis_get_frame_float(f, NULL, &outputs))
5419 break;
5420 }
5421 return n;
5422 }
5423
stb_vorbis_get_samples_float(stb_vorbis * f,int channels,float ** buffer,int num_samples)5424 int stb_vorbis_get_samples_float(stb_vorbis *f, int channels, float **buffer, int num_samples)
5425 {
5426 float **outputs;
5427 int n=0;
5428 int z = f->channels;
5429 if (z > channels) z = channels;
5430 while (n < num_samples) {
5431 int i;
5432 int k = f->channel_buffer_end - f->channel_buffer_start;
5433 if (n+k >= num_samples) k = num_samples - n;
5434 if (k) {
5435 for (i=0; i < z; ++i)
5436 memcpy(buffer[i]+n, f->channel_buffers[i]+f->channel_buffer_start, sizeof(float)*k);
5437 for ( ; i < channels; ++i)
5438 memset(buffer[i]+n, 0, sizeof(float) * k);
5439 }
5440 n += k;
5441 f->channel_buffer_start += k;
5442 if (n == num_samples)
5443 break;
5444 if (!stb_vorbis_get_frame_float(f, NULL, &outputs))
5445 break;
5446 }
5447 return n;
5448 }
5449 #endif // STB_VORBIS_NO_PULLDATA_API
5450
5451 /* Version history
5452 1.17 - 2019-07-08 - fix CVE-2019-13217, -13218, -13219, -13220, -13221, -13222, -13223
5453 found with Mayhem by ForAllSecure
5454 1.16 - 2019-03-04 - fix warnings
5455 1.15 - 2019-02-07 - explicit failure if Ogg Skeleton data is found
5456 1.14 - 2018-02-11 - delete bogus dealloca usage
5457 1.13 - 2018-01-29 - fix truncation of last frame (hopefully)
5458 1.12 - 2017-11-21 - limit residue begin/end to blocksize/2 to avoid large temp allocs in bad/corrupt files
5459 1.11 - 2017-07-23 - fix MinGW compilation
5460 1.10 - 2017-03-03 - more robust seeking; fix negative ilog(); clear error in open_memory
5461 1.09 - 2016-04-04 - back out 'avoid discarding last frame' fix from previous version
5462 1.08 - 2016-04-02 - fixed multiple warnings; fix setup memory leaks;
5463 avoid discarding last frame of audio data
5464 1.07 - 2015-01-16 - fixed some warnings, fix mingw, const-correct API
5465 some more crash fixes when out of memory or with corrupt files
5466 1.06 - 2015-08-31 - full, correct support for seeking API (Dougall Johnson)
5467 some crash fixes when out of memory or with corrupt files
5468 1.05 - 2015-04-19 - don't define __forceinline if it's redundant
5469 1.04 - 2014-08-27 - fix missing const-correct case in API
5470 1.03 - 2014-08-07 - Warning fixes
5471 1.02 - 2014-07-09 - Declare qsort compare function _cdecl on windows
5472 1.01 - 2014-06-18 - fix stb_vorbis_get_samples_float
5473 1.0 - 2014-05-26 - fix memory leaks; fix warnings; fix bugs in multichannel
5474 (API change) report sample rate for decode-full-file funcs
5475 0.99996 - bracket #include <malloc.h> for macintosh compilation by Laurent Gomila
5476 0.99995 - use union instead of pointer-cast for fast-float-to-int to avoid alias-optimization problem
5477 0.99994 - change fast-float-to-int to work in single-precision FPU mode, remove endian-dependence
5478 0.99993 - remove assert that fired on legal files with empty tables
5479 0.99992 - rewind-to-start
5480 0.99991 - bugfix to stb_vorbis_get_samples_short by Bernhard Wodo
5481 0.9999 - (should have been 0.99990) fix no-CRT support, compiling as C++
5482 0.9998 - add a full-decode function with a memory source
5483 0.9997 - fix a bug in the read-from-FILE case in 0.9996 addition
5484 0.9996 - query length of vorbis stream in samples/seconds
5485 0.9995 - bugfix to another optimization that only happened in certain files
5486 0.9994 - bugfix to one of the optimizations that caused significant (but inaudible?) errors
5487 0.9993 - performance improvements; runs in 99% to 104% of time of reference implementation
5488 0.9992 - performance improvement of IMDCT; now performs close to reference implementation
5489 0.9991 - performance improvement of IMDCT
5490 0.999 - (should have been 0.9990) performance improvement of IMDCT
5491 0.998 - no-CRT support from Casey Muratori
5492 0.997 - bugfixes for bugs found by Terje Mathisen
5493 0.996 - bugfix: fast-huffman decode initialized incorrectly for sparse codebooks; fixing gives 10% speedup - found by Terje Mathisen
5494 0.995 - bugfix: fix to 'effective' overrun detection - found by Terje Mathisen
5495 0.994 - bugfix: garbage decode on final VQ symbol of a non-multiple - found by Terje Mathisen
5496 0.993 - bugfix: pushdata API required 1 extra byte for empty page (failed to consume final page if empty) - found by Terje Mathisen
5497 0.992 - fixes for MinGW warning
5498 0.991 - turn fast-float-conversion on by default
5499 0.990 - fix push-mode seek recovery if you seek into the headers
5500 0.98b - fix to bad release of 0.98
5501 0.98 - fix push-mode seek recovery; robustify float-to-int and support non-fast mode
5502 0.97 - builds under c++ (typecasting, don't use 'class' keyword)
5503 0.96 - somehow MY 0.95 was right, but the web one was wrong, so here's my 0.95 rereleased as 0.96, fixes a typo in the clamping code
5504 0.95 - clamping code for 16-bit functions
5505 0.94 - not publically released
5506 0.93 - fixed all-zero-floor case (was decoding garbage)
5507 0.92 - fixed a memory leak
5508 0.91 - conditional compiles to omit parts of the API and the infrastructure to support them: STB_VORBIS_NO_PULLDATA_API, STB_VORBIS_NO_PUSHDATA_API, STB_VORBIS_NO_STDIO, STB_VORBIS_NO_INTEGER_CONVERSION
5509 0.90 - first public release
5510 */
5511
5512 #endif // STB_VORBIS_HEADER_ONLY
5513
5514
5515 /*
5516 ------------------------------------------------------------------------------
5517 This software is available under 2 licenses -- choose whichever you prefer.
5518 ------------------------------------------------------------------------------
5519 ALTERNATIVE A - MIT License
5520 Copyright (c) 2017 Sean Barrett
5521 Permission is hereby granted, free of charge, to any person obtaining a copy of
5522 this software and associated documentation files (the "Software"), to deal in
5523 the Software without restriction, including without limitation the rights to
5524 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
5525 of the Software, and to permit persons to whom the Software is furnished to do
5526 so, subject to the following conditions:
5527 The above copyright notice and this permission notice shall be included in all
5528 copies or substantial portions of the Software.
5529 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
5530 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
5531 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
5532 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
5533 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
5534 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
5535 SOFTWARE.
5536 ------------------------------------------------------------------------------
5537 ALTERNATIVE B - Public Domain (www.unlicense.org)
5538 This is free and unencumbered software released into the public domain.
5539 Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
5540 software, either in source code form or as a compiled binary, for any purpose,
5541 commercial or non-commercial, and by any means.
5542 In jurisdictions that recognize copyright laws, the author or authors of this
5543 software dedicate any and all copyright interest in the software to the public
5544 domain. We make this dedication for the benefit of the public at large and to
5545 the detriment of our heirs and successors. We intend this dedication to be an
5546 overt act of relinquishment in perpetuity of all present and future rights to
5547 this software under copyright law.
5548 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
5549 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
5550 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
5551 AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
5552 ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
5553 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
5554 ------------------------------------------------------------------------------
5555 */
5556