1 /*
2     DeaDBeeF - ultimate music player for GNU/Linux systems with X11
3     Copyright (C) 2009-2011 Alexey Yakovenko <waker@users.sourceforge.net>
4     based on apedec from FFMpeg Copyright (c) 2007 Benjamin Zores <ben@geexbox.org>
5     based upon libdemac from Dave Chapman.
6 
7     This program is free software; you can redistribute it and/or
8     modify it under the terms of the GNU General Public License
9     as published by the Free Software Foundation; either version 2
10     of the License, or (at your option) any later version.
11 
12     This program is distributed in the hope that it will be useful,
13     but WITHOUT ANY WARRANTY; without even the implied warranty of
14     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15     GNU General Public License for more details.
16 
17     You should have received a copy of the GNU General Public License
18     along with this program; if not, write to the Free Software
19     Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
20 */
21 
22 /*
23    main changes compared to ffmpeg:
24      demuxer and decoder joined into 1 module
25      no mallocs/reallocs during decoding
26      streaming through fixed ringbuffer (small mem footprint)
27      24bit support merged from rockbox
28 */
29 
30 /*
31    main changes compared to DeaDBeeF:
32      removed deadbeef functions and structures
33      added public callback api
34      added mingw support
35      fixed some gcc warnings
36 
37 */
38 
39 #if HAVE_CONFIG_H
40 #include "config.h"
41 #endif
42 #include <stdio.h>
43 #include <string.h>
44 #include <limits.h>
45 #include <stdlib.h>
46 #include <assert.h>
47 #include <math.h>
48 #include "ffap.h"
49 
50 
51 #if defined(_WIN32) && ! defined(_MSC_VER)
52 #include <malloc.h>
posix_memalign(void ** memptr,size_t alignment,size_t size)53 int posix_memalign (void **memptr, size_t alignment, size_t size) {
54     *memptr = __mingw_aligned_malloc (size, alignment);
55     return *memptr ? 0 : -1;
56 }
57 #endif
58 
59 #ifdef TARGET_ANDROID
posix_memalign(void ** memptr,size_t alignment,size_t size)60 int posix_memalign (void **memptr, size_t alignment, size_t size) {
61     *memptr = memalign (alignment, size);
62     return *memptr ? 0 : -1;
63 }
64 #endif
65 
66 #define ENABLE_DEBUG 0
67 
68 #define trace(...) { fprintf(stderr, __VA_ARGS__); }
69 //#define trace(fmt,...)
70 
71 #define likely(x)       __builtin_expect((x),1)
72 #define unlikely(x)     __builtin_expect((x),0)
73 
74 #define PACKET_BUFFER_SIZE 100000
75 
76 #define min(x,y) ((x)<(y)?(x):(y))
77 #define max(x,y) ((x)>(y)?(x):(y))
78 
bytestream_get_byte(const uint8_t ** ptr)79 static inline uint8_t bytestream_get_byte (const uint8_t **ptr) {
80     uint8_t v = *(*ptr);
81     (*ptr)++;
82     return v;
83 }
84 
bytestream_get_be32(const uint8_t ** ptr)85 static inline uint32_t bytestream_get_be32 (const uint8_t **ptr) {
86     const uint8_t *tmp = *ptr;
87     uint32_t x = tmp[3] | (tmp[2] << 8) | (tmp[1] << 16) | (tmp[0] << 24);
88     (*ptr) += 4;
89     return x;
90 }
91 
92 
93 #define BLOCKS_PER_LOOP     4608
94 #define MAX_CHANNELS        2
95 #define MAX_BYTESPERSAMPLE  3
96 
97 #define APE_FRAMECODE_MONO_SILENCE    1
98 #define APE_FRAMECODE_STEREO_SILENCE  3
99 #define APE_FRAMECODE_PSEUDO_STEREO   4
100 
101 #define HISTORY_SIZE 512
102 #define PREDICTOR_ORDER 8
103 /** Total size of all predictor histories */
104 #define PREDICTOR_SIZE 50
105 
106 #define YDELAYA (18 + PREDICTOR_ORDER*4)
107 #define YDELAYB (18 + PREDICTOR_ORDER*3)
108 #define XDELAYA (18 + PREDICTOR_ORDER*2)
109 #define XDELAYB (18 + PREDICTOR_ORDER)
110 
111 #define YADAPTCOEFFSA 18
112 #define XADAPTCOEFFSA 14
113 #define YADAPTCOEFFSB 10
114 #define XADAPTCOEFFSB 5
115 
116 /**
117  * Possible compression levels
118  * @{
119  */
120 enum APECompressionLevel {
121     COMPRESSION_LEVEL_FAST       = 1000,
122     COMPRESSION_LEVEL_NORMAL     = 2000,
123     COMPRESSION_LEVEL_HIGH       = 3000,
124     COMPRESSION_LEVEL_EXTRA_HIGH = 4000,
125     COMPRESSION_LEVEL_INSANE     = 5000
126 };
127 /** @} */
128 
129 #define APE_FILTER_LEVELS 3
130 
131 /** Filter orders depending on compression level */
132 static const uint16_t ape_filter_orders[5][APE_FILTER_LEVELS] = {
133     {  0,   0,    0 },
134     { 16,   0,    0 },
135     { 64,   0,    0 },
136     { 32, 256,    0 },
137     { 16, 256, 1280 }
138 };
139 
140 /** Filter fraction bits depending on compression level */
141 static const uint8_t ape_filter_fracbits[5][APE_FILTER_LEVELS] = {
142     {  0,  0,  0 },
143     { 11,  0,  0 },
144     { 11,  0,  0 },
145     { 10, 13,  0 },
146     { 11, 13, 15 }
147 };
148 
149 
150 /** Filters applied to the decoded data */
151 typedef struct APEFilter {
152     int16_t *coeffs;        ///< actual coefficients used in filtering
153     int16_t *adaptcoeffs;   ///< adaptive filter coefficients used for correcting of actual filter coefficients
154     int16_t *historybuffer; ///< filter memory
155     int16_t *delay;         ///< filtered values
156 
157     int avg;
158 } APEFilter;
159 
160 typedef struct APERice {
161     uint32_t k;
162     uint32_t ksum;
163 } APERice;
164 
165 typedef struct APERangecoder {
166     uint32_t low;           ///< low end of interval
167     uint32_t range;         ///< length of interval
168     uint32_t help;          ///< bytes_to_follow resp. intermediate value
169     unsigned int buffer;    ///< buffer for input/output
170 } APERangecoder;
171 
172 /** Filter histories */
173 typedef struct APEPredictor {
174     int32_t *buf;
175 
176     int32_t lastA[2];
177 
178     int32_t filterA[2];
179     int32_t filterB[2];
180 
181     int32_t coeffsA[2][4];  ///< adaption coefficients
182     int32_t coeffsB[2][5];  ///< adaption coefficients
183     int32_t historybuffer[HISTORY_SIZE + PREDICTOR_SIZE];
184 } APEPredictor;
185 
186 /* The earliest and latest file formats supported by this library */
187 #define APE_MIN_VERSION 3950
188 #define APE_MAX_VERSION 3990
189 
190 #define MAC_FORMAT_FLAG_8_BIT                 1 // is 8-bit [OBSOLETE]
191 #define MAC_FORMAT_FLAG_CRC                   2 // uses the new CRC32 error detection [OBSOLETE]
192 #define MAC_FORMAT_FLAG_HAS_PEAK_LEVEL        4 // uint32 nPeakLevel after the header [OBSOLETE]
193 #define MAC_FORMAT_FLAG_24_BIT                8 // is 24-bit [OBSOLETE]
194 #define MAC_FORMAT_FLAG_HAS_SEEK_ELEMENTS    16 // has the number of seek elements after the peak level
195 #define MAC_FORMAT_FLAG_CREATE_WAV_HEADER    32 // create the wave header on decompression (not stored)
196 
197 #define MAC_SUBFRAME_SIZE 4608
198 
199 #define APE_EXTRADATA_SIZE 6
200 
201 typedef struct {
202     int64_t pos;
203     int nblocks;
204     int size;
205     int skip;
206 } APEFrame;
207 
208 /** Decoder context */
209 typedef struct APEContext {
210     /* Derived fields */
211     uint32_t junklength;
212     uint32_t firstframe;
213     uint32_t totalsamples;
214     uint32_t currentframe;
215     APEFrame *frames;
216 
217     /* Info from Descriptor Block */
218     char magic[4];
219     uint16_t fileversion;
220     uint16_t padding1;
221     uint32_t descriptorlength;
222     uint32_t headerlength;
223     uint32_t seektablelength;
224     uint32_t wavheaderlength;
225     uint32_t audiodatalength;
226     uint32_t audiodatalength_high;
227     uint32_t wavtaillength;
228     uint8_t md5[16];
229 
230     /* Info from Header Block */
231     uint16_t compressiontype;
232     uint16_t formatflags;
233     uint32_t blocksperframe;
234     uint32_t finalframeblocks;
235     uint32_t totalframes;
236     uint16_t bps;
237     uint16_t channels;
238     uint32_t samplerate;
239     int samples;                             ///< samples left to decode in current frame
240 
241     /* Seektable */
242     uint32_t *seektable;
243 
244     int fset;                                ///< which filter set to use (calculated from compression level)
245     int flags;                               ///< global decoder flags
246 
247     uint32_t CRC;                            ///< frame CRC
248     int frameflags;                          ///< frame flags
249     int currentframeblocks;                  ///< samples (per channel) in current frame
250     int blocksdecoded;                       ///< count of decoded samples in current frame
251     APEPredictor predictor;                  ///< predictor used for final reconstruction
252 
253     int32_t decoded0[BLOCKS_PER_LOOP];       ///< decoded data for the first channel
254     int32_t decoded1[BLOCKS_PER_LOOP];       ///< decoded data for the second channel
255 
256     int16_t* filterbuf[APE_FILTER_LEVELS];   ///< filter memory
257 
258     APERangecoder rc;                        ///< rangecoder used to decode actual values
259     APERice riceX;                           ///< rice code parameters for the second channel
260     APERice riceY;                           ///< rice code parameters for the first channel
261     APEFilter filters[APE_FILTER_LEVELS][2]; ///< filters used for reconstruction
262 
263     uint8_t *data_end;                       ///< frame data end
264     const uint8_t *ptr;                      ///< current position in frame data
265     const uint8_t *last_ptr;
266 
267     uint8_t *packet_data; // must be PACKET_BUFFER_SIZE
268     int packet_remaining; // number of bytes in packet_data
269     int packet_sizeleft; // number of bytes left unread for current ape frame
270     int samplestoskip;
271     int currentsample; // current sample from beginning of file
272 
273     uint8_t buffer[BLOCKS_PER_LOOP * 2 * 2 * 2];
274     int remaining;
275 
276     int error;
277     int skip_header;
278 } APEContext;
279 
280 inline static int
read_uint16_(FFap_decoder * decoder,uint16_t * x)281 read_uint16_(FFap_decoder *decoder, uint16_t *x)
282 {
283     unsigned char tmp[2];
284     int n;
285 
286     n = decoder->read(tmp, 1, 2, decoder->client_data);
287 
288     if (n != 2)
289         return -1;
290 
291     *x = tmp[0] | (tmp[1] << 8);
292 
293     return 0;
294 }
295 
296 inline static int
read_uint32_(FFap_decoder * decoder,uint32_t * x)297 read_uint32_(FFap_decoder *decoder, uint32_t* x)
298 {
299     unsigned char tmp[4];
300     int n;
301 
302     n = decoder->read(tmp, 1, 4, decoder->client_data);
303 
304     if (n != 4)
305         return -1;
306 
307     *x = tmp[0] | (tmp[1] << 8) | (tmp[2] << 16) | (tmp[3] << 24);
308 
309     return 0;
310 }
311 
312 #if ENABLE_DEBUG
ape_dumpinfo(APEContext * ape_ctx)313 static void ape_dumpinfo(APEContext * ape_ctx)
314 {
315     uint32_t i;
316 
317     fprintf (stderr, "Descriptor Block:\n\n");
318     fprintf (stderr, "magic                = \"%c%c%c%c\"\n", ape_ctx->magic[0], ape_ctx->magic[1], ape_ctx->magic[2], ape_ctx->magic[3]);
319     fprintf (stderr, "fileversion          = %d\n", ape_ctx->fileversion);
320     fprintf (stderr, "descriptorlength     = %d\n", ape_ctx->descriptorlength);
321     fprintf (stderr, "headerlength         = %d\n", ape_ctx->headerlength);
322     fprintf (stderr, "seektablelength      = %d\n", ape_ctx->seektablelength);
323     fprintf (stderr, "wavheaderlength      = %d\n", ape_ctx->wavheaderlength);
324     fprintf (stderr, "audiodatalength      = %d\n", ape_ctx->audiodatalength);
325     fprintf (stderr, "audiodatalength_high = %d\n", ape_ctx->audiodatalength_high);
326     fprintf (stderr, "wavtaillength        = %d\n", ape_ctx->wavtaillength);
327     fprintf (stderr, "md5                  = ");
328     for (i = 0; i < 16; i++)
329          fprintf (stderr, "%02x", ape_ctx->md5[i]);
330     fprintf (stderr, "\n");
331 
332     fprintf (stderr, "\nHeader Block:\n\n");
333 
334     fprintf (stderr, "compressiontype      = %d\n", ape_ctx->compressiontype);
335     fprintf (stderr, "formatflags          = %d\n", ape_ctx->formatflags);
336     fprintf (stderr, "blocksperframe       = %d\n", ape_ctx->blocksperframe);
337     fprintf (stderr, "finalframeblocks     = %d\n", ape_ctx->finalframeblocks);
338     fprintf (stderr, "totalframes          = %d\n", ape_ctx->totalframes);
339     fprintf (stderr, "bps                  = %d\n", ape_ctx->bps);
340     fprintf (stderr, "channels             = %d\n", ape_ctx->channels);
341     fprintf (stderr, "samplerate           = %d\n", ape_ctx->samplerate);
342 
343     fprintf (stderr, "\nSeektable\n\n");
344     if ((ape_ctx->seektablelength / sizeof(uint32_t)) != ape_ctx->totalframes) {
345         fprintf (stderr, "No seektable\n");
346     } else {
347         for (i = 0; i < ape_ctx->seektablelength / sizeof(uint32_t); i++) {
348             if (i < ape_ctx->totalframes - 1) {
349                 fprintf (stderr, "%8d   %d (%d bytes)\n", i, ape_ctx->seektable[i], ape_ctx->seektable[i + 1] - ape_ctx->seektable[i]);
350             } else {
351                 fprintf (stderr, "%8d   %d\n", i, ape_ctx->seektable[i]);
352             }
353         }
354     }
355 
356     fprintf (stderr, "\nFrames\n\n");
357     for (i = 0; i < ape_ctx->totalframes; i++)
358         fprintf (stderr, "%8d   %8lld %8d (%d samples)\n", i, ape_ctx->frames[i].pos, ape_ctx->frames[i].size, ape_ctx->frames[i].nblocks);
359 
360     fprintf (stderr, "\nCalculated information:\n\n");
361     fprintf (stderr, "junklength           = %d\n", ape_ctx->junklength);
362     fprintf (stderr, "firstframe           = %d\n", ape_ctx->firstframe);
363     fprintf (stderr, "totalsamples         = %d\n", ape_ctx->totalsamples);
364 }
365 #endif
366 
367 static int
ape_read_header(FFap_decoder * decoder)368 ape_read_header (FFap_decoder *decoder)
369 //ape_read_header(DB_FILE *fp, APEContext *ape)
370 {
371     uint32_t i;
372     APEContext *ape = decoder->ape_ctx;
373 
374     /* TODO: Skip any leading junk such as id3v2 tags */
375     ape->junklength = 0;
376 
377     if(decoder->read(ape->magic, 1, 4, decoder->client_data) != 4) {
378         return -1;
379     }
380 
381     /*if (deadbeef->fread (ape->magic, 1, 4, fp) != 4) {
382         return -1;
383     }*/
384     if (memcmp (ape->magic, "MAC ", 4))
385         return -1;
386 
387     if (read_uint16_ (decoder, &ape->fileversion) < 0) {
388         return -1;
389     }
390 
391     /*if (read_uint16 (fp, &ape->fileversion) < 0) {
392         return -1;
393     }*/
394 
395     if (ape->fileversion < APE_MIN_VERSION || ape->fileversion > APE_MAX_VERSION) {
396         fprintf (stderr, "ape: Unsupported file version - %d.%02d\n", ape->fileversion / 1000, (ape->fileversion % 1000) / 10);
397         return -1;
398     }
399 
400     if (ape->fileversion >= 3980) {
401         if (read_uint16_ (decoder, &ape->padding1) < 0) {
402             return -1;
403         }
404         if (read_uint32_ (decoder, &ape->descriptorlength) < 0) {
405             return -1;
406         }
407         if (read_uint32_ (decoder, &ape->headerlength) < 0) {
408             return -1;
409         }
410         if (read_uint32_ (decoder, &ape->seektablelength) < 0) {
411             return -1;
412         }
413         if (read_uint32_ (decoder, &ape->wavheaderlength) < 0) {
414             return -1;
415         }
416         if (read_uint32_ (decoder, &ape->audiodatalength) < 0) {
417             return -1;
418         }
419         if (read_uint32_ (decoder, &ape->audiodatalength_high) < 0) {
420             return -1;
421         }
422         if (read_uint32_ (decoder, &ape->wavtaillength) < 0) {
423             return -1;
424         }
425         if(decoder->read(ape->md5, 1, 16, decoder->client_data) != 16) {
426             return -1;
427         }
428 
429         /*if (deadbeef->fread (ape->md5, 1, 16, fp) != 16) {
430             return -1;
431         }*/
432 
433         /* Skip any unknown bytes at the end of the descriptor.
434            This is for future compatibility */
435 
436         if (ape->descriptorlength > 52) {
437             //deadbeef->fseek (fp, ape->descriptorlength - 52, SEEK_CUR);
438             decoder->seek(ape->descriptorlength - 52, SEEK_CUR, decoder->client_data);
439         }
440 
441         /* Read header data */
442         if (read_uint16_ (decoder, &ape->compressiontype) < 0) {
443             return -1;
444         }
445         if (read_uint16_ (decoder, &ape->formatflags) < 0) {
446             return -1;
447         }
448         if (read_uint32_ (decoder, &ape->blocksperframe) < 0) {
449             return -1;
450         }
451         if (read_uint32_ (decoder, &ape->finalframeblocks) < 0) {
452             return -1;
453         }
454         if (read_uint32_ (decoder, & ape->totalframes) < 0) {
455             return -1;
456         }
457         if (read_uint16_ (decoder, &ape->bps) < 0) {
458             return -1;
459         }
460         if (read_uint16_ (decoder, &ape->channels) < 0) {
461             return -1;
462         }
463         if (read_uint32_ (decoder, &ape->samplerate) < 0) {
464             return -1;
465         }
466     } else {
467         ape->descriptorlength = 0;
468         ape->headerlength = 32;
469 
470         if (read_uint16_ (decoder, &ape->compressiontype) < 0) {
471             return -1;
472         }
473         if (read_uint16_ (decoder, &ape->formatflags) < 0) {
474             return -1;
475         }
476         if (read_uint16_ (decoder, &ape->channels) < 0) {
477             return -1;
478         }
479         if (read_uint32_ (decoder, &ape->samplerate) < 0) {
480             return -1;
481         }
482         if (read_uint32_ (decoder, &ape->wavheaderlength) < 0) {
483             return -1;
484         }
485         if (read_uint32_ (decoder, &ape->wavtaillength) < 0) {
486             return -1;
487         }
488         if (read_uint32_ (decoder, &ape->totalframes) < 0) {
489             return -1;
490         }
491         if (read_uint32_ (decoder, &ape->finalframeblocks) < 0) {
492             return -1;
493         }
494 
495         if (ape->formatflags & MAC_FORMAT_FLAG_HAS_PEAK_LEVEL) {
496             //deadbeef->fseek(fp, 4, SEEK_CUR); /* Skip the peak level */
497             decoder->seek(4, SEEK_CUR, decoder->client_data); /* Skip the peak level */
498             ape->headerlength += 4;
499         }
500 
501         if (ape->formatflags & MAC_FORMAT_FLAG_HAS_SEEK_ELEMENTS) {
502             if (read_uint32_ (decoder, &ape->seektablelength) < 0) {
503                 return -1;
504             };
505             ape->headerlength += 4;
506             ape->seektablelength *= sizeof(int32_t);
507         } else
508             ape->seektablelength = ape->totalframes * sizeof(int32_t);
509 
510         if (ape->formatflags & MAC_FORMAT_FLAG_8_BIT)
511             ape->bps = 8;
512         else if (ape->formatflags & MAC_FORMAT_FLAG_24_BIT)
513             ape->bps = 24;
514         else
515             ape->bps = 16;
516 
517         //if (ape->fileversion >= 3950)
518             ape->blocksperframe = 73728 * 4;
519         /*else if (ape->fileversion >= 3900 || (ape->fileversion >= 3800  && ape->compressiontype >= 4000))
520             ape->blocksperframe = 73728;
521         else
522             ape->blocksperframe = 9216;*/
523 
524         /* Skip any stored wav header */
525         if (!(ape->formatflags & MAC_FORMAT_FLAG_CREATE_WAV_HEADER)) {
526             //deadbeef->fseek (fp, ape->wavheaderlength, SEEK_CUR);
527             decoder->seek(ape->wavheaderlength, SEEK_CUR, decoder->client_data);
528         }
529     }
530 
531     if(ape->totalframes > UINT_MAX / sizeof(APEFrame)){
532         fprintf (stderr, "ape: Too many frames: %u\n", ape->totalframes);
533         return -1;
534     }
535     ape->frames       = malloc(ape->totalframes * sizeof(APEFrame));
536     if(!ape->frames)
537         return -1;
538     ape->firstframe   = ape->junklength + ape->descriptorlength + ape->headerlength + ape->seektablelength + ape->wavheaderlength;
539     ape->currentframe = 0;
540 
541     ape->totalsamples = ape->finalframeblocks;
542     if (ape->totalframes > 1)
543         ape->totalsamples += ape->blocksperframe * (ape->totalframes - 1);
544 
545     if (ape->seektablelength > 0) {
546         ape->seektable = malloc(ape->seektablelength);
547         for (i = 0; i < ape->seektablelength / sizeof(uint32_t); i++) {
548             if (read_uint32_ (decoder, &ape->seektable[i]) < 0) {
549                 return -1;
550             }
551         }
552     }
553 
554     ape->frames[0].pos     = ape->firstframe;
555     ape->frames[0].nblocks = ape->blocksperframe;
556     ape->frames[0].skip    = 0;
557     for (i = 1; i < ape->totalframes; i++) {
558         ape->frames[i].pos      = ape->seektable[i]; //ape->frames[i-1].pos + ape->blocksperframe;
559         ape->frames[i].nblocks  = ape->blocksperframe;
560         ape->frames[i - 1].size = ape->frames[i].pos - ape->frames[i - 1].pos;
561         ape->frames[i].skip     = (ape->frames[i].pos - ape->frames[0].pos) & 3;
562     }
563     ape->frames[ape->totalframes - 1].size    = ape->finalframeblocks * 4;
564     ape->frames[ape->totalframes - 1].nblocks = ape->finalframeblocks;
565 
566     for (i = 0; i < ape->totalframes; i++) {
567         if(ape->frames[i].skip){
568             ape->frames[i].pos  -= ape->frames[i].skip;
569             ape->frames[i].size += ape->frames[i].skip;
570         }
571         ape->frames[i].size = (ape->frames[i].size + 3) & ~3;
572     }
573 
574 #if ENABLE_DEBUG
575     ape_dumpinfo(ape);
576     fprintf (stderr, "ape: Decoding file - v%d.%02d, compression level %d\n", ape->fileversion / 1000, (ape->fileversion % 1000) / 10, ape->compressiontype);
577 #endif
578 
579     return 0;
580 }
581 
582 #   define AV_WB32(p, d) do {                   \
583         ((uint8_t*)(p))[3] = (d);               \
584         ((uint8_t*)(p))[2] = (d)>>8;            \
585         ((uint8_t*)(p))[1] = (d)>>16;           \
586         ((uint8_t*)(p))[0] = (d)>>24;           \
587     } while(0)
588 
589 #define AV_WL32(p, v) AV_WB32(p, bswap_32(v))
590 
bswap_32(uint32_t x)591 static inline uint32_t bswap_32(uint32_t x)
592 {
593     x= ((x<<8)&0xFF00FF00) | ((x>>8)&0x00FF00FF);
594     x= (x>>16) | (x<<16);
595     return x;
596 }
597 
ape_read_packet(FFap_decoder * decoder)598 static int ape_read_packet(FFap_decoder *decoder)
599 {
600     int ret;
601     int nblocks;
602     APEContext *ape = decoder->ape_ctx;
603     uint32_t extra_size = 8;
604 
605     if (ape->currentframe > ape->totalframes)
606         return -1;
607 #if ENABLE_DEBUG
608     trace ("ffap: seeking to packet %d (%lld + %d)\n", ape->currentframe, ape->frames[ape->currentframe].pos, ape->skip_header);
609 #endif
610     if (decoder->seek(ape->frames[ape->currentframe].pos + ape->skip_header, SEEK_SET,
611                       decoder->client_data) != 0){
612         return -1;
613     }
614 
615     /* Calculate how many blocks there are in this frame */
616     if (ape->currentframe == (ape->totalframes - 1))
617         nblocks = ape->finalframeblocks;
618     else
619         nblocks = ape->blocksperframe;
620 
621     AV_WL32(ape->packet_data    , nblocks);
622     AV_WL32(ape->packet_data + 4, ape->frames[ape->currentframe].skip);
623 //    packet_sizeleft -= 8;
624 
625 // update bitrate
626     int bitrate = -1;
627     if (nblocks != 0 && ape->frames[ape->currentframe].size != 0) {
628         float sec = (float)nblocks / ape->samplerate;
629         bitrate = ape->frames[ape->currentframe].size / sec * 8;
630     }
631     if (bitrate > 0) {
632         decoder->bitrate = bitrate/1000;
633     }
634 
635     int sz = PACKET_BUFFER_SIZE-8;
636     sz = min (sz, ape->frames[ape->currentframe].size);
637 //    fprintf (stderr, "readsize: %d, packetsize: %d\n", sz, ape->frames[ape->currentframe].size);
638     ret = decoder->read (ape->packet_data + extra_size, 1, sz, decoder->client_data);
639     ape->packet_sizeleft = ape->frames[ape->currentframe].size - sz + 8;
640     ape->packet_remaining = sz+8;
641 
642     ape->currentframe++;
643 
644     return ret;
645 }
646 
647 static void
ape_free_ctx(APEContext * ape_ctx)648 ape_free_ctx (APEContext *ape_ctx) {
649     int i;
650     if (ape_ctx->packet_data) {
651         free (ape_ctx->packet_data);
652         ape_ctx->packet_data = NULL;
653     }
654     if (ape_ctx->frames) {
655         free (ape_ctx->frames);
656         ape_ctx->frames = NULL;
657     }
658     if (ape_ctx->seektable) {
659         free (ape_ctx->seektable);
660         ape_ctx->seektable = NULL;
661     }
662     for (i = 0; i < APE_FILTER_LEVELS; i++) {
663         if (ape_ctx->filterbuf[i]) {
664 #if defined(_WIN32) && ! defined(_MSC_VER)
665             __mingw_aligned_free(ape_ctx->filterbuf[i]);
666 #else
667             free (ape_ctx->filterbuf[i]);
668 #endif
669             ape_ctx->filterbuf[i] = NULL;
670         }
671     }
672     free(ape_ctx);
673 }
674 
ffap_free(FFap_decoder * decoder)675 void ffap_free (FFap_decoder *decoder)
676 {
677     ape_free_ctx (decoder->ape_ctx);
678     free (decoder);
679 }
680 
ffap_init(FFap_decoder * decoder)681 int ffap_init(FFap_decoder *decoder)
682 {
683     if(!decoder->read || !decoder->seek || !decoder->tell
684             || !decoder->getlength || !decoder->client_data)
685         return -1;
686 
687     memset(decoder->ape_ctx, 0, sizeof(APEContext));
688 
689     /*int skip = deadbeef->junk_get_leading_size (info->fp);
690     if (skip > 0) {
691         deadbeef->fseek (info->fp, skip, SEEK_SET);
692         info->ape_ctx.skip_header = skip;
693     }*/
694     //ape_read_header (info->fp, &info->ape_ctx);
695     ape_read_header(decoder);
696     int i;
697 
698     if (decoder->ape_ctx->channels > 2) {
699         fprintf (stderr, "ape: Only mono and stereo is supported\n");
700         return -1;
701     }
702 #if ENABLE_DEBUG
703     fprintf (stderr, "ape: Compression Level: %d - Flags: %d\n",
704              decoder->ape_ctx->compressiontype, decoder->ape_ctx->formatflags);
705 #endif
706     if (decoder->ape_ctx->compressiontype % 1000 ||
707             decoder->ape_ctx->compressiontype > COMPRESSION_LEVEL_INSANE)
708     {
709         fprintf (stderr, "ape: Incorrect compression level %d\n", decoder->ape_ctx->compressiontype);
710         return -1;
711     }
712 
713     decoder->ape_ctx->fset = decoder->ape_ctx->compressiontype / 1000 - 1;
714     for (i = 0; i < APE_FILTER_LEVELS; i++) {
715         if (!ape_filter_orders[decoder->ape_ctx->fset][i])
716             break;
717         int err = posix_memalign ((void **)&decoder->ape_ctx->filterbuf[i], 16,
718                                   (ape_filter_orders[decoder->ape_ctx->fset][i] * 3 + HISTORY_SIZE) * 4);
719         if (err) {
720             trace ("ffap: out of memory (posix_memalign)\n");
721             return -1;
722         }
723     }
724 
725     //_info->plugin = &plugin;
726     decoder->bps = decoder->ape_ctx->bps;
727     decoder->samplerate = decoder->ape_ctx->samplerate;
728     decoder->channels = decoder->ape_ctx->channels;
729     decoder->duration = (float)decoder->ape_ctx->totalsamples / decoder->samplerate;
730     decoder->readpos = 0;
731 
732 
733     /*_info->readpos = 0;
734     if (it->endsample > 0) {
735         info->startsample = it->startsample;
736         info->endsample = it->endsample;
737         //plugin.seek_sample (_info, 0);
738         //trace ("start: %d/%f, end: %d/%f\n", startsample, timestart, endsample, timeend);
739     }
740     else {
741         info->startsample = 0;
742         info->endsample = info->ape_ctx.totalsamples-1;
743     }*/
744 
745 
746 
747     decoder->ape_ctx->packet_data = malloc (PACKET_BUFFER_SIZE);
748     if (!decoder->ape_ctx->packet_data) {
749         fprintf (stderr, "ape: failed to allocate memory for packet data\n");
750         return -1;
751     }
752     return 0;
753 }
754 
755 /**
756  * @defgroup rangecoder APE range decoder
757  * @{
758  */
759 
760 #define CODE_BITS    32
761 #define TOP_VALUE    ((unsigned int)1 << (CODE_BITS-1))
762 #define SHIFT_BITS   (CODE_BITS - 9)
763 #define EXTRA_BITS   ((CODE_BITS-2) % 8 + 1)
764 #define BOTTOM_VALUE (TOP_VALUE >> 8)
765 
766 /** Start the decoder */
range_start_decoding(APEContext * ctx)767 static inline void range_start_decoding(APEContext * ctx)
768 {
769     ctx->rc.buffer = bytestream_get_byte(&ctx->ptr);
770     ctx->rc.low    = ctx->rc.buffer >> (8 - EXTRA_BITS);
771     ctx->rc.range  = (uint32_t) 1 << EXTRA_BITS;
772 }
773 
774 /** Perform normalization */
range_dec_normalize(APEContext * ctx)775 static inline void range_dec_normalize(APEContext * ctx)
776 {
777     while (ctx->rc.range <= BOTTOM_VALUE) {
778         ctx->rc.buffer <<= 8;
779         if(ctx->ptr < ctx->data_end)
780             ctx->rc.buffer += *ctx->ptr;
781         ctx->ptr++;
782         ctx->rc.low    = (ctx->rc.low << 8)    | ((ctx->rc.buffer >> 1) & 0xFF);
783         ctx->rc.range  <<= 8;
784     }
785 }
786 
787 /**
788  * Calculate culmulative frequency for next symbol. Does NO update!
789  * @param ctx decoder context
790  * @param tot_f is the total frequency or (code_value)1<<shift
791  * @return the culmulative frequency
792  */
793 /*static inline int range_decode_culfreq(APEContext * ctx, int tot_f)
794 {
795     range_dec_normalize(ctx);
796     ctx->rc.help = ctx->rc.range / tot_f;
797     return ctx->rc.low / ctx->rc.help;
798 }*/
799 
800 /**
801  * Decode value with given size in bits
802  * @param ctx decoder context
803  * @param shift number of bits to decode
804  */
range_decode_culshift(APEContext * ctx,int shift)805 static inline int range_decode_culshift(APEContext * ctx, int shift)
806 {
807     range_dec_normalize(ctx);
808     ctx->rc.help = ctx->rc.range >> shift;
809     return ctx->rc.low / ctx->rc.help;
810 }
811 
812 
813 /**
814  * Update decoding state
815  * @param ctx decoder context
816  * @param sy_f the interval length (frequency of the symbol)
817  * @param lt_f the lower end (frequency sum of < symbols)
818  */
range_decode_update(APEContext * ctx,int sy_f,int lt_f)819 static inline void range_decode_update(APEContext * ctx, int sy_f, int lt_f)
820 {
821     ctx->rc.low  -= ctx->rc.help * lt_f;
822     ctx->rc.range = ctx->rc.help * sy_f;
823 }
824 
825 /** Decode n bits (n <= 16) without modelling */
range_decode_bits(APEContext * ctx,int n)826 static inline int range_decode_bits(APEContext * ctx, int n)
827 {
828     int sym = range_decode_culshift(ctx, n);
829     range_decode_update(ctx, 1, sym);
830     return sym;
831 }
832 
833 
834 #define MODEL_ELEMENTS 64
835 
836 /**
837  * Fixed probabilities for symbols in Monkey Audio version 3.97
838  */
839 static const uint16_t counts_3970[22] = {
840         0, 14824, 28224, 39348, 47855, 53994, 58171, 60926,
841     62682, 63786, 64463, 64878, 65126, 65276, 65365, 65419,
842     65450, 65469, 65480, 65487, 65491, 65493,
843 };
844 
845 /**
846  * Probability ranges for symbols in Monkey Audio version 3.97
847  */
848 static const uint16_t counts_diff_3970[21] = {
849     14824, 13400, 11124, 8507, 6139, 4177, 2755, 1756,
850     1104, 677, 415, 248, 150, 89, 54, 31,
851     19, 11, 7, 4, 2,
852 };
853 
854 /**
855  * Fixed probabilities for symbols in Monkey Audio version 3.98
856  */
857 static const uint16_t counts_3980[22] = {
858         0, 19578, 36160, 48417, 56323, 60899, 63265, 64435,
859     64971, 65232, 65351, 65416, 65447, 65466, 65476, 65482,
860     65485, 65488, 65490, 65491, 65492, 65493,
861 };
862 
863 /**
864  * Probability ranges for symbols in Monkey Audio version 3.98
865  */
866 static const uint16_t counts_diff_3980[21] = {
867     19578, 16582, 12257, 7906, 4576, 2366, 1170, 536,
868     261, 119, 65, 31, 19, 10, 6, 3,
869     3, 2, 1, 1, 1,
870 };
871 
872 /**
873  * Decode symbol
874  * @param ctx decoder context
875  * @param counts probability range start position
876  * @param counts_diff probability range widths
877  */
range_get_symbol(APEContext * ctx,const uint16_t counts[],const uint16_t counts_diff[])878 static inline int range_get_symbol(APEContext * ctx,
879                                    const uint16_t counts[],
880                                    const uint16_t counts_diff[])
881 {
882     int symbol, cf;
883 
884     cf = range_decode_culshift(ctx, 16);
885 
886     if(cf > 65492){
887         symbol= cf - 65535 + 63;
888         range_decode_update(ctx, 1, cf);
889         if(unlikely (cf > 65535)) {
890             ctx->error=1;
891         }
892         return symbol;
893     }
894     /* figure out the symbol inefficiently; a binary search would be much better */
895     for (symbol = 0; counts[symbol + 1] <= cf; symbol++);
896 
897     range_decode_update(ctx, counts_diff[symbol], counts[symbol]);
898 
899     return symbol;
900 }
901 /** @} */ // group rangecoder
902 
update_rice(APERice * rice,int x)903 static inline void update_rice(APERice *rice, int x)
904 {
905     uint32_t lim = rice->k ? (1 << (rice->k + 4)) : 0;
906     rice->ksum += ((x + 1) / 2) - ((rice->ksum + 16) >> 5);
907 
908     if (rice->ksum < lim)
909         rice->k--;
910     else if (rice->ksum >= (uint32_t) (1 << (rice->k + 5)))
911         rice->k++;
912 }
913 
ape_decode_value(APEContext * ctx,APERice * rice)914 static inline int ape_decode_value(APEContext * ctx, APERice *rice)
915 {
916     int x, overflow;
917 
918     if (ctx->fileversion < 3990) {
919         int tmpk;
920 
921         overflow = range_get_symbol(ctx, counts_3970, counts_diff_3970);
922 
923         if (overflow == (MODEL_ELEMENTS - 1)) {
924             tmpk = range_decode_bits(ctx, 5);
925             overflow = 0;
926         } else
927             tmpk = (rice->k < 1) ? 0 : rice->k - 1;
928 
929         if (tmpk <= 16)
930             x = range_decode_bits(ctx, tmpk);
931         else {
932             x = range_decode_bits(ctx, 16);
933             x |= (range_decode_bits(ctx, tmpk - 16) << 16);
934         }
935         x += overflow << tmpk;
936     } else {
937         int base, pivot;
938 
939         pivot = rice->ksum >> 5;
940         if (pivot == 0)
941             pivot = 1;
942 
943         overflow = range_get_symbol(ctx, counts_3980, counts_diff_3980);
944 
945         if (overflow == (MODEL_ELEMENTS - 1)) {
946             overflow  = range_decode_bits(ctx, 16) << 16;
947             overflow |= range_decode_bits(ctx, 16);
948         }
949 
950         if (pivot >= 0x10000) {
951             /* Codepath for 24-bit streams */
952             int nbits, lo_bits, base_hi, base_lo;
953 
954             /* Count the number of bits in pivot */
955             nbits = 17; /* We know there must be at least 17 bits */
956             while ((pivot >> nbits) > 0) { nbits++; }
957 
958             /* base_lo is the low (nbits-16) bits of base
959                base_hi is the high 16 bits of base
960                */
961             lo_bits = (nbits - 16);
962 
963             // {{{ unrolled base_hi = range_decode_culfreq(ctx, (pivot >> lo_bits) + 1)
964             range_dec_normalize(ctx);
965             ctx->rc.help = ctx->rc.range / ((pivot >> lo_bits) + 1);
966             if (unlikely (ctx->rc.help == 0)) {
967                 ctx->error = 1;
968                 return 0;
969             }
970             base_hi = ctx->rc.low / ctx->rc.help;
971             // }}}
972             range_decode_update(ctx, 1, base_hi);
973 
974             base_lo = range_decode_culshift(ctx, lo_bits);
975             range_decode_update(ctx, 1, base_lo);
976 
977             base = (base_hi << lo_bits) + base_lo;
978         }
979         else {
980             // {{{ unrolled base = range_decode_culfreq(ctx, pivot)
981             range_dec_normalize(ctx);
982             ctx->rc.help = ctx->rc.range / pivot;
983             if (unlikely (ctx->rc.help == 0)) {
984                 ctx->error = 1;
985                 return 0;
986             }
987             base = ctx->rc.low / ctx->rc.help;
988             // }}}
989             range_decode_update(ctx, 1, base);
990         }
991 
992         x = base + overflow * pivot;
993     }
994 
995     update_rice(rice, x);
996 
997     /* Convert to signed */
998     if (x & 1)
999         return (x >> 1) + 1;
1000     else
1001         return -(x >> 1);
1002 }
1003 
entropy_decode(APEContext * ctx,int blockstodecode,int stereo)1004 static void entropy_decode(APEContext * ctx, int blockstodecode, int stereo)
1005 {
1006     int32_t *decoded0 = ctx->decoded0;
1007     int32_t *decoded1 = ctx->decoded1;
1008 
1009     ctx->blocksdecoded = blockstodecode;
1010 
1011     if (ctx->frameflags & APE_FRAMECODE_STEREO_SILENCE) {
1012         /* We are pure silence, just memset the output buffer. */
1013         memset(decoded0, 0, blockstodecode * sizeof(int32_t));
1014         memset(decoded1, 0, blockstodecode * sizeof(int32_t));
1015     } else {
1016         while (likely (blockstodecode--)) {
1017             *decoded0++ = ape_decode_value(ctx, &ctx->riceY);
1018             if (stereo)
1019                 *decoded1++ = ape_decode_value(ctx, &ctx->riceX);
1020         }
1021     }
1022 
1023     if (ctx->blocksdecoded == ctx->currentframeblocks)
1024         range_dec_normalize(ctx);   /* normalize to use up all bytes */
1025 }
1026 
init_entropy_decoder(APEContext * ctx)1027 static void init_entropy_decoder(APEContext * ctx)
1028 {
1029     /* Read the CRC */
1030     ctx->CRC = bytestream_get_be32(&ctx->ptr);
1031 
1032     /* Read the frame flags if they exist */
1033     ctx->frameflags = 0;
1034     if ((ctx->fileversion > 3820) && (ctx->CRC & 0x80000000)) {
1035         ctx->CRC &= ~0x80000000;
1036 
1037         ctx->frameflags = bytestream_get_be32(&ctx->ptr);
1038     }
1039 
1040     /* Keep a count of the blocks decoded in this frame */
1041     ctx->blocksdecoded = 0;
1042 
1043     /* Initialize the rice structs */
1044     ctx->riceX.k = 10;
1045     ctx->riceX.ksum = (1 << ctx->riceX.k) * 16;
1046     ctx->riceY.k = 10;
1047     ctx->riceY.ksum = (1 << ctx->riceY.k) * 16;
1048 
1049     /* The first 8 bits of input are ignored. */
1050     ctx->ptr++;
1051 
1052     range_start_decoding(ctx);
1053 }
1054 
1055 static const int32_t initial_coeffs[4] = {
1056     360, 317, -109, 98
1057 };
1058 
init_predictor_decoder(APEContext * ctx)1059 static void init_predictor_decoder(APEContext * ctx)
1060 {
1061     APEPredictor *p = &ctx->predictor;
1062 
1063     /* Zero the history buffers */
1064     memset(p->historybuffer, 0, PREDICTOR_SIZE * sizeof(int32_t));
1065     p->buf = p->historybuffer;
1066 
1067     /* Initialize and zero the coefficients */
1068     memcpy(p->coeffsA[0], initial_coeffs, sizeof(initial_coeffs));
1069     memcpy(p->coeffsA[1], initial_coeffs, sizeof(initial_coeffs));
1070     memset(p->coeffsB, 0, sizeof(p->coeffsB));
1071 
1072     p->filterA[0] = p->filterA[1] = 0;
1073     p->filterB[0] = p->filterB[1] = 0;
1074     p->lastA[0]   = p->lastA[1]   = 0;
1075 }
1076 
1077 /** Get inverse sign of integer (-1 for positive, 1 for negative and 0 for zero) */
APESIGN(int32_t x)1078 static inline int APESIGN(int32_t x) {
1079     return (x < 0) - (x > 0);
1080 }
1081 
predictor_update_filter(APEPredictor * p,const int decoded,const int filter,const int delayA,const int delayB,const int adaptA,const int adaptB)1082 static int predictor_update_filter(APEPredictor *p, const int decoded, const int filter, const int delayA, const int delayB, const int adaptA, const int adaptB)
1083 {
1084     int32_t predictionA, predictionB;
1085 
1086     p->buf[delayA]     = p->lastA[filter];
1087     p->buf[adaptA]     = APESIGN(p->buf[delayA]);
1088     p->buf[delayA - 1] = p->buf[delayA] - p->buf[delayA - 1];
1089     p->buf[adaptA - 1] = APESIGN(p->buf[delayA - 1]);
1090 
1091     predictionA = p->buf[delayA    ] * p->coeffsA[filter][0] +
1092                   p->buf[delayA - 1] * p->coeffsA[filter][1] +
1093                   p->buf[delayA - 2] * p->coeffsA[filter][2] +
1094                   p->buf[delayA - 3] * p->coeffsA[filter][3];
1095 
1096     /*  Apply a scaled first-order filter compression */
1097     p->buf[delayB]     = p->filterA[filter ^ 1] - ((p->filterB[filter] * 31) >> 5);
1098     p->buf[adaptB]     = APESIGN(p->buf[delayB]);
1099     p->buf[delayB - 1] = p->buf[delayB] - p->buf[delayB - 1];
1100     p->buf[adaptB - 1] = APESIGN(p->buf[delayB - 1]);
1101     p->filterB[filter] = p->filterA[filter ^ 1];
1102 
1103     predictionB = p->buf[delayB    ] * p->coeffsB[filter][0] +
1104                   p->buf[delayB - 1] * p->coeffsB[filter][1] +
1105                   p->buf[delayB - 2] * p->coeffsB[filter][2] +
1106                   p->buf[delayB - 3] * p->coeffsB[filter][3] +
1107                   p->buf[delayB - 4] * p->coeffsB[filter][4];
1108 
1109     p->lastA[filter] = decoded + ((predictionA + (predictionB >> 1)) >> 10);
1110     p->filterA[filter] = p->lastA[filter] + ((p->filterA[filter] * 31) >> 5);
1111 
1112     if (!decoded) // no need updating filter coefficients
1113         return p->filterA[filter];
1114 
1115     if (decoded > 0) {
1116         p->coeffsA[filter][0] -= p->buf[adaptA    ];
1117         p->coeffsA[filter][1] -= p->buf[adaptA - 1];
1118         p->coeffsA[filter][2] -= p->buf[adaptA - 2];
1119         p->coeffsA[filter][3] -= p->buf[adaptA - 3];
1120 
1121         p->coeffsB[filter][0] -= p->buf[adaptB    ];
1122         p->coeffsB[filter][1] -= p->buf[adaptB - 1];
1123         p->coeffsB[filter][2] -= p->buf[adaptB - 2];
1124         p->coeffsB[filter][3] -= p->buf[adaptB - 3];
1125         p->coeffsB[filter][4] -= p->buf[adaptB - 4];
1126     } else {
1127         p->coeffsA[filter][0] += p->buf[adaptA    ];
1128         p->coeffsA[filter][1] += p->buf[adaptA - 1];
1129         p->coeffsA[filter][2] += p->buf[adaptA - 2];
1130         p->coeffsA[filter][3] += p->buf[adaptA - 3];
1131 
1132         p->coeffsB[filter][0] += p->buf[adaptB    ];
1133         p->coeffsB[filter][1] += p->buf[adaptB - 1];
1134         p->coeffsB[filter][2] += p->buf[adaptB - 2];
1135         p->coeffsB[filter][3] += p->buf[adaptB - 3];
1136         p->coeffsB[filter][4] += p->buf[adaptB - 4];
1137     }
1138     return p->filterA[filter];
1139 }
1140 
predictor_decode_stereo(APEContext * ctx,int count)1141 static void predictor_decode_stereo(APEContext * ctx, int count)
1142 {
1143     int32_t predictionA, predictionB;
1144     APEPredictor *p = &ctx->predictor;
1145     int32_t *decoded0 = ctx->decoded0;
1146     int32_t *decoded1 = ctx->decoded1;
1147 
1148     while (count--) {
1149         /* Predictor Y */
1150         predictionA = predictor_update_filter(p, *decoded0, 0, YDELAYA, YDELAYB, YADAPTCOEFFSA, YADAPTCOEFFSB);
1151         predictionB = predictor_update_filter(p, *decoded1, 1, XDELAYA, XDELAYB, XADAPTCOEFFSA, XADAPTCOEFFSB);
1152         *(decoded0++) = predictionA;
1153         *(decoded1++) = predictionB;
1154 
1155         /* Combined */
1156         p->buf++;
1157 
1158         /* Have we filled the history buffer? */
1159         if (p->buf == p->historybuffer + HISTORY_SIZE) {
1160             memmove(p->historybuffer, p->buf, PREDICTOR_SIZE * sizeof(int32_t));
1161             p->buf = p->historybuffer;
1162         }
1163     }
1164 }
1165 
predictor_decode_mono(APEContext * ctx,int count)1166 static void predictor_decode_mono(APEContext * ctx, int count)
1167 {
1168     APEPredictor *p = &ctx->predictor;
1169     int32_t *decoded0 = ctx->decoded0;
1170     int32_t predictionA, currentA, A;
1171 
1172     currentA = p->lastA[0];
1173 
1174     while (count--) {
1175         A = *decoded0;
1176 
1177         p->buf[YDELAYA] = currentA;
1178         p->buf[YDELAYA - 1] = p->buf[YDELAYA] - p->buf[YDELAYA - 1];
1179 
1180         predictionA = p->buf[YDELAYA    ] * p->coeffsA[0][0] +
1181                       p->buf[YDELAYA - 1] * p->coeffsA[0][1] +
1182                       p->buf[YDELAYA - 2] * p->coeffsA[0][2] +
1183                       p->buf[YDELAYA - 3] * p->coeffsA[0][3];
1184 
1185         currentA = A + (predictionA >> 10);
1186 
1187         p->buf[YADAPTCOEFFSA]     = APESIGN(p->buf[YDELAYA    ]);
1188         p->buf[YADAPTCOEFFSA - 1] = APESIGN(p->buf[YDELAYA - 1]);
1189 
1190         if (A > 0) {
1191             p->coeffsA[0][0] -= p->buf[YADAPTCOEFFSA    ];
1192             p->coeffsA[0][1] -= p->buf[YADAPTCOEFFSA - 1];
1193             p->coeffsA[0][2] -= p->buf[YADAPTCOEFFSA - 2];
1194             p->coeffsA[0][3] -= p->buf[YADAPTCOEFFSA - 3];
1195         } else if (A < 0) {
1196             p->coeffsA[0][0] += p->buf[YADAPTCOEFFSA    ];
1197             p->coeffsA[0][1] += p->buf[YADAPTCOEFFSA - 1];
1198             p->coeffsA[0][2] += p->buf[YADAPTCOEFFSA - 2];
1199             p->coeffsA[0][3] += p->buf[YADAPTCOEFFSA - 3];
1200         }
1201 
1202         p->buf++;
1203 
1204         /* Have we filled the history buffer? */
1205         if (p->buf == p->historybuffer + HISTORY_SIZE) {
1206             memmove(p->historybuffer, p->buf, PREDICTOR_SIZE * sizeof(int32_t));
1207             p->buf = p->historybuffer;
1208         }
1209 
1210         p->filterA[0] = currentA + ((p->filterA[0] * 31) >> 5);
1211         *(decoded0++) = p->filterA[0];
1212     }
1213 
1214     p->lastA[0] = currentA;
1215 }
1216 
do_init_filter(APEFilter * f,int16_t * buf,int order)1217 static void do_init_filter(APEFilter *f, int16_t * buf, int order)
1218 {
1219     f->coeffs = buf;
1220     f->historybuffer = buf + order;
1221     f->delay       = f->historybuffer + order * 2;
1222     f->adaptcoeffs = f->historybuffer + order;
1223 
1224     memset(f->historybuffer, 0, (order * 2) * sizeof(int16_t));
1225     memset(f->coeffs, 0, order * sizeof(int16_t));
1226     f->avg = 0;
1227 }
1228 
init_filter(APEFilter * f,int16_t * buf,int order)1229 static void init_filter(APEFilter *f, int16_t * buf, int order)
1230 {
1231     do_init_filter(&f[0], buf, order);
1232     do_init_filter(&f[1], buf + order * 3 + HISTORY_SIZE, order);
1233 }
1234 
1235 #ifdef __clang__
1236 #define optimize(x)
1237 #endif
1238 
1239 #if !defined(__x86_64__) && !defined(__i386__)
1240 #define target(x)
1241 #endif
1242 
1243 #define DECLARE_SCALARPRODUCT_AND_MADD(TYPE, TARGET) \
1244 __attribute__ ((target (TARGET), optimize("O3,unroll-loops"))) \
1245 static int32_t scalarproduct_and_madd_int16_##TYPE(int16_t *v1, const int16_t *v2, const int16_t *v3, int order, int mul) \
1246 { \
1247     int res = 0; \
1248     while (order--) { \
1249         res   += *v1 * *v2++; \
1250         *v1++ += mul * *v3++; \
1251     } \
1252     return res; \
1253 }
1254 
1255 #if defined(__x86_64__) || defined(__i386__)
1256 DECLARE_SCALARPRODUCT_AND_MADD(sse2, "sse2")
1257 DECLARE_SCALARPRODUCT_AND_MADD(sse42, "sse4.2")
1258 DECLARE_SCALARPRODUCT_AND_MADD(avx, "avx")
1259 DECLARE_SCALARPRODUCT_AND_MADD(avx2, "avx2")
1260 #endif
1261 DECLARE_SCALARPRODUCT_AND_MADD(c, "default")
1262 
1263 
1264 static int32_t
1265 (*scalarproduct_and_madd_int16)(int16_t *v1, const int16_t *v2, const int16_t *v3, int order, int mul);
1266 
clip_int16(int a)1267 static inline int16_t clip_int16(int a)
1268 {
1269     if ((a+32768) & ~65535) return (a>>31) ^ 32767;
1270         else                    return a;
1271 }
1272 
bswap_buf(uint32_t * dst,const uint32_t * src,int w)1273 static void bswap_buf(uint32_t *dst, const uint32_t *src, int w){
1274     int i;
1275 
1276     for(i=0; i+8<=w; i+=8){
1277         dst[i+0]= bswap_32(src[i+0]);
1278         dst[i+1]= bswap_32(src[i+1]);
1279         dst[i+2]= bswap_32(src[i+2]);
1280         dst[i+3]= bswap_32(src[i+3]);
1281         dst[i+4]= bswap_32(src[i+4]);
1282         dst[i+5]= bswap_32(src[i+5]);
1283         dst[i+6]= bswap_32(src[i+6]);
1284         dst[i+7]= bswap_32(src[i+7]);
1285     }
1286     for(;i<w; i++){
1287         dst[i+0]= bswap_32(src[i+0]);
1288     }
1289 }
1290 
1291 
do_apply_filter(int version,APEFilter * f,int32_t * data,int count,int order,int fracbits)1292 static inline void do_apply_filter(int version, APEFilter *f, int32_t *data, int count, int order, int fracbits)
1293 {
1294     int res;
1295     int absres;
1296 
1297     while (count--) {
1298         res = scalarproduct_and_madd_int16(f->coeffs, f->delay - order, f->adaptcoeffs - order, order, APESIGN(*data));
1299         res = (res + (1 << (fracbits - 1))) >> fracbits;
1300         res += *data;
1301 
1302         *data++ = res;
1303 
1304         /* Update the output history */
1305         *f->delay++ = clip_int16(res);
1306 
1307         if (version < 3980) {
1308             /* Version ??? to < 3.98 files (untested) */
1309             f->adaptcoeffs[0]  = (res == 0) ? 0 : ((res >> 28) & 8) - 4;
1310             f->adaptcoeffs[-4] >>= 1;
1311             f->adaptcoeffs[-8] >>= 1;
1312         } else {
1313             /* Version 3.98 and later files */
1314 
1315             /* Update the adaption coefficients */
1316             absres = (res < 0 ? -res : res);
1317 
1318             if (absres > (f->avg * 3))
1319                 *f->adaptcoeffs = ((res >> 25) & 64) - 32;
1320             else if (absres > (f->avg * 4) / 3)
1321                 *f->adaptcoeffs = ((res >> 26) & 32) - 16;
1322             else if (absres > 0)
1323                 *f->adaptcoeffs = ((res >> 27) & 16) - 8;
1324             else
1325                 *f->adaptcoeffs = 0;
1326 
1327             f->avg += (absres - f->avg) / 16;
1328 
1329             f->adaptcoeffs[-1] >>= 1;
1330             f->adaptcoeffs[-2] >>= 1;
1331             f->adaptcoeffs[-8] >>= 1;
1332         }
1333 
1334         f->adaptcoeffs++;
1335 
1336         /* Have we filled the history buffer? */
1337         if (f->delay == f->historybuffer + HISTORY_SIZE + (order * 2)) {
1338             memmove(f->historybuffer, f->delay - (order * 2),
1339                     (order * 2) * sizeof(int16_t));
1340             f->delay = f->historybuffer + order * 2;
1341             f->adaptcoeffs = f->historybuffer + order;
1342         }
1343     }
1344 }
1345 
apply_filter(APEContext * ctx,APEFilter * f,int32_t * data0,int32_t * data1,int count,int order,int fracbits)1346 static void apply_filter(APEContext * ctx, APEFilter *f,
1347                          int32_t * data0, int32_t * data1,
1348                          int count, int order, int fracbits)
1349 {
1350     do_apply_filter(ctx->fileversion, &f[0], data0, count, order, fracbits);
1351     if (data1)
1352         do_apply_filter(ctx->fileversion, &f[1], data1, count, order, fracbits);
1353 }
1354 
ape_apply_filters(APEContext * ctx,int32_t * decoded0,int32_t * decoded1,int count)1355 static void ape_apply_filters(APEContext * ctx, int32_t * decoded0,
1356                               int32_t * decoded1, int count)
1357 {
1358     int i;
1359 
1360     for (i = 0; i < APE_FILTER_LEVELS; i++) {
1361         if (!ape_filter_orders[ctx->fset][i])
1362             break;
1363         apply_filter(ctx, ctx->filters[i], decoded0, decoded1, count, ape_filter_orders[ctx->fset][i], ape_filter_fracbits[ctx->fset][i]);
1364     }
1365 }
1366 
init_frame_decoder(APEContext * ctx)1367 static void init_frame_decoder(APEContext * ctx)
1368 {
1369     int i;
1370     init_entropy_decoder(ctx);
1371     init_predictor_decoder(ctx);
1372 
1373     for (i = 0; i < APE_FILTER_LEVELS; i++) {
1374         if (!ape_filter_orders[ctx->fset][i])
1375             break;
1376         init_filter(ctx->filters[i], ctx->filterbuf[i], ape_filter_orders[ctx->fset][i]);
1377     }
1378 }
1379 
ape_unpack_mono(APEContext * ctx,int count)1380 static void ape_unpack_mono(APEContext * ctx, int count)
1381 {
1382     int32_t left;
1383     int32_t *decoded0 = ctx->decoded0;
1384     int32_t *decoded1 = ctx->decoded1;
1385 
1386     if (ctx->frameflags & APE_FRAMECODE_STEREO_SILENCE) {
1387         entropy_decode(ctx, count, 0);
1388         /* We are pure silence, so we're done. */
1389         //fprintf (stderr, "pure silence mono\n");
1390         return;
1391     }
1392 
1393     entropy_decode(ctx, count, 0);
1394     ape_apply_filters(ctx, decoded0, NULL, count);
1395 
1396     /* Now apply the predictor decoding */
1397     predictor_decode_mono(ctx, count);
1398 
1399     /* Pseudo-stereo - just copy left channel to right channel */
1400     if (ctx->channels == 2) {
1401         while (count--) {
1402             left = *decoded0;
1403             *(decoded1++) = *(decoded0++) = left;
1404         }
1405     }
1406 }
1407 
ape_unpack_stereo(APEContext * ctx,int count)1408 static void ape_unpack_stereo(APEContext * ctx, int count)
1409 {
1410     int32_t left, right;
1411     int32_t *decoded0 = ctx->decoded0;
1412     int32_t *decoded1 = ctx->decoded1;
1413 
1414     if (ctx->frameflags & APE_FRAMECODE_STEREO_SILENCE) {
1415         /* We are pure silence, so we're done. */
1416         //fprintf (stderr, "pure silence stereo\n");
1417         return;
1418     }
1419 
1420     entropy_decode(ctx, count, 1);
1421     ape_apply_filters(ctx, decoded0, decoded1, count);
1422 
1423     /* Now apply the predictor decoding */
1424     predictor_decode_stereo(ctx, count);
1425 
1426     /* Decorrelate and scale to output depth */
1427     while (count--) {
1428         left = *decoded1 - (*decoded0 / 2);
1429         right = left + *decoded0;
1430 
1431         *(decoded0++) = left;
1432         *(decoded1++) = right;
1433     }
1434 }
1435 
1436 static int
ape_decode_frame(FFap_decoder * decoder,void * data,int * data_size)1437 ape_decode_frame(FFap_decoder *decoder, void *data, int *data_size)
1438 {
1439     APEContext *s = decoder->ape_ctx;
1440     char *samples = data;
1441     int nblocks;
1442     int i, n;
1443     int blockstodecode;
1444     int bytes_used;
1445     int samplesize = decoder->bps/8 * s->channels;
1446     if(decoder->bps == 24)
1447         samplesize = 4 * s->channels;
1448 
1449     /* should not happen but who knows */
1450     if (BLOCKS_PER_LOOP * samplesize > *data_size) {
1451         fprintf (stderr, "ape: Packet size is too big! (max is %d where you have %d)\n", *data_size, BLOCKS_PER_LOOP * samplesize);
1452         return -1;
1453     }
1454 
1455     if (s->packet_remaining < PACKET_BUFFER_SIZE) {
1456         if (s->samples == 0) {
1457             if (s->currentframe == s->totalframes) {
1458                 return -1;
1459             }
1460             assert (!s->samples);
1461 //            fprintf (stderr, "start reading packet %d\n", ape_ctx.currentframe);
1462             assert (s->samples == 0); // all samples from prev packet must have been read
1463             // start new packet
1464             if (ape_read_packet (decoder) < 0) {
1465                 fprintf (stderr, "ape: error reading packet\n");
1466                 return -1;
1467             }
1468             bswap_buf((uint32_t*)(s->packet_data), (const uint32_t*)(s->packet_data), s->packet_remaining >> 2);
1469 
1470 //            fprintf (stderr, "packet_sizeleft=%d packet_remaining=%d\n", packet_sizeleft, packet_remaining);
1471             s->ptr = s->last_ptr = s->packet_data;
1472 
1473             nblocks = s->samples = bytestream_get_be32(&s->ptr);
1474 
1475             //fprintf (stderr, "s->samples=%d (1)\n", s->samples);
1476             n = bytestream_get_be32(&s->ptr);
1477             if(n < 0 || n > 3){
1478                 fprintf (stderr, "ape: Incorrect offset passed\n");
1479                 return -1;
1480             }
1481             s->ptr += n;
1482 
1483             s->currentframeblocks = nblocks;
1484 
1485             //buf += 4;
1486             if (s->samples <= 0) {
1487                 *data_size = 0;
1488                 bytes_used = s->packet_remaining;
1489                 goto error;
1490             }
1491 
1492             memset(s->decoded0,  0, sizeof(s->decoded0));
1493             memset(s->decoded1,  0, sizeof(s->decoded1));
1494 
1495             /* Initialize the frame decoder */
1496             init_frame_decoder(s);
1497         }
1498         else {
1499             int sz = PACKET_BUFFER_SIZE - s->packet_remaining;
1500             sz = min (sz, s->packet_sizeleft);
1501             sz = sz&~3;
1502             uint8_t *p = s->packet_data + s->packet_remaining;
1503             int r = decoder->read (p, 1, sz, decoder->client_data);
1504             bswap_buf((uint32_t*)p, (const uint32_t*)p, r >> 2);
1505             s->packet_sizeleft -= r;
1506             s->packet_remaining += r;
1507             //fprintf (stderr, "read more %d bytes for current packet, sizeleft=%d, packet_remaining=%d\n", r, packet_sizeleft, packet_remaining);
1508         }
1509     }
1510     s->data_end = s->packet_data + s->packet_remaining;
1511 
1512     if (s->packet_remaining == 0 && !s->samples) {
1513         *data_size = 0;
1514         return 0;
1515     }
1516     if (!s->packet_remaining) {
1517         fprintf (stderr, "ape: packetbuf is empty!!\n");
1518         *data_size = 0;
1519         bytes_used = s->packet_remaining;
1520         goto error;
1521     }
1522 
1523     nblocks = s->samples;
1524     blockstodecode = min(BLOCKS_PER_LOOP, nblocks);
1525 
1526     s->error=0;
1527 
1528     if ((s->channels == 1) || (s->frameflags & APE_FRAMECODE_PSEUDO_STEREO))
1529         ape_unpack_mono(s, blockstodecode);
1530     else
1531         ape_unpack_stereo(s, blockstodecode);
1532 
1533     if(s->error || s->ptr >= s->data_end){
1534         s->samples=0;
1535         if (s->error) {
1536             fprintf (stderr, "ape: Error decoding frame, error=%d\n", s->error);
1537         }
1538         else {
1539             fprintf (stderr, "ape: Error decoding frame, ptr > data_end\n");
1540         }
1541         return -1;
1542     }
1543 
1544     int skip = min (s->samplestoskip, blockstodecode);
1545     i = skip;
1546 
1547     if (decoder->bps == 32 || decoder->bps == 24) {
1548         for (; i < blockstodecode; i++) {
1549             *((int32_t*)samples) = s->decoded0[i];
1550             samples += 4;
1551             if(s->channels > 1) {
1552                 *((int32_t*)samples) = s->decoded1[i];
1553                 samples += 4;
1554             }
1555         }
1556     }
1557     else if (decoder->bps == 16) {
1558         for (; i < blockstodecode; i++) {
1559             *((int16_t*)samples) = (int16_t)s->decoded0[i];
1560             samples += 2;
1561             if(s->channels > 1) {
1562                 *((int16_t*)samples) = (int16_t)s->decoded1[i];
1563                 samples += 2;
1564             }
1565         }
1566     }
1567     else if (decoder->bps == 8) {
1568         for (; i < blockstodecode; i++) {
1569             *samples = (int16_t)s->decoded0[i];
1570             samples++;
1571             if(s->channels > 1) {
1572                 *samples = (int16_t)s->decoded1[i];
1573                 samples++;
1574             }
1575         }
1576     }
1577 
1578     s->samplestoskip -= skip;
1579     s->samples -= blockstodecode;
1580 
1581     *data_size = (blockstodecode - skip) * samplesize;
1582 //    ape_ctx.currentsample += blockstodecode - skip;
1583     bytes_used = s->samples ? s->ptr - s->last_ptr : s->packet_remaining;
1584 
1585     // shift everything
1586 error:
1587     if (bytes_used < s->packet_remaining) {
1588         memmove (s->packet_data, s->packet_data+bytes_used, s->packet_remaining-bytes_used);
1589     }
1590     s->packet_remaining -= bytes_used;
1591     s->ptr -= bytes_used;
1592     s->last_ptr = s->ptr;
1593 
1594     return bytes_used;
1595 }
1596 
ffap_read(FFap_decoder * decoder,unsigned char * buffer,int size)1597 int ffap_read(FFap_decoder *decoder, unsigned char *buffer, int size)
1598 {
1599     int samplesize = decoder->bps / 8 * decoder->channels;
1600     if(decoder->bps == 24)
1601         samplesize = 4 * decoder->channels;
1602     /*if (info->ape_ctx.currentsample + size / samplesize > info->endsample) {
1603         size = (info->endsample - info->ape_ctx.currentsample + 1) * samplesize;
1604         trace ("size truncated to %d bytes (%d samples), cursample=%d, info->endsample=%d, totalsamples=%d\n", size, size / samplesize, info->ape_ctx.currentsample, info->endsample, info->ape_ctx.totalsamples);
1605         if (size <= 0) {
1606             return 0;
1607         }
1608     }*/
1609     int inits = size;
1610     while (size > 0) {
1611         if (decoder->ape_ctx->remaining > 0) {
1612             int sz = min (size, decoder->ape_ctx->remaining);
1613             memcpy (buffer, decoder->ape_ctx->buffer, sz);
1614             buffer += sz;
1615             size -= sz;
1616             if (decoder->ape_ctx->remaining > sz) {
1617                 memmove (decoder->ape_ctx->buffer, decoder->ape_ctx->buffer + sz, decoder->ape_ctx->remaining-sz);
1618             }
1619             decoder->ape_ctx->remaining -= sz;
1620             continue;
1621         }
1622         int s = BLOCKS_PER_LOOP * 2 * 2 * 2;
1623         assert (decoder->ape_ctx->remaining <= s/2);
1624         s -= decoder->ape_ctx->remaining;
1625         uint8_t *buf = decoder->ape_ctx->buffer + decoder->ape_ctx->remaining;
1626         int n = ape_decode_frame (decoder, buf, &s);
1627         if (n == -1) {
1628             break;
1629         }
1630         decoder->ape_ctx->remaining += s;
1631 
1632         int sz = min (size, decoder->ape_ctx->remaining);
1633         memcpy (buffer, decoder->ape_ctx->buffer, sz);
1634         buffer += sz;
1635         size -= sz;
1636         if (decoder->ape_ctx->remaining > sz) {
1637             memmove (decoder->ape_ctx->buffer, decoder->ape_ctx->buffer + sz,
1638                      decoder->ape_ctx->remaining-sz);
1639         }
1640         decoder->ape_ctx->remaining -= sz;
1641     }
1642     decoder->ape_ctx->currentsample += (inits - size) / samplesize;
1643     decoder->readpos = (decoder->ape_ctx->currentsample /*- decoder->startsample*/) / decoder->samplerate;
1644     return inits - size;
1645 }
1646 
1647 static int
ffap_seek_sample(FFap_decoder * decoder,int sample)1648 ffap_seek_sample (FFap_decoder *decoder, int sample) {
1649     //ape_info_t *info = (ape_info_t*)_info;
1650     //sample += info->startsample;
1651     trace ("ffap: seeking to %d/%u\n", sample, decoder->ape_ctx->totalsamples);
1652     uint32_t newsample = sample;
1653     if (newsample > decoder->ape_ctx->totalsamples) {
1654         trace ("eof\n");
1655         return -1;
1656     }
1657     uint32_t nframe = newsample / decoder->ape_ctx->blocksperframe;
1658     if (nframe >= decoder->ape_ctx->totalframes) {
1659         trace ("eof2\n");
1660         return -1;
1661     }
1662     decoder->ape_ctx->currentframe = nframe;
1663     decoder->ape_ctx->samplestoskip = newsample - nframe * decoder->ape_ctx->blocksperframe;
1664     trace ("ffap: seek to sample %u at blockstart\n", nframe * decoder->ape_ctx->blocksperframe);
1665     trace ("ffap: samples to skip: %d\n", decoder->ape_ctx->samplestoskip);
1666 
1667     // reset decoder
1668     decoder->ape_ctx->remaining = 0;
1669     decoder->ape_ctx->packet_remaining = 0;
1670     decoder->ape_ctx->samples = 0;
1671     decoder->ape_ctx->currentsample = newsample;
1672     decoder->readpos = (float)(newsample/*-info->startsample*/)/decoder->samplerate;
1673     return 0;
1674 }
1675 
ffap_seek(FFap_decoder * decoder,float seconds)1676 int ffap_seek(FFap_decoder *decoder, float seconds) {
1677     return ffap_seek_sample (decoder, seconds * decoder->samplerate);
1678 }
1679 
ffap_new(ffap_read_callback read_callback,ffap_seek_callback seek_callback,ffap_tell_callback tell_callback,ffap_getlength_callback getlength_callback,void * client_data)1680 FFap_decoder *ffap_new(ffap_read_callback read_callback,
1681                        ffap_seek_callback seek_callback,
1682                        ffap_tell_callback tell_callback,
1683                        ffap_getlength_callback getlength_callback,
1684                        void *client_data)
1685 {
1686     FFap_decoder *decoder = malloc (sizeof (FFap_decoder));
1687     memset (decoder, 0, sizeof (FFap_decoder));
1688     decoder->read = read_callback;
1689     decoder->seek = seek_callback;
1690     decoder->tell = tell_callback;
1691     decoder->getlength = getlength_callback;
1692     decoder->client_data = client_data;
1693     decoder->ape_ctx = malloc (sizeof (APEContext));
1694     memset(decoder->ape_ctx, 0, (sizeof (APEContext)));
1695     return decoder;
1696 }
1697 
1698 
ffap_load()1699 void ffap_load()
1700 {
1701 #if defined(__x86_64__) || defined(__i386__)
1702     if (__builtin_cpu_supports("avx2"))
1703     {
1704         scalarproduct_and_madd_int16 = scalarproduct_and_madd_int16_avx2;
1705         trace ("ffap: avx2 support detected\n");
1706     }
1707     else if (__builtin_cpu_supports("avx"))
1708     {
1709         scalarproduct_and_madd_int16 = scalarproduct_and_madd_int16_avx;
1710         trace ("ffap: avx support detected\n");
1711     }
1712     else if (__builtin_cpu_supports("sse4.2"))
1713     {
1714         scalarproduct_and_madd_int16 = scalarproduct_and_madd_int16_sse42;
1715         trace ("ffap: sse4.2 support detected\n");
1716     }
1717     else if (__builtin_cpu_supports("sse2"))
1718     {
1719         scalarproduct_and_madd_int16 = scalarproduct_and_madd_int16_sse2;
1720         trace ("ffap: sse2 support detected\n");
1721     }
1722     else
1723 #endif
1724     {
1725         scalarproduct_and_madd_int16 = scalarproduct_and_madd_int16_c;
1726         trace ("ffap: SIMD support is not detected\n");
1727     }
1728 }
1729