1 #include <libavutil/opt.h>
2 #include <libswresample/swresample.h>
3 
4 #include "bliss.h"
5 
6 #define NB_BYTES_PER_SAMPLE 2
7 #define SAMPLE_RATE 22050
8 #define CHANNELS 2
9 
10 int fill_song_properties(struct bl_song *const song, char const *const filename,
11                          AVCodecParameters *codecpar, AVFormatContext *context,
12                          struct SwrContext **swr_ctx);
13 
14 int process_frame(struct bl_song *const song, int8_t **beginning_ptr,
15                   AVFrame *decoded_frame, int *index_ptr, uint64_t *size_ptr,
16                   struct SwrContext *swr_ctx);
17 
18 int resample_decoded_frames(struct SwrContext *swr_ctx,
19                             struct bl_song *const song, AVFrame *decoded_frame,
20                             uint8_t ***out_buffer, int flush);
21 
22 int append_buffer_to_song(struct bl_song *const song, int *index_ptr,
23                           int nb_samples, int8_t **beginning_ptr,
24                           uint64_t *size_ptr, uint8_t *decoded_samples);
25 
bl_audio_decode(char const * const filename,struct bl_song * const song)26 int bl_audio_decode(char const *const filename, struct bl_song *const song) {
27   int ret;
28   // Contexts and libav variables
29   AVPacket avpkt;
30   AVFormatContext *context;
31   int audio_stream;
32   AVCodecContext *codec_context = NULL;
33 #if LIBSWRESAMPLE_VERSION_MAJOR >= 2
34   AVCodecParameters *codecpar = NULL;
35 #endif
36   AVCodec *codec = NULL;
37   AVFrame *decoded_frame = NULL;
38   struct SwrContext *swr_ctx;
39 
40   // Size of the samples
41   uint64_t size = 0;
42 
43   // Pointer to beginning of music data
44   int8_t *beginning;
45   // Received frame holder
46   int got_frame;
47   // Position in the data buffer
48   int index;
49   // Initialize AV lib
50   av_register_all();
51   context = avformat_alloc_context();
52 
53   av_log_set_level(AV_LOG_QUIET);
54 
55   // Open input file
56   if (avformat_open_input(&context, filename, NULL, NULL) < 0) {
57     fprintf(stderr, "Couldn't open file: %s. Error %d encountered.\n", filename,
58             errno);
59     return BL_UNEXPECTED;
60   }
61 
62   // Search for a valid stream
63   if (avformat_find_stream_info(context, NULL) < 0) {
64     fprintf(stderr, "Couldn't find stream information\n");
65     avformat_close_input(&context);
66     return BL_UNEXPECTED;
67   }
68 
69   // Find stream and corresponding codec
70   audio_stream =
71       av_find_best_stream(context, AVMEDIA_TYPE_AUDIO, -1, -1, &codec, 0);
72   if (audio_stream < 0) {
73     fprintf(stderr, "Couldn't find a suitable audio stream\n");
74     avformat_close_input(&context);
75     return BL_UNEXPECTED;
76   }
77 
78 #if LIBSWRESAMPLE_VERSION_MAJOR < 2
79   codec_context = context->streams[audio_stream]->codec;
80   if (!codec_context) {
81     fprintf(stderr, "Codec not found!\n");
82     avformat_close_input(&context);
83     return BL_UNEXPECTED;
84   }
85 #else
86   // Find codec parameters
87   codecpar = context->streams[audio_stream]->codecpar;
88 
89   // Find and allocate codec context
90   codec_context = avcodec_alloc_context3(codec);
91 #endif
92   codec_context->thread_count = 0;
93   codec_context->thread_type = FF_THREAD_FRAME;
94 
95   if (avcodec_open2(codec_context, codec, NULL) < 0) {
96     fprintf(stderr, "Could not open codec\n");
97     return BL_UNEXPECTED;
98     avformat_close_input(&context);
99   }
100 
101   // Fill song properties
102   if ((ret = fill_song_properties(song, filename, codecpar, context,
103                                   &swr_ctx)) == BL_UNEXPECTED) {
104     goto cleanup;
105   }
106   beginning = song->sample_array;
107   index = 0;
108 
109   // Read the whole data and copy them into a huge buffer
110   av_init_packet(&avpkt);
111   decoded_frame = av_frame_alloc();
112   if (!decoded_frame) {
113     fprintf(stderr, "Could not allocate audio frame\n");
114     ret = BL_UNEXPECTED;
115     goto cleanup;
116   }
117   while (av_read_frame(context, &avpkt) == 0) {
118     if (avpkt.stream_index == audio_stream) {
119 #if LIBSWRESAMPLE_VERSION_MAJOR < 2
120       avcodec_decode_audio4(codec_context, decoded_frame, &got_frame, &avpkt);
121 #else
122       avcodec_send_packet(codec_context, &avpkt);
123       got_frame = !avcodec_receive_frame(codec_context, decoded_frame);
124 #endif
125 
126       av_packet_unref(&avpkt);
127 
128       // Copy decoded data into a huge array
129       if (got_frame) {
130         if ((ret = process_frame(song, &beginning, decoded_frame, &index, &size,
131                                  swr_ctx)) == BL_UNEXPECTED) {
132           goto cleanup;
133         }
134       }
135     } else {
136       // Dropping packets that do not belong to the audio stream
137       // (such as album cover)
138       av_packet_unref(&avpkt);
139     }
140   }
141   // Free memory
142   avpkt.data = NULL;
143   avpkt.size = 0;
144 
145   // Read the end of audio, as precognized in
146   // http://ffmpeg.org/pipermail/libav-user/2015-August/008433.html
147 #if LIBSWRESAMPLE_VERSION_MAJOR < 2
148   do {
149     avcodec_decode_audio4(codec_context, decoded_frame, &got_frame, &avpkt);
150     if (got_frame) {
151       if ((ret = process_frame(song, &beginning, decoded_frame, &index, &size,
152                                swr_ctx)) == BL_UNEXPECTED) {
153         goto cleanup;
154       }
155     }
156   } while (got_frame);
157 #else
158   avcodec_send_packet(codec_context, NULL);
159   do {
160     ret = avcodec_receive_frame(codec_context, decoded_frame);
161     if (!ret) {
162       if (process_frame(song, &beginning, decoded_frame, &index, &size,
163                         swr_ctx) == BL_UNEXPECTED) {
164         ret = BL_UNEXPECTED;
165         goto cleanup;
166       }
167     }
168   } while (!ret);
169 #endif
170   if (song->resampled == 1) {
171     uint8_t **out_buffer;
172     if ((ret = resample_decoded_frames(swr_ctx, song, decoded_frame,
173                                        &out_buffer, 1)) == BL_UNEXPECTED) {
174       return BL_UNEXPECTED;
175     }
176     if (ret) {
177       if (append_buffer_to_song(song, &index, ret, &beginning, &size,
178                                 out_buffer[0]) == BL_UNEXPECTED) {
179         return BL_UNEXPECTED;
180       }
181     }
182     if (out_buffer)
183       av_freep(&out_buffer[0]);
184     av_freep(&out_buffer);
185   }
186 
187   // Use correct number of samples after decoding
188   if ((song->nSamples = index) <= 0) {
189     fprintf(stderr, "Couldn't find any valid samples while decoding\n");
190     return BL_UNEXPECTED;
191   }
192   song->sample_array = beginning;
193   song->sample_rate = SAMPLE_RATE;
194   song->channels = CHANNELS;
195 
196   ret = BL_OK;
197 cleanup:
198   // Free memory
199   if (song->resampled)
200     swr_free(&swr_ctx);
201 #if LIBSWRESAMPLE_VERSION_MAJOR < 2
202   avcodec_close(codec_context);
203 #else
204   avcodec_free_context(&codec_context);
205 #endif
206   av_frame_unref(decoded_frame);
207 #if LIBAVUTIL_VERSION_MAJOR > 51
208   av_frame_free(&decoded_frame);
209 #endif
210   av_packet_unref(&avpkt);
211   avformat_close_input(&context);
212 
213   return ret;
214 }
215 
fill_song_properties(struct bl_song * const song,char const * const filename,AVCodecParameters * codecpar,AVFormatContext * context,struct SwrContext ** swr_ctx)216 int fill_song_properties(struct bl_song *const song, char const *const filename,
217                          AVCodecParameters *codecpar, AVFormatContext *context,
218                          struct SwrContext **swr_ctx) {
219   // Dictionary to fetch tags
220   AVDictionaryEntry *tags_dictionary;
221   uint64_t size = 0;
222 
223   song->filename = malloc(strlen(filename) + 1);
224   strcpy(song->filename, filename);
225 
226 #if LIBSWRESAMPLE_VERSION_MAJOR < 2
227   song->sample_rate = codec_context->sample_rate;
228   song->nb_bytes_per_sample =
229       av_get_bytes_per_sample(codec_context->sample_fmt);
230   song->channels = codec_context->channels;
231 #else
232   song->sample_rate = codecpar->sample_rate;
233   song->nb_bytes_per_sample = av_get_bytes_per_sample(codecpar->format);
234   song->channels = codecpar->channels;
235 #endif
236   song->duration = (uint64_t)(context->duration) / ((uint64_t)AV_TIME_BASE);
237   song->bitrate = context->bit_rate;
238   song->resampled = 0;
239 
240   // Get number of samples
241   size = (((uint64_t)(context->duration) * (uint64_t)SAMPLE_RATE) /
242           ((uint64_t)AV_TIME_BASE)) *
243          song->channels * NB_BYTES_PER_SAMPLE;
244 
245   // Estimated number of samples
246   song->nSamples = ((((uint64_t)(context->duration) * (uint64_t)SAMPLE_RATE) /
247                      ((uint64_t)AV_TIME_BASE)) *
248                     song->channels);
249 
250   // Allocate sample_array
251   if ((song->sample_array = calloc(size, 1)) == NULL) {
252     fprintf(stderr, "Could not allocate enough memory\n");
253     return BL_UNEXPECTED;
254   }
255   // Zero initialize tags
256   song->artist = NULL;
257   song->title = NULL;
258   song->album = NULL;
259   song->tracknumber = NULL;
260 
261   // Initialize tracknumber tag
262   tags_dictionary = av_dict_get(context->metadata, "track", NULL, 0);
263   if (tags_dictionary != NULL) {
264     song->tracknumber = malloc(strlen(tags_dictionary->value) + 1);
265     strcpy(song->tracknumber, tags_dictionary->value);
266     song->tracknumber[strcspn(song->tracknumber, "/")] = '\0';
267   } else {
268     song->tracknumber = malloc(1 * sizeof(char));
269     strcpy(song->tracknumber, "");
270   }
271 
272   // Initialize title tag
273   tags_dictionary = av_dict_get(context->metadata, "title", NULL, 0);
274   if (tags_dictionary != NULL) {
275     song->title = malloc(strlen(tags_dictionary->value) + 1);
276     strcpy(song->title, tags_dictionary->value);
277   } else {
278     song->title = malloc(12 * sizeof(char));
279     strcpy(song->title, "<no title>");
280   }
281 
282   // Initialize artist tag
283   tags_dictionary = av_dict_get(context->metadata, "ARTIST", NULL, 0);
284   if (tags_dictionary != NULL) {
285     song->artist = malloc(strlen(tags_dictionary->value) + 1);
286     strcpy(song->artist, tags_dictionary->value);
287   } else {
288     song->artist = malloc(12 * sizeof(char));
289     strcpy(song->artist, "<no artist>");
290   }
291 
292   // Initialize album tag
293   tags_dictionary = av_dict_get(context->metadata, "ALBUM", NULL, 0);
294   if (tags_dictionary != NULL) {
295     song->album = malloc(strlen(tags_dictionary->value) + 1);
296     strcpy(song->album, tags_dictionary->value);
297   } else {
298     song->album = malloc(11 * sizeof(char));
299     strcpy(song->album, "<no album>");
300   }
301 
302   // Initialize genre tag
303   tags_dictionary = av_dict_get(context->metadata, "genre", NULL, 0);
304   if (tags_dictionary != NULL) {
305     song->genre = malloc(strlen(tags_dictionary->value) + 1);
306     strcpy(song->genre, tags_dictionary->value);
307   } else {
308     song->genre = malloc(11 * sizeof(char));
309     strcpy(song->genre, "<no genre>");
310   }
311 
312   // If the song is in a floating-point format or int32, prepare the conversion
313   // to int16
314 #if LIBSWRESAMPLE_VERSION_MAJOR < 2
315   if ((codec_context->sample_fmt != AV_SAMPLE_FMT_S16) ||
316       (codec_context->sample_rate != SAMPLE_RATE)) {
317 #else
318   if ((codecpar->format != AV_SAMPLE_FMT_S16) ||
319       (codecpar->sample_rate != SAMPLE_RATE)) {
320 #endif
321     song->resampled = 1;
322     song->nb_bytes_per_sample = 2;
323 
324     *swr_ctx = swr_alloc();
325 
326 #if LIBSWRESAMPLE_VERSION_MAJOR < 2
327     av_opt_set_int(*swr_ctx, "in_channel_layout", codec_context->channel_layout,
328                    0);
329     av_opt_set_int(*swr_ctx, "in_sample_rate", codec_context->sample_rate, 0);
330     av_opt_set_sample_fmt(*swr_ctx, "in_sample_fmt", codec_context->sample_fmt,
331                           0);
332     av_opt_set_int(*swr_ctx, "out_channel_layout",
333                    codec_context->channel_layout, 0);
334     av_opt_set_int(*swr_ctx, "out_sample_rate", SAMPLE_RATE, 0);
335 #else
336     av_opt_set_int(*swr_ctx, "in_channel_layout", codecpar->channel_layout, 0);
337     av_opt_set_int(*swr_ctx, "in_sample_rate", codecpar->sample_rate, 0);
338     av_opt_set_sample_fmt(*swr_ctx, "in_sample_fmt", codecpar->format, 0);
339     av_opt_set_int(*swr_ctx, "out_channel_layout", AV_CH_LAYOUT_STEREO, 0);
340     av_opt_set_int(*swr_ctx, "out_sample_rate", SAMPLE_RATE, 0);
341 #endif
342     av_opt_set_sample_fmt(*swr_ctx, "out_sample_fmt", AV_SAMPLE_FMT_S16, 0);
343     if (swr_init(*swr_ctx) < 0) {
344       fprintf(stderr, "Could not allocate resampler context\n");
345       return BL_UNEXPECTED;
346     }
347   }
348 
349   return BL_OK;
350 }
351 
352 // If needed, realloc sample array and put stuff in beginning_ptr
353 int append_buffer_to_song(struct bl_song *const song, int *index_ptr,
354                           int nb_samples, int8_t **beginning_ptr,
355                           uint64_t *size_ptr, uint8_t *decoded_samples) {
356   size_t data_size = av_samples_get_buffer_size(
357       NULL, CHANNELS, nb_samples, AV_SAMPLE_FMT_S16, 1);
358   if ((*index_ptr * song->nb_bytes_per_sample + data_size) > *size_ptr) {
359     int8_t *ptr;
360     ptr = realloc(*beginning_ptr, *size_ptr + data_size);
361     if (ptr != NULL) {
362       *beginning_ptr = ptr;
363       *size_ptr += data_size;
364       song->nSamples += data_size / song->nb_bytes_per_sample;
365     } else {
366       fprintf(stderr, "Error while trying to allocate memory\n");
367       return BL_UNEXPECTED;
368     }
369   }
370   memcpy(&(*beginning_ptr)[*index_ptr * song->nb_bytes_per_sample],
371          decoded_samples, data_size);
372   *index_ptr += data_size / song->nb_bytes_per_sample;
373 
374   return BL_OK;
375 }
376 
377 int resample_decoded_frames(struct SwrContext *swr_ctx,
378                             struct bl_song *const song, AVFrame *decoded_frame,
379                             uint8_t ***out_buffer, int flush) {
380   size_t dst_bufsize;
381   int nb_samples;
382   // Approximate the resampled buffer size
383   int dst_nb_samples = av_rescale_rnd(
384       swr_get_delay(swr_ctx, song->sample_rate) + decoded_frame->nb_samples,
385       SAMPLE_RATE, song->sample_rate, AV_ROUND_UP);
386   dst_bufsize = av_samples_alloc_array_and_samples(
387       out_buffer, NULL, CHANNELS, dst_nb_samples, AV_SAMPLE_FMT_S16, 0);
388   if (!flush) {
389     nb_samples = swr_convert(swr_ctx, *out_buffer, dst_bufsize,
390                              (const uint8_t **)decoded_frame->data,
391                              decoded_frame->nb_samples);
392   } else {
393     nb_samples = swr_convert(swr_ctx, *out_buffer, dst_bufsize, NULL, 0);
394   }
395   if (nb_samples < 0) {
396     fprintf(stderr, "Error while converting from floating-point to int\n");
397     return BL_UNEXPECTED;
398   }
399 
400   return nb_samples;
401 }
402 
403 int process_frame(struct bl_song *const song, int8_t **beginning_ptr,
404                   AVFrame *decoded_frame, int *index_ptr, uint64_t *size_ptr,
405                   struct SwrContext *swr_ctx) {
406   uint8_t *decoded_samples = decoded_frame->extended_data[0];
407   int nb_samples = decoded_frame->nb_samples;
408   uint8_t **out_buffer;
409   // If the song isn't in a 16-bit format, convert it to
410   if (song->resampled == 1) {
411     if ((nb_samples = resample_decoded_frames(
412              swr_ctx, song, decoded_frame, &out_buffer, 0)) == BL_UNEXPECTED) {
413       return BL_UNEXPECTED;
414     }
415     decoded_samples = out_buffer[0];
416   }
417   if (nb_samples > 0)
418     if (append_buffer_to_song(song, index_ptr, nb_samples, beginning_ptr,
419                              size_ptr, decoded_samples) == BL_UNEXPECTED)
420       return BL_UNEXPECTED;
421 
422   if (song->resampled == 1) {
423     if (out_buffer)
424       av_freep(&out_buffer[0]);
425     av_freep(&out_buffer);
426   }
427   return BL_OK;
428 }
429