1 // Copyright (C) 2016 Lukas Lalinsky
2 // Distributed under the MIT license, see the LICENSE file for details.
3
4 #ifndef CHROMAPRINT_AUDIO_FFMPEG_AUDIO_READER_H_
5 #define CHROMAPRINT_AUDIO_FFMPEG_AUDIO_READER_H_
6
7 #ifdef HAVE_CONFIG_H
8 #include <config.h>
9 #endif
10
11 #include "debug.h"
12 #include "utils/scope_exit.h"
13 #include <cstdlib>
14 #include <string>
15 #include <memory>
16
17 extern "C" {
18 #include <libavcodec/avcodec.h>
19 #include <libavformat/avformat.h>
20 #include <libavutil/opt.h>
21 #include <libavutil/channel_layout.h>
22 }
23
24 #include "audio/ffmpeg_audio_processor.h"
25
26 #ifndef HAVE_AV_PACKET_UNREF
27 #define av_packet_unref av_free_packet
28 #endif
29
30 #ifndef HAVE_AV_FRAME_ALLOC
31 #define av_frame_alloc avcodec_alloc_frame
32 #endif
33
34 #ifndef HAVE_AV_FRAME_FREE
35 #define av_frame_free avcodec_free_frame
36 #endif
37
38 #ifndef AV_ERROR_MAX_STRING_SIZE
39 #define AV_ERROR_MAX_STRING_SIZE 128
40 #endif
41
42 namespace chromaprint {
43
44 class FFmpegAudioReader {
45 public:
46 FFmpegAudioReader();
47 ~FFmpegAudioReader();
48
49 /**
50 * Get the sample rate in the audio stream.
51 * @return sample rate in Hz, -1 on error
52 */
53 int GetSampleRate() const;
54
55 /**
56 * Get the number of channels in the audio stream.
57 * @return number of channels, -1 on error
58 */
59 int GetChannels() const;
60
61 /**
62 * Get the estimated audio stream duration.
63 * @return stream duration in milliseconds, -1 on error
64 */
65 int GetDuration() const;
66
67 bool SetInputFormat(const char *name);
68 bool SetInputSampleRate(int sample_rate);
69 bool SetInputChannels(int channels);
70
SetOutputSampleRate(int sample_rate)71 void SetOutputSampleRate(int sample_rate) { m_output_sample_rate = sample_rate; }
SetOutputChannels(int channels)72 void SetOutputChannels(int channels) { m_output_channels = channels; }
73
74 bool Open(const std::string &file_name);
75
76 void Close();
77
78 bool Read(const int16_t **data, size_t *size);
79
IsOpen()80 bool IsOpen() const { return m_opened; }
IsFinished()81 bool IsFinished() const { return m_finished && !m_got_frame; }
82
GetError()83 std::string GetError() const { return m_error; }
GetErrorCode()84 int GetErrorCode() const { return m_error_code; }
85
86 private:
87 inline void SetError(const char *format, int errnum = 0);
88
89 std::unique_ptr<FFmpegAudioProcessor> m_converter;
90 uint8_t *m_convert_buffer[1] = { nullptr };
91 int m_convert_buffer_nb_samples = 0;
92
93 AVInputFormat *m_input_fmt = nullptr;
94 AVDictionary *m_input_opts = nullptr;
95
96 AVFormatContext *m_format_ctx = nullptr;
97 AVCodecContext *m_codec_ctx = nullptr;
98 AVFrame *m_frame = nullptr;
99 int m_stream_index = -1;
100 std::string m_error;
101 int m_error_code = 0;
102 bool m_finished = false;
103 bool m_opened = false;
104 int m_got_frame = 0;
105 AVPacket m_packet;
106 AVPacket m_packet0;
107
108 int m_output_sample_rate = 0;
109 int m_output_channels = 0;
110
111 uint64_t m_nb_packets = 0;
112 int m_decode_error = 0;
113 };
114
FFmpegAudioReader()115 inline FFmpegAudioReader::FFmpegAudioReader() {
116 av_log_set_level(AV_LOG_QUIET);
117 av_register_all();
118
119 av_init_packet(&m_packet);
120 m_packet.data = nullptr;
121 m_packet.size = 0;
122
123 m_packet0 = m_packet;
124 }
125
~FFmpegAudioReader()126 inline FFmpegAudioReader::~FFmpegAudioReader() {
127 Close();
128 av_dict_free(&m_input_opts);
129 av_freep(&m_convert_buffer[0]);
130 av_packet_unref(&m_packet0);
131 }
132
SetInputFormat(const char * name)133 inline bool FFmpegAudioReader::SetInputFormat(const char *name) {
134 m_input_fmt = av_find_input_format(name);
135 return m_input_fmt;
136 }
137
SetInputSampleRate(int sample_rate)138 inline bool FFmpegAudioReader::SetInputSampleRate(int sample_rate) {
139 char buf[64];
140 sprintf(buf, "%d", sample_rate);
141 return av_dict_set(&m_input_opts, "sample_rate", buf, 0) >= 0;
142 }
143
SetInputChannels(int channels)144 inline bool FFmpegAudioReader::SetInputChannels(int channels) {
145 char buf[64];
146 sprintf(buf, "%d", channels);
147 return av_dict_set(&m_input_opts, "channels", buf, 0) >= 0;
148 }
149
Open(const std::string & file_name)150 inline bool FFmpegAudioReader::Open(const std::string &file_name) {
151 int ret;
152
153 Close();
154
155 av_init_packet(&m_packet);
156 m_packet.data = nullptr;
157 m_packet.size = 0;
158
159 m_packet0 = m_packet;
160
161 ret = avformat_open_input(&m_format_ctx, file_name.c_str(), m_input_fmt, &m_input_opts);
162 if (ret < 0) {
163 SetError("Could not open the input file", ret);
164 return false;
165 }
166
167 ret = avformat_find_stream_info(m_format_ctx, nullptr);
168 if (ret < 0) {
169 SetError("Coud not find stream information in the file", ret);
170 return false;
171 }
172
173 AVCodec *codec;
174 ret = av_find_best_stream(m_format_ctx, AVMEDIA_TYPE_AUDIO, -1, -1, &codec, 0);
175 if (ret < 0) {
176 SetError("Could not find any audio stream in the file", ret);
177 return false;
178 }
179 m_stream_index = ret;
180
181 m_codec_ctx = m_format_ctx->streams[m_stream_index]->codec;
182 m_codec_ctx->request_sample_fmt = AV_SAMPLE_FMT_S16;
183
184 ret = avcodec_open2(m_codec_ctx, codec, nullptr);
185 if (ret < 0) {
186 SetError("Could not open the codec", ret);
187 return false;
188 }
189
190 if (!m_codec_ctx->channel_layout) {
191 m_codec_ctx->channel_layout = av_get_default_channel_layout(m_codec_ctx->channels);
192 }
193
194 m_frame = av_frame_alloc();
195 if (!m_frame) {
196 return false;
197 }
198
199 if (!m_output_sample_rate) {
200 m_output_sample_rate = m_codec_ctx->sample_rate;
201 }
202
203 if (!m_output_channels) {
204 m_output_channels = m_codec_ctx->channels;
205 }
206
207 if (m_codec_ctx->sample_fmt != AV_SAMPLE_FMT_S16 || m_codec_ctx->channels != m_output_channels || m_codec_ctx->sample_rate != m_output_sample_rate) {
208 m_converter.reset(new FFmpegAudioProcessor());
209 m_converter->SetCompatibleMode();
210 m_converter->SetInputSampleFormat(m_codec_ctx->sample_fmt);
211 m_converter->SetInputSampleRate(m_codec_ctx->sample_rate);
212 m_converter->SetInputChannelLayout(m_codec_ctx->channel_layout);
213 m_converter->SetOutputSampleFormat(AV_SAMPLE_FMT_S16);
214 m_converter->SetOutputSampleRate(m_output_sample_rate);
215 m_converter->SetOutputChannelLayout(av_get_default_channel_layout(m_output_channels));
216 auto ret = m_converter->Init();
217 if (ret != 0) {
218 SetError("Could not create an audio converter instance", ret);
219 return false;
220 }
221 }
222
223 m_opened = true;
224 m_finished = false;
225 m_got_frame = 0;
226 m_nb_packets = 0;
227 m_decode_error = 0;
228
229 return true;
230 }
231
Close()232 inline void FFmpegAudioReader::Close() {
233 av_frame_free(&m_frame);
234
235 m_stream_index = -1;
236
237 if (m_codec_ctx) {
238 avcodec_close(m_codec_ctx);
239 m_codec_ctx = nullptr;
240 }
241
242 if (m_format_ctx) {
243 avformat_close_input(&m_format_ctx);
244 }
245 }
246
GetSampleRate()247 inline int FFmpegAudioReader::GetSampleRate() const {
248 return m_output_sample_rate;
249 }
250
GetChannels()251 inline int FFmpegAudioReader::GetChannels() const {
252 return m_output_channels;
253 }
254
GetDuration()255 inline int FFmpegAudioReader::GetDuration() const {
256 if (m_format_ctx && m_stream_index >= 0) {
257 const auto stream = m_format_ctx->streams[m_stream_index];
258 if (stream->duration != AV_NOPTS_VALUE) {
259 return 1000 * stream->time_base.num * stream->duration / stream->time_base.den;
260 } else if (m_format_ctx->duration != AV_NOPTS_VALUE) {
261 return 1000 * m_format_ctx->duration / AV_TIME_BASE;
262 }
263 }
264 return -1;
265 }
266
Read(const int16_t ** data,size_t * size)267 inline bool FFmpegAudioReader::Read(const int16_t **data, size_t *size) {
268 if (!IsOpen() || IsFinished()) {
269 return false;
270 }
271
272 int ret;
273 while (true) {
274 while (m_packet.size <= 0) {
275 av_packet_unref(&m_packet0);
276 av_init_packet(&m_packet);
277 m_packet.data = nullptr;
278 m_packet.size = 0;
279 ret = av_read_frame(m_format_ctx, &m_packet);
280 if (ret < 0) {
281 if (ret == AVERROR_EOF) {
282 m_finished = true;
283 break;
284 } else {
285 SetError("Error reading from the audio source", ret);
286 return false;
287 }
288 }
289 m_packet0 = m_packet;
290 if (m_packet.stream_index != m_stream_index) {
291 m_packet.data = nullptr;
292 m_packet.size = 0;
293 } else {
294 m_nb_packets++;
295 }
296 }
297
298 ret = avcodec_decode_audio4(m_codec_ctx, m_frame, &m_got_frame, &m_packet);
299 if (ret < 0) {
300 if (m_decode_error) {
301 SetError("Error decoding audio frame", m_decode_error);
302 return false;
303 }
304 m_decode_error = ret;
305 m_packet.data = nullptr;
306 m_packet.size = 0;
307 continue;
308 }
309
310 break;
311 }
312
313 m_decode_error = 0;
314
315 const int decoded = std::min(ret, m_packet.size);
316 m_packet.data += decoded;
317 m_packet.size -= decoded;
318
319 if (m_got_frame) {
320 if (m_converter) {
321 if (m_frame->nb_samples > m_convert_buffer_nb_samples) {
322 int linsize;
323 av_freep(&m_convert_buffer[0]);
324 m_convert_buffer_nb_samples = std::max(1024 * 8, m_frame->nb_samples);
325 ret = av_samples_alloc(m_convert_buffer, &linsize, m_codec_ctx->channels, m_convert_buffer_nb_samples, AV_SAMPLE_FMT_S16, 1);
326 if (ret < 0) {
327 SetError("Couldn't allocate audio converter buffer", ret);
328 return false;
329 }
330 }
331 auto nb_samples = m_converter->Convert(m_convert_buffer, m_convert_buffer_nb_samples, (const uint8_t **) m_frame->data, m_frame->nb_samples);
332 if (nb_samples < 0) {
333 SetError("Couldn't convert audio", ret);
334 return false;
335 }
336 *data = (const int16_t *) m_convert_buffer[0];
337 *size = nb_samples;
338 } else {
339 *data = (const int16_t *) m_frame->data[0];
340 *size = m_frame->nb_samples;
341 }
342 } else {
343 if (m_finished && m_converter) {
344 auto nb_samples = m_converter->Flush(m_convert_buffer, m_convert_buffer_nb_samples);
345 if (nb_samples < 0) {
346 SetError("Couldn't convert audio", ret);
347 return false;
348 } else if (nb_samples > 0) {
349 m_got_frame = 1;
350 *data = (const int16_t *) m_convert_buffer[0];
351 *size = nb_samples;
352 }
353 }
354 }
355
356 return true;
357 }
358
SetError(const char * message,int errnum)359 inline void FFmpegAudioReader::SetError(const char *message, int errnum) {
360 m_error = message;
361 if (errnum < 0) {
362 char buf[AV_ERROR_MAX_STRING_SIZE];
363 if (av_strerror(errnum, buf, AV_ERROR_MAX_STRING_SIZE) == 0) {
364 m_error += " (";
365 m_error += buf;
366 m_error += ")";
367 }
368 }
369 m_error_code = errnum;
370 }
371
372 }; // namespace chromaprint
373
374 #endif
375