1 // Copyright (C) 2016  Lukas Lalinsky
2 // Distributed under the MIT license, see the LICENSE file for details.
3 
4 #ifndef CHROMAPRINT_AUDIO_FFMPEG_AUDIO_READER_H_
5 #define CHROMAPRINT_AUDIO_FFMPEG_AUDIO_READER_H_
6 
7 #ifdef HAVE_CONFIG_H
8 #include <config.h>
9 #endif
10 
11 #include "debug.h"
12 #include "utils/scope_exit.h"
13 #include <cstdlib>
14 #include <string>
15 #include <memory>
16 
17 extern "C" {
18 #include <libavcodec/avcodec.h>
19 #include <libavformat/avformat.h>
20 #include <libavutil/opt.h>
21 #include <libavutil/channel_layout.h>
22 }
23 
24 #include "audio/ffmpeg_audio_processor.h"
25 
26 #ifndef HAVE_AV_PACKET_UNREF
27 #define av_packet_unref av_free_packet
28 #endif
29 
30 #ifndef HAVE_AV_FRAME_ALLOC
31 #define av_frame_alloc avcodec_alloc_frame
32 #endif
33 
34 #ifndef HAVE_AV_FRAME_FREE
35 #define av_frame_free avcodec_free_frame
36 #endif
37 
38 #ifndef AV_ERROR_MAX_STRING_SIZE
39 #define AV_ERROR_MAX_STRING_SIZE 128
40 #endif
41 
42 namespace chromaprint {
43 
44 class FFmpegAudioReader {
45 public:
46 	FFmpegAudioReader();
47 	~FFmpegAudioReader();
48 
49 	/**
50 	 * Get the sample rate in the audio stream.
51 	 * @return sample rate in Hz, -1 on error
52 	 */
53 	int GetSampleRate() const;
54 
55 	/**
56 	 * Get the number of channels in the audio stream.
57 	 * @return number of channels, -1 on error
58 	 */
59 	int GetChannels() const;
60 
61 	/**
62 	 * Get the estimated audio stream duration.
63 	 * @return stream duration in milliseconds, -1 on error
64 	 */
65 	int GetDuration() const;
66 
67 	bool SetInputFormat(const char *name);
68 	bool SetInputSampleRate(int sample_rate);
69 	bool SetInputChannels(int channels);
70 
SetOutputSampleRate(int sample_rate)71 	void SetOutputSampleRate(int sample_rate) { m_output_sample_rate = sample_rate; }
SetOutputChannels(int channels)72 	void SetOutputChannels(int channels) { m_output_channels = channels; }
73 
74 	bool Open(const std::string &file_name);
75 
76 	void Close();
77 
78 	bool Read(const int16_t **data, size_t *size);
79 
IsOpen()80 	bool IsOpen() const { return m_opened; }
IsFinished()81 	bool IsFinished() const { return m_finished && !m_got_frame; }
82 
GetError()83 	std::string GetError() const { return m_error; }
GetErrorCode()84 	int GetErrorCode() const { return m_error_code; }
85 
86 private:
87 	inline void SetError(const char *format, int errnum = 0);
88 
89 	std::unique_ptr<FFmpegAudioProcessor> m_converter;
90 	uint8_t *m_convert_buffer[1] = { nullptr };
91 	int m_convert_buffer_nb_samples = 0;
92 
93 	AVInputFormat *m_input_fmt = nullptr;
94 	AVDictionary *m_input_opts = nullptr;
95 
96 	AVFormatContext *m_format_ctx = nullptr;
97 	AVCodecContext *m_codec_ctx = nullptr;
98 	AVFrame *m_frame = nullptr;
99 	int m_stream_index = -1;
100 	std::string m_error;
101 	int m_error_code = 0;
102 	bool m_finished = false;
103 	bool m_opened = false;
104 	int m_got_frame = 0;
105 	AVPacket m_packet;
106 	AVPacket m_packet0;
107 
108 	int m_output_sample_rate = 0;
109 	int m_output_channels = 0;
110 
111 	uint64_t m_nb_packets = 0;
112 	int m_decode_error = 0;
113 };
114 
FFmpegAudioReader()115 inline FFmpegAudioReader::FFmpegAudioReader() {
116 	av_log_set_level(AV_LOG_QUIET);
117 	av_register_all();
118 
119 	av_init_packet(&m_packet);
120 	m_packet.data = nullptr;
121 	m_packet.size = 0;
122 
123 	m_packet0 = m_packet;
124 }
125 
~FFmpegAudioReader()126 inline FFmpegAudioReader::~FFmpegAudioReader() {
127 	Close();
128 	av_dict_free(&m_input_opts);
129 	av_freep(&m_convert_buffer[0]);
130 	av_packet_unref(&m_packet0);
131 }
132 
SetInputFormat(const char * name)133 inline bool FFmpegAudioReader::SetInputFormat(const char *name) {
134 	m_input_fmt = av_find_input_format(name);
135 	return m_input_fmt;
136 }
137 
SetInputSampleRate(int sample_rate)138 inline bool FFmpegAudioReader::SetInputSampleRate(int sample_rate) {
139 	char buf[64];
140 	sprintf(buf, "%d", sample_rate);
141 	return av_dict_set(&m_input_opts, "sample_rate", buf, 0) >= 0;
142 }
143 
SetInputChannels(int channels)144 inline bool FFmpegAudioReader::SetInputChannels(int channels) {
145 	char buf[64];
146 	sprintf(buf, "%d", channels);
147 	return av_dict_set(&m_input_opts, "channels", buf, 0) >= 0;
148 }
149 
Open(const std::string & file_name)150 inline bool FFmpegAudioReader::Open(const std::string &file_name) {
151 	int ret;
152 
153 	Close();
154 
155     av_init_packet(&m_packet);
156 	m_packet.data = nullptr;
157 	m_packet.size = 0;
158 
159 	m_packet0 = m_packet;
160 
161 	ret = avformat_open_input(&m_format_ctx, file_name.c_str(), m_input_fmt, &m_input_opts);
162 	if (ret < 0) {
163 		SetError("Could not open the input file", ret);
164 		return false;
165 	}
166 
167 	ret = avformat_find_stream_info(m_format_ctx, nullptr);
168 	if (ret < 0) {
169 		SetError("Coud not find stream information in the file", ret);
170 		return false;
171 	}
172 
173 	AVCodec *codec;
174 	ret = av_find_best_stream(m_format_ctx, AVMEDIA_TYPE_AUDIO, -1, -1, &codec, 0);
175 	if (ret < 0) {
176 		SetError("Could not find any audio stream in the file", ret);
177 		return false;
178 	}
179 	m_stream_index = ret;
180 
181 	m_codec_ctx = m_format_ctx->streams[m_stream_index]->codec;
182 	m_codec_ctx->request_sample_fmt = AV_SAMPLE_FMT_S16;
183 
184 	ret = avcodec_open2(m_codec_ctx, codec, nullptr);
185 	if (ret < 0) {
186 		SetError("Could not open the codec", ret);
187 		return false;
188 	}
189 
190 	if (!m_codec_ctx->channel_layout) {
191 		m_codec_ctx->channel_layout = av_get_default_channel_layout(m_codec_ctx->channels);
192 	}
193 
194 	m_frame = av_frame_alloc();
195 	if (!m_frame) {
196 		return false;
197 	}
198 
199 	if (!m_output_sample_rate) {
200 		m_output_sample_rate = m_codec_ctx->sample_rate;
201 	}
202 
203 	if (!m_output_channels) {
204 		m_output_channels = m_codec_ctx->channels;
205 	}
206 
207 	if (m_codec_ctx->sample_fmt != AV_SAMPLE_FMT_S16 || m_codec_ctx->channels != m_output_channels || m_codec_ctx->sample_rate != m_output_sample_rate) {
208 		m_converter.reset(new FFmpegAudioProcessor());
209 		m_converter->SetCompatibleMode();
210 		m_converter->SetInputSampleFormat(m_codec_ctx->sample_fmt);
211 		m_converter->SetInputSampleRate(m_codec_ctx->sample_rate);
212 		m_converter->SetInputChannelLayout(m_codec_ctx->channel_layout);
213 		m_converter->SetOutputSampleFormat(AV_SAMPLE_FMT_S16);
214 		m_converter->SetOutputSampleRate(m_output_sample_rate);
215 		m_converter->SetOutputChannelLayout(av_get_default_channel_layout(m_output_channels));
216 		auto ret = m_converter->Init();
217 		if (ret != 0) {
218 			SetError("Could not create an audio converter instance", ret);
219 			return false;
220 		}
221 	}
222 
223 	m_opened = true;
224 	m_finished = false;
225 	m_got_frame = 0;
226 	m_nb_packets = 0;
227 	m_decode_error = 0;
228 
229 	return true;
230 }
231 
Close()232 inline void FFmpegAudioReader::Close() {
233 	av_frame_free(&m_frame);
234 
235 	m_stream_index = -1;
236 
237 	if (m_codec_ctx) {
238 		avcodec_close(m_codec_ctx);
239 		m_codec_ctx = nullptr;
240 	}
241 
242 	if (m_format_ctx) {
243 		avformat_close_input(&m_format_ctx);
244 	}
245 }
246 
GetSampleRate()247 inline int FFmpegAudioReader::GetSampleRate() const {
248 	return m_output_sample_rate;
249 }
250 
GetChannels()251 inline int FFmpegAudioReader::GetChannels() const {
252 	return m_output_channels;
253 }
254 
GetDuration()255 inline int FFmpegAudioReader::GetDuration() const {
256 	if (m_format_ctx && m_stream_index >= 0) {
257 		const auto stream = m_format_ctx->streams[m_stream_index];
258 		if (stream->duration != AV_NOPTS_VALUE) {
259 			return 1000 * stream->time_base.num * stream->duration / stream->time_base.den;
260 		} else if (m_format_ctx->duration != AV_NOPTS_VALUE) {
261 			return 1000 * m_format_ctx->duration / AV_TIME_BASE;
262 		}
263 	}
264 	return -1;
265 }
266 
Read(const int16_t ** data,size_t * size)267 inline bool FFmpegAudioReader::Read(const int16_t **data, size_t *size) {
268 	if (!IsOpen() || IsFinished()) {
269 		return false;
270 	}
271 
272 	int ret;
273 	while (true) {
274 		while (m_packet.size <= 0) {
275 			av_packet_unref(&m_packet0);
276 			av_init_packet(&m_packet);
277 			m_packet.data = nullptr;
278 			m_packet.size = 0;
279 			ret = av_read_frame(m_format_ctx, &m_packet);
280 			if (ret < 0) {
281 				if (ret == AVERROR_EOF) {
282 					m_finished = true;
283 					break;
284 				} else {
285 					SetError("Error reading from the audio source", ret);
286 					return false;
287 				}
288 			}
289 			m_packet0 = m_packet;
290 			if (m_packet.stream_index != m_stream_index) {
291 				m_packet.data = nullptr;
292 				m_packet.size = 0;
293 			} else {
294 				m_nb_packets++;
295 			}
296 		}
297 
298 		ret = avcodec_decode_audio4(m_codec_ctx, m_frame, &m_got_frame, &m_packet);
299 		if (ret < 0) {
300 			if (m_decode_error) {
301 				SetError("Error decoding audio frame", m_decode_error);
302 				return false;
303 			}
304 			m_decode_error = ret;
305 			m_packet.data = nullptr;
306 			m_packet.size = 0;
307 			continue;
308 		}
309 
310 		break;
311 	}
312 
313 	m_decode_error = 0;
314 
315 	const int decoded = std::min(ret, m_packet.size);
316 	m_packet.data += decoded;
317 	m_packet.size -= decoded;
318 
319 	if (m_got_frame) {
320 		if (m_converter) {
321 			if (m_frame->nb_samples > m_convert_buffer_nb_samples) {
322 				int linsize;
323 				av_freep(&m_convert_buffer[0]);
324 				m_convert_buffer_nb_samples = std::max(1024 * 8, m_frame->nb_samples);
325 				ret = av_samples_alloc(m_convert_buffer, &linsize, m_codec_ctx->channels, m_convert_buffer_nb_samples, AV_SAMPLE_FMT_S16, 1);
326 				if (ret < 0) {
327 					SetError("Couldn't allocate audio converter buffer", ret);
328 					return false;
329 				}
330 			}
331 			auto nb_samples = m_converter->Convert(m_convert_buffer, m_convert_buffer_nb_samples, (const uint8_t **) m_frame->data, m_frame->nb_samples);
332 			if (nb_samples < 0) {
333 				SetError("Couldn't convert audio", ret);
334 				return false;
335 			}
336 			*data = (const int16_t *) m_convert_buffer[0];
337 			*size = nb_samples;
338 		} else {
339 			*data = (const int16_t *) m_frame->data[0];
340 			*size = m_frame->nb_samples;
341 		}
342 	} else {
343 		if (m_finished && m_converter) {
344 			auto nb_samples = m_converter->Flush(m_convert_buffer, m_convert_buffer_nb_samples);
345 			if (nb_samples < 0) {
346 				SetError("Couldn't convert audio", ret);
347 				return false;
348 			} else if (nb_samples > 0) {
349 				m_got_frame = 1;
350 				*data = (const int16_t *) m_convert_buffer[0];
351 				*size = nb_samples;
352 			}
353 		}
354 	}
355 
356 	return true;
357 }
358 
SetError(const char * message,int errnum)359 inline void FFmpegAudioReader::SetError(const char *message, int errnum) {
360 	m_error = message;
361 	if (errnum < 0) {
362 		char buf[AV_ERROR_MAX_STRING_SIZE];
363 		if (av_strerror(errnum, buf, AV_ERROR_MAX_STRING_SIZE) == 0) {
364 			m_error += " (";
365 			m_error += buf;
366 			m_error += ")";
367 		}
368 	}
369 	m_error_code = errnum;
370 }
371 
372 }; // namespace chromaprint
373 
374 #endif
375