1 #include <cstdio>
2 #include <cstdlib>
3 #include <cstring>
4 #include <sstream>
5 #include <chrono>
6 #include <chromaprint.h>
7 #include "audio/ffmpeg_audio_reader.h"
8 #include "utils/scope_exit.h"
9 
10 #ifdef _WIN32
11 #include <windows.h>
12 #endif
13 
14 using namespace chromaprint;
15 
16 enum Format {
17 	TEXT = 0,
18 	JSON,
19 	PLAIN,
20 };
21 
22 static Format g_format = TEXT;
23 static char *g_input_format = nullptr;
24 static int g_input_channels = 0;
25 static int g_input_sample_rate = 0;
26 static double g_max_duration = 120;
27 static double g_max_chunk_duration = 0;
28 static bool g_overlap = false;
29 static bool g_raw = false;
30 static bool g_signed = false;
31 static bool g_abs_ts = false;
32 static bool g_ignore_errors = false;
33 static ChromaprintAlgorithm g_algorithm = CHROMAPRINT_ALGORITHM_DEFAULT;
34 
35 
36 const char *g_help =
37 	"Usage: %s [OPTIONS] FILE [FILE...]\n"
38 	"\n"
39 	"Generate fingerprints from audio files/streams.\n"
40 	"\n"
41 	"Options:\n"
42 	"  -format NAME   Set the input format name\n"
43 	"  -rate NUM      Set the sample rate of the input audio\n"
44 	"  -channels NUM  Set the number of channels in the input audio\n"
45 	"  -length SECS   Restrict the duration of the processed input audio (default 120)\n"
46 	"  -chunk SECS    Split the input audio into chunks of this duration\n"
47 	"  -algorithm NUM Set the algorigthm method (default 2)\n"
48 	"  -overlap       Overlap the chunks slightly to make sure audio on the edges is fingerprinted\n"
49 	"  -ts            Output UNIX timestamps for chunked results, useful when fingerprinting real-time audio stream\n"
50 	"  -raw           Output fingerprints in the uncompressed format\n"
51 	"  -signed        Change the uncompressed format from unsigned integers to signed (for pg_acoustid compatibility)\n"
52 	"  -json          Print the output in JSON format\n"
53 	"  -text          Print the output in text format\n"
54 	"  -plain         Print the just the fingerprint in text format\n"
55 	"  -version       Print version information\n"
56 	;
57 
ParseOptions(int & argc,char ** argv)58 static void ParseOptions(int &argc, char **argv) {
59 	int j = 1;
60 	for (int i = 1; i < argc; i++) {
61 		if (!strcmp(argv[i], "--")) {
62 			while (++i < argc) {
63 				argv[j++] = argv[i];
64 			}
65 		} else if ((!strcmp(argv[i], "-format") || !strcmp(argv[i], "-f")) && i + 1 < argc) {
66 			g_input_format = argv[++i];
67 		} else if ((!strcmp(argv[i], "-channels") || !strcmp(argv[i], "-c")) && i + 1 < argc) {
68 			auto value = atoi(argv[i + 1]);
69 			if (value > 0) {
70 				g_input_channels = value;
71 			} else {
72 				fprintf(stderr, "ERROR: The argument for %s must be a non-zero positive number\n", argv[i]);
73 				exit(2);
74 			}
75 			i++;
76 		} else if ((!strcmp(argv[i], "-rate") || !strcmp(argv[i], "-r")) && i + 1 < argc) {
77 			auto value = atoi(argv[i + 1]);
78 			if (value >= 0) {
79 				g_input_sample_rate = value;
80 			} else {
81 				fprintf(stderr, "ERROR: The argument for %s must be a positive number\n", argv[i]);
82 				exit(2);
83 			}
84 			i++;
85 		} else if ((!strcmp(argv[i], "-length") || !strcmp(argv[i], "-t")) && i + 1 < argc) {
86 			auto value = atof(argv[i + 1]);
87 			if (value >= 0) {
88 				g_max_duration = value;
89 			} else {
90 				fprintf(stderr, "ERROR: The argument for %s must be a positive number\n", argv[i]);
91 				exit(2);
92 			}
93 			i++;
94 		} else if (!strcmp(argv[i], "-chunk") && i + 1 < argc) {
95 			auto value = atof(argv[i + 1]);
96 			if (value >= 0) {
97 				g_max_chunk_duration = value;
98 			} else {
99 				fprintf(stderr, "ERROR: The argument for %s must be a positive number\n", argv[i]);
100 				exit(2);
101 			}
102 			i++;
103         } else if ((!strcmp(argv[i], "-algorithm") || !strcmp(argv[i], "-a")) && i + 1 < argc) {
104             auto value = atoi(argv[i + 1]);
105             if (value >= 1 && value <= 5) {
106                 g_algorithm = (ChromaprintAlgorithm)(value - 1);
107             } else {
108                 fprintf(stderr, "ERROR: The argument for %s must be 1 - 5\n", argv[i]);
109                 exit(2);
110             }
111             i++;
112 		} else if (!strcmp(argv[i], "-text")) {
113 			g_format = TEXT;
114 		} else if (!strcmp(argv[i], "-json")) {
115 			g_format = JSON;
116 		} else if (!strcmp(argv[i], "-plain")) {
117 			g_format = PLAIN;
118 		} else if (!strcmp(argv[i], "-overlap")) {
119 			g_overlap = true;
120 		} else if (!strcmp(argv[i], "-ts")) {
121 			g_abs_ts = true;
122 		} else if (!strcmp(argv[i], "-raw")) {
123 			g_raw = true;
124 		} else if (!strcmp(argv[i], "-signed")) {
125 			g_signed = true;
126 		} else if (!strcmp(argv[i], "-ignore-errors")) {
127 			g_ignore_errors = true;
128 		} else if (!strcmp(argv[i], "-v") || !strcmp(argv[i], "-version")) {
129 #if defined(USE_SWRESAMPLE)
130 #define RESAMPLE_LIB_IDENT_IDENT LIBSWRESAMPLE_IDENT
131 #else
132 #define RESAMPLE_LIB_IDENT_IDENT LIBAVRESAMPLE_IDENT
133 #endif
134 			fprintf(stdout, "fpcalc version %s (FFmpeg %s %s %s)\n", chromaprint_get_version(), LIBAVCODEC_IDENT, LIBAVFORMAT_IDENT, RESAMPLE_LIB_IDENT_IDENT);
135 			exit(0);
136 		} else if (!strcmp(argv[i], "-h") || !strcmp(argv[i], "-help") || !strcmp(argv[i], "--help")) {
137 			fprintf(stdout, g_help, argv[0]);
138 			exit(0);
139 		} else {
140 			const auto len = strlen(argv[i]);
141 			if (len > 1 && argv[i][0] == '-') {
142 				fprintf(stderr, "ERROR: Unknown option %s\n", argv[i]);
143 				exit(2);
144 			} else {
145 				argv[j++] = argv[i];
146 			}
147 		}
148 	}
149 	if (j < 2) {
150 		fprintf(stderr, "ERROR: No input files\n");
151 		exit(2);
152 	}
153 	argc = j;
154 }
155 
PrintResult(ChromaprintContext * ctx,FFmpegAudioReader & reader,bool first,double timestamp,double duration)156 void PrintResult(ChromaprintContext *ctx, FFmpegAudioReader &reader, bool first, double timestamp, double duration) {
157 	std::string tmp_fp;
158 	const char *fp;
159 	bool dealloc_fp = false;
160 
161 	int size;
162 	if (!chromaprint_get_raw_fingerprint_size(ctx, &size)) {
163 		fprintf(stderr, "ERROR: Could not get the fingerprinting size\n");
164 		exit(2);
165 	}
166 	if (size <= 0) {
167 		if (first) {
168 			fprintf(stderr, "ERROR: Empty fingerprint\n");
169 			exit(2);
170 		}
171 		return;
172 	}
173 
174 	if (g_raw) {
175 		std::stringstream ss;
176 		uint32_t *raw_fp_data = nullptr;
177 		int raw_fp_size = 0;
178 		if (!chromaprint_get_raw_fingerprint(ctx, &raw_fp_data, &raw_fp_size)) {
179 			fprintf(stderr, "ERROR: Could not get the fingerprinting\n");
180 			exit(2);
181 		}
182 		SCOPE_EXIT(chromaprint_dealloc(raw_fp_data));
183 		for (int i = 0; i < raw_fp_size; i++) {
184 			if (i > 0) {
185 				ss << ',';
186 			}
187             if (g_signed) {
188                 ss << static_cast<int32_t>(raw_fp_data[i]);
189             } else {
190                 ss << raw_fp_data[i];
191             }
192 		}
193 		tmp_fp = ss.str();
194 		fp = tmp_fp.c_str();
195 	} else {
196 		char *tmp_fp2;
197 		if (!chromaprint_get_fingerprint(ctx, &tmp_fp2)) {
198 			fprintf(stderr, "ERROR: Could not get the fingerprinting\n");
199 			exit(2);
200 		}
201 		fp = tmp_fp2;
202 		dealloc_fp = true;
203 	}
204 	SCOPE_EXIT(if (dealloc_fp) { chromaprint_dealloc((void *) fp); });
205 
206 	if (g_max_chunk_duration == 0) {
207 		duration = reader.GetDuration();
208 		if (duration < 0.0) {
209 			duration = 0.0;
210 		} else {
211 			duration /= 1000.0;
212 		}
213 	}
214 
215 	switch (g_format) {
216 		case TEXT:
217 			if (!first) {
218 				printf("\n");
219 			}
220 			if (g_abs_ts) {
221 				printf("TIMESTAMP=%.2f\n", timestamp);
222 			}
223 			printf("DURATION=%d\nFINGERPRINT=%s\n", int(duration), fp);
224 			break;
225 		case JSON:
226 			if (g_max_chunk_duration != 0) {
227 				if (g_raw) {
228 					printf("{\"timestamp\": %.2f, \"duration\": %.2f, \"fingerprint\": [%s]}\n", timestamp, duration, fp);
229 				} else {
230 					printf("{\"timestamp\": %.2f, \"duration\": %.2f, \"fingerprint\": \"%s\"}\n", timestamp, duration, fp);
231 				}
232 			} else {
233 				if (g_raw) {
234 					printf("{\"duration\": %.2f, \"fingerprint\": [%s]}\n", duration, fp);
235 				} else {
236 					printf("{\"duration\": %.2f, \"fingerprint\": \"%s\"}\n", duration, fp);
237 				}
238 			}
239 			break;
240 		case PLAIN:
241 			printf("%s\n", fp);
242 			break;
243 	}
244 
245 	fflush(stdout);
246 }
247 
GetCurrentTimestamp()248 double GetCurrentTimestamp() {
249 	const auto now = std::chrono::system_clock::now();
250 	const auto usec = std::chrono::duration_cast<std::chrono::microseconds>(now.time_since_epoch());
251 	return usec.count() / 1000000.0;
252 }
253 
ProcessFile(ChromaprintContext * ctx,FFmpegAudioReader & reader,const char * file_name)254 void ProcessFile(ChromaprintContext *ctx, FFmpegAudioReader &reader, const char *file_name) {
255 	double ts = 0.0;
256 	if (g_abs_ts) {
257 		ts = GetCurrentTimestamp();
258 	}
259 
260 	if (!strcmp(file_name, "-")) {
261 		file_name = "pipe:0";
262 	}
263 
264 	if (!reader.Open(file_name)) {
265 		fprintf(stderr, "ERROR: %s\n", reader.GetError().c_str());
266 		exit(2);
267 	}
268 
269 	if (!chromaprint_start(ctx, reader.GetSampleRate(), reader.GetChannels())) {
270 		fprintf(stderr, "ERROR: Could not initialize the fingerprinting process\n");
271 		exit(2);
272 	}
273 
274 	size_t stream_size = 0;
275 	const size_t stream_limit = g_max_duration * reader.GetSampleRate();
276 
277 	size_t chunk_size = 0;
278 	const size_t chunk_limit = g_max_chunk_duration * reader.GetSampleRate();
279 
280 	size_t extra_chunk_limit = 0;
281 	double overlap = 0.0;
282 	if (chunk_limit > 0 && g_overlap) {
283 		extra_chunk_limit = chromaprint_get_delay(ctx);
284 		overlap = chromaprint_get_delay_ms(ctx) / 1000.0;
285 	}
286 
287 	bool first_chunk = true;
288 	bool read_failed = false;
289 	bool got_results = false;
290 
291 	while (!reader.IsFinished()) {
292 		const int16_t *frame_data = nullptr;
293 		size_t frame_size = 0;
294 		if (!reader.Read(&frame_data, &frame_size)) {
295 			fprintf(stderr, "ERROR: %s\n", reader.GetError().c_str());
296 			read_failed = true;
297 			break;
298 		}
299 
300 		bool stream_done = false;
301 		if (stream_limit > 0) {
302 			const auto remaining = stream_limit - stream_size;
303 			if (frame_size > remaining) {
304 				frame_size = remaining;
305 				stream_done = true;
306 			}
307 		}
308 		stream_size += frame_size;
309 
310 		if (frame_size == 0) {
311 			if (stream_done) {
312 				break;
313 			} else {
314 				continue;
315 			}
316 		}
317 
318 		bool chunk_done = false;
319 		size_t first_part_size = frame_size;
320 		if (chunk_limit > 0) {
321 			const auto remaining = chunk_limit + extra_chunk_limit - chunk_size;
322 			if (first_part_size > remaining) {
323 				first_part_size = remaining;
324 				chunk_done = true;
325 			}
326 		}
327 
328 		if (!chromaprint_feed(ctx, frame_data, first_part_size * reader.GetChannels())) {
329 			fprintf(stderr, "ERROR: Could not process audio data\n");
330 			exit(2);
331 		}
332 
333 		chunk_size += first_part_size;
334 
335 		if (chunk_done) {
336 			if (!chromaprint_finish(ctx)) {
337 				fprintf(stderr, "ERROR: Could not finish the fingerprinting process\n");
338 				exit(2);
339 			}
340 
341 			const auto chunk_duration = (chunk_size - extra_chunk_limit) * 1.0 / reader.GetSampleRate() + overlap;
342 			PrintResult(ctx, reader, first_chunk, ts, chunk_duration);
343 			got_results = true;
344 
345 			if (g_abs_ts) {
346 				ts = GetCurrentTimestamp();
347 			} else {
348 				ts += chunk_duration;
349 			}
350 
351 			if (g_overlap) {
352 				if (!chromaprint_clear_fingerprint(ctx)) {
353 					fprintf(stderr, "ERROR: Could not initialize the fingerprinting process\n");
354 					exit(2);
355 				}
356 				ts -= overlap;
357 			} else {
358 				if (!chromaprint_start(ctx, reader.GetSampleRate(), reader.GetChannels())) {
359 					fprintf(stderr, "ERROR: Could not initialize the fingerprinting process\n");
360 					exit(2);
361 				}
362 			}
363 
364 			if (first_chunk) {
365 				extra_chunk_limit = 0;
366 				first_chunk = false;
367 			}
368 
369 			chunk_size = 0;
370 		}
371 
372 		frame_data += first_part_size * reader.GetChannels();
373 		frame_size -= first_part_size;
374 
375 		if (frame_size > 0) {
376 			if (!chromaprint_feed(ctx, frame_data, frame_size * reader.GetChannels())) {
377 				fprintf(stderr, "ERROR: Could not process audio data\n");
378 				exit(2);
379 			}
380 		}
381 
382 		chunk_size += frame_size;
383 
384 		if (stream_done) {
385 			break;
386 		}
387 	}
388 
389 	if (!chromaprint_finish(ctx)) {
390 		fprintf(stderr, "ERROR: Could not finish the fingerprinting process\n");
391 		exit(2);
392 	}
393 
394 	if (chunk_size > 0) {
395 		const auto chunk_duration = (chunk_size - extra_chunk_limit) * 1.0 / reader.GetSampleRate() + overlap;
396 		PrintResult(ctx, reader, first_chunk, ts, chunk_duration);
397 		got_results = true;
398 	} else if (first_chunk) {
399 		fprintf(stderr, "ERROR: Not enough audio data\n");
400 		exit(2);
401 	}
402 
403 	if (!g_ignore_errors) {
404 		if (read_failed) {
405 			exit(got_results ? 3 : 2);
406 		}
407 	}
408 }
409 
fpcalc_main(int argc,char ** argv)410 int fpcalc_main(int argc, char **argv) {
411 	ParseOptions(argc, argv);
412 
413 	FFmpegAudioReader reader;
414 	if (g_input_format) {
415 		if (!reader.SetInputFormat(g_input_format)) {
416 			fprintf(stderr, "ERROR: Invalid format\n");
417 			return 2;
418 		}
419 	}
420 	if (g_input_channels) {
421 		if (!reader.SetInputChannels(g_input_channels)) {
422 			fprintf(stderr, "ERROR: Invalid number of channels\n");
423 			return 2;
424 		}
425 	}
426 	if (g_input_sample_rate) {
427 		if (!reader.SetInputSampleRate(g_input_sample_rate)) {
428 			fprintf(stderr, "ERROR: Invalid sample rate\n");
429 			return 2;
430 		}
431 	}
432 
433 	ChromaprintContext *chromaprint_ctx = chromaprint_new(g_algorithm);
434 	SCOPE_EXIT(chromaprint_free(chromaprint_ctx));
435 
436 	reader.SetOutputChannels(chromaprint_get_num_channels(chromaprint_ctx));
437 	reader.SetOutputSampleRate(chromaprint_get_sample_rate(chromaprint_ctx));
438 
439 	for (int i = 1; i < argc; i++) {
440 		ProcessFile(chromaprint_ctx, reader, argv[i]);
441 	}
442 
443 	return 0;
444 }
445 
446 #ifdef _WIN32
main(int win32_argc,char ** win32_argv)447 int main(int win32_argc, char **win32_argv)
448 {
449 	int i, argc = 0, buffsize = 0, offset = 0;
450 	char **utf8_argv, *utf8_argv_ptr;
451 	wchar_t **argv;
452 	argv = CommandLineToArgvW(GetCommandLineW(), &argc);
453 	buffsize = 0;
454 	for (i = 0; i < argc; i++) {
455 		buffsize += WideCharToMultiByte(CP_UTF8, 0, argv[i], -1, NULL, 0, NULL, NULL);
456 	}
457 	utf8_argv = (char **) av_mallocz(sizeof(char *) * (argc + 1) + buffsize);
458 	utf8_argv_ptr = (char *) utf8_argv + sizeof(char *) * (argc + 1);
459 	for (i = 0; i < argc; i++) {
460 		utf8_argv[i] = &utf8_argv_ptr[offset];
461 		offset += WideCharToMultiByte(CP_UTF8, 0, argv[i], -1, &utf8_argv_ptr[offset], buffsize - offset, NULL, NULL);
462 	}
463 	LocalFree(argv);
464 	return fpcalc_main(argc, utf8_argv);
465 }
466 #else
main(int argc,char ** argv)467 int main(int argc, char **argv)
468 {
469 	return fpcalc_main(argc, argv);
470 }
471 #endif
472