1 #include <cstdio>
2 #include <cstdlib>
3 #include <cstring>
4 #include <sstream>
5 #include <chrono>
6 #include <chromaprint.h>
7 #include "audio/ffmpeg_audio_reader.h"
8 #include "utils/scope_exit.h"
9
10 #ifdef _WIN32
11 #include <windows.h>
12 #endif
13
14 using namespace chromaprint;
15
16 enum Format {
17 TEXT = 0,
18 JSON,
19 PLAIN,
20 };
21
22 static Format g_format = TEXT;
23 static char *g_input_format = nullptr;
24 static int g_input_channels = 0;
25 static int g_input_sample_rate = 0;
26 static double g_max_duration = 120;
27 static double g_max_chunk_duration = 0;
28 static bool g_overlap = false;
29 static bool g_raw = false;
30 static bool g_signed = false;
31 static bool g_abs_ts = false;
32 static bool g_ignore_errors = false;
33 static ChromaprintAlgorithm g_algorithm = CHROMAPRINT_ALGORITHM_DEFAULT;
34
35
36 const char *g_help =
37 "Usage: %s [OPTIONS] FILE [FILE...]\n"
38 "\n"
39 "Generate fingerprints from audio files/streams.\n"
40 "\n"
41 "Options:\n"
42 " -format NAME Set the input format name\n"
43 " -rate NUM Set the sample rate of the input audio\n"
44 " -channels NUM Set the number of channels in the input audio\n"
45 " -length SECS Restrict the duration of the processed input audio (default 120)\n"
46 " -chunk SECS Split the input audio into chunks of this duration\n"
47 " -algorithm NUM Set the algorigthm method (default 2)\n"
48 " -overlap Overlap the chunks slightly to make sure audio on the edges is fingerprinted\n"
49 " -ts Output UNIX timestamps for chunked results, useful when fingerprinting real-time audio stream\n"
50 " -raw Output fingerprints in the uncompressed format\n"
51 " -signed Change the uncompressed format from unsigned integers to signed (for pg_acoustid compatibility)\n"
52 " -json Print the output in JSON format\n"
53 " -text Print the output in text format\n"
54 " -plain Print the just the fingerprint in text format\n"
55 " -version Print version information\n"
56 ;
57
ParseOptions(int & argc,char ** argv)58 static void ParseOptions(int &argc, char **argv) {
59 int j = 1;
60 for (int i = 1; i < argc; i++) {
61 if (!strcmp(argv[i], "--")) {
62 while (++i < argc) {
63 argv[j++] = argv[i];
64 }
65 } else if ((!strcmp(argv[i], "-format") || !strcmp(argv[i], "-f")) && i + 1 < argc) {
66 g_input_format = argv[++i];
67 } else if ((!strcmp(argv[i], "-channels") || !strcmp(argv[i], "-c")) && i + 1 < argc) {
68 auto value = atoi(argv[i + 1]);
69 if (value > 0) {
70 g_input_channels = value;
71 } else {
72 fprintf(stderr, "ERROR: The argument for %s must be a non-zero positive number\n", argv[i]);
73 exit(2);
74 }
75 i++;
76 } else if ((!strcmp(argv[i], "-rate") || !strcmp(argv[i], "-r")) && i + 1 < argc) {
77 auto value = atoi(argv[i + 1]);
78 if (value >= 0) {
79 g_input_sample_rate = value;
80 } else {
81 fprintf(stderr, "ERROR: The argument for %s must be a positive number\n", argv[i]);
82 exit(2);
83 }
84 i++;
85 } else if ((!strcmp(argv[i], "-length") || !strcmp(argv[i], "-t")) && i + 1 < argc) {
86 auto value = atof(argv[i + 1]);
87 if (value >= 0) {
88 g_max_duration = value;
89 } else {
90 fprintf(stderr, "ERROR: The argument for %s must be a positive number\n", argv[i]);
91 exit(2);
92 }
93 i++;
94 } else if (!strcmp(argv[i], "-chunk") && i + 1 < argc) {
95 auto value = atof(argv[i + 1]);
96 if (value >= 0) {
97 g_max_chunk_duration = value;
98 } else {
99 fprintf(stderr, "ERROR: The argument for %s must be a positive number\n", argv[i]);
100 exit(2);
101 }
102 i++;
103 } else if ((!strcmp(argv[i], "-algorithm") || !strcmp(argv[i], "-a")) && i + 1 < argc) {
104 auto value = atoi(argv[i + 1]);
105 if (value >= 1 && value <= 5) {
106 g_algorithm = (ChromaprintAlgorithm)(value - 1);
107 } else {
108 fprintf(stderr, "ERROR: The argument for %s must be 1 - 5\n", argv[i]);
109 exit(2);
110 }
111 i++;
112 } else if (!strcmp(argv[i], "-text")) {
113 g_format = TEXT;
114 } else if (!strcmp(argv[i], "-json")) {
115 g_format = JSON;
116 } else if (!strcmp(argv[i], "-plain")) {
117 g_format = PLAIN;
118 } else if (!strcmp(argv[i], "-overlap")) {
119 g_overlap = true;
120 } else if (!strcmp(argv[i], "-ts")) {
121 g_abs_ts = true;
122 } else if (!strcmp(argv[i], "-raw")) {
123 g_raw = true;
124 } else if (!strcmp(argv[i], "-signed")) {
125 g_signed = true;
126 } else if (!strcmp(argv[i], "-ignore-errors")) {
127 g_ignore_errors = true;
128 } else if (!strcmp(argv[i], "-v") || !strcmp(argv[i], "-version")) {
129 #if defined(USE_SWRESAMPLE)
130 #define RESAMPLE_LIB_IDENT_IDENT LIBSWRESAMPLE_IDENT
131 #else
132 #define RESAMPLE_LIB_IDENT_IDENT LIBAVRESAMPLE_IDENT
133 #endif
134 fprintf(stdout, "fpcalc version %s (FFmpeg %s %s %s)\n", chromaprint_get_version(), LIBAVCODEC_IDENT, LIBAVFORMAT_IDENT, RESAMPLE_LIB_IDENT_IDENT);
135 exit(0);
136 } else if (!strcmp(argv[i], "-h") || !strcmp(argv[i], "-help") || !strcmp(argv[i], "--help")) {
137 fprintf(stdout, g_help, argv[0]);
138 exit(0);
139 } else {
140 const auto len = strlen(argv[i]);
141 if (len > 1 && argv[i][0] == '-') {
142 fprintf(stderr, "ERROR: Unknown option %s\n", argv[i]);
143 exit(2);
144 } else {
145 argv[j++] = argv[i];
146 }
147 }
148 }
149 if (j < 2) {
150 fprintf(stderr, "ERROR: No input files\n");
151 exit(2);
152 }
153 argc = j;
154 }
155
PrintResult(ChromaprintContext * ctx,FFmpegAudioReader & reader,bool first,double timestamp,double duration)156 void PrintResult(ChromaprintContext *ctx, FFmpegAudioReader &reader, bool first, double timestamp, double duration) {
157 std::string tmp_fp;
158 const char *fp;
159 bool dealloc_fp = false;
160
161 int size;
162 if (!chromaprint_get_raw_fingerprint_size(ctx, &size)) {
163 fprintf(stderr, "ERROR: Could not get the fingerprinting size\n");
164 exit(2);
165 }
166 if (size <= 0) {
167 if (first) {
168 fprintf(stderr, "ERROR: Empty fingerprint\n");
169 exit(2);
170 }
171 return;
172 }
173
174 if (g_raw) {
175 std::stringstream ss;
176 uint32_t *raw_fp_data = nullptr;
177 int raw_fp_size = 0;
178 if (!chromaprint_get_raw_fingerprint(ctx, &raw_fp_data, &raw_fp_size)) {
179 fprintf(stderr, "ERROR: Could not get the fingerprinting\n");
180 exit(2);
181 }
182 SCOPE_EXIT(chromaprint_dealloc(raw_fp_data));
183 for (int i = 0; i < raw_fp_size; i++) {
184 if (i > 0) {
185 ss << ',';
186 }
187 if (g_signed) {
188 ss << static_cast<int32_t>(raw_fp_data[i]);
189 } else {
190 ss << raw_fp_data[i];
191 }
192 }
193 tmp_fp = ss.str();
194 fp = tmp_fp.c_str();
195 } else {
196 char *tmp_fp2;
197 if (!chromaprint_get_fingerprint(ctx, &tmp_fp2)) {
198 fprintf(stderr, "ERROR: Could not get the fingerprinting\n");
199 exit(2);
200 }
201 fp = tmp_fp2;
202 dealloc_fp = true;
203 }
204 SCOPE_EXIT(if (dealloc_fp) { chromaprint_dealloc((void *) fp); });
205
206 if (g_max_chunk_duration == 0) {
207 duration = reader.GetDuration();
208 if (duration < 0.0) {
209 duration = 0.0;
210 } else {
211 duration /= 1000.0;
212 }
213 }
214
215 switch (g_format) {
216 case TEXT:
217 if (!first) {
218 printf("\n");
219 }
220 if (g_abs_ts) {
221 printf("TIMESTAMP=%.2f\n", timestamp);
222 }
223 printf("DURATION=%d\nFINGERPRINT=%s\n", int(duration), fp);
224 break;
225 case JSON:
226 if (g_max_chunk_duration != 0) {
227 if (g_raw) {
228 printf("{\"timestamp\": %.2f, \"duration\": %.2f, \"fingerprint\": [%s]}\n", timestamp, duration, fp);
229 } else {
230 printf("{\"timestamp\": %.2f, \"duration\": %.2f, \"fingerprint\": \"%s\"}\n", timestamp, duration, fp);
231 }
232 } else {
233 if (g_raw) {
234 printf("{\"duration\": %.2f, \"fingerprint\": [%s]}\n", duration, fp);
235 } else {
236 printf("{\"duration\": %.2f, \"fingerprint\": \"%s\"}\n", duration, fp);
237 }
238 }
239 break;
240 case PLAIN:
241 printf("%s\n", fp);
242 break;
243 }
244
245 fflush(stdout);
246 }
247
GetCurrentTimestamp()248 double GetCurrentTimestamp() {
249 const auto now = std::chrono::system_clock::now();
250 const auto usec = std::chrono::duration_cast<std::chrono::microseconds>(now.time_since_epoch());
251 return usec.count() / 1000000.0;
252 }
253
ProcessFile(ChromaprintContext * ctx,FFmpegAudioReader & reader,const char * file_name)254 void ProcessFile(ChromaprintContext *ctx, FFmpegAudioReader &reader, const char *file_name) {
255 double ts = 0.0;
256 if (g_abs_ts) {
257 ts = GetCurrentTimestamp();
258 }
259
260 if (!strcmp(file_name, "-")) {
261 file_name = "pipe:0";
262 }
263
264 if (!reader.Open(file_name)) {
265 fprintf(stderr, "ERROR: %s\n", reader.GetError().c_str());
266 exit(2);
267 }
268
269 if (!chromaprint_start(ctx, reader.GetSampleRate(), reader.GetChannels())) {
270 fprintf(stderr, "ERROR: Could not initialize the fingerprinting process\n");
271 exit(2);
272 }
273
274 size_t stream_size = 0;
275 const size_t stream_limit = g_max_duration * reader.GetSampleRate();
276
277 size_t chunk_size = 0;
278 const size_t chunk_limit = g_max_chunk_duration * reader.GetSampleRate();
279
280 size_t extra_chunk_limit = 0;
281 double overlap = 0.0;
282 if (chunk_limit > 0 && g_overlap) {
283 extra_chunk_limit = chromaprint_get_delay(ctx);
284 overlap = chromaprint_get_delay_ms(ctx) / 1000.0;
285 }
286
287 bool first_chunk = true;
288 bool read_failed = false;
289 bool got_results = false;
290
291 while (!reader.IsFinished()) {
292 const int16_t *frame_data = nullptr;
293 size_t frame_size = 0;
294 if (!reader.Read(&frame_data, &frame_size)) {
295 fprintf(stderr, "ERROR: %s\n", reader.GetError().c_str());
296 read_failed = true;
297 break;
298 }
299
300 bool stream_done = false;
301 if (stream_limit > 0) {
302 const auto remaining = stream_limit - stream_size;
303 if (frame_size > remaining) {
304 frame_size = remaining;
305 stream_done = true;
306 }
307 }
308 stream_size += frame_size;
309
310 if (frame_size == 0) {
311 if (stream_done) {
312 break;
313 } else {
314 continue;
315 }
316 }
317
318 bool chunk_done = false;
319 size_t first_part_size = frame_size;
320 if (chunk_limit > 0) {
321 const auto remaining = chunk_limit + extra_chunk_limit - chunk_size;
322 if (first_part_size > remaining) {
323 first_part_size = remaining;
324 chunk_done = true;
325 }
326 }
327
328 if (!chromaprint_feed(ctx, frame_data, first_part_size * reader.GetChannels())) {
329 fprintf(stderr, "ERROR: Could not process audio data\n");
330 exit(2);
331 }
332
333 chunk_size += first_part_size;
334
335 if (chunk_done) {
336 if (!chromaprint_finish(ctx)) {
337 fprintf(stderr, "ERROR: Could not finish the fingerprinting process\n");
338 exit(2);
339 }
340
341 const auto chunk_duration = (chunk_size - extra_chunk_limit) * 1.0 / reader.GetSampleRate() + overlap;
342 PrintResult(ctx, reader, first_chunk, ts, chunk_duration);
343 got_results = true;
344
345 if (g_abs_ts) {
346 ts = GetCurrentTimestamp();
347 } else {
348 ts += chunk_duration;
349 }
350
351 if (g_overlap) {
352 if (!chromaprint_clear_fingerprint(ctx)) {
353 fprintf(stderr, "ERROR: Could not initialize the fingerprinting process\n");
354 exit(2);
355 }
356 ts -= overlap;
357 } else {
358 if (!chromaprint_start(ctx, reader.GetSampleRate(), reader.GetChannels())) {
359 fprintf(stderr, "ERROR: Could not initialize the fingerprinting process\n");
360 exit(2);
361 }
362 }
363
364 if (first_chunk) {
365 extra_chunk_limit = 0;
366 first_chunk = false;
367 }
368
369 chunk_size = 0;
370 }
371
372 frame_data += first_part_size * reader.GetChannels();
373 frame_size -= first_part_size;
374
375 if (frame_size > 0) {
376 if (!chromaprint_feed(ctx, frame_data, frame_size * reader.GetChannels())) {
377 fprintf(stderr, "ERROR: Could not process audio data\n");
378 exit(2);
379 }
380 }
381
382 chunk_size += frame_size;
383
384 if (stream_done) {
385 break;
386 }
387 }
388
389 if (!chromaprint_finish(ctx)) {
390 fprintf(stderr, "ERROR: Could not finish the fingerprinting process\n");
391 exit(2);
392 }
393
394 if (chunk_size > 0) {
395 const auto chunk_duration = (chunk_size - extra_chunk_limit) * 1.0 / reader.GetSampleRate() + overlap;
396 PrintResult(ctx, reader, first_chunk, ts, chunk_duration);
397 got_results = true;
398 } else if (first_chunk) {
399 fprintf(stderr, "ERROR: Not enough audio data\n");
400 exit(2);
401 }
402
403 if (!g_ignore_errors) {
404 if (read_failed) {
405 exit(got_results ? 3 : 2);
406 }
407 }
408 }
409
fpcalc_main(int argc,char ** argv)410 int fpcalc_main(int argc, char **argv) {
411 ParseOptions(argc, argv);
412
413 FFmpegAudioReader reader;
414 if (g_input_format) {
415 if (!reader.SetInputFormat(g_input_format)) {
416 fprintf(stderr, "ERROR: Invalid format\n");
417 return 2;
418 }
419 }
420 if (g_input_channels) {
421 if (!reader.SetInputChannels(g_input_channels)) {
422 fprintf(stderr, "ERROR: Invalid number of channels\n");
423 return 2;
424 }
425 }
426 if (g_input_sample_rate) {
427 if (!reader.SetInputSampleRate(g_input_sample_rate)) {
428 fprintf(stderr, "ERROR: Invalid sample rate\n");
429 return 2;
430 }
431 }
432
433 ChromaprintContext *chromaprint_ctx = chromaprint_new(g_algorithm);
434 SCOPE_EXIT(chromaprint_free(chromaprint_ctx));
435
436 reader.SetOutputChannels(chromaprint_get_num_channels(chromaprint_ctx));
437 reader.SetOutputSampleRate(chromaprint_get_sample_rate(chromaprint_ctx));
438
439 for (int i = 1; i < argc; i++) {
440 ProcessFile(chromaprint_ctx, reader, argv[i]);
441 }
442
443 return 0;
444 }
445
446 #ifdef _WIN32
main(int win32_argc,char ** win32_argv)447 int main(int win32_argc, char **win32_argv)
448 {
449 int i, argc = 0, buffsize = 0, offset = 0;
450 char **utf8_argv, *utf8_argv_ptr;
451 wchar_t **argv;
452 argv = CommandLineToArgvW(GetCommandLineW(), &argc);
453 buffsize = 0;
454 for (i = 0; i < argc; i++) {
455 buffsize += WideCharToMultiByte(CP_UTF8, 0, argv[i], -1, NULL, 0, NULL, NULL);
456 }
457 utf8_argv = (char **) av_mallocz(sizeof(char *) * (argc + 1) + buffsize);
458 utf8_argv_ptr = (char *) utf8_argv + sizeof(char *) * (argc + 1);
459 for (i = 0; i < argc; i++) {
460 utf8_argv[i] = &utf8_argv_ptr[offset];
461 offset += WideCharToMultiByte(CP_UTF8, 0, argv[i], -1, &utf8_argv_ptr[offset], buffsize - offset, NULL, NULL);
462 }
463 LocalFree(argv);
464 return fpcalc_main(argc, utf8_argv);
465 }
466 #else
main(int argc,char ** argv)467 int main(int argc, char **argv)
468 {
469 return fpcalc_main(argc, argv);
470 }
471 #endif
472