1 /*
2 * Copyright 2008-2013 Various Authors
3 * Copyright 2005 Timo Hirvonen
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation; either version 2 of the
8 * License, or (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, see <http://www.gnu.org/licenses/>.
17 */
18
19 #include "id3.h"
20 #include "xmalloc.h"
21 #include "convert.h"
22 #include "uchar.h"
23 #include "options.h"
24 #include "debug.h"
25 #include "utils.h"
26 #include "file.h"
27
28 #include <unistd.h>
29 #include <stdint.h>
30 #include <errno.h>
31 #include <stdio.h>
32 #include <string.h>
33 #include <strings.h>
34 #include <limits.h>
35
36 enum {
37 ID3_ENCODING_ISO_8859_1 = 0x00,
38 ID3_ENCODING_UTF_16 = 0x01,
39 ID3_ENCODING_UTF_16_BE = 0x02,
40 ID3_ENCODING_UTF_8 = 0x03,
41
42 ID3_ENCODING_MAX = 0x03
43 };
44
45 /*
46 * position:
47 *
48 * 0 "ID3"
49 * -10 "3DI"
50 * -128 "TAG"
51 * -138 "3DI"
52 *
53 * if v2 is at beginning _and_ at end then there must be a seek tag at beginning
54 */
55
56 struct v2_header {
57 unsigned char ver_major;
58 unsigned char ver_minor;
59 unsigned char flags;
60 uint32_t size;
61 };
62
63 struct v2_extended_header {
64 uint32_t size;
65 };
66
67 struct v2_frame_header {
68 char id[4];
69 uint32_t size;
70 uint16_t flags;
71 };
72
73 #define V2_HEADER_UNSYNC (1 << 7)
74 #define V2_HEADER_EXTENDED (1 << 6)
75 #define V2_HEADER_EXPERIMENTAL (1 << 5)
76 #define V2_HEADER_FOOTER (1 << 4)
77
78 #define V2_FRAME_COMPRESSED (1 << 3) /* great idea!!1 */
79 #define V2_FRAME_ENCRYPTHED (1 << 2) /* wow, this is very neat! */
80 #define V2_FRAME_UNSYNC (1 << 1)
81 #define V2_FRAME_LEN_INDICATOR (1 << 0)
82
83 #define NR_GENRES 148
84 /* genres {{{ */
85 static const char *genres[NR_GENRES] = {
86 "Blues",
87 "Classic Rock",
88 "Country",
89 "Dance",
90 "Disco",
91 "Funk",
92 "Grunge",
93 "Hip-Hop",
94 "Jazz",
95 "Metal",
96 "New Age",
97 "Oldies",
98 "Other",
99 "Pop",
100 "R&B",
101 "Rap",
102 "Reggae",
103 "Rock",
104 "Techno",
105 "Industrial",
106 "Alternative",
107 "Ska",
108 "Death Metal",
109 "Pranks",
110 "Soundtrack",
111 "Euro-Techno",
112 "Ambient",
113 "Trip-Hop",
114 "Vocal",
115 "Jazz+Funk",
116 "Fusion",
117 "Trance",
118 "Classical",
119 "Instrumental",
120 "Acid",
121 "House",
122 "Game",
123 "Sound Clip",
124 "Gospel",
125 "Noise",
126 "Alt",
127 "Bass",
128 "Soul",
129 "Punk",
130 "Space",
131 "Meditative",
132 "Instrumental Pop",
133 "Instrumental Rock",
134 "Ethnic",
135 "Gothic",
136 "Darkwave",
137 "Techno-Industrial",
138 "Electronic",
139 "Pop-Folk",
140 "Eurodance",
141 "Dream",
142 "Southern Rock",
143 "Comedy",
144 "Cult",
145 "Gangsta Rap",
146 "Top 40",
147 "Christian Rap",
148 "Pop/Funk",
149 "Jungle",
150 "Native American",
151 "Cabaret",
152 "New Wave",
153 "Psychedelic",
154 "Rave",
155 "Showtunes",
156 "Trailer",
157 "Lo-Fi",
158 "Tribal",
159 "Acid Punk",
160 "Acid Jazz",
161 "Polka",
162 "Retro",
163 "Musical",
164 "Rock & Roll",
165 "Hard Rock",
166 "Folk",
167 "Folk/Rock",
168 "National Folk",
169 "Swing",
170 "Fast-Fusion",
171 "Bebob",
172 "Latin",
173 "Revival",
174 "Celtic",
175 "Bluegrass",
176 "Avantgarde",
177 "Gothic Rock",
178 "Progressive Rock",
179 "Psychedelic Rock",
180 "Symphonic Rock",
181 "Slow Rock",
182 "Big Band",
183 "Chorus",
184 "Easy Listening",
185 "Acoustic",
186 "Humour",
187 "Speech",
188 "Chanson",
189 "Opera",
190 "Chamber Music",
191 "Sonata",
192 "Symphony",
193 "Booty Bass",
194 "Primus",
195 "Porn Groove",
196 "Satire",
197 "Slow Jam",
198 "Club",
199 "Tango",
200 "Samba",
201 "Folklore",
202 "Ballad",
203 "Power Ballad",
204 "Rhythmic Soul",
205 "Freestyle",
206 "Duet",
207 "Punk Rock",
208 "Drum Solo",
209 "A Cappella",
210 "Euro-House",
211 "Dance Hall",
212 "Goa",
213 "Drum & Bass",
214 "Club-House",
215 "Hardcore",
216 "Terror",
217 "Indie",
218 "BritPop",
219 "Negerpunk",
220 "Polsk Punk",
221 "Beat",
222 "Christian Gangsta Rap",
223 "Heavy Metal",
224 "Black Metal",
225 "Crossover",
226 "Contemporary Christian",
227 "Christian Rock",
228 "Merengue",
229 "Salsa",
230 "Thrash Metal",
231 "Anime",
232 "JPop",
233 "Synthpop"
234 };
235 /* }}} */
236
237 #define id3_debug(...) d_print(__VA_ARGS__)
238
239 const char * const id3_key_names[NUM_ID3_KEYS] = {
240 "artist",
241 "album",
242 "title",
243 "date",
244 "originaldate",
245 "genre",
246 "discnumber",
247 "tracknumber",
248 "albumartist",
249 "artistsort",
250 "albumartistsort",
251 "albumsort",
252 "compilation",
253 "replaygain_track_gain",
254 "replaygain_track_peak",
255 "replaygain_album_gain",
256 "replaygain_album_peak",
257 "composer",
258 "conductor",
259 "lyricist",
260 "remixer",
261 "label",
262 "publisher",
263 "subtitle",
264 "comment",
265 "musicbrainz_trackid",
266 "media",
267 "bpm",
268 };
269
utf16_is_lsurrogate(uchar uch)270 static int utf16_is_lsurrogate(uchar uch)
271 {
272 return 0xdc00 <= uch && 0xdfff >= uch;
273 }
274
utf16_is_hsurrogate(uchar uch)275 static int utf16_is_hsurrogate(uchar uch)
276 {
277 return 0xd800 <= uch && 0xdbff >= uch;
278 }
279
utf16_is_bom(uchar uch)280 static int utf16_is_bom(uchar uch)
281 {
282 return uch == 0xfeff;
283 }
284
utf16_is_special(uchar uch)285 static int utf16_is_special(uchar uch)
286 {
287 return utf16_is_hsurrogate(uch) || utf16_is_lsurrogate(uch) || utf16_is_bom(uch);
288 }
289
utf16_to_utf8(const unsigned char * buf,size_t buf_size)290 static char *utf16_to_utf8(const unsigned char *buf, size_t buf_size)
291 {
292 char *out;
293 size_t i, idx;
294 int little_endian = 0;
295
296 if (buf_size < 2)
297 return NULL;
298
299 if (buf[0] == 0xff && buf[1] == 0xfe)
300 little_endian = 1;
301
302 out = xnew(char, (buf_size / 2) * 4 + 1);
303 i = idx = 0;
304
305 while (buf_size - i >= 2) {
306 uchar u;
307
308 if (little_endian)
309 u = buf[i] + (buf[i + 1] << 8);
310 else
311 u = buf[i + 1] + (buf[i] << 8);
312
313 if (u_is_unicode(u)) {
314 if (!utf16_is_special(u))
315 u_set_char(out, &idx, u);
316 } else {
317 free(out);
318 return NULL;
319 }
320
321 if (u == 0)
322 return out;
323
324 i += 2;
325 }
326
327 u_set_char(out, &idx, 0);
328 return out;
329 }
330
is_v1(const char * buf)331 static int is_v1(const char *buf)
332 {
333 return buf[0] == 'T' && buf[1] == 'A' && buf[2] == 'G';
334 }
335
u32_unsync(const unsigned char * buf,uint32_t * up)336 static int u32_unsync(const unsigned char *buf, uint32_t *up)
337 {
338 uint32_t b, u = 0;
339 int i;
340
341 for (i = 0; i < 4; i++) {
342 b = buf[i];
343 if (b >= 0x80)
344 return 0;
345 u <<= 7;
346 u |= b;
347 }
348 *up = u;
349 return 1;
350 }
351
get_u32(const unsigned char * buf,uint32_t * up)352 static void get_u32(const unsigned char *buf, uint32_t *up)
353 {
354 uint32_t b, u = 0;
355 int i;
356
357 for (i = 0; i < 4; i++) {
358 b = buf[i];
359 u <<= 8;
360 u |= b;
361 }
362 *up = u;
363 }
364
get_u24(const unsigned char * buf,uint32_t * up)365 static void get_u24(const unsigned char *buf, uint32_t *up)
366 {
367 uint32_t b, u = 0;
368 int i;
369
370 for (i = 0; i < 3; i++) {
371 b = buf[i];
372 u <<= 8;
373 u |= b;
374 }
375 *up = u;
376 }
377
get_i16(const unsigned char * buf,int16_t * ip)378 static void get_i16(const unsigned char *buf, int16_t *ip)
379 {
380 uint16_t b, u = 0;
381 int i;
382
383 for (i = 0; i < 2; i++) {
384 b = buf[i];
385 u <<= 8;
386 u |= b;
387 }
388 *ip = u;
389 }
390
v2_header_footer_parse(struct v2_header * header,const char * buf)391 static int v2_header_footer_parse(struct v2_header *header, const char *buf)
392 {
393 const unsigned char *b = (const unsigned char *)buf;
394
395 header->ver_major = b[3];
396 header->ver_minor = b[4];
397 header->flags = b[5];
398 if (header->ver_major == 0xff || header->ver_minor == 0xff)
399 return 0;
400 return u32_unsync(b + 6, &header->size);
401 }
402
v2_header_parse(struct v2_header * header,const char * buf)403 static int v2_header_parse(struct v2_header *header, const char *buf)
404 {
405 if (buf[0] != 'I' || buf[1] != 'D' || buf[2] != '3')
406 return 0;
407 return v2_header_footer_parse(header, buf);
408 }
409
v2_footer_parse(struct v2_header * header,const char * buf)410 static int v2_footer_parse(struct v2_header *header, const char *buf)
411 {
412 if (buf[0] != '3' || buf[1] != 'D' || buf[2] != 'I')
413 return 0;
414 return v2_header_footer_parse(header, buf);
415 }
416
v2_extended_header_parse(struct v2_extended_header * header,const char * buf)417 static int v2_extended_header_parse(struct v2_extended_header *header, const char *buf)
418 {
419 return u32_unsync((const unsigned char *)buf, &header->size);
420 }
421
is_frame_id_char(char ch)422 static int is_frame_id_char(char ch)
423 {
424 return (ch >= 'A' && ch <= 'Z') || (ch >= '0' && ch <= '9');
425 }
426
427 /* XXXYYY
428 *
429 * X = [A-Z0-9]
430 * Y = byte
431 *
432 * XXX is frame
433 * YYY is frame size excluding this 6 byte header
434 */
v2_2_0_frame_header_parse(struct v2_frame_header * header,const char * buf)435 static int v2_2_0_frame_header_parse(struct v2_frame_header *header, const char *buf)
436 {
437 int i;
438
439 for (i = 0; i < 3; i++) {
440 if (!is_frame_id_char(buf[i]))
441 return 0;
442 header->id[i] = buf[i];
443 }
444 header->id[3] = 0;
445 get_u24((const unsigned char *)(buf + 3), &header->size);
446 header->flags = 0;
447 if (header->size == 0)
448 return 0;
449 id3_debug("%c%c%c %d\n", header->id[0], header->id[1], header->id[2], header->size);
450 return 1;
451 }
452
453 /* XXXXYYYYZZ
454 *
455 * X = [A-Z0-9]
456 * Y = byte
457 * Z = byte
458 *
459 * XXXX is frame
460 * YYYY is frame size excluding this 10 byte header
461 * ZZ is flags
462 */
v2_3_0_frame_header_parse(struct v2_frame_header * header,const char * buf)463 static int v2_3_0_frame_header_parse(struct v2_frame_header *header, const char *buf)
464 {
465 int i;
466
467 for (i = 0; i < 4; i++) {
468 if (!(is_frame_id_char(buf[i]) || (i == 3 && buf[i] == '\0')))
469 return 0;
470 header->id[i] = buf[i];
471 }
472 get_u32((const unsigned char *)(buf + 4), &header->size);
473 header->flags = (buf[8] << 8) | buf[9];
474 if (header->size == 0)
475 return 0;
476 id3_debug("%c%c%c%c %d\n", header->id[0], header->id[1], header->id[2],
477 header->id[3], header->size);
478 return 1;
479 }
480
481 /* same as 2.3 but header size is sync safe */
v2_4_0_frame_header_parse(struct v2_frame_header * header,const char * buf)482 static int v2_4_0_frame_header_parse(struct v2_frame_header *header, const char *buf)
483 {
484 int i;
485
486 for (i = 0; i < 4; i++) {
487 if (!(is_frame_id_char(buf[i]) || (i == 3 && buf[i] == '\0')))
488 return 0;
489 header->id[i] = buf[i];
490 }
491 if (!u32_unsync((const unsigned char *)(buf + 4), &header->size))
492 return 0;
493 header->flags = (buf[8] << 8) | buf[9];
494 if (header->size == 0)
495 return 0;
496 id3_debug("%c%c%c%c %d\n", header->id[0], header->id[1], header->id[2],
497 header->id[3], header->size);
498 return 1;
499 }
500
parse_genre(const char * str)501 static char *parse_genre(const char *str)
502 {
503 int parenthesis = 0;
504 long int idx;
505 char *end;
506
507 if (strncasecmp(str, "(RX", 3) == 0)
508 return xstrdup("Remix");
509
510 if (strncasecmp(str, "(CR", 3) == 0)
511 return xstrdup("Cover");
512
513 if (*str == '(') {
514 parenthesis = 1;
515 str++;
516 }
517
518 idx = strtol(str, &end, 10);
519 if (str != end) {
520 /* Number parsed but there may be some crap after the number.
521 * I don't care, ID3v2 by definition contains crap.
522 */
523 if (idx >= 0 && idx < NR_GENRES)
524 return xstrdup(genres[idx]);
525 }
526
527 if (parenthesis) {
528 const char *ptr = strchr(str, ')');
529
530 if (ptr && ptr[1]) {
531 /* genre name after random crap in parenthesis,
532 * return the genre name */
533 return xstrdup(ptr + 1);
534 }
535 str--;
536 }
537
538 /* random crap, just return it and wait for a bug report */
539 return xstrdup(str);
540 }
541
542 /* http://www.id3.org/id3v2.4.0-structure.txt */
543 static struct {
544 const char name[8];
545 enum id3_key key;
546 } frame_tab[] = {
547 /* 2.4.0 */
548 { "TDRC", ID3_DATE }, // recording date
549 { "TDRL", ID3_DATE }, // release date
550 { "TDOR", ID3_ORIGINALDATE }, // original release date
551 { "TSOP", ID3_ARTISTSORT },
552 { "TSOA", ID3_ALBUMSORT },
553
554 /* >= 2.3.0 */
555 { "TPE1", ID3_ARTIST },
556 { "TALB", ID3_ALBUM },
557 { "TIT2", ID3_TITLE },
558 { "TYER", ID3_DATE },
559 { "TCON", ID3_GENRE },
560 { "TPOS", ID3_DISC },
561 { "TRCK", ID3_TRACK },
562 { "TPE2", ID3_ALBUMARTIST },
563 { "TSO2", ID3_ALBUMARTISTSORT },
564 { "XSOP", ID3_ARTISTSORT }, // obsolete
565 { "XSOA", ID3_ALBUMSORT }, // obsolete
566 { "TCMP", ID3_COMPILATION },
567 { "TORY", ID3_ORIGINALDATE },
568 { "TCOM", ID3_COMPOSER },
569 { "TPE3", ID3_CONDUCTOR },
570 { "TEXT", ID3_LYRICIST },
571 { "TPE4", ID3_REMIXER },
572 { "TPUB", ID3_PUBLISHER }, // TPUB can be both publisher or label
573 { "TIT3", ID3_SUBTITLE },
574 { "TMED", ID3_MEDIA },
575 { "TBPM", ID3_BPM},
576
577 /* obsolete frames (2.2.0) */
578 { "TP1", ID3_ARTIST },
579 { "TP2", ID3_ALBUMARTIST },
580 { "TAL", ID3_ALBUM },
581 { "TT2", ID3_TITLE },
582 { "TYE", ID3_DATE },
583 { "TCO", ID3_GENRE },
584 { "TPA", ID3_DISC },
585 { "TRK", ID3_TRACK },
586 { "TSP", ID3_ARTISTSORT },
587 { "TS2", ID3_ALBUMARTISTSORT },
588 { "TSA", ID3_ALBUMSORT },
589 { "TCP", ID3_COMPILATION },
590 { "TBP", ID3_BPM },
591 };
592
frame_tab_index(const char * id)593 static int frame_tab_index(const char *id)
594 {
595 int i = 0;
596
597 while (i < N_ELEMENTS(frame_tab)) {
598 if (!strncmp(id, frame_tab[i].name, 4))
599 return i;
600 i++;
601 }
602 return -1;
603 }
604
check_date_format(const char * buf)605 static int check_date_format(const char *buf)
606 {
607 int i, ch;
608
609 /* year */
610 for (i = 0; i < 4; i++) {
611 ch = *buf++;
612 if (ch < '0' || ch > '9')
613 return 0;
614 }
615 ch = *buf++;
616 if (!ch)
617 return 4;
618 if (ch != '-')
619 return 0;
620
621 /* month */
622 for (i = 0; i < 2; i++) {
623 ch = *buf++;
624 if (ch < '0' || ch > '9')
625 return 0;
626 }
627 ch = *buf++;
628 if (!ch)
629 return 7;
630 if (ch != '-')
631 return 0;
632
633 /* day */
634 for (i = 0; i < 2; i++) {
635 ch = *buf++;
636 if (ch < '0' || ch > '9')
637 return 0;
638 }
639 ch = *buf;
640 if (!ch || (ch >= '0' && ch <= '9'))
641 return 10;
642 return 0;
643 }
644
fix_date(char * buf)645 static void fix_date(char *buf)
646 {
647 const char *ptr = buf;
648 int ch, len = 0;
649
650 do {
651 ch = *ptr++;
652 if (ch >= '0' && ch <= '9') {
653 len++;
654 continue;
655 }
656 if (len == 4) {
657 // number which length is 4, must be year
658 memmove(buf, ptr - 5, 4);
659 buf[4] = 0;
660 return;
661 }
662 len = 0;
663 } while (ch);
664 *buf = 0;
665 }
666
decode_str(const char * buf,int len,int encoding)667 static char *decode_str(const char *buf, int len, int encoding)
668 {
669 char *in, *out = NULL;
670
671 switch (encoding) {
672 case ID3_ENCODING_ISO_8859_1:
673 in = xstrndup(buf, len);
674 utf8_encode(in, id3_default_charset, &out);
675 free(in);
676 break;
677 case ID3_ENCODING_UTF_8:
678 in = xstrndup(buf, len);
679 if (u_is_valid(in)) {
680 out = in;
681 } else {
682 utf8_encode(in, id3_default_charset, &out);
683 free(in);
684 }
685 break;
686 case ID3_ENCODING_UTF_16:
687 case ID3_ENCODING_UTF_16_BE:
688 out = utf16_to_utf8((const unsigned char *)buf, len);
689 break;
690 }
691 return out;
692 }
693
add_v2(struct id3tag * id3,enum id3_key key,char * value)694 static void add_v2(struct id3tag *id3, enum id3_key key, char *value)
695 {
696 free(id3->v2[key]);
697 id3->v2[key] = value;
698 id3->has_v2 = 1;
699 }
700
decode_normal(struct id3tag * id3,const char * buf,int len,int encoding,enum id3_key key)701 static void decode_normal(struct id3tag *id3, const char *buf, int len, int encoding, enum id3_key key)
702 {
703 char *out = decode_str(buf, len, encoding);
704
705 if (!out)
706 return;
707
708 if (key == ID3_GENRE) {
709 char *tmp;
710
711 id3_debug("genre before: '%s'\n", out);
712 tmp = parse_genre(out);
713 free(out);
714 out = tmp;
715 } else if (key == ID3_DATE || key == ID3_ORIGINALDATE) {
716 int date_len = check_date_format(out);
717 id3_debug("date before: '%s'\n", out);
718 if (date_len)
719 out[date_len] = '\0';
720 else
721 fix_date(out);
722 if (!*out) {
723 id3_debug("date parsing failed\n");
724 free(out);
725 return;
726 }
727 } else if (key == ID3_ALBUMARTIST) {
728 /*
729 * This must be TPE2 frame; ignore it if ID3_ALBUMARTIST is
730 * already present
731 */
732 if (id3->v2[key]) {
733 free(out);
734 return;
735 }
736 } else if (key == ID3_PUBLISHER) {
737 add_v2(id3, ID3_LABEL, strdup(out));
738 }
739
740 add_v2(id3, key, out);
741 }
742
id3_skiplen(const char * buf,size_t len,int encoding)743 static size_t id3_skiplen(const char *buf, size_t len, int encoding)
744 {
745 if (encoding == ID3_ENCODING_ISO_8859_1 || encoding == ID3_ENCODING_UTF_8) {
746 return strlen(buf) + 1;
747 } else {
748 int i = 0;
749 while (i + 1 < len) {
750 if (buf[i] == '\0' && buf[i + 1] == '\0')
751 return i + 2;
752
753 /* Assume every character is exactly 2 bytes */
754 i += 2;
755 }
756
757 return len;
758 }
759 }
760
decode_txxx(struct id3tag * id3,const char * buf,int len,int encoding)761 static void decode_txxx(struct id3tag *id3, const char *buf, int len, int encoding)
762 {
763 const char ql_prefix[] = "QuodLibet::";
764 enum id3_key key = NUM_ID3_KEYS;
765 int size;
766 char *out, *out_mem;
767
768 out = decode_str(buf, len, encoding);
769 if (!out)
770 return;
771
772 id3_debug("TXXX, key = '%s'\n", out);
773
774 out_mem = out;
775
776 /* skip braindead QuodLibet TXXX frame prefix */
777 if (!strncmp(out, ql_prefix, sizeof(ql_prefix) - 1))
778 out += sizeof(ql_prefix) - 1;
779
780 if (!strcasecmp(out, "replaygain_track_gain"))
781 key = ID3_RG_TRACK_GAIN;
782 else if (!strcasecmp(out, "replaygain_track_peak"))
783 key = ID3_RG_TRACK_PEAK;
784 else if (!strcasecmp(out, "replaygain_album_gain"))
785 key = ID3_RG_ALBUM_GAIN;
786 else if (!strcasecmp(out, "replaygain_album_peak"))
787 key = ID3_RG_ALBUM_PEAK;
788 else if (!strcasecmp(out, "album artist"))
789 key = ID3_ALBUMARTIST;
790 else if (!strcasecmp(out, "albumartist"))
791 key = ID3_ALBUMARTIST;
792 else if (!strcasecmp(out, "albumartistsort"))
793 key = ID3_ALBUMARTISTSORT;
794 else if (!strcasecmp(out, "albumsort"))
795 key = ID3_ALBUMSORT;
796 else if (!strcasecmp(out, "compilation"))
797 key = ID3_COMPILATION;
798
799 size = id3_skiplen(buf, len, encoding);
800 free(out_mem);
801
802 if (key == NUM_ID3_KEYS)
803 return;
804
805 buf += size;
806 len -= size;
807 if (len <= 0)
808 return;
809
810 out = decode_str(buf, len, encoding);
811 if (!out)
812 return;
813
814 add_v2(id3, key, out);
815 }
816
decode_comment(struct id3tag * id3,const char * buf,int len,int encoding)817 static void decode_comment(struct id3tag *id3, const char *buf, int len, int encoding)
818 {
819 int slen;
820 char *out;
821 int valid_description;
822
823 if (len <= 3)
824 return;
825
826 /* skip language */
827 buf += 3;
828 len -= 3;
829
830 /* "Short content description" part of COMM frame */
831 out = decode_str(buf, len, encoding);
832 if (!out)
833 return;
834
835 valid_description = strcmp(out, "") == 0 || strcmp(out, "description") == 0;
836 free(out);
837
838 if (!valid_description)
839 return;
840
841 slen = id3_skiplen(buf, len, encoding);
842 if (slen >= len)
843 return;
844
845 buf += slen;
846 len -= slen;
847
848 out = decode_str(buf, len, encoding);
849 if (!out)
850 return;
851
852 add_v2(id3, ID3_COMMENT, out);
853 }
854
855 /*
856 * From http://id3.org/id3v2.4.0-frames:
857 *
858 * The volume adjustment is encoded as a fixed point decibel value, 16 bit signed
859 * integer representing (adjustment*512), giving +/- 64 dB with a precision of
860 * 0.001953125 dB. E.g. +2 dB is stored as $04 00 and -2 dB is $FC 00. There may
861 * be more than one "RVA2" frame in each tag, but only one with the same
862 * identification string.
863 *
864 * <Header for 'Relative volume adjustment (2)', ID: "RVA2">
865 * Identification <text string> $00
866 *
867 * The 'identification' string is used to identify the situation and/or device
868 * where this adjustment should apply. The following is then repeated for every
869 * channel
870 *
871 * Type of channel $xx
872 * Volume adjustment $xx xx
873 * Bits representing peak $xx
874 * Peak volume $xx (xx ...)
875 *
876 * Type of channel: $00 Other
877 * $01 Master volume
878 * $02 Front right
879 * $03 Front left
880 * $04 Back right
881 * $05 Back left
882 * $06 Front centre
883 * $07 Back centre
884 * $08 Subwoofer
885 *
886 * Bits representing peak can be any number between 0 and 255. 0 means that there
887 * is no peak volume field. The peak volume field is always padded to whole
888 * bytes, setting the most significant bits to zero.
889 */
decode_rva2(struct id3tag * id3,const char * buf,int len)890 static void decode_rva2(struct id3tag *id3, const char *buf, int len)
891 {
892 const int rva2_min_len = 6 + 1 + 2 + 1;
893
894 int audiophile_rg = 0;
895 int channel = 0;
896 int16_t volume_adj = 0;
897 int peak_bits = 0;
898 int peak_bytes = 0;
899 int peak_shift = 0;
900 uint32_t peak = 0;
901
902 char *gain_str = NULL;
903 char *peak_str = NULL;
904
905 int i;
906
907 if (len < rva2_min_len) {
908 id3_debug("frame length %d too small\n", len);
909 return;
910 }
911
912 if (!strcasecmp(buf, "album")) {
913 audiophile_rg = 1;
914 } else if (strcasecmp(buf, "track")) {
915 id3_debug("unsupported identifier: %s\n", buf);
916 return;
917 }
918
919 buf += 6;
920
921 channel = *buf++;
922 if (channel != 0x1) {
923 id3_debug("unsupported channel: %d\n", channel);
924 return;
925 }
926
927 get_i16((unsigned char *)buf, &volume_adj);
928 buf += 2;
929
930 peak_bits = *buf++;
931
932 if (peak_bits == 0)
933 id3_debug("no peak data\n");
934
935 /*
936 * This crazy code comes from Mutagen
937 */
938 peak_bytes = min_i(4, (peak_bits + 7) >> 3);
939 peak_shift = ((8 - (peak_bits & 7)) & 7) + (4 - peak_bytes) * 8;
940
941 if (len < rva2_min_len + peak_bytes) {
942 id3_debug("peak data %d does not fit frame with length %d\n", peak_bytes, len);
943 return;
944 }
945
946 for (i = 0; i < peak_bytes; ++i) {
947 peak <<= 8;
948 peak |= (unsigned char)*buf++;
949 }
950
951 gain_str = xnew(char, 32);
952 snprintf(gain_str, 32, "%lf dB", volume_adj / 512.0);
953
954 add_v2(id3, audiophile_rg ? ID3_RG_ALBUM_GAIN : ID3_RG_TRACK_GAIN, gain_str);
955
956 if (peak_bytes) {
957 peak_str = xnew(char, 32);
958 snprintf(peak_str, 32, "%lf", ((double)peak * (1 << peak_shift)) / INT_MAX);
959
960 add_v2(id3, audiophile_rg ? ID3_RG_ALBUM_PEAK : ID3_RG_TRACK_PEAK, peak_str);
961 }
962
963 id3_debug("gain %s, peak %s\n", gain_str, peak_str ? peak_str : "none");
964 }
965
decode_ufid(struct id3tag * id3,const char * buf,int len)966 static void decode_ufid(struct id3tag *id3, const char *buf, int len)
967 {
968 char *ufid;
969 int ufid_len = len - 22 - 1;
970
971 if (ufid_len < 0 || strcmp(buf, "http://musicbrainz.org") != 0)
972 return;
973
974 ufid = xnew(char, ufid_len + 1);
975 memcpy(ufid, buf + len - ufid_len, ufid_len);
976 ufid[ufid_len] = '\0';
977
978 id3_debug("%s: %s\n", buf, ufid);
979 add_v2(id3, ID3_MUSICBRAINZ_TRACKID, ufid);
980 }
981
982
v2_add_frame(struct id3tag * id3,struct v2_frame_header * fh,const char * buf)983 static void v2_add_frame(struct id3tag *id3, struct v2_frame_header *fh, const char *buf)
984 {
985 int encoding;
986 int len;
987 int idx;
988
989 if (!strncmp(fh->id, "RVA2", 4)) {
990 decode_rva2(id3, buf, fh->size);
991 return;
992 } else if (!strncmp(fh->id, "UFID", 4)) {
993 decode_ufid(id3, buf, fh->size);
994 return;
995 }
996
997 encoding = *buf++;
998 len = fh->size - 1;
999
1000 if (encoding > ID3_ENCODING_MAX)
1001 return;
1002
1003 idx = frame_tab_index(fh->id);
1004 if (idx >= 0) {
1005 decode_normal(id3, buf, len, encoding, frame_tab[idx].key);
1006 } else if (!strncmp(fh->id, "TXXX", 4)) {
1007 decode_txxx(id3, buf, len, encoding);
1008 } else if (!strncmp(fh->id, "COMM", 4)) {
1009 decode_comment(id3, buf, len, encoding);
1010 } else if (!strncmp(fh->id, "COM", 3)) {
1011 decode_comment(id3, buf, len, encoding);
1012 }
1013 }
1014
unsync(unsigned char * buf,int * lenp)1015 static void unsync(unsigned char *buf, int *lenp)
1016 {
1017 int len = *lenp;
1018 int s, d;
1019
1020 s = d = 0;
1021 while (s < len - 1) {
1022 if (buf[s] == 0xff && buf[s + 1] == 0x00) {
1023 /* 0xff 0x00 -> 0xff */
1024 buf[d++] = 0xff;
1025 s += 2;
1026
1027 if (s < len - 2 && buf[s] == 0x00) {
1028 /* 0xff 0x00 0x00 -> 0xff 0x00 */
1029 buf[d++] = 0x00;
1030 s++;
1031 }
1032 continue;
1033 }
1034 buf[d++] = buf[s++];
1035 }
1036 if (s < len)
1037 buf[d++] = buf[s++];
1038
1039 d_print("de-synchronization removed %d bytes\n", s - d);
1040 *lenp = d;
1041 }
1042
v2_read(struct id3tag * id3,int fd,const struct v2_header * header)1043 static int v2_read(struct id3tag *id3, int fd, const struct v2_header *header)
1044 {
1045 char *buf;
1046 int rc, buf_size;
1047 int frame_start, i;
1048 int frame_header_size;
1049
1050 buf_size = header->size;
1051 buf = xnew(char, buf_size);
1052 rc = read_all(fd, buf, buf_size);
1053 if (rc == -1) {
1054 free(buf);
1055 return rc;
1056 }
1057
1058 frame_start = 0;
1059 if (header->flags & V2_HEADER_EXTENDED) {
1060 struct v2_extended_header ext;
1061
1062 if (!v2_extended_header_parse(&ext, buf) || ext.size > buf_size) {
1063 id3_debug("extended header corrupted\n");
1064 free(buf);
1065 return -2;
1066 }
1067 frame_start = ext.size;
1068 /* should check if update flag is set */
1069 }
1070
1071 frame_header_size = 10;
1072 if (header->ver_major == 2)
1073 frame_header_size = 6;
1074
1075 i = frame_start;
1076 while (i < buf_size - frame_header_size) {
1077 struct v2_frame_header fh;
1078 int len_unsync;
1079
1080 if (header->ver_major == 2) {
1081 if (!v2_2_0_frame_header_parse(&fh, buf + i))
1082 break;
1083 } else if (header->ver_major == 3) {
1084 if (!v2_3_0_frame_header_parse(&fh, buf + i))
1085 break;
1086 } else {
1087 /* assume v2.4 */
1088 if (!v2_4_0_frame_header_parse(&fh, buf + i))
1089 break;
1090 }
1091
1092 i += frame_header_size;
1093
1094 if (fh.size > buf_size - i) {
1095 id3_debug("frame too big\n");
1096 break;
1097 }
1098
1099 if (fh.flags & V2_FRAME_LEN_INDICATOR) {
1100 /*
1101 * Ignore the frame length 4-byte field
1102 */
1103 i += 4;
1104 fh.size -= 4;
1105 }
1106
1107 len_unsync = fh.size;
1108
1109 if ((fh.flags & V2_FRAME_UNSYNC) || (header->flags & V2_HEADER_UNSYNC))
1110 unsync((unsigned char *)(buf + i), (int *)&fh.size);
1111
1112 v2_add_frame(id3, &fh, buf + i);
1113
1114 i += len_unsync;
1115 }
1116
1117 free(buf);
1118 return 0;
1119 }
1120
id3_tag_size(const char * buf,int buf_size)1121 int id3_tag_size(const char *buf, int buf_size)
1122 {
1123 struct v2_header header;
1124
1125 if (buf_size < 10)
1126 return 0;
1127 if (v2_header_parse(&header, buf)) {
1128 if (header.flags & V2_HEADER_FOOTER) {
1129 /* header + data + footer */
1130 id3_debug("v2.%d.%d with footer\n", header.ver_major, header.ver_minor);
1131 return 10 + header.size + 10;
1132 }
1133 /* header */
1134 id3_debug("v2.%d.%d\n", header.ver_major, header.ver_minor);
1135 return 10 + header.size;
1136 }
1137 if (buf_size >= 3 && is_v1(buf)) {
1138 id3_debug("v1\n");
1139 return 128;
1140 }
1141 return 0;
1142 }
1143
id3_init(struct id3tag * id3)1144 void id3_init(struct id3tag *id3)
1145 {
1146 const struct id3tag t = { .has_v1 = 0, .has_v2 = 0 };
1147 *id3 = t;
1148 }
1149
id3_free(struct id3tag * id3)1150 void id3_free(struct id3tag *id3)
1151 {
1152 int i;
1153
1154 for (i = 0; i < NUM_ID3_KEYS; i++)
1155 free(id3->v2[i]);
1156 }
1157
id3_read_tags(struct id3tag * id3,int fd,unsigned int flags)1158 int id3_read_tags(struct id3tag *id3, int fd, unsigned int flags)
1159 {
1160 off_t off;
1161 int rc;
1162
1163 if (flags & ID3_V2) {
1164 struct v2_header header;
1165 char buf[138];
1166
1167 rc = read_all(fd, buf, 10);
1168 if (rc == -1)
1169 goto rc_error;
1170 if (v2_header_parse(&header, buf)) {
1171 rc = v2_read(id3, fd, &header);
1172 if (rc)
1173 goto rc_error;
1174 /* get v1 if needed */
1175 } else {
1176 /* get v2 from end and optionally v1 */
1177
1178 off = lseek(fd, -138, SEEK_END);
1179 if (off == -1)
1180 goto error;
1181 rc = read_all(fd, buf, 138);
1182 if (rc == -1)
1183 goto rc_error;
1184
1185 if (is_v1(buf + 10)) {
1186 if (flags & ID3_V1) {
1187 memcpy(id3->v1, buf + 10, 128);
1188 id3->has_v1 = 1;
1189 }
1190 if (v2_footer_parse(&header, buf)) {
1191 /* footer at end of file - 128 */
1192 off = lseek(fd, -((off_t) header.size + 138), SEEK_END);
1193 if (off == -1)
1194 goto error;
1195 rc = v2_read(id3, fd, &header);
1196 if (rc)
1197 goto rc_error;
1198 }
1199 } else if (v2_footer_parse(&header, buf + 128)) {
1200 /* footer at end of file */
1201 off = lseek(fd, -((off_t) header.size + 10), SEEK_END);
1202 if (off == -1)
1203 goto error;
1204 rc = v2_read(id3, fd, &header);
1205 if (rc)
1206 goto rc_error;
1207 }
1208 return 0;
1209 }
1210 }
1211 if (flags & ID3_V1) {
1212 off = lseek(fd, -128, SEEK_END);
1213 if (off == -1)
1214 goto error;
1215 rc = read_all(fd, id3->v1, 128);
1216 if (rc == -1)
1217 goto rc_error;
1218 id3->has_v1 = is_v1(id3->v1);
1219 }
1220 return 0;
1221 error:
1222 rc = -1;
1223 rc_error:
1224 return rc;
1225 }
1226
v1_get_str(const char * buf,int len)1227 static char *v1_get_str(const char *buf, int len)
1228 {
1229 char in[32];
1230 char *out;
1231 int i;
1232
1233 for (i = len - 1; i >= 0; i--) {
1234 if (buf[i] != 0 && buf[i] != ' ')
1235 break;
1236 }
1237 if (i == -1)
1238 return NULL;
1239 memcpy(in, buf, i + 1);
1240 in[i + 1] = 0;
1241 if (u_is_valid(in))
1242 return xstrdup(in);
1243 if (utf8_encode(in, id3_default_charset, &out))
1244 return NULL;
1245 return out;
1246 }
1247
id3_get_comment(struct id3tag * id3,enum id3_key key)1248 char *id3_get_comment(struct id3tag *id3, enum id3_key key)
1249 {
1250 if (id3->has_v2) {
1251 if (id3->v2[key])
1252 return xstrdup(id3->v2[key]);
1253 }
1254 if (id3->has_v1) {
1255 switch (key) {
1256 case ID3_ARTIST:
1257 return v1_get_str(id3->v1 + 33, 30);
1258 case ID3_ALBUM:
1259 return v1_get_str(id3->v1 + 63, 30);
1260 case ID3_TITLE:
1261 return v1_get_str(id3->v1 + 3, 30);
1262 case ID3_DATE:
1263 return v1_get_str(id3->v1 + 93, 4);
1264 case ID3_GENRE:
1265 {
1266 unsigned char idx = id3->v1[127];
1267
1268 if (idx >= NR_GENRES)
1269 return NULL;
1270 return xstrdup(genres[idx]);
1271 }
1272 case ID3_TRACK:
1273 {
1274 char *t;
1275
1276 if (id3->v1[125] != 0)
1277 return NULL;
1278 t = xnew(char, 4);
1279 snprintf(t, 4, "%d", ((unsigned char *)id3->v1)[126]);
1280 return t;
1281 }
1282 default:
1283 return NULL;
1284 }
1285 }
1286 return NULL;
1287 }
1288
id3_get_genre(uint16_t id)1289 char const *id3_get_genre(uint16_t id)
1290 {
1291 if (id >= NR_GENRES)
1292 return NULL;
1293 return genres[id];
1294 }
1295