1 /*
2  * Copyright 2008-2013 Various Authors
3  * Copyright 2005 Timo Hirvonen
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License as
7  * published by the Free Software Foundation; either version 2 of the
8  * License, or (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program; if not, see <http://www.gnu.org/licenses/>.
17  */
18 
19 #include "id3.h"
20 #include "xmalloc.h"
21 #include "convert.h"
22 #include "uchar.h"
23 #include "options.h"
24 #include "debug.h"
25 #include "utils.h"
26 #include "file.h"
27 
28 #include <unistd.h>
29 #include <stdint.h>
30 #include <errno.h>
31 #include <stdio.h>
32 #include <string.h>
33 #include <strings.h>
34 #include <limits.h>
35 
36 enum {
37 	ID3_ENCODING_ISO_8859_1 = 0x00,
38 	ID3_ENCODING_UTF_16     = 0x01,
39 	ID3_ENCODING_UTF_16_BE  = 0x02,
40 	ID3_ENCODING_UTF_8      = 0x03,
41 
42 	ID3_ENCODING_MAX        = 0x03
43 };
44 
45 /*
46  * position:
47  *
48  *    0 "ID3"
49  *  -10 "3DI"
50  * -128 "TAG"
51  * -138 "3DI"
52  *
53  * if v2 is at beginning _and_ at end then there must be a seek tag at beginning
54  */
55 
56 struct v2_header {
57 	unsigned char ver_major;
58 	unsigned char ver_minor;
59 	unsigned char flags;
60 	uint32_t size;
61 };
62 
63 struct v2_extended_header {
64 	uint32_t size;
65 };
66 
67 struct v2_frame_header {
68 	char id[4];
69 	uint32_t size;
70 	uint16_t flags;
71 };
72 
73 #define V2_HEADER_UNSYNC	(1 << 7)
74 #define V2_HEADER_EXTENDED	(1 << 6)
75 #define V2_HEADER_EXPERIMENTAL	(1 << 5)
76 #define V2_HEADER_FOOTER	(1 << 4)
77 
78 #define V2_FRAME_COMPRESSED	(1 << 3) /* great idea!!1 */
79 #define V2_FRAME_ENCRYPTHED	(1 << 2) /* wow, this is very neat! */
80 #define V2_FRAME_UNSYNC		(1 << 1)
81 #define V2_FRAME_LEN_INDICATOR	(1 << 0)
82 
83 #define NR_GENRES 148
84 /* genres {{{ */
85 static const char *genres[NR_GENRES] = {
86 	"Blues",
87 	"Classic Rock",
88 	"Country",
89 	"Dance",
90 	"Disco",
91 	"Funk",
92 	"Grunge",
93 	"Hip-Hop",
94 	"Jazz",
95 	"Metal",
96 	"New Age",
97 	"Oldies",
98 	"Other",
99 	"Pop",
100 	"R&B",
101 	"Rap",
102 	"Reggae",
103 	"Rock",
104 	"Techno",
105 	"Industrial",
106 	"Alternative",
107 	"Ska",
108 	"Death Metal",
109 	"Pranks",
110 	"Soundtrack",
111 	"Euro-Techno",
112 	"Ambient",
113 	"Trip-Hop",
114 	"Vocal",
115 	"Jazz+Funk",
116 	"Fusion",
117 	"Trance",
118 	"Classical",
119 	"Instrumental",
120 	"Acid",
121 	"House",
122 	"Game",
123 	"Sound Clip",
124 	"Gospel",
125 	"Noise",
126 	"Alt",
127 	"Bass",
128 	"Soul",
129 	"Punk",
130 	"Space",
131 	"Meditative",
132 	"Instrumental Pop",
133 	"Instrumental Rock",
134 	"Ethnic",
135 	"Gothic",
136 	"Darkwave",
137 	"Techno-Industrial",
138 	"Electronic",
139 	"Pop-Folk",
140 	"Eurodance",
141 	"Dream",
142 	"Southern Rock",
143 	"Comedy",
144 	"Cult",
145 	"Gangsta Rap",
146 	"Top 40",
147 	"Christian Rap",
148 	"Pop/Funk",
149 	"Jungle",
150 	"Native American",
151 	"Cabaret",
152 	"New Wave",
153 	"Psychedelic",
154 	"Rave",
155 	"Showtunes",
156 	"Trailer",
157 	"Lo-Fi",
158 	"Tribal",
159 	"Acid Punk",
160 	"Acid Jazz",
161 	"Polka",
162 	"Retro",
163 	"Musical",
164 	"Rock & Roll",
165 	"Hard Rock",
166 	"Folk",
167 	"Folk/Rock",
168 	"National Folk",
169 	"Swing",
170 	"Fast-Fusion",
171 	"Bebob",
172 	"Latin",
173 	"Revival",
174 	"Celtic",
175 	"Bluegrass",
176 	"Avantgarde",
177 	"Gothic Rock",
178 	"Progressive Rock",
179 	"Psychedelic Rock",
180 	"Symphonic Rock",
181 	"Slow Rock",
182 	"Big Band",
183 	"Chorus",
184 	"Easy Listening",
185 	"Acoustic",
186 	"Humour",
187 	"Speech",
188 	"Chanson",
189 	"Opera",
190 	"Chamber Music",
191 	"Sonata",
192 	"Symphony",
193 	"Booty Bass",
194 	"Primus",
195 	"Porn Groove",
196 	"Satire",
197 	"Slow Jam",
198 	"Club",
199 	"Tango",
200 	"Samba",
201 	"Folklore",
202 	"Ballad",
203 	"Power Ballad",
204 	"Rhythmic Soul",
205 	"Freestyle",
206 	"Duet",
207 	"Punk Rock",
208 	"Drum Solo",
209 	"A Cappella",
210 	"Euro-House",
211 	"Dance Hall",
212 	"Goa",
213 	"Drum & Bass",
214 	"Club-House",
215 	"Hardcore",
216 	"Terror",
217 	"Indie",
218 	"BritPop",
219 	"Negerpunk",
220 	"Polsk Punk",
221 	"Beat",
222 	"Christian Gangsta Rap",
223 	"Heavy Metal",
224 	"Black Metal",
225 	"Crossover",
226 	"Contemporary Christian",
227 	"Christian Rock",
228 	"Merengue",
229 	"Salsa",
230 	"Thrash Metal",
231 	"Anime",
232 	"JPop",
233 	"Synthpop"
234 };
235 /* }}} */
236 
237 #define id3_debug(...) d_print(__VA_ARGS__)
238 
239 const char * const id3_key_names[NUM_ID3_KEYS] = {
240 	"artist",
241 	"album",
242 	"title",
243 	"date",
244 	"originaldate",
245 	"genre",
246 	"discnumber",
247 	"tracknumber",
248 	"albumartist",
249 	"artistsort",
250 	"albumartistsort",
251 	"albumsort",
252 	"compilation",
253 	"replaygain_track_gain",
254 	"replaygain_track_peak",
255 	"replaygain_album_gain",
256 	"replaygain_album_peak",
257 	"composer",
258 	"conductor",
259 	"lyricist",
260 	"remixer",
261 	"label",
262 	"publisher",
263 	"subtitle",
264 	"comment",
265 	"musicbrainz_trackid",
266 	"media",
267 	"bpm",
268 };
269 
utf16_is_lsurrogate(uchar uch)270 static int utf16_is_lsurrogate(uchar uch)
271 {
272 	return 0xdc00 <= uch && 0xdfff >= uch;
273 }
274 
utf16_is_hsurrogate(uchar uch)275 static int utf16_is_hsurrogate(uchar uch)
276 {
277 	return 0xd800 <= uch && 0xdbff >= uch;
278 }
279 
utf16_is_bom(uchar uch)280 static int utf16_is_bom(uchar uch)
281 {
282 	return uch == 0xfeff;
283 }
284 
utf16_is_special(uchar uch)285 static int utf16_is_special(uchar uch)
286 {
287 	return utf16_is_hsurrogate(uch) || utf16_is_lsurrogate(uch) || utf16_is_bom(uch);
288 }
289 
utf16_to_utf8(const unsigned char * buf,size_t buf_size)290 static char *utf16_to_utf8(const unsigned char *buf, size_t buf_size)
291 {
292 	char *out;
293 	size_t i, idx;
294 	int little_endian = 0;
295 
296 	if (buf_size < 2)
297 		return NULL;
298 
299 	if (buf[0] == 0xff && buf[1] == 0xfe)
300 		little_endian = 1;
301 
302 	out = xnew(char, (buf_size / 2) * 4 + 1);
303 	i = idx = 0;
304 
305 	while (buf_size - i >= 2) {
306 		uchar u;
307 
308 		if (little_endian)
309 			u = buf[i] + (buf[i + 1] << 8);
310 		else
311 			u = buf[i + 1] + (buf[i] << 8);
312 
313 		if (u_is_unicode(u)) {
314 			if (!utf16_is_special(u))
315 				u_set_char(out, &idx, u);
316 		} else {
317 			free(out);
318 			return NULL;
319 		}
320 
321 		if (u == 0)
322 			return out;
323 
324 		i += 2;
325 	}
326 
327 	u_set_char(out, &idx, 0);
328 	return out;
329 }
330 
is_v1(const char * buf)331 static int is_v1(const char *buf)
332 {
333 	return buf[0] == 'T' && buf[1] == 'A' && buf[2] == 'G';
334 }
335 
u32_unsync(const unsigned char * buf,uint32_t * up)336 static int u32_unsync(const unsigned char *buf, uint32_t *up)
337 {
338 	uint32_t b, u = 0;
339 	int i;
340 
341 	for (i = 0; i < 4; i++) {
342 		b = buf[i];
343 		if (b >= 0x80)
344 			return 0;
345 		u <<= 7;
346 		u |= b;
347 	}
348 	*up = u;
349 	return 1;
350 }
351 
get_u32(const unsigned char * buf,uint32_t * up)352 static void get_u32(const unsigned char *buf, uint32_t *up)
353 {
354 	uint32_t b, u = 0;
355 	int i;
356 
357 	for (i = 0; i < 4; i++) {
358 		b = buf[i];
359 		u <<= 8;
360 		u |= b;
361 	}
362 	*up = u;
363 }
364 
get_u24(const unsigned char * buf,uint32_t * up)365 static void get_u24(const unsigned char *buf, uint32_t *up)
366 {
367 	uint32_t b, u = 0;
368 	int i;
369 
370 	for (i = 0; i < 3; i++) {
371 		b = buf[i];
372 		u <<= 8;
373 		u |= b;
374 	}
375 	*up = u;
376 }
377 
get_i16(const unsigned char * buf,int16_t * ip)378 static void get_i16(const unsigned char *buf, int16_t *ip)
379 {
380 	uint16_t b, u = 0;
381 	int i;
382 
383 	for (i = 0; i < 2; i++) {
384 		b = buf[i];
385 		u <<= 8;
386 		u |= b;
387 	}
388 	*ip = u;
389 }
390 
v2_header_footer_parse(struct v2_header * header,const char * buf)391 static int v2_header_footer_parse(struct v2_header *header, const char *buf)
392 {
393 	const unsigned char *b = (const unsigned char *)buf;
394 
395 	header->ver_major = b[3];
396 	header->ver_minor = b[4];
397 	header->flags = b[5];
398 	if (header->ver_major == 0xff || header->ver_minor == 0xff)
399 		return 0;
400 	return u32_unsync(b + 6, &header->size);
401 }
402 
v2_header_parse(struct v2_header * header,const char * buf)403 static int v2_header_parse(struct v2_header *header, const char *buf)
404 {
405 	if (buf[0] != 'I' || buf[1] != 'D' || buf[2] != '3')
406 		return 0;
407 	return v2_header_footer_parse(header, buf);
408 }
409 
v2_footer_parse(struct v2_header * header,const char * buf)410 static int v2_footer_parse(struct v2_header *header, const char *buf)
411 {
412 	if (buf[0] != '3' || buf[1] != 'D' || buf[2] != 'I')
413 		return 0;
414 	return v2_header_footer_parse(header, buf);
415 }
416 
v2_extended_header_parse(struct v2_extended_header * header,const char * buf)417 static int v2_extended_header_parse(struct v2_extended_header *header, const char *buf)
418 {
419 	return u32_unsync((const unsigned char *)buf, &header->size);
420 }
421 
is_frame_id_char(char ch)422 static int is_frame_id_char(char ch)
423 {
424 	return (ch >= 'A' && ch <= 'Z') || (ch >= '0' && ch <= '9');
425 }
426 
427 /* XXXYYY
428  *
429  * X = [A-Z0-9]
430  * Y = byte
431  *
432  * XXX is frame
433  * YYY is frame size excluding this 6 byte header
434  */
v2_2_0_frame_header_parse(struct v2_frame_header * header,const char * buf)435 static int v2_2_0_frame_header_parse(struct v2_frame_header *header, const char *buf)
436 {
437 	int i;
438 
439 	for (i = 0; i < 3; i++) {
440 		if (!is_frame_id_char(buf[i]))
441 			return 0;
442 		header->id[i] = buf[i];
443 	}
444 	header->id[3] = 0;
445 	get_u24((const unsigned char *)(buf + 3), &header->size);
446 	header->flags = 0;
447 	if (header->size == 0)
448 		return 0;
449 	id3_debug("%c%c%c %d\n", header->id[0], header->id[1], header->id[2], header->size);
450 	return 1;
451 }
452 
453 /* XXXXYYYYZZ
454  *
455  * X = [A-Z0-9]
456  * Y = byte
457  * Z = byte
458  *
459  * XXXX is frame
460  * YYYY is frame size excluding this 10 byte header
461  * ZZ   is flags
462  */
v2_3_0_frame_header_parse(struct v2_frame_header * header,const char * buf)463 static int v2_3_0_frame_header_parse(struct v2_frame_header *header, const char *buf)
464 {
465 	int i;
466 
467 	for (i = 0; i < 4; i++) {
468 		if (!(is_frame_id_char(buf[i]) || (i == 3 && buf[i] == '\0')))
469 			return 0;
470 		header->id[i] = buf[i];
471 	}
472 	get_u32((const unsigned char *)(buf + 4), &header->size);
473 	header->flags = (buf[8] << 8) | buf[9];
474 	if (header->size == 0)
475 		return 0;
476 	id3_debug("%c%c%c%c %d\n", header->id[0], header->id[1], header->id[2],
477 			header->id[3], header->size);
478 	return 1;
479 }
480 
481 /* same as 2.3 but header size is sync safe */
v2_4_0_frame_header_parse(struct v2_frame_header * header,const char * buf)482 static int v2_4_0_frame_header_parse(struct v2_frame_header *header, const char *buf)
483 {
484 	int i;
485 
486 	for (i = 0; i < 4; i++) {
487 		if (!(is_frame_id_char(buf[i]) || (i == 3 && buf[i] == '\0')))
488 			return 0;
489 		header->id[i] = buf[i];
490 	}
491 	if (!u32_unsync((const unsigned char *)(buf + 4), &header->size))
492 		return 0;
493 	header->flags = (buf[8] << 8) | buf[9];
494 	if (header->size == 0)
495 		return 0;
496 	id3_debug("%c%c%c%c %d\n", header->id[0], header->id[1], header->id[2],
497 			header->id[3], header->size);
498 	return 1;
499 }
500 
parse_genre(const char * str)501 static char *parse_genre(const char *str)
502 {
503 	int parenthesis = 0;
504 	long int idx;
505 	char *end;
506 
507 	if (strncasecmp(str, "(RX", 3) == 0)
508 		return xstrdup("Remix");
509 
510 	if (strncasecmp(str, "(CR", 3) == 0)
511 		return xstrdup("Cover");
512 
513 	if (*str == '(') {
514 		parenthesis = 1;
515 		str++;
516 	}
517 
518 	idx = strtol(str, &end, 10);
519 	if (str != end) {
520 		/* Number parsed but there may be some crap after the number.
521 		 * I don't care, ID3v2 by definition contains crap.
522 		 */
523 		if (idx >= 0 && idx < NR_GENRES)
524 			return xstrdup(genres[idx]);
525 	}
526 
527 	if (parenthesis) {
528 		const char *ptr = strchr(str, ')');
529 
530 		if (ptr && ptr[1]) {
531 			/* genre name after random crap in parenthesis,
532 			 * return the genre name */
533 			return xstrdup(ptr + 1);
534 		}
535 		str--;
536 	}
537 
538 	/* random crap, just return it and wait for a bug report */
539 	return xstrdup(str);
540 }
541 
542 /* http://www.id3.org/id3v2.4.0-structure.txt */
543 static struct {
544 	const char name[8];
545 	enum id3_key key;
546 } frame_tab[] = {
547 	/* 2.4.0 */
548 	{ "TDRC", ID3_DATE }, // recording date
549 	{ "TDRL", ID3_DATE }, // release date
550 	{ "TDOR", ID3_ORIGINALDATE }, // original release date
551 	{ "TSOP", ID3_ARTISTSORT },
552 	{ "TSOA", ID3_ALBUMSORT },
553 
554 	/* >= 2.3.0 */
555 	{ "TPE1", ID3_ARTIST },
556 	{ "TALB", ID3_ALBUM },
557 	{ "TIT2", ID3_TITLE },
558 	{ "TYER", ID3_DATE },
559 	{ "TCON", ID3_GENRE },
560 	{ "TPOS", ID3_DISC },
561 	{ "TRCK", ID3_TRACK },
562 	{ "TPE2", ID3_ALBUMARTIST },
563 	{ "TSO2", ID3_ALBUMARTISTSORT },
564 	{ "XSOP", ID3_ARTISTSORT }, // obsolete
565 	{ "XSOA", ID3_ALBUMSORT }, // obsolete
566 	{ "TCMP", ID3_COMPILATION },
567 	{ "TORY", ID3_ORIGINALDATE },
568 	{ "TCOM", ID3_COMPOSER },
569 	{ "TPE3", ID3_CONDUCTOR },
570 	{ "TEXT", ID3_LYRICIST },
571 	{ "TPE4", ID3_REMIXER },
572 	{ "TPUB", ID3_PUBLISHER }, // TPUB can be both publisher or label
573 	{ "TIT3", ID3_SUBTITLE },
574 	{ "TMED", ID3_MEDIA },
575 	{ "TBPM", ID3_BPM},
576 
577 	/* obsolete frames (2.2.0) */
578 	{ "TP1",  ID3_ARTIST },
579 	{ "TP2",  ID3_ALBUMARTIST },
580 	{ "TAL",  ID3_ALBUM },
581 	{ "TT2",  ID3_TITLE },
582 	{ "TYE",  ID3_DATE },
583 	{ "TCO",  ID3_GENRE },
584 	{ "TPA",  ID3_DISC },
585 	{ "TRK",  ID3_TRACK },
586 	{ "TSP",  ID3_ARTISTSORT },
587 	{ "TS2",  ID3_ALBUMARTISTSORT },
588 	{ "TSA",  ID3_ALBUMSORT },
589 	{ "TCP",  ID3_COMPILATION },
590 	{ "TBP",  ID3_BPM },
591 };
592 
frame_tab_index(const char * id)593 static int frame_tab_index(const char *id)
594 {
595 	int i = 0;
596 
597 	while (i < N_ELEMENTS(frame_tab)) {
598 		if (!strncmp(id, frame_tab[i].name, 4))
599 			return i;
600 		i++;
601 	}
602 	return -1;
603 }
604 
check_date_format(const char * buf)605 static int check_date_format(const char *buf)
606 {
607 	int i, ch;
608 
609 	/* year */
610 	for (i = 0; i < 4; i++) {
611 		ch = *buf++;
612 		if (ch < '0' || ch > '9')
613 			return 0;
614 	}
615 	ch = *buf++;
616 	if (!ch)
617 		return 4;
618 	if (ch != '-')
619 		return 0;
620 
621 	/* month */
622 	for (i = 0; i < 2; i++) {
623 		ch = *buf++;
624 		if (ch < '0' || ch > '9')
625 			return 0;
626 	}
627 	ch = *buf++;
628 	if (!ch)
629 		return 7;
630 	if (ch != '-')
631 		return 0;
632 
633 	/* day */
634 	for (i = 0; i < 2; i++) {
635 		ch = *buf++;
636 		if (ch < '0' || ch > '9')
637 			return 0;
638 	}
639 	ch = *buf;
640 	if (!ch || (ch >= '0' && ch <= '9'))
641 		return 10;
642 	return 0;
643 }
644 
fix_date(char * buf)645 static void fix_date(char *buf)
646 {
647 	const char *ptr = buf;
648 	int ch, len = 0;
649 
650 	do {
651 		ch = *ptr++;
652 		if (ch >= '0' && ch <= '9') {
653 			len++;
654 			continue;
655 		}
656 		if (len == 4) {
657 			// number which length is 4, must be year
658 			memmove(buf, ptr - 5, 4);
659 			buf[4] = 0;
660 			return;
661 		}
662 		len = 0;
663 	} while (ch);
664 	*buf = 0;
665 }
666 
decode_str(const char * buf,int len,int encoding)667 static char *decode_str(const char *buf, int len, int encoding)
668 {
669 	char *in, *out = NULL;
670 
671 	switch (encoding) {
672 	case ID3_ENCODING_ISO_8859_1:
673 		in = xstrndup(buf, len);
674 		utf8_encode(in, id3_default_charset, &out);
675 		free(in);
676 		break;
677 	case ID3_ENCODING_UTF_8:
678 		in = xstrndup(buf, len);
679 		if (u_is_valid(in)) {
680 			out = in;
681 		} else {
682 			utf8_encode(in, id3_default_charset, &out);
683 			free(in);
684 		}
685 		break;
686 	case ID3_ENCODING_UTF_16:
687 	case ID3_ENCODING_UTF_16_BE:
688 		out = utf16_to_utf8((const unsigned char *)buf, len);
689 		break;
690 	}
691 	return out;
692 }
693 
add_v2(struct id3tag * id3,enum id3_key key,char * value)694 static void add_v2(struct id3tag *id3, enum id3_key key, char *value)
695 {
696 	free(id3->v2[key]);
697 	id3->v2[key] = value;
698 	id3->has_v2 = 1;
699 }
700 
decode_normal(struct id3tag * id3,const char * buf,int len,int encoding,enum id3_key key)701 static void decode_normal(struct id3tag *id3, const char *buf, int len, int encoding, enum id3_key key)
702 {
703 	char *out = decode_str(buf, len, encoding);
704 
705 	if (!out)
706 		return;
707 
708 	if (key == ID3_GENRE) {
709 		char *tmp;
710 
711 		id3_debug("genre before: '%s'\n", out);
712 		tmp = parse_genre(out);
713 		free(out);
714 		out = tmp;
715 	} else if (key == ID3_DATE || key == ID3_ORIGINALDATE) {
716 		int date_len = check_date_format(out);
717 		id3_debug("date before: '%s'\n", out);
718 		if (date_len)
719 			out[date_len] = '\0';
720 		else
721 			fix_date(out);
722 		if (!*out) {
723 			id3_debug("date parsing failed\n");
724 			free(out);
725 			return;
726 		}
727 	} else if (key == ID3_ALBUMARTIST) {
728 		/*
729 		 * This must be TPE2 frame; ignore it if ID3_ALBUMARTIST is
730 		 * already present
731 		 */
732 		if (id3->v2[key]) {
733 			free(out);
734 			return;
735 		}
736 	} else if (key == ID3_PUBLISHER) {
737 		 add_v2(id3, ID3_LABEL, strdup(out));
738 	}
739 
740 	add_v2(id3, key, out);
741 }
742 
id3_skiplen(const char * buf,size_t len,int encoding)743 static size_t id3_skiplen(const char *buf, size_t len, int encoding)
744 {
745 	if (encoding == ID3_ENCODING_ISO_8859_1 || encoding == ID3_ENCODING_UTF_8) {
746 		return strlen(buf) + 1;
747 	} else {
748 		int i = 0;
749 		while (i + 1 < len) {
750 			if (buf[i] == '\0' && buf[i + 1] == '\0')
751 				return i + 2;
752 
753 			/* Assume every character is exactly 2 bytes */
754 			i += 2;
755 		}
756 
757 		return len;
758 	}
759 }
760 
decode_txxx(struct id3tag * id3,const char * buf,int len,int encoding)761 static void decode_txxx(struct id3tag *id3, const char *buf, int len, int encoding)
762 {
763 	const char ql_prefix[] = "QuodLibet::";
764 	enum id3_key key = NUM_ID3_KEYS;
765 	int size;
766 	char *out, *out_mem;
767 
768 	out = decode_str(buf, len, encoding);
769 	if (!out)
770 		return;
771 
772 	id3_debug("TXXX, key = '%s'\n", out);
773 
774 	out_mem = out;
775 
776 	/* skip braindead QuodLibet TXXX frame prefix */
777 	if (!strncmp(out, ql_prefix, sizeof(ql_prefix) - 1))
778 		out += sizeof(ql_prefix) - 1;
779 
780 	if (!strcasecmp(out, "replaygain_track_gain"))
781 		key = ID3_RG_TRACK_GAIN;
782 	else if (!strcasecmp(out, "replaygain_track_peak"))
783 		key = ID3_RG_TRACK_PEAK;
784 	else if (!strcasecmp(out, "replaygain_album_gain"))
785 		key = ID3_RG_ALBUM_GAIN;
786 	else if (!strcasecmp(out, "replaygain_album_peak"))
787 		key = ID3_RG_ALBUM_PEAK;
788 	else if (!strcasecmp(out, "album artist"))
789 		key = ID3_ALBUMARTIST;
790 	else if (!strcasecmp(out, "albumartist"))
791 		key = ID3_ALBUMARTIST;
792 	else if (!strcasecmp(out, "albumartistsort"))
793 		key = ID3_ALBUMARTISTSORT;
794 	else if (!strcasecmp(out, "albumsort"))
795 		key = ID3_ALBUMSORT;
796 	else if (!strcasecmp(out, "compilation"))
797 		key = ID3_COMPILATION;
798 
799 	size = id3_skiplen(buf, len, encoding);
800 	free(out_mem);
801 
802 	if (key == NUM_ID3_KEYS)
803 		return;
804 
805 	buf += size;
806 	len -= size;
807 	if (len <= 0)
808 		return;
809 
810 	out = decode_str(buf, len, encoding);
811 	if (!out)
812 		return;
813 
814 	add_v2(id3, key, out);
815 }
816 
decode_comment(struct id3tag * id3,const char * buf,int len,int encoding)817 static void decode_comment(struct id3tag *id3, const char *buf, int len, int encoding)
818 {
819 	int slen;
820 	char *out;
821 	int valid_description;
822 
823 	if (len <= 3)
824 		return;
825 
826 	/* skip language */
827 	buf += 3;
828 	len -= 3;
829 
830 	/* "Short content description" part of COMM frame */
831 	out = decode_str(buf, len, encoding);
832 	if (!out)
833 		return;
834 
835 	valid_description = strcmp(out, "") == 0 || strcmp(out, "description") == 0;
836 	free(out);
837 
838 	if (!valid_description)
839 		return;
840 
841 	slen = id3_skiplen(buf, len, encoding);
842 	if (slen >= len)
843 		return;
844 
845 	buf += slen;
846 	len -= slen;
847 
848 	out = decode_str(buf, len, encoding);
849 	if (!out)
850 		return;
851 
852 	add_v2(id3, ID3_COMMENT, out);
853 }
854 
855 /*
856  * From http://id3.org/id3v2.4.0-frames:
857  *
858  * The volume adjustment is encoded as a fixed point decibel value, 16 bit signed
859  * integer representing (adjustment*512), giving +/- 64 dB with a precision of
860  * 0.001953125 dB. E.g. +2 dB is stored as $04 00 and -2 dB is $FC 00. There may
861  * be more than one "RVA2" frame in each tag, but only one with the same
862  * identification string.
863  *
864  * 	<Header for 'Relative volume adjustment (2)', ID: "RVA2">
865  * 	Identification          <text string> $00
866  *
867  * The 'identification' string is used to identify the situation and/or device
868  * where this adjustment should apply. The following is then repeated for every
869  * channel
870  *
871  * 	Type of channel         $xx
872  * 	Volume adjustment       $xx xx
873  * 	Bits representing peak  $xx
874  * 	Peak volume             $xx (xx ...)
875  *
876  * Type of channel:	$00 Other
877  * 			$01 Master volume
878  * 			$02 Front right
879  * 			$03 Front left
880  * 			$04 Back right
881  * 			$05 Back left
882  * 			$06 Front centre
883  * 			$07 Back centre
884  * 			$08 Subwoofer
885  *
886  * Bits representing peak can be any number between 0 and 255. 0 means that there
887  * is no peak volume field. The peak volume field is always padded to whole
888  * bytes, setting the most significant bits to zero.
889  */
decode_rva2(struct id3tag * id3,const char * buf,int len)890 static void decode_rva2(struct id3tag *id3, const char *buf, int len)
891 {
892 	const int rva2_min_len	= 6 + 1 + 2 + 1;
893 
894 	int audiophile_rg	= 0;
895 	int channel		= 0;
896 	int16_t volume_adj	= 0;
897 	int peak_bits		= 0;
898 	int peak_bytes		= 0;
899 	int peak_shift		= 0;
900 	uint32_t peak		= 0;
901 
902 	char *gain_str		= NULL;
903 	char *peak_str		= NULL;
904 
905 	int i;
906 
907 	if (len < rva2_min_len) {
908 		id3_debug("frame length %d too small\n", len);
909 		return;
910 	}
911 
912 	if (!strcasecmp(buf, "album")) {
913 		audiophile_rg = 1;
914 	} else if (strcasecmp(buf, "track")) {
915 		id3_debug("unsupported identifier: %s\n", buf);
916 		return;
917 	}
918 
919 	buf += 6;
920 
921 	channel = *buf++;
922 	if (channel != 0x1) {
923 		id3_debug("unsupported channel: %d\n", channel);
924 		return;
925 	}
926 
927 	get_i16((unsigned char *)buf, &volume_adj);
928 	buf += 2;
929 
930 	peak_bits = *buf++;
931 
932 	if (peak_bits == 0)
933 		id3_debug("no peak data\n");
934 
935 	/*
936 	 * This crazy code comes from Mutagen
937 	 */
938 	peak_bytes = min_i(4, (peak_bits + 7) >> 3);
939 	peak_shift = ((8 - (peak_bits & 7)) & 7) + (4 - peak_bytes) * 8;
940 
941 	if (len < rva2_min_len + peak_bytes) {
942 		id3_debug("peak data %d does not fit frame with length %d\n", peak_bytes, len);
943 		return;
944 	}
945 
946 	for (i = 0; i < peak_bytes; ++i) {
947 		peak <<= 8;
948 		peak |= (unsigned char)*buf++;
949 	}
950 
951 	gain_str = xnew(char, 32);
952 	snprintf(gain_str, 32, "%lf dB", volume_adj / 512.0);
953 
954 	add_v2(id3, audiophile_rg ? ID3_RG_ALBUM_GAIN : ID3_RG_TRACK_GAIN, gain_str);
955 
956 	if (peak_bytes) {
957 		peak_str = xnew(char, 32);
958 		snprintf(peak_str, 32, "%lf", ((double)peak * (1 << peak_shift)) / INT_MAX);
959 
960 		add_v2(id3, audiophile_rg ? ID3_RG_ALBUM_PEAK : ID3_RG_TRACK_PEAK, peak_str);
961 	}
962 
963 	id3_debug("gain %s, peak %s\n", gain_str, peak_str ? peak_str : "none");
964 }
965 
decode_ufid(struct id3tag * id3,const char * buf,int len)966 static void decode_ufid(struct id3tag *id3, const char *buf, int len)
967 {
968 	char *ufid;
969 	int ufid_len = len - 22 - 1;
970 
971 	if (ufid_len < 0 || strcmp(buf, "http://musicbrainz.org") != 0)
972 		return;
973 
974 	ufid = xnew(char, ufid_len + 1);
975 	memcpy(ufid, buf + len - ufid_len, ufid_len);
976 	ufid[ufid_len] = '\0';
977 
978 	id3_debug("%s: %s\n", buf, ufid);
979 	add_v2(id3, ID3_MUSICBRAINZ_TRACKID, ufid);
980 }
981 
982 
v2_add_frame(struct id3tag * id3,struct v2_frame_header * fh,const char * buf)983 static void v2_add_frame(struct id3tag *id3, struct v2_frame_header *fh, const char *buf)
984 {
985 	int encoding;
986 	int len;
987 	int idx;
988 
989 	if (!strncmp(fh->id, "RVA2", 4)) {
990 		decode_rva2(id3, buf, fh->size);
991 		return;
992 	} else if (!strncmp(fh->id, "UFID", 4)) {
993 		decode_ufid(id3, buf, fh->size);
994 		return;
995 	}
996 
997 	encoding = *buf++;
998 	len = fh->size - 1;
999 
1000 	if (encoding > ID3_ENCODING_MAX)
1001 		return;
1002 
1003 	idx = frame_tab_index(fh->id);
1004 	if (idx >= 0) {
1005 		decode_normal(id3, buf, len, encoding, frame_tab[idx].key);
1006 	} else if (!strncmp(fh->id, "TXXX", 4)) {
1007 		decode_txxx(id3, buf, len, encoding);
1008 	} else if (!strncmp(fh->id, "COMM", 4)) {
1009 		decode_comment(id3, buf, len, encoding);
1010 	} else if (!strncmp(fh->id, "COM", 3)) {
1011 		decode_comment(id3, buf, len, encoding);
1012 	}
1013 }
1014 
unsync(unsigned char * buf,int * lenp)1015 static void unsync(unsigned char *buf, int *lenp)
1016 {
1017 	int len = *lenp;
1018 	int s, d;
1019 
1020 	s = d = 0;
1021 	while (s < len - 1) {
1022 		if (buf[s] == 0xff && buf[s + 1] == 0x00) {
1023 			/* 0xff 0x00 -> 0xff */
1024 			buf[d++] = 0xff;
1025 			s += 2;
1026 
1027 			if (s < len - 2 && buf[s] == 0x00) {
1028 				/* 0xff 0x00 0x00 -> 0xff 0x00 */
1029 				buf[d++] = 0x00;
1030 				s++;
1031 			}
1032 			continue;
1033 		}
1034 		buf[d++] = buf[s++];
1035 	}
1036 	if (s < len)
1037 		buf[d++] = buf[s++];
1038 
1039 	d_print("de-synchronization removed %d bytes\n", s - d);
1040 	*lenp = d;
1041 }
1042 
v2_read(struct id3tag * id3,int fd,const struct v2_header * header)1043 static int v2_read(struct id3tag *id3, int fd, const struct v2_header *header)
1044 {
1045 	char *buf;
1046 	int rc, buf_size;
1047 	int frame_start, i;
1048 	int frame_header_size;
1049 
1050 	buf_size = header->size;
1051 	buf = xnew(char, buf_size);
1052 	rc = read_all(fd, buf, buf_size);
1053 	if (rc == -1) {
1054 		free(buf);
1055 		return rc;
1056 	}
1057 
1058 	frame_start = 0;
1059 	if (header->flags & V2_HEADER_EXTENDED) {
1060 		struct v2_extended_header ext;
1061 
1062 		if (!v2_extended_header_parse(&ext, buf) || ext.size > buf_size) {
1063 			id3_debug("extended header corrupted\n");
1064 			free(buf);
1065 			return -2;
1066 		}
1067 		frame_start = ext.size;
1068 		/* should check if update flag is set */
1069 	}
1070 
1071 	frame_header_size = 10;
1072 	if (header->ver_major == 2)
1073 		frame_header_size = 6;
1074 
1075 	i = frame_start;
1076 	while (i < buf_size - frame_header_size) {
1077 		struct v2_frame_header fh;
1078 		int len_unsync;
1079 
1080 		if (header->ver_major == 2) {
1081 			if (!v2_2_0_frame_header_parse(&fh, buf + i))
1082 				break;
1083 		} else if (header->ver_major == 3) {
1084 			if (!v2_3_0_frame_header_parse(&fh, buf + i))
1085 				break;
1086 		} else {
1087 			/* assume v2.4 */
1088 			if (!v2_4_0_frame_header_parse(&fh, buf + i))
1089 				break;
1090 		}
1091 
1092 		i += frame_header_size;
1093 
1094 		if (fh.size > buf_size - i) {
1095 			id3_debug("frame too big\n");
1096 			break;
1097 		}
1098 
1099 		if (fh.flags & V2_FRAME_LEN_INDICATOR) {
1100 			/*
1101 			 * Ignore the frame length 4-byte field
1102 			 */
1103 			i	+= 4;
1104 			fh.size	-= 4;
1105 		}
1106 
1107 		len_unsync = fh.size;
1108 
1109 		if ((fh.flags & V2_FRAME_UNSYNC) || (header->flags & V2_HEADER_UNSYNC))
1110 			unsync((unsigned char *)(buf + i), (int *)&fh.size);
1111 
1112 		v2_add_frame(id3, &fh, buf + i);
1113 
1114 		i += len_unsync;
1115 	}
1116 
1117 	free(buf);
1118 	return 0;
1119 }
1120 
id3_tag_size(const char * buf,int buf_size)1121 int id3_tag_size(const char *buf, int buf_size)
1122 {
1123 	struct v2_header header;
1124 
1125 	if (buf_size < 10)
1126 		return 0;
1127 	if (v2_header_parse(&header, buf)) {
1128 		if (header.flags & V2_HEADER_FOOTER) {
1129 			/* header + data + footer */
1130 			id3_debug("v2.%d.%d with footer\n", header.ver_major, header.ver_minor);
1131 			return 10 + header.size + 10;
1132 		}
1133 		/* header */
1134 		id3_debug("v2.%d.%d\n", header.ver_major, header.ver_minor);
1135 		return 10 + header.size;
1136 	}
1137 	if (buf_size >= 3 && is_v1(buf)) {
1138 		id3_debug("v1\n");
1139 		return 128;
1140 	}
1141 	return 0;
1142 }
1143 
id3_init(struct id3tag * id3)1144 void id3_init(struct id3tag *id3)
1145 {
1146 	const struct id3tag t = { .has_v1 = 0, .has_v2 = 0 };
1147 	*id3 = t;
1148 }
1149 
id3_free(struct id3tag * id3)1150 void id3_free(struct id3tag *id3)
1151 {
1152 	int i;
1153 
1154 	for (i = 0; i < NUM_ID3_KEYS; i++)
1155 		free(id3->v2[i]);
1156 }
1157 
id3_read_tags(struct id3tag * id3,int fd,unsigned int flags)1158 int id3_read_tags(struct id3tag *id3, int fd, unsigned int flags)
1159 {
1160 	off_t off;
1161 	int rc;
1162 
1163 	if (flags & ID3_V2) {
1164 		struct v2_header header;
1165 		char buf[138];
1166 
1167 		rc = read_all(fd, buf, 10);
1168 		if (rc == -1)
1169 			goto rc_error;
1170 		if (v2_header_parse(&header, buf)) {
1171 			rc = v2_read(id3, fd, &header);
1172 			if (rc)
1173 				goto rc_error;
1174 			/* get v1 if needed */
1175 		} else {
1176 			/* get v2 from end and optionally v1 */
1177 
1178 			off = lseek(fd, -138, SEEK_END);
1179 			if (off == -1)
1180 				goto error;
1181 			rc = read_all(fd, buf, 138);
1182 			if (rc == -1)
1183 				goto rc_error;
1184 
1185 			if (is_v1(buf + 10)) {
1186 				if (flags & ID3_V1) {
1187 					memcpy(id3->v1, buf + 10, 128);
1188 					id3->has_v1 = 1;
1189 				}
1190 				if (v2_footer_parse(&header, buf)) {
1191 					/* footer at end of file - 128 */
1192 					off = lseek(fd, -((off_t) header.size + 138), SEEK_END);
1193 					if (off == -1)
1194 						goto error;
1195 					rc = v2_read(id3, fd, &header);
1196 					if (rc)
1197 						goto rc_error;
1198 				}
1199 			} else if (v2_footer_parse(&header, buf + 128)) {
1200 				/* footer at end of file */
1201 				off = lseek(fd, -((off_t) header.size + 10), SEEK_END);
1202 				if (off == -1)
1203 					goto error;
1204 				rc = v2_read(id3, fd, &header);
1205 				if (rc)
1206 					goto rc_error;
1207 			}
1208 			return 0;
1209 		}
1210 	}
1211 	if (flags & ID3_V1) {
1212 		off = lseek(fd, -128, SEEK_END);
1213 		if (off == -1)
1214 			goto error;
1215 		rc = read_all(fd, id3->v1, 128);
1216 		if (rc == -1)
1217 			goto rc_error;
1218 		id3->has_v1 = is_v1(id3->v1);
1219 	}
1220 	return 0;
1221 error:
1222 	rc = -1;
1223 rc_error:
1224 	return rc;
1225 }
1226 
v1_get_str(const char * buf,int len)1227 static char *v1_get_str(const char *buf, int len)
1228 {
1229 	char in[32];
1230 	char *out;
1231 	int i;
1232 
1233 	for (i = len - 1; i >= 0; i--) {
1234 		if (buf[i] != 0 && buf[i] != ' ')
1235 			break;
1236 	}
1237 	if (i == -1)
1238 		return NULL;
1239 	memcpy(in, buf, i + 1);
1240 	in[i + 1] = 0;
1241 	if (u_is_valid(in))
1242 		return xstrdup(in);
1243 	if (utf8_encode(in, id3_default_charset, &out))
1244 		return NULL;
1245 	return out;
1246 }
1247 
id3_get_comment(struct id3tag * id3,enum id3_key key)1248 char *id3_get_comment(struct id3tag *id3, enum id3_key key)
1249 {
1250 	if (id3->has_v2) {
1251 		if (id3->v2[key])
1252 			return xstrdup(id3->v2[key]);
1253 	}
1254 	if (id3->has_v1) {
1255 		switch (key) {
1256 		case ID3_ARTIST:
1257 			return v1_get_str(id3->v1 + 33, 30);
1258 		case ID3_ALBUM:
1259 			return v1_get_str(id3->v1 + 63, 30);
1260 		case ID3_TITLE:
1261 			return v1_get_str(id3->v1 + 3, 30);
1262 		case ID3_DATE:
1263 			return v1_get_str(id3->v1 + 93, 4);
1264 		case ID3_GENRE:
1265 			{
1266 				unsigned char idx = id3->v1[127];
1267 
1268 				if (idx >= NR_GENRES)
1269 					return NULL;
1270 				return xstrdup(genres[idx]);
1271 			}
1272 		case ID3_TRACK:
1273 			{
1274 				char *t;
1275 
1276 				if (id3->v1[125] != 0)
1277 					return NULL;
1278 				t = xnew(char, 4);
1279 				snprintf(t, 4, "%d", ((unsigned char *)id3->v1)[126]);
1280 				return t;
1281 			}
1282 		default:
1283 			return NULL;
1284 		}
1285 	}
1286 	return NULL;
1287 }
1288 
id3_get_genre(uint16_t id)1289 char const *id3_get_genre(uint16_t id)
1290 {
1291 	if (id >= NR_GENRES)
1292 		return NULL;
1293 	return genres[id];
1294 }
1295