xref: /reactos/sdk/lib/3rdparty/libmpg123/id3.c (revision 40462c92)
1 /*
2 	id3: ID3v2.3 and ID3v2.4 parsing (a relevant subset)
3 
4 	copyright 2006-2020 by the mpg123 project - free software under the terms of the LGPL 2.1
5 	see COPYING and AUTHORS files in distribution or http://mpg123.org
6 	initially written by Thomas Orgis
7 
8 	WIP: Handling of multiple ID3 tags in a stream.
9 
10 	1. With update flag: Add non-unique data, replace unique.
11 	- Only one TALB, TPE1, etc.
12 	- Only one TXXX with a certain description.
13 	- Only one COMM with certain language and description.
14 	- Only one APIC with certain type and description, generally only one
15 	  of type 1 and 2 each.
16 	2. Without update flag: wipe whole data and only store new stuff.
17 
18 	BIG BAD BUT: How to properly handle seeks in a stream that make
19 	the parser encounter the same tags again in random order? Is
20 	there even a correct way to handle that without storing an
21 	ordered list of all tags? I could simplify the code here and just
22 	communicate that a frame should be an update to previous, and
23 	at which stream position the frame was encountered. But since
24 	libmpg123 is driven by MPEG frames, there could be multiple
25 	ID3v2 tags in direct succession treated by the parser without
26 	the library user being able to interfere.
27 
28 	This is severely fucked. All that complexity also doesn't matter
29 	in practice, as streams use ICY and individual files have just one
30 	ID3v2 tag (relevant for libmpg123).  It's an academic problem. But
31 	for seekable files, I could implement some jumping logic to find
32 	and parse all ID3v2 for once and then set a flag that only jumps
33 	the frames on seeks. That covers all local disk playback. For
34 	streams, seeking is no issue (seeking back, at least), so the
35 	update/replace logic works.
36 
37 	Look at the standard:
38 
39 ------
40 5.   Tag location
41 
42    The default location of an ID3v2 tag is prepended to the audio so
43    that players can benefit from the information when the data is
44    streamed. It is however possible to append the tag, or make a
45    prepend/append combination. When deciding upon where an unembedded
46    tag should be located, the following order of preference SHOULD be
47    considered.
48 
49      1. Prepend the tag.
50 
51      2. Prepend a tag with all vital information and add a second tag at
52         the end of the file, before tags from other tagging systems. The
53         first tag is required to have a SEEK frame.
54 
55      3. Add a tag at the end of the file, before tags from other tagging
56         systems.
57 
58    In case 2 and 3 the tag can simply be appended if no other known tags
59    are present. The suggested method to find ID3v2 tags are:
60 
61      1. Look for a prepended tag using the pattern found in section 3.1.
62 
63      2. If a SEEK frame was found, use its values to guide further
64         searching.
65 
66      3. Look for a tag footer, scanning from the back of the file.
67 
68    For every new tag that is found, the old tag should be discarded
69    unless the update flag in the extended header (section 3.2) is set.
70 ------
71 
72 	For seekable streams, I simply need to implement explicit ID3v2 search along
73 	that recommendation and keep the complete information. Streams that continue
74 	growing during playback will not recognize added ID3v2 tags. So be it.
75 	For non-seekable streams, a tag is always parsed when encountered, assuming
76 	the order of update tags always matches.
77 
78 	First step for the 1.26 release shall be the implementaton of the update
79 	logic and glossing over the theoretical problem of re-parsing update
80 	frames in the wrong order by ignoring it. They are not that relevant.
81 
82 	TODO: Cave in and add the missing frames from the spec. Not that far to go.
83 	But need another data structure to communicate those ...
84 */
85 
86 #include "mpg123lib_intern.h"
87 #include "id3.h"
88 #include "debug.h"
89 
90 #ifndef NO_ID3V2 /* Only the main parsing routine will always be there. */
91 
92 /* We know the usual text frames plus some specifics. */
93 #define KNOWN_FRAMES 5
94 static const char frame_type[KNOWN_FRAMES][5] = { "COMM", "TXXX", "RVA2", "USLT", "APIC" };
95 enum frame_types { unknown = -2, text = -1, comment, extra, rva2, uslt, picture };
96 
97 /* UTF support definitions */
98 
99 typedef void (*text_converter)(mpg123_string *sb, const unsigned char* source, size_t len, const int noquiet);
100 
101 static void convert_latin1  (mpg123_string *sb, const unsigned char* source, size_t len, const int noquiet);
102 static void convert_utf16bom(mpg123_string *sb, const unsigned char* source, size_t len, const int noquiet);
103 static void convert_utf8    (mpg123_string *sb, const unsigned char* source, size_t len, const int noquiet);
104 
105 static const text_converter text_converters[4] =
106 {
107 	convert_latin1,
108 	/* We always check for (multiple) BOM in 16bit unicode. Without BOM, UTF16 BE is the default.
109 	   Errors in encoding are detected anyway. */
110 	convert_utf16bom,
111 	convert_utf16bom,
112 	convert_utf8
113 };
114 
115 static const unsigned int encoding_widths[4] = { 1, 2, 2, 1 };
116 
117 /* the code starts here... */
118 
119 static void null_id3_links(mpg123_handle *fr)
120 {
121 	fr->id3v2.title  = NULL;
122 	fr->id3v2.artist = NULL;
123 	fr->id3v2.album  = NULL;
124 	fr->id3v2.year   = NULL;
125 	fr->id3v2.genre  = NULL;
126 	fr->id3v2.comment = NULL;
127 }
128 
129 void init_id3(mpg123_handle *fr)
130 {
131 	fr->id3v2.version = 0; /* nothing there */
132 	null_id3_links(fr);
133 	fr->id3v2.comments     = 0;
134 	fr->id3v2.comment_list = NULL;
135 	fr->id3v2.texts    = 0;
136 	fr->id3v2.text     = NULL;
137 	fr->id3v2.extras   = 0;
138 	fr->id3v2.extra    = NULL;
139 	fr->id3v2.pictures   = 0;
140 	fr->id3v2.picture    = NULL;
141 }
142 
143 /* Managing of the text, comment and extra lists. */
144 
145 /* Initialize one element. */
146 static void init_mpg123_text(mpg123_text *txt)
147 {
148 	mpg123_init_string(&txt->text);
149 	mpg123_init_string(&txt->description);
150 	txt->id[0] = 0;
151 	txt->id[1] = 0;
152 	txt->id[2] = 0;
153 	txt->id[3] = 0;
154 	txt->lang[0] = 0;
155 	txt->lang[1] = 0;
156 	txt->lang[2] = 0;
157 }
158 
159 static void init_mpg123_picture(mpg123_picture *pic)
160 {
161 	mpg123_init_string(&pic->mime_type);
162 	mpg123_init_string(&pic->description);
163 	pic->type = 0;
164 	pic->size = 0;
165 	pic->data = NULL;
166 }
167 
168 /* Free memory of one element. */
169 static void free_mpg123_text(mpg123_text *txt)
170 {
171 	mpg123_free_string(&txt->text);
172 	mpg123_free_string(&txt->description);
173 }
174 
175 static void free_mpg123_picture(mpg123_picture * pic)
176 {
177 	mpg123_free_string(&pic->mime_type);
178 	mpg123_free_string(&pic->description);
179 	if (pic->data != NULL)
180 		free(pic->data);
181 }
182 
183 /* Free memory of whole list. */
184 #define free_comment(mh) free_id3_text(&((mh)->id3v2.comment_list), &((mh)->id3v2.comments))
185 #define free_text(mh)    free_id3_text(&((mh)->id3v2.text),         &((mh)->id3v2.texts))
186 #define free_extra(mh)   free_id3_text(&((mh)->id3v2.extra),        &((mh)->id3v2.extras))
187 #define free_picture(mh) free_id3_picture(&((mh)->id3v2.picture),   &((mh)->id3v2.pictures))
188 static void free_id3_text(mpg123_text **list, size_t *size)
189 {
190 	size_t i;
191 	for(i=0; i<*size; ++i) free_mpg123_text(&((*list)[i]));
192 
193 	free(*list);
194 	*list = NULL;
195 	*size = 0;
196 }
197 static void free_id3_picture(mpg123_picture **list, size_t *size)
198 {
199 	size_t i;
200 	for(i=0; i<*size; ++i) free_mpg123_picture(&((*list)[i]));
201 
202 	free(*list);
203 	*list = NULL;
204 	*size = 0;
205 }
206 
207 /* Add items to the list. */
208 
209 #define add_comment(mh, l, d) add_id3_text(&((mh)->id3v2.comment_list), &((mh)->id3v2.comments), NULL,    l, d)
210 #define add_text(mh, id)      add_id3_text(&((mh)->id3v2.text),         &((mh)->id3v2.texts),      id, NULL, NULL)
211 #define add_uslt(mh, l, d)    add_id3_text(&((mh)->id3v2.text),         &((mh)->id3v2.texts),      id, l, d)
212 #define add_extra(mh, d)      add_id3_text(&((mh)->id3v2.extra),        &((mh)->id3v2.extras),   NULL, NULL, d)
213 #define add_picture(mh, t, d) add_id3_picture(&((mh)->id3v2.picture), &((mh)->id3v2.pictures), t, d)
214 static mpg123_text *add_id3_text( mpg123_text **list, size_t *size
215 ,	char id[4], char lang[3], mpg123_string *description )
216 {
217 	mdebug( "add_id3_text id=%s lang=%s, desc=%s"
218 	,	id ? (char[5]) { id[0], id[1], id[2], id[3], 0 } : "(nil)"
219 	,	lang ? (char[4]) { lang[0], lang[1], lang[2], 0 }  : "(nil)"
220 	,	description ? (description->fill ? description->p : "(empty)") : "(nil)" );
221 	if(lang && !description)
222 		return NULL; // no lone language intended
223 	if(id || description)
224 	{
225 		// Look through list of existing texts and return an existing entry
226 		// if it should be overwritten.
227 		for(size_t i=0; i<*size; ++i)
228 		{
229 			mpg123_text *entry = *list+i;
230 			if(description)
231 			{ // Overwrite entry with same description and same ID and language.
232 				if( (!id || !memcmp(id, entry->id, 4))
233 					&& (!lang || !memcmp(entry->lang, lang, 3))
234 					&& mpg123_same_string(&(entry->description), description)
235 				)
236 				return entry;
237 			} else if(id && !memcmp(id, entry->id, 4))
238 				return entry; // Just ovewrite because of same ID.
239 			mdebug("add_id3_text: entry %zu was no match", i);
240 		}
241 	}
242 	mdebug("add_id3_text: append to list of %zu", *size);
243 	// Nothing found, add new one.
244 	mpg123_text *x = safe_realloc(*list, sizeof(mpg123_text)*(*size+1));
245 	if(x == NULL) return NULL; /* bad */
246 
247 	*list  = x;
248 	*size += 1;
249 	init_mpg123_text(&((*list)[*size-1]));
250 
251 	return &((*list)[*size-1]); /* Return pointer to the added text. */
252 }
253 
254 
255 static mpg123_picture *add_id3_picture(mpg123_picture **list, size_t *size, char type, mpg123_string *description)
256 {
257 	if(!description)
258 		return NULL;
259 
260 	// Return entry to overwrite, if appropriate.
261 	for(size_t i=0; i<*size; ++i)
262 	{
263 		mpg123_picture *entry = *list+i;
264 		if(  type == entry->type
265 			&& ( type == 1 || type == 2 ||
266 				mpg123_same_string(&entry->description, description)
267 			)
268 		)
269 			return entry;
270 	}
271 	// Append a new one.
272 	mpg123_picture *x = safe_realloc(*list, sizeof(mpg123_picture)*(*size+1));
273 	if(x == NULL) return NULL; /* bad */
274 
275 	*list  = x;
276 	*size += 1;
277 	init_mpg123_picture(&((*list)[*size-1]));
278 
279 	return &((*list)[*size-1]); /* Return pointer to the added picture. */
280 }
281 
282 /* OK, back to the higher level functions. */
283 
284 void exit_id3(mpg123_handle *fr)
285 {
286 	free_picture(fr);
287 	free_comment(fr);
288 	free_extra(fr);
289 	free_text(fr);
290 }
291 
292 void reset_id3(mpg123_handle *fr)
293 {
294 	exit_id3(fr);
295 	init_id3(fr);
296 }
297 
298 /* Set the id3v2.artist id3v2.title ... links to elements of the array. */
299 void id3_link(mpg123_handle *fr)
300 {
301 	size_t i;
302 	mpg123_id3v2 *v2 = &fr->id3v2;
303 	debug("linking ID3v2");
304 	null_id3_links(fr);
305 	for(i=0; i<v2->texts; ++i)
306 	{
307 		mpg123_text *entry = &v2->text[i];
308 		if     (!strncmp("TIT2", entry->id, 4)) v2->title  = &entry->text;
309 		else if(!strncmp("TALB", entry->id, 4)) v2->album  = &entry->text;
310 		else if(!strncmp("TPE1", entry->id, 4)) v2->artist = &entry->text;
311 		else if(!strncmp("TYER", entry->id, 4)) v2->year   = &entry->text;
312 		else if(!strncmp("TCON", entry->id, 4)) v2->genre  = &entry->text;
313 	}
314 	for(i=0; i<v2->comments; ++i)
315 	{
316 		mpg123_text *entry = &v2->comment_list[i];
317 		if(entry->description.fill == 0 || entry->description.p[0] == 0)
318 		v2->comment = &entry->text;
319 	}
320 	/* When no generic comment found, use the last non-generic one. */
321 	if(v2->comment == NULL && v2->comments > 0)
322 	v2->comment = &v2->comment_list[v2->comments-1].text;
323 }
324 
325 /*
326 	Store ID3 text data in an mpg123_string; either verbatim copy or
327 	everything translated to UTF-8 encoding.
328 	Preserve the zero string separator (I don't need strlen for the total size).
329 
330 	Since we can overwrite strings with ID3 update frames, don't free
331 	memory, just grow strings.
332 */
333 static void store_id3_text(mpg123_string *sb, unsigned char *source, size_t source_size, const int noquiet, const int notranslate)
334 {
335 	unsigned char encoding;
336 	if(sb) // Always overwrite, even with nothing.
337 		sb->fill = 0;
338 	if(!source_size)
339 	{
340 		debug("Empty id3 data!");
341 		return;
342 	}
343 
344 	/* We shall just copy the data. Client wants to decode itself. */
345 	if(notranslate)
346 	{
347 		/* Future: Add a path for ID3 errors. */
348 		if(!mpg123_grow_string(sb, source_size))
349 		{
350 			if(noquiet) error("Cannot resize target string, out of memory?");
351 			return;
352 		}
353 		memcpy(sb->p, source, source_size);
354 		sb->fill = source_size;
355 		debug1("stored undecoded ID3 text of size %"SIZE_P, (size_p)source_size);
356 		return;
357 	}
358 
359 	encoding = source[0];
360 	if(encoding > mpg123_id3_enc_max)
361 	{
362 		if(noquiet)
363 			error1("Unknown text encoding %u, I take no chances, sorry!", encoding);
364 		return;
365 	}
366 	id3_to_utf8(sb, encoding, source+1, source_size-1, noquiet);
367 
368 	if(sb->fill) debug1("UTF-8 string (the first one): %s", sb->p);
369 	else if(noquiet) error("unable to convert string to UTF-8 (out of memory, junk input?)!");
370 }
371 
372 /* On error, sb->size is 0. */
373 /* Also, encoding has been checked already! */
374 void id3_to_utf8(mpg123_string *sb, unsigned char encoding, const unsigned char *source, size_t source_size, int noquiet)
375 {
376 	unsigned int bwidth;
377 	if(sb)
378 		sb->fill = 0;
379 	debug1("encoding: %u", encoding);
380 	/* A note: ID3v2.3 uses UCS-2 non-variable 16bit encoding, v2.4 uses UTF16.
381 	   UTF-16 uses a reserved/private range in UCS-2 to add the magic, so we just always treat it as UTF. */
382 	bwidth = encoding_widths[encoding];
383 	/* Hack! I've seen a stray zero byte before BOM. Is that supposed to happen? */
384 	if(encoding != mpg123_id3_utf16be) /* UTF16be _can_ beging with a null byte! */
385 	while(source_size > bwidth && source[0] == 0)
386 	{
387 		--source_size;
388 		++source;
389 		debug("skipped leading zero");
390 	}
391 	if(source_size % bwidth)
392 	{
393 		/* When we need two bytes for a character, it's strange to have an uneven bytestream length. */
394 		if(noquiet) warning2("Weird tag size %d for encoding %u - I will probably trim too early or something but I think the MP3 is broken.", (int)source_size, encoding);
395 		source_size -= source_size % bwidth;
396 	}
397 	text_converters[encoding](sb, source, source_size, noquiet);
398 }
399 
400 /* You have checked encoding to be in the range already. */
401 static unsigned char *next_text(unsigned char* prev, unsigned char encoding, size_t limit)
402 {
403 	unsigned char *text = prev;
404 	size_t width = encoding_widths[encoding];
405 
406 	/* So I go lengths to find zero or double zero...
407 	   Remember bug 2834636: Only check for aligned NULLs! */
408 	while(text-prev < (ssize_t)limit)
409 	{
410 		if(text[0] == 0)
411 		{
412 			if(width <= limit-(text-prev))
413 			{
414 				size_t i = 1;
415 				for(; i<width; ++i) if(text[i] != 0) break;
416 
417 				if(i == width) /* found a null wide enough! */
418 				{
419 					text += width;
420 					break;
421 				}
422 			}
423 			else return NULL; /* No full character left? This text is broken */
424 		}
425 
426 		text += width;
427 	}
428 	if((size_t)(text-prev) >= limit) text = NULL;
429 
430 	return text;
431 }
432 
433 static const char *enc_name(unsigned char enc)
434 {
435 	switch(enc)
436 	{
437 		case 0:  return "Latin 1";
438 		case 1:  return "UTF-16 BOM";
439 		case 2:  return "UTF-16 BE";
440 		case 3:  return "UTF-8";
441 		default: return "unknown!";
442 	}
443 }
444 
445 static void process_text(mpg123_handle *fr, unsigned char *realdata, size_t realsize, char *id)
446 {
447 	/* Text encoding          $xx */
448 	/* The text (encoded) ... */
449 	mpg123_text *t = add_text(fr, id);
450 	if(VERBOSE4) fprintf(stderr, "Note: Storing text from %s encoding\n", enc_name(realdata[0]));
451 	if(t == NULL)
452 	{
453 		if(NOQUIET) error("Unable to attach new text!");
454 		return;
455 	}
456 	mdebug("process_text: (over)writing entry with ID %s", t->id
457 	?	(char[5]) { t->id[0], t->id[1], t->id[2], t->id[3], 0 }
458 	:	"(nil)" );
459 	memcpy(t->id, id, 4);
460 	store_id3_text(&t->text, realdata, realsize, NOQUIET, fr->p.flags & MPG123_PLAIN_ID3TEXT);
461 	if(VERBOSE4) // Do not print unsanitized text to terminals!
462 		fprintf(stderr, "Note: ID3v2 %c%c%c%c text frame stored\n", id[0], id[1], id[2], id[3]);
463 }
464 
465 static void process_picture(mpg123_handle *fr, unsigned char *realdata, size_t realsize)
466 {
467 	unsigned char encoding;
468 	mpg123_picture *i = NULL;
469 	unsigned char* workpoint = NULL;
470 	mpg123_string mime; mpg123_init_string(&mime);
471 	unsigned char image_type = 0;
472 	mpg123_string description;	mpg123_init_string(&description);
473 	unsigned char *image_data = NULL;
474 	if(realsize < 1)
475 	{
476 		debug("Empty id3 data!");
477 		return;
478 	}
479 	encoding = realdata[0];
480 	realdata++; realsize--;
481 	if(encoding > mpg123_id3_enc_max)
482 	{
483 		if(NOQUIET)
484 			error1("Unknown text encoding %u, I take no chances, sorry!", encoding);
485 		return;
486 	}
487 	if(VERBOSE4) fprintf(stderr, "Note: Storing picture from APIC frame.\n");
488 
489 	/* get mime type (encoding is always latin-1) */
490 	workpoint = next_text(realdata, 0, realsize);
491 	if(!workpoint)
492 	{
493 		if(NOQUIET)
494 			error("Unable to get mime type for picture; skipping picture.");
495 		return;
496 	}
497 	id3_to_utf8(&mime, 0, realdata, workpoint - realdata, NOQUIET);
498 	realsize -= workpoint - realdata;
499 	realdata = workpoint;
500 	/* get picture type */
501 	image_type = realdata[0];
502 	realdata++; realsize--;
503 	/* get description (encoding is encoding) */
504 	workpoint = next_text(realdata, encoding, realsize);
505 	if(!workpoint)
506 	{
507 		if(NOQUIET)
508 			error("Unable to get description for picture; skipping picture.");
509 		mpg123_free_string(&mime);
510 		return;
511 	}
512 	id3_to_utf8(&description, encoding, realdata, workpoint - realdata, NOQUIET);
513 	realsize -= workpoint - realdata;
514 	if(realsize)
515 		image_data = (unsigned char*)malloc(realsize);
516 	if(!realsize || !image_data) {
517 		if(NOQUIET)
518 			error("No picture data or malloc failure; skipping picture.");
519 		mpg123_free_string(&description);
520 		mpg123_free_string(&mime);
521 		return;
522 	}
523 	memcpy(image_data, workpoint, realsize);
524 
525 	// All data ready now, append to/replace in list.
526 	i = add_picture(fr, image_type, &description);
527 	if(!i)
528 	{
529 		if(NOQUIET)
530 			error("Unable to attach new picture!");
531 		free(image_data);
532 		mpg123_free_string(&description);
533 		mpg123_free_string(&mime);
534 		return;
535 	}
536 
537 	// Either this is a fresh image, or one to be replaced.
538 	// We hand over memory, so old storage needs to be freed.
539 	free_mpg123_picture(i);
540 	i->type = image_type;
541 	i->size = realsize;
542 	i->data = image_data;
543 	mpg123_move_string(&mime, &i->mime_type);
544 	mpg123_move_string(&description, &i->description);
545 	if(VERBOSE4)
546 		fprintf(stderr, "Note: ID3v2 APIC picture frame of type: %d\n", i->type);
547 }
548 
549 /* Store a new comment that perhaps is a RVA / RVA_ALBUM/AUDIOPHILE / RVA_MIX/RADIO one
550    Special gimmik: It also stores USLT to the texts. Stucture is the same as for comments. */
551 static void process_comment(mpg123_handle *fr, enum frame_types tt, unsigned char *realdata, size_t realsize, int rva_level, char *id)
552 {
553 	/* Text encoding          $xx */
554 	/* Language               $xx xx xx */
555 	/* Short description (encoded!)      <text> $00 (00) */
556 	/* Then the comment text (encoded) ... */
557 	unsigned char  encoding = realdata[0];
558 	char lang[3]; // realdata + 1
559 	unsigned char *descr    = realdata+4;
560 	unsigned char *text     = NULL;
561 	mpg123_text *xcom = NULL;
562 	mpg123_text localcom; // UTF-8 variant for local processing, remember to clean up!
563 	init_mpg123_text(&localcom);
564 
565 	if(realsize < (size_t)(descr-realdata))
566 	{
567 		if(NOQUIET) error1("Invalid frame size of %"SIZE_P" (too small for anything).", (size_p)realsize);
568 		return;
569 	}
570 	if(encoding > mpg123_id3_enc_max)
571 	{
572 		if(NOQUIET)
573 			error1("Unknown text encoding %u, I take no chances, sorry!", encoding);
574 		return;
575 	}
576 	memcpy(lang, realdata+1, 3);
577 	/* Now I can abuse a byte from lang for the encoding. */
578 	descr[-1] = encoding;
579 	/* Be careful with finding the end of description, I have to honor encoding here. */
580 	text = next_text(descr, encoding, realsize-(descr-realdata));
581 	if(text == NULL)
582 	{
583 		if(NOQUIET)
584 			error("No comment text / valid description?");
585 		return;
586 	}
587 	{ // just vor variable scope
588 		mpg123_string description;
589 		mpg123_init_string(&description);
590 		// Store the text, with desired encoding, but for comments always a local copy in UTF-8.
591 		store_id3_text( &description, descr-1, text-descr+1
592 		,	NOQUIET, fr->p.flags & MPG123_PLAIN_ID3TEXT );
593 		if(tt == comment)
594 			store_id3_text( &localcom.description, descr-1, text-descr+1
595 			,	NOQUIET, 0 );
596 		if(VERBOSE4)
597 			fprintf( stderr, "Note: Storing comment from %s encoding\n"
598 			,	enc_name(realdata[0]) );
599 		xcom = tt == uslt
600 		?	add_uslt(fr, lang, &description)
601 		:	add_comment(fr, lang, &description);
602 		if(xcom == NULL)
603 		{
604 			if(NOQUIET)
605 				error("Unable to attach new comment!");
606 			mpg123_free_string(&description);
607 			free_mpg123_text(&localcom);
608 			return;
609 		}
610 		memcpy(xcom->id, id, 4);
611 		memcpy(xcom->lang, lang, 3);
612 		// That takes over the description allocation.
613 		mpg123_move_string(&description, &xcom->description);
614 	}
615 
616 	text[-1] = encoding; /* Byte abusal for encoding... */
617 	store_id3_text(&xcom->text, text-1, realsize+1-(text-realdata), NOQUIET, fr->p.flags & MPG123_PLAIN_ID3TEXT);
618 	/* Remember: I will probably decode the above (again) for rva comment checking. So no messing around, please. */
619 
620 	if(VERBOSE4) /* Do _not_ print the verbatim text: The encoding might be funny! */
621 	{
622 		fprintf(stderr, "Note: ID3 comm/uslt desc of length %"SIZE_P".\n", (size_p)xcom->description.fill);
623 		fprintf(stderr, "Note: ID3 comm/uslt text of length %"SIZE_P".\n", (size_p)xcom->text.fill);
624 	}
625 	/* Look out for RVA info only when we really deal with a straight comment. */
626 	if(tt == comment && localcom.description.fill > 0)
627 	{
628 		int rva_mode = -1; /* mix / album */
629 		if(    !strcasecmp(localcom.description.p, "rva")
630 			 || !strcasecmp(localcom.description.p, "rva_mix")
631 			 || !strcasecmp(localcom.description.p, "rva_track")
632 			 || !strcasecmp(localcom.description.p, "rva_radio") )
633 		rva_mode = 0;
634 		else if(    !strcasecmp(localcom.description.p, "rva_album")
635 		         || !strcasecmp(localcom.description.p, "rva_audiophile")
636 		         || !strcasecmp(localcom.description.p, "rva_user") )
637 		rva_mode = 1;
638 		if((rva_mode > -1) && (fr->rva.level[rva_mode] <= rva_level))
639 		{
640 			/* Only translate the contents in here where we really need them. */
641 			store_id3_text(&localcom.text, text-1, realsize+1-(text-realdata), NOQUIET, 0);
642 			if(localcom.text.fill > 0)
643 			{
644 				fr->rva.gain[rva_mode] = (float) atof(localcom.text.p);
645 				if(VERBOSE3) fprintf(stderr, "Note: RVA value %fdB\n", fr->rva.gain[rva_mode]);
646 				fr->rva.peak[rva_mode] = 0;
647 				fr->rva.level[rva_mode] = rva_level;
648 			}
649 		}
650 	}
651 	/* Make sure to free the local memory... */
652 	free_mpg123_text(&localcom);
653 }
654 
655 static void process_extra(mpg123_handle *fr, unsigned char* realdata, size_t realsize, int rva_level, char *id)
656 {
657 	/* Text encoding          $xx */
658 	/* Description        ... $00 (00) */
659 	/* Text ... */
660 	unsigned char encoding = realdata[0];
661 	unsigned char *descr   = realdata+1; /* remember, the encoding is descr[-1] */
662 	unsigned char *text;
663 	mpg123_text *xex;
664 	mpg123_text localex;
665 
666 	if((int)realsize < descr-realdata)
667 	{
668 		if(NOQUIET) error1("Invalid frame size of %lu (too small for anything).", (unsigned long)realsize);
669 		return;
670 	}
671 	if(encoding > mpg123_id3_enc_max)
672 	{
673 		if(NOQUIET)
674 			error1("Unknown text encoding %u, I take no chances, sorry!", encoding);
675 		return;
676 	}
677 	text = next_text(descr, encoding, realsize-(descr-realdata));
678 	if(VERBOSE4) fprintf(stderr, "Note: Storing extra from %s encoding\n", enc_name(realdata[0]));
679 	if(text == NULL)
680 	{
681 		if(NOQUIET) error("No extra frame text / valid description?");
682 		return;
683 	}
684 	{ // just vor variable scope
685 		mpg123_string description;
686 		mpg123_init_string(&description);
687 		/* The outside storage gets reencoded to UTF-8 only if not requested otherwise.
688 		   Remember that we really need the -1 here to hand in the encoding byte!*/
689 		store_id3_text( &description, descr-1, text-descr+1
690 		,	NOQUIET, fr->p.flags & MPG123_PLAIN_ID3TEXT );
691 		xex = add_extra(fr, &description);
692 		if(xex)
693 			mpg123_move_string(&description, &xex->description);
694 		else
695 			mpg123_free_string(&description);
696 	}
697 	if(xex == NULL)
698 	{
699 		if(NOQUIET) error("Unable to attach new extra text!");
700 		return;
701 	}
702 	memcpy(xex->id, id, 4);
703 	init_mpg123_text(&localex); /* For our local copy. */
704 
705 	/* Our local copy is always stored in UTF-8! */
706 	store_id3_text(&localex.description, descr-1, text-descr+1, NOQUIET, 0);
707 	/* At first, only store the outside copy of the payload. We may not need the local copy. */
708 	text[-1] = encoding;
709 	store_id3_text(&xex->text, text-1, realsize-(text-realdata)+1, NOQUIET, fr->p.flags & MPG123_PLAIN_ID3TEXT);
710 
711 	/* Now check if we would like to interpret this extra info for RVA. */
712 	if(localex.description.fill > 0)
713 	{
714 		int is_peak = 0;
715 		int rva_mode = -1; /* mix / album */
716 
717 		if(!strncasecmp(localex.description.p, "replaygain_track_",17))
718 		{
719 			if(VERBOSE3) fprintf(stderr, "Note: RVA ReplayGain track gain/peak\n");
720 
721 			rva_mode = 0;
722 			if(!strcasecmp(localex.description.p, "replaygain_track_peak")) is_peak = 1;
723 			else if(strcasecmp(localex.description.p, "replaygain_track_gain")) rva_mode = -1;
724 		}
725 		else
726 		if(!strncasecmp(localex.description.p, "replaygain_album_",17))
727 		{
728 			if(VERBOSE3) fprintf(stderr, "Note: RVA ReplayGain album gain/peak\n");
729 
730 			rva_mode = 1;
731 			if(!strcasecmp(localex.description.p, "replaygain_album_peak")) is_peak = 1;
732 			else if(strcasecmp(localex.description.p, "replaygain_album_gain")) rva_mode = -1;
733 		}
734 		if((rva_mode > -1) && (fr->rva.level[rva_mode] <= rva_level))
735 		{
736 			/* Now we need the translated copy of the data. */
737 			store_id3_text(&localex.text, text-1, realsize-(text-realdata)+1, NOQUIET, 0);
738 			if(localex.text.fill > 0)
739 			{
740 				if(is_peak)
741 				{
742 					fr->rva.peak[rva_mode] = (float) atof(localex.text.p);
743 					if(VERBOSE3) fprintf(stderr, "Note: RVA peak %f\n", fr->rva.peak[rva_mode]);
744 				}
745 				else
746 				{
747 					fr->rva.gain[rva_mode] = (float) atof(localex.text.p);
748 					if(VERBOSE3) fprintf(stderr, "Note: RVA gain %fdB\n", fr->rva.gain[rva_mode]);
749 				}
750 				fr->rva.level[rva_mode] = rva_level;
751 			}
752 		}
753 	}
754 
755 	free_mpg123_text(&localex);
756 }
757 
758 /* Make a ID3v2.3+ 4-byte ID from a ID3v2.2 3-byte ID
759    Note that not all frames survived to 2.4; the mapping goes to 2.3 .
760    A notable miss is the old RVA frame, which is very unspecific anyway.
761    This function returns -1 when a not known 3 char ID was encountered, 0 otherwise. */
762 static int promote_framename(mpg123_handle *fr, char *id) /* fr because of VERBOSE macros */
763 {
764 	size_t i;
765 	char *old[] =
766 	{
767 		"COM",  "TAL",  "TBP",  "TCM",  "TCO",  "TCR",  "TDA",  "TDY",  "TEN",  "TFT",
768 		"TIM",  "TKE",  "TLA",  "TLE",  "TMT",  "TOA",  "TOF",  "TOL",  "TOR",  "TOT",
769 		"TP1",  "TP2",  "TP3",  "TP4",  "TPA",  "TPB",  "TRC",  "TDA",  "TRK",  "TSI",
770 		"TSS",  "TT1",  "TT2",  "TT3",  "TXT",  "TXX",  "TYE"
771 	};
772 	char *new[] =
773 	{
774 		"COMM", "TALB", "TBPM", "TCOM", "TCON", "TCOP", "TDAT", "TDLY", "TENC", "TFLT",
775 		"TIME", "TKEY", "TLAN", "TLEN", "TMED", "TOPE", "TOFN", "TOLY", "TORY", "TOAL",
776 		"TPE1", "TPE2", "TPE3", "TPE4", "TPOS", "TPUB", "TSRC", "TRDA", "TRCK", "TSIZ",
777 		"TSSE", "TIT1", "TIT2", "TIT3", "TEXT", "TXXX", "TYER"
778 	};
779 	for(i=0; i<sizeof(old)/sizeof(char*); ++i)
780 	{
781 		if(!strncmp(id, old[i], 3))
782 		{
783 			memcpy(id, new[i], 4);
784 			if(VERBOSE3) fprintf(stderr, "Translated ID3v2.2 frame %s to %s\n", old[i], new[i]);
785 			return 0;
786 		}
787 	}
788 	if(VERBOSE3) fprintf(stderr, "Ignoring untranslated ID3v2.2 frame %c%c%c\n", id[0], id[1], id[2]);
789 	return -1;
790 }
791 
792 #endif /* NO_ID3V2 */
793 
794 int store_id3v2( mpg123_handle *fr
795 ,	unsigned long first4bytes, unsigned char buf[6], unsigned long length )
796 {
797 	int ret = 1;
798 	off_t ret2;
799 	unsigned long fullen = 10+length;
800 	if(fr->id3v2_raw)
801 		free(fr->id3v2_raw);
802 	fr->id3v2_size = 0;
803 	/* Allocate one byte more for a closing zero as safety catch for strlen(). */
804 	fr->id3v2_raw = malloc(fullen+1);
805 	if(!fr->id3v2_raw)
806 	{
807 		fr->err = MPG123_OUT_OF_MEM;
808 		if(NOQUIET)
809 			error1("ID3v2: Arrg! Unable to allocate %lu bytes"
810 				" for ID3v2 data - trying to skip instead.", length+1);
811 		if((ret2=fr->rd->skip_bytes(fr,length)) < 0)
812 			ret = ret2;
813 		else
814 			ret = 0;
815 	}
816 	else
817 	{
818 		fr->id3v2_raw[0] = (first4bytes>>24) & 0xff;
819 		fr->id3v2_raw[1] = (first4bytes>>16) & 0xff;
820 		fr->id3v2_raw[2] = (first4bytes>>8)  & 0xff;
821 		fr->id3v2_raw[3] =  first4bytes      & 0xff;
822 		memcpy(fr->id3v2_raw+4, buf, 6);
823 		if((ret2=fr->rd->read_frame_body(fr, fr->id3v2_raw+10, length)) < 0)
824 		{
825 			ret=ret2;
826 			free(fr->id3v2_raw);
827 			fr->id3v2_raw = NULL;
828 		}
829 		else
830 		{ /* Closing with a zero for paranoia. */
831 			fr->id3v2_raw[fullen] = 0;
832 			fr->id3v2_size = fullen;
833 		}
834 	}
835 	return ret;
836 }
837 
838 /*
839 	trying to parse ID3v2.3 and ID3v2.4 tags...
840 
841 	returns:  0: bad or just unparseable tag
842 	          1: good, (possibly) new tag info
843 	         <0: reader error (may need more data feed, try again)
844 */
845 int parse_new_id3(mpg123_handle *fr, unsigned long first4bytes)
846 {
847 	#define UNSYNC_FLAG 128
848 	#define EXTHEAD_FLAG 64  /* ID3v2.3+ */
849 	#define COMPRESS_FLAG 64 /* ID3v2.2 */
850 	#define EXP_FLAG 32
851 	#define FOOTER_FLAG 16
852 	#define EXT_UPDATE_FLAG 64 /* ID3v2.4 only: extended header update flag */
853 	#define UNKNOWN_FLAGS 15 /* 00001111*/
854 	unsigned char buf[6];
855 	unsigned long length=0;
856 	unsigned char flags = 0;
857 	int ret = 1;
858 	off_t ret2;
859 	int storetag = 0;
860 	unsigned int footlen = 0;
861 #ifndef NO_ID3V2
862 	int skiptag = 0;
863 #endif
864 	unsigned char major = first4bytes & 0xff;
865 	debug1("ID3v2: major tag version: %i", major);
866 
867 	if(major == 0xff) return 0; /* Invalid... */
868 	if((ret2 = fr->rd->read_frame_body(fr, buf, 6)) < 0) /* read more header information */
869 	return ret2;
870 
871 	if(buf[0] == 0xff) return 0; /* Revision, will never be 0xff. */
872 
873 	if(fr->p.flags & MPG123_STORE_RAW_ID3)
874 		storetag = 1;
875 	/* second new byte are some nice flags, if these are invalid skip the whole thing */
876 	flags = buf[1];
877 	debug1("ID3v2: flags 0x%08x", flags);
878 	/* use 4 bytes from buf to construct 28bit uint value and return 1; return 0 if bytes are not synchsafe */
879 	#define synchsafe_to_long(buf,res) \
880 	( \
881 		(((buf)[0]|(buf)[1]|(buf)[2]|(buf)[3]) & 0x80) ? 0 : \
882 		(res =  (((unsigned long) (buf)[0]) << 21) \
883 		     | (((unsigned long) (buf)[1]) << 14) \
884 		     | (((unsigned long) (buf)[2]) << 7) \
885 		     |  ((unsigned long) (buf)[3]) \
886 		,1) \
887 	)
888 	/* id3v2.3 does not store synchsafe frame sizes, but synchsafe tag size - doh! */
889 	/* Remember: bytes_to_long() can yield ULONG_MAX on 32 bit platforms! */
890 	#define bytes_to_long(buf,res) \
891 	( \
892 		major == 3 ? \
893 		(res =  (((unsigned long) (buf)[0]) << 24) \
894 		     | (((unsigned long) (buf)[1]) << 16) \
895 		     | (((unsigned long) (buf)[2]) << 8) \
896 		     |  ((unsigned long) (buf)[3]) \
897 		,1) : synchsafe_to_long(buf,res) \
898 	)
899 	/* for id3v2.2 only */
900 	#define threebytes_to_long(buf,res) \
901 	( \
902 		res =  (((unsigned long) (buf)[0]) << 16) \
903 		     | (((unsigned long) (buf)[1]) << 8) \
904 		     |  ((unsigned long) (buf)[2]) \
905 	)
906 
907 	/* length-10 or length-20 (footer present); 4 synchsafe integers == 28 bit number  */
908 	/* we have already read 10 bytes, so left are length or length+10 bytes belonging to tag */
909 	/* Note: This is an 28 bit value in 32 bit storage, plenty of space for */
910 	/* length+x for reasonable x. */
911 	if(!synchsafe_to_long(buf+2,length))
912 	{
913 		if(NOQUIET) error4("Bad tag length (not synchsafe): 0x%02x%02x%02x%02x; You got a bad ID3 tag here.", buf[2],buf[3],buf[4],buf[5]);
914 		return 0;
915 	}
916 	if(flags & FOOTER_FLAG)
917 		footlen = 10;
918 	debug1("ID3v2: tag data length %lu", length);
919 #ifndef NO_ID3V2
920 	if(VERBOSE2) fprintf(stderr,"Note: ID3v2.%i rev %i tag of %lu bytes\n", major, buf[0], length);
921 	/* skip if unknown version/scary flags, parse otherwise */
922 	if(fr->p.flags & MPG123_SKIP_ID3V2)
923 	{
924 		if(VERBOSE3)
925 			fprintf(stderr, "Note: Skipping ID3v2 tag per user request.\n");
926 		skiptag = 1;
927 	}
928 	if((flags & UNKNOWN_FLAGS) || (major > 4) || (major < 2))
929 	{
930 		if(NOQUIET)
931 			warning2( "ID3v2: Won't parse the ID3v2 tag with major version"
932 				" %u and flags 0x%xu - some extra code may be needed"
933 			,	major, flags );
934 		skiptag = 1;
935 	}
936 	// Standard says that compressed tags should be ignored as there isn't an agreed
937 	// compressoion scheme.
938 	if(major == 2 && flags & COMPRESS_FLAG)
939 	{
940 		if(NOQUIET)
941 			warning("ID3v2: ignoring compressed ID3v2.2 tag");
942 		skiptag = 1;
943 	}
944 	if(length < 10)
945 	{
946 		if(NOQUIET)
947 			warning1("ID3v2: unrealistic small tag lengh %lu, skipping", length);
948 		skiptag = 1;
949 	}
950 	if(!skiptag)
951 		storetag = 1;
952 #endif
953 	if(storetag)
954 	{
955 		/* Stores whole tag with footer and an additonal trailing zero. */
956 		if((ret2 = store_id3v2(fr, first4bytes, buf, length+footlen)) <= 0)
957 			return ret2;
958 	}
959 #ifndef NO_ID3V2
960 	if(skiptag)
961 	{
962 		if(VERBOSE3)
963 			fprintf(stderr, "Note: skipped tag clearing possibly existing ID3v2 data");
964 		reset_id3(fr); // Old data is invalid.
965 #endif
966 		if(!storetag && (ret2=fr->rd->skip_bytes(fr,length+footlen))<0)
967 			ret=ret2;
968 #ifndef NO_ID3V2
969 	}
970 	else
971 	{
972 		unsigned char* tagdata = fr->id3v2_raw+10;
973 		/* try to interpret that beast */
974 		debug("ID3v2: analysing frames...");
975 		if(length > 0)
976 		{
977 			unsigned char extflags = 0;
978 			unsigned long tagpos = 0;
979 			/* bytes of frame title and of framesize value */
980 			unsigned int head_part = major > 2 ? 4 : 3;
981 			unsigned int flag_part = major > 2 ? 2 : 0;
982 			/* The amount of bytes that are unconditionally read for each frame: */
983 			/* ID, size, flags. */
984 			unsigned int framebegin = head_part+head_part+flag_part;
985 			debug1("ID3v2: have read at all %lu bytes for the tag now", (unsigned long)length+6);
986 			if(flags & EXTHEAD_FLAG)
987 			{
988 				debug("ID3v2: extended header");
989 				if(!bytes_to_long(tagdata, tagpos) || tagpos >= length)
990 				{
991 					ret = 0;
992 					if(NOQUIET)
993 						error4( "Bad (non-synchsafe/too large) tag offset from extended header:"
994 							"0x%02x%02x%02x%02x"
995 						,	tagdata[0], tagdata[1], tagdata[2], tagdata[3] );
996 				} else if(tagpos < 6)
997 				{
998 					ret = 0;
999 					if(NOQUIET)
1000 						merror("Extended header too small (%lu).", tagpos);
1001 				}
1002 				if(major == 3)
1003 				{
1004 					tagpos += 4; // The size itself is not included.
1005 					if(tagpos >= length)
1006 					{
1007 						ret = 0;
1008 						if(NOQUIET)
1009 							error("Too much extended v2.3 header.");
1010 					}
1011 				} else if(ret) // v2.4 and at least got my 6 bytes of ext header
1012 				{
1013 					// Only v4 knows update frames, check for that.
1014 					// Need to step back. Header is 4 bytes length, one byte flag size,
1015 					// one byte flags. Flag size has to be 1!
1016 					if(tagdata[4] == 1 && tagdata[5] & EXT_UPDATE_FLAG)
1017 					{
1018 						if(VERBOSE3)
1019 							fprintf(stderr, "Note: ID3v2.4 update tag\n");
1020 						extflags |= EXT_UPDATE_FLAG;
1021 					}
1022 				}
1023 			}
1024 			if(!(extflags & EXT_UPDATE_FLAG))
1025 			{
1026 				if(VERBOSE3)
1027 					fprintf(stderr, "Note: non-update tag replacing existing ID3v2 data\n");
1028 				reset_id3(fr);
1029 			}
1030 			if(ret > 0)
1031 			{
1032 				char id[5];
1033 				unsigned long framesize;
1034 				unsigned long fflags; /* need 16 bits, actually */
1035 				id[4] = 0;
1036 				fr->id3v2.version = major;
1037 				/* Pos now advanced after ext head, now a frame has to follow. */
1038 				/* Note: tagpos <= length, which is 28 bit integer, so both */
1039 				/* far away from overflow for adding known small values. */
1040 				/* I want to read at least one full header now. */
1041 				while(length >= tagpos+framebegin)
1042 				{
1043 					int i = 0;
1044 					unsigned long pos = tagpos;
1045 					/* level 1,2,3 - 0 is info from lame/info tag! */
1046 					/* rva tags with ascending significance, then general frames */
1047 					enum frame_types tt = unknown;
1048 					/* we may have entered the padding zone or any other strangeness: check if we have valid frame id characters */
1049 					for(i=0; i< head_part; ++i)
1050 					if( !( ((tagdata[tagpos+i] > 47) && (tagdata[tagpos+i] < 58))
1051 						 || ((tagdata[tagpos+i] > 64) && (tagdata[tagpos+i] < 91)) ) )
1052 					{
1053 						debug5("ID3v2: real tag data apparently ended after %lu bytes with 0x%02x%02x%02x%02x", tagpos, tagdata[tagpos], tagdata[tagpos+1], tagdata[tagpos+2], tagdata[tagpos+3]);
1054 						/* This is no hard error... let's just hope that we got something meaningful already (ret==1 in that case). */
1055 						goto tagparse_cleanup; /* Need to escape two loops here. */
1056 					}
1057 					if(ret > 0)
1058 					{
1059 						/* 4 or 3 bytes id */
1060 						strncpy(id, (char*) tagdata+pos, head_part);
1061 						id[head_part] = 0; /* terminate for 3 or 4 bytes */
1062 						pos += head_part;
1063 						tagpos += head_part;
1064 						/* size as 32 bits or 28 bits */
1065 						if(fr->id3v2.version == 2) threebytes_to_long(tagdata+pos, framesize);
1066 						else
1067 						if(!bytes_to_long(tagdata+pos, framesize))
1068 						{
1069 							/* Just assume that up to now there was some good data. */
1070 							if(NOQUIET) error1("ID3v2: non-syncsafe size of %s frame, skipping the remainder of tag", id);
1071 							break;
1072 						}
1073 						if(VERBOSE3) fprintf(stderr, "Note: ID3v2 %s frame of size %lu\n", id, framesize);
1074 						tagpos += head_part;
1075 						pos += head_part;
1076 						if(fr->id3v2.version > 2)
1077 						{
1078 							fflags  = (((unsigned long) tagdata[pos]) << 8) | ((unsigned long) tagdata[pos+1]);
1079 							pos    += 2;
1080 							tagpos += 2;
1081 						}
1082 						else fflags = 0;
1083 
1084 						if(length - tagpos < framesize)
1085 						{
1086 							if(NOQUIET) error("Whoa! ID3v2 frame claims to be larger than the whole rest of the tag.");
1087 							break;
1088 						}
1089 						tagpos += framesize; /* the important advancement in whole tag */
1090 						/* for sanity, after full parsing tagpos should be == pos */
1091 						/* debug4("ID3v2: found %s frame, size %lu (as bytes: 0x%08lx), flags 0x%016lx", id, framesize, framesize, fflags); */
1092 						/* %0abc0000 %0h00kmnp */
1093 						#define BAD_FFLAGS (unsigned long) 36784
1094 						#define PRES_TAG_FFLAG 16384
1095 						#define PRES_FILE_FFLAG 8192
1096 						#define READ_ONLY_FFLAG 4096
1097 						#define GROUP_FFLAG 64
1098 						#define COMPR_FFLAG 8
1099 						#define ENCR_FFLAG 4
1100 						#define UNSYNC_FFLAG 2
1101 						#define DATLEN_FFLAG 1
1102 						if(head_part < 4 && promote_framename(fr, id) != 0) continue;
1103 
1104 						/* shall not or want not handle these */
1105 						if(fflags & (BAD_FFLAGS | COMPR_FFLAG | ENCR_FFLAG))
1106 						{
1107 							if(NOQUIET) warning("ID3v2: skipping invalid/unsupported frame");
1108 							continue;
1109 						}
1110 
1111 						for(i = 0; i < KNOWN_FRAMES; ++i)
1112 						if(!strncmp(frame_type[i], id, 4)){ tt = i; break; }
1113 
1114 						if(id[0] == 'T' && tt != extra) tt = text;
1115 
1116 						if(tt != unknown)
1117 						{
1118 							int rva_mode = -1; /* mix / album */
1119 							unsigned long realsize = framesize;
1120 							unsigned char* realdata = tagdata+pos;
1121 							unsigned char* unsyncbuffer = NULL;
1122 							if(((flags & UNSYNC_FLAG) || (fflags & UNSYNC_FFLAG)) && framesize > 0)
1123 							{
1124 								unsigned long ipos = 0;
1125 								unsigned long opos = 0;
1126 								debug("Id3v2: going to de-unsync the frame data");
1127 								/* de-unsync: FF00 -> FF; real FF00 is simply represented as FF0000 ... */
1128 								/* damn, that means I have to delete bytes from withing the data block... thus need temporal storage */
1129 								/* standard mandates that de-unsync should always be safe if flag is set */
1130 								realdata = unsyncbuffer = malloc(framesize+1); /* will need <= bytes, plus a safety zero */
1131 								if(realdata == NULL)
1132 								{
1133 									if(NOQUIET) error("ID3v2: unable to allocate working buffer for de-unsync");
1134 									continue;
1135 								}
1136 								/* now going byte per byte through the data... */
1137 								realdata[0] = tagdata[pos];
1138 								opos = 1;
1139 								for(ipos = pos+1; ipos < pos+framesize; ++ipos)
1140 								{
1141 									if(!((tagdata[ipos] == 0) && (tagdata[ipos-1] == 0xff)))
1142 									{
1143 										realdata[opos++] = tagdata[ipos];
1144 									}
1145 								}
1146 								realsize = opos;
1147 								/* Append a zero to keep strlen() safe. */
1148 								realdata[realsize] = 0;
1149 								debug2("ID3v2: de-unsync made %lu out of %lu bytes", realsize, framesize);
1150 							}
1151 							pos = 0; /* now at the beginning again... */
1152 							/* Avoid reading over boundary, even if there is a */
1153 							/* zero byte of padding for safety. */
1154 							if(realsize) switch(tt)
1155 							{
1156 								case comment:
1157 								case uslt:
1158 									process_comment(fr, tt, realdata, realsize, comment+1, id);
1159 								break;
1160 								case extra: /* perhaps foobar2000's work */
1161 									process_extra(fr, realdata, realsize, extra+1, id);
1162 								break;
1163 								case rva2: /* "the" RVA tag */
1164 								{
1165 									/* starts with null-terminated identification */
1166 									if(VERBOSE3) fprintf(stderr, "Note: RVA2 identification \"%s\"\n", realdata);
1167 									/* default: some individual value, mix mode */
1168 									rva_mode = 0;
1169 									if( !strncasecmp((char*)realdata, "album", 5)
1170 										 || !strncasecmp((char*)realdata, "audiophile", 10)
1171 										 || !strncasecmp((char*)realdata, "user", 4))
1172 									rva_mode = 1;
1173 									if(fr->rva.level[rva_mode] <= rva2+1)
1174 									{
1175 										pos += strlen((char*) realdata) + 1;
1176 										debug2("got my pos: %zu - %zu", realsize, pos);
1177 										// channel and two bytes for RVA value
1178 										// pos possibly just past the safety zero, so one more than realsize
1179 										if(pos > realsize || realsize-pos < 3)
1180 										{
1181 											if(NOQUIET)
1182 												error("bad RVA2 tag (truncated?)");
1183 										}
1184 										else if(realdata[pos] == 1)
1185 										{
1186 											++pos;
1187 											/* only handle master channel */
1188 											debug("ID3v2: it is for the master channel");
1189 											/* two bytes adjustment, one byte for bits representing peak - n bytes, eh bits, for peak */
1190 											/* 16 bit signed integer = dB * 512. Do not shift signed integers! Multiply instead.
1191 											   Also no implementation-defined casting. Reinterpret the pointer to signed char, then do
1192 											   proper casting. */
1193 											fr->rva.gain[rva_mode] = (float) (
1194 												((short)((signed char*)realdata)[pos]) * 256 + (short)realdata[pos+1] ) / 512;
1195 											pos += 2;
1196 											if(VERBOSE3) fprintf(stderr, "Note: RVA value %fdB\n", fr->rva.gain[rva_mode]);
1197 											/* heh, the peak value is represented by a number of bits - but in what manner? Skipping that part */
1198 											fr->rva.peak[rva_mode] = 0;
1199 											fr->rva.level[rva_mode] = rva2+1;
1200 										}
1201 									}
1202 								}
1203 								break;
1204 								/* non-rva metainfo, simply store... */
1205 								case text:
1206 									process_text(fr, realdata, realsize, id);
1207 								break;
1208 								case picture:
1209 									if (fr->p.flags & MPG123_PICTURE)
1210 									process_picture(fr, realdata, realsize);
1211 
1212 									break;
1213 								default: if(NOQUIET) error1("ID3v2: unknown frame type %i", tt);
1214 							}
1215 							if(unsyncbuffer)
1216 								free(unsyncbuffer);
1217 						}
1218 						#undef BAD_FFLAGS
1219 						#undef PRES_TAG_FFLAG
1220 						#undef PRES_FILE_FFLAG
1221 						#undef READ_ONLY_FFLAG
1222 						#undef GROUP_FFLAG
1223 						#undef COMPR_FFLAG
1224 						#undef ENCR_FFLAG
1225 						#undef UNSYNC_FFLAG
1226 						#undef DATLEN_FFLAG
1227 					}
1228 					else break;
1229 					#undef KNOWN_FRAMES
1230 				}
1231 			} else
1232 			{
1233 				if(VERBOSE3)
1234 					fprintf(stderr, "Note: faulty ID3v2 tag still clearing old data\n");
1235 				reset_id3(fr);
1236 			}
1237 		} else // No new data, but still there was a tag that invalidates old data.
1238 		{
1239 			if(VERBOSE3)
1240 				fprintf(stderr, "Note: empty ID3v2 clearing old data\n");
1241 			reset_id3(fr);
1242 		}
1243 tagparse_cleanup:
1244 		/* Get rid of stored raw data that should not be kept. */
1245 		if(!(fr->p.flags & MPG123_STORE_RAW_ID3))
1246 		{
1247 			free(fr->id3v2_raw);
1248 			fr->id3v2_raw = NULL;
1249 			fr->id3v2_size = 0;
1250 		}
1251 	}
1252 #endif /* NO_ID3V2 */
1253 	return ret;
1254 	#undef UNSYNC_FLAG
1255 	#undef EXTHEAD_FLAG
1256 	#undef COMPRESS_FLAG
1257 	#undef EXP_FLAG
1258 	#undef FOOTER_FLAG
1259 	#undef EXT_UPDATE_FLAG
1260 	#undef UNKOWN_FLAGS
1261 }
1262 
1263 #ifndef NO_ID3V2 /* Disabling all the rest... */
1264 
1265 static void convert_latin1(mpg123_string *sb, const unsigned char* s, size_t l, const int noquiet)
1266 {
1267 	size_t length = l;
1268 	size_t i;
1269 	unsigned char *p;
1270 	/* determine real length, a latin1 character can at most take 2  in UTF8 */
1271 	for(i=0; i<l; ++i)
1272 	if(s[i] >= 0x80) ++length;
1273 
1274 	debug1("UTF-8 length: %lu", (unsigned long)length);
1275 	/* one extra zero byte for paranoia */
1276 	if(!mpg123_grow_string(sb, length+1))
1277 		return;
1278 
1279 	p = (unsigned char*) sb->p; /* Signedness doesn't matter but it shows I thought about the non-issue */
1280 	for(i=0; i<l; ++i)
1281 	if(s[i] < 0x80){ *p = s[i]; ++p; }
1282 	else /* two-byte encoding */
1283 	{
1284 		*p     = 0xc0 | (s[i]>>6);
1285 		*(p+1) = 0x80 | (s[i] & 0x3f);
1286 		p+=2;
1287 	}
1288 
1289 	sb->p[length] = 0;
1290 	sb->fill = length+1;
1291 }
1292 
1293 /*
1294 	Check if we have a byte oder mark(s) there, return:
1295 	-1: little endian
1296 	 0: no BOM
1297 	 1: big endian
1298 
1299 	This modifies source and len to indicate the data _after_ the BOM(s).
1300 	Note on nasty data: The last encountered BOM determines the endianness.
1301 	I have seen data with multiple BOMS, namely from "the" id3v2 program.
1302 	Not nice, but what should I do?
1303 */
1304 static int check_bom(const unsigned char** source, size_t *len)
1305 {
1306 	int this_bom    = 0;
1307 	int further_bom = 0;
1308 
1309 	if(*len < 2) return 0;
1310 
1311 	if((*source)[0] == 0xff && (*source)[1] == 0xfe)
1312 	this_bom = -1;
1313 
1314 	if((*source)[0] == 0xfe && (*source)[1] == 0xff)
1315 	this_bom = 1;
1316 
1317 	/* Skip the detected BOM. */
1318 	if(this_bom != 0)
1319 	{
1320 		*source += 2;
1321 		*len    -= 2;
1322 		/* Check for following BOMs. The last one wins! */
1323 		further_bom = check_bom(source, len);
1324 		if(further_bom == 0) return this_bom; /* End of the recursion. */
1325 		else                 return further_bom;
1326 	}
1327 	else return 0;
1328 }
1329 
1330 #define FULLPOINT(f,s) ( (((f)&0x3ff)<<10) + ((s)&0x3ff) + 0x10000 )
1331 /* Remember: There's a limit at 0x1ffff. */
1332 #define UTF8LEN(x) ( (x)<0x80 ? 1 : ((x)<0x800 ? 2 : ((x)<0x10000 ? 3 : 4)))
1333 static void convert_utf16bom(mpg123_string *sb, const unsigned char* s, size_t l, const int noquiet)
1334 {
1335 	size_t i;
1336 	size_t n; /* number bytes that make up full pairs */
1337 	unsigned char *p;
1338 	size_t length = 0; /* the resulting UTF-8 length */
1339 	/* Determine real length... extreme case can be more than utf-16 length. */
1340 	size_t high = 0;
1341 	size_t low  = 1;
1342 	int bom_endian;
1343 
1344 	debug1("convert_utf16 with length %lu", (unsigned long)l);
1345 
1346 	bom_endian = check_bom(&s, &l);
1347 	debug1("UTF16 endianness check: %i", bom_endian);
1348 
1349 	if(bom_endian == -1) /* little-endian */
1350 	{
1351 		high = 1; /* The second byte is the high byte. */
1352 		low  = 0; /* The first byte is the low byte. */
1353 	}
1354 
1355 	n = (l/2)*2; /* number bytes that make up full pairs */
1356 
1357 	/* first: get length, check for errors -- stop at first one */
1358 	for(i=0; i < n; i+=2)
1359 	{
1360 		unsigned long point = ((unsigned long) s[i+high]<<8) + s[i+low];
1361 		if((point & 0xfc00) == 0xd800) /* lead surrogate */
1362 		{
1363 			unsigned short second = (i+3 < l) ? (s[i+2+high]<<8) + s[i+2+low] : 0;
1364 			if((second & 0xfc00) == 0xdc00) /* good... */
1365 			{
1366 				point = FULLPOINT(point,second);
1367 				length += UTF8LEN(point); /* possibly 4 bytes */
1368 				i+=2; /* We overstepped one word. */
1369 			}
1370 			else /* if no valid pair, break here */
1371 			{
1372 				if(noquiet) error2("Invalid UTF16 surrogate pair at %li (0x%04lx).", (unsigned long)i, point);
1373 				n = i; /* Forget the half pair, END! */
1374 				break;
1375 			}
1376 		}
1377 		else length += UTF8LEN(point); /* 1,2 or 3 bytes */
1378 	}
1379 
1380 	if(!mpg123_grow_string(sb, length+1))
1381 		return;
1382 
1383 	/* Now really convert, skip checks as these have been done just before. */
1384 	p = (unsigned char*) sb->p; /* Signedness doesn't matter but it shows I thought about the non-issue */
1385 	for(i=0; i < n; i+=2)
1386 	{
1387 		unsigned long codepoint = ((unsigned long) s[i+high]<<8) + s[i+low];
1388 		if((codepoint & 0xfc00) == 0xd800) /* lead surrogate */
1389 		{
1390 			unsigned short second = (s[i+2+high]<<8) + s[i+2+low];
1391 			codepoint = FULLPOINT(codepoint,second);
1392 			i+=2; /* We overstepped one word. */
1393 		}
1394 		if(codepoint < 0x80) *p++ = (unsigned char) codepoint;
1395 		else if(codepoint < 0x800)
1396 		{
1397 			*p++ = (unsigned char) (0xc0 | (codepoint>>6));
1398 			*p++ = (unsigned char) (0x80 | (codepoint & 0x3f));
1399 		}
1400 		else if(codepoint < 0x10000)
1401 		{
1402 			*p++ = (unsigned char) (0xe0 | (codepoint>>12));
1403 			*p++ = 0x80 | ((codepoint>>6) & 0x3f);
1404 			*p++ = 0x80 | (codepoint & 0x3f);
1405 		}
1406 		else if (codepoint < 0x200000)
1407 		{
1408 			*p++ = (unsigned char) (0xf0 | codepoint>>18);
1409 			*p++ = (unsigned char) (0x80 | ((codepoint>>12) & 0x3f));
1410 			*p++ = (unsigned char) (0x80 | ((codepoint>>6) & 0x3f));
1411 			*p++ = (unsigned char) (0x80 | (codepoint & 0x3f));
1412 		} /* ignore bigger ones (that are not possible here anyway) */
1413 	}
1414 	sb->p[sb->size-1] = 0; /* paranoia... */
1415 	sb->fill = sb->size;
1416 }
1417 #undef UTF8LEN
1418 #undef FULLPOINT
1419 
1420 static void convert_utf8(mpg123_string *sb, const unsigned char* source, size_t len, const int noquiet)
1421 {
1422 	if(mpg123_grow_string(sb, len+1))
1423 	{
1424 		memcpy(sb->p, source, len);
1425 		sb->p[len] = 0;
1426 		sb->fill = len+1;
1427 	}
1428 }
1429 
1430 #endif
1431