1 /*
2 	id3: ID3v2.3 and ID3v2.4 parsing (a relevant subset)
3 
4 	copyright 2006-2013 by the mpg123 project - free software under the terms of the LGPL 2.1
5 	see COPYING and AUTHORS files in distribution or http://mpg123.org
6 	initially written by Thomas Orgis
7 */
8 
9 #include "mpg123lib_intern.h"
10 #include "id3.h"
11 #include "debug.h"
12 
13 #ifndef NO_ID3V2 /* Only the main parsing routine will always be there. */
14 
15 /* We know the usual text frames plus some specifics. */
16 #define KNOWN_FRAMES 5
17 static const char frame_type[KNOWN_FRAMES][5] = { "COMM", "TXXX", "RVA2", "USLT", "APIC" };
18 enum frame_types { unknown = -2, text = -1, comment, extra, rva2, uslt, picture };
19 
20 /* UTF support definitions */
21 
22 typedef void (*text_converter)(mpg123_string *sb, const unsigned char* source, size_t len, const int noquiet);
23 
24 static void convert_latin1  (mpg123_string *sb, const unsigned char* source, size_t len, const int noquiet);
25 static void convert_utf16bom(mpg123_string *sb, const unsigned char* source, size_t len, const int noquiet);
26 static void convert_utf8    (mpg123_string *sb, const unsigned char* source, size_t len, const int noquiet);
27 
28 static const text_converter text_converters[4] =
29 {
30 	convert_latin1,
31 	/* We always check for (multiple) BOM in 16bit unicode. Without BOM, UTF16 BE is the default.
32 	   Errors in encoding are detected anyway. */
33 	convert_utf16bom,
34 	convert_utf16bom,
35 	convert_utf8
36 };
37 
38 static const unsigned int encoding_widths[4] = { 1, 2, 2, 1 };
39 
40 /* the code starts here... */
41 
null_id3_links(mpg123_handle * fr)42 static void null_id3_links(mpg123_handle *fr)
43 {
44 	fr->id3v2.title  = NULL;
45 	fr->id3v2.artist = NULL;
46 	fr->id3v2.album  = NULL;
47 	fr->id3v2.year   = NULL;
48 	fr->id3v2.genre  = NULL;
49 	fr->id3v2.comment = NULL;
50 }
51 
init_id3(mpg123_handle * fr)52 void init_id3(mpg123_handle *fr)
53 {
54 	fr->id3v2.version = 0; /* nothing there */
55 	null_id3_links(fr);
56 	fr->id3v2.comments     = 0;
57 	fr->id3v2.comment_list = NULL;
58 	fr->id3v2.texts    = 0;
59 	fr->id3v2.text     = NULL;
60 	fr->id3v2.extras   = 0;
61 	fr->id3v2.extra    = NULL;
62 	fr->id3v2.pictures   = 0;
63 	fr->id3v2.picture    = NULL;
64 }
65 
66 /* Managing of the text, comment and extra lists. */
67 
68 /* Initialize one element. */
init_mpg123_text(mpg123_text * txt)69 static void init_mpg123_text(mpg123_text *txt)
70 {
71 	mpg123_init_string(&txt->text);
72 	mpg123_init_string(&txt->description);
73 	txt->id[0] = 0;
74 	txt->id[1] = 0;
75 	txt->id[2] = 0;
76 	txt->id[3] = 0;
77 	txt->lang[0] = 0;
78 	txt->lang[1] = 0;
79 	txt->lang[2] = 0;
80 }
81 
init_mpg123_picture(mpg123_picture * pic)82 static void init_mpg123_picture(mpg123_picture *pic)
83 {
84 	mpg123_init_string(&pic->mime_type);
85 	mpg123_init_string(&pic->description);
86 	pic->type = 0;
87 	pic->size = 0;
88 	pic->data = NULL;
89 }
90 
91 /* Free memory of one element. */
free_mpg123_text(mpg123_text * txt)92 static void free_mpg123_text(mpg123_text *txt)
93 {
94 	mpg123_free_string(&txt->text);
95 	mpg123_free_string(&txt->description);
96 }
97 
free_mpg123_picture(mpg123_picture * pic)98 static void free_mpg123_picture(mpg123_picture * pic)
99 {
100 	mpg123_free_string(&pic->mime_type);
101 	mpg123_free_string(&pic->description);
102 	if (pic->data != NULL)
103 		free(pic->data);
104 }
105 
106 /* Free memory of whole list. */
107 #define free_comment(mh) free_id3_text(&((mh)->id3v2.comment_list), &((mh)->id3v2.comments))
108 #define free_text(mh)    free_id3_text(&((mh)->id3v2.text),         &((mh)->id3v2.texts))
109 #define free_extra(mh)   free_id3_text(&((mh)->id3v2.extra),        &((mh)->id3v2.extras))
110 #define free_picture(mh) free_id3_picture(&((mh)->id3v2.picture),   &((mh)->id3v2.pictures))
free_id3_text(mpg123_text ** list,size_t * size)111 static void free_id3_text(mpg123_text **list, size_t *size)
112 {
113 	size_t i;
114 	for(i=0; i<*size; ++i) free_mpg123_text(&((*list)[i]));
115 
116 	free(*list);
117 	*list = NULL;
118 	*size = 0;
119 }
free_id3_picture(mpg123_picture ** list,size_t * size)120 static void free_id3_picture(mpg123_picture **list, size_t *size)
121 {
122 	size_t i;
123 	for(i=0; i<*size; ++i) free_mpg123_picture(&((*list)[i]));
124 
125 	free(*list);
126 	*list = NULL;
127 	*size = 0;
128 }
129 
130 /* Add items to the list. */
131 #define add_comment(mh) add_id3_text(&((mh)->id3v2.comment_list), &((mh)->id3v2.comments))
132 #define add_text(mh)    add_id3_text(&((mh)->id3v2.text),         &((mh)->id3v2.texts))
133 #define add_extra(mh)   add_id3_text(&((mh)->id3v2.extra),        &((mh)->id3v2.extras))
134 #define add_picture(mh)   add_id3_picture(&((mh)->id3v2.picture),       &((mh)->id3v2.pictures))
add_id3_text(mpg123_text ** list,size_t * size)135 static mpg123_text *add_id3_text(mpg123_text **list, size_t *size)
136 {
137 	mpg123_text *x = safe_realloc(*list, sizeof(mpg123_text)*(*size+1));
138 	if(x == NULL) return NULL; /* bad */
139 
140 	*list  = x;
141 	*size += 1;
142 	init_mpg123_text(&((*list)[*size-1]));
143 
144 	return &((*list)[*size-1]); /* Return pointer to the added text. */
145 }
add_id3_picture(mpg123_picture ** list,size_t * size)146 static mpg123_picture *add_id3_picture(mpg123_picture **list, size_t *size)
147 {
148 	mpg123_picture *x = safe_realloc(*list, sizeof(mpg123_picture)*(*size+1));
149 	if(x == NULL) return NULL; /* bad */
150 
151 	*list  = x;
152 	*size += 1;
153 	init_mpg123_picture(&((*list)[*size-1]));
154 
155 	return &((*list)[*size-1]); /* Return pointer to the added picture. */
156 }
157 
158 
159 /* Remove the last item. */
160 #define pop_comment(mh) pop_id3_text(&((mh)->id3v2.comment_list), &((mh)->id3v2.comments))
161 #define pop_text(mh)    pop_id3_text(&((mh)->id3v2.text),         &((mh)->id3v2.texts))
162 #define pop_extra(mh)   pop_id3_text(&((mh)->id3v2.extra),        &((mh)->id3v2.extras))
163 #define pop_picture(mh)   pop_id3_picture(&((mh)->id3v2.picture),       &((mh)->id3v2.pictures))
pop_id3_text(mpg123_text ** list,size_t * size)164 static void pop_id3_text(mpg123_text **list, size_t *size)
165 {
166 	mpg123_text *x;
167 	if(*size < 1) return;
168 
169 	free_mpg123_text(&((*list)[*size-1]));
170 	if(*size > 1)
171 	{
172 		x = safe_realloc(*list, sizeof(mpg123_text)*(*size-1));
173 		if(x != NULL){ *list  = x; *size -= 1; }
174 	}
175 	else
176 	{
177 		free(*list);
178 		*list = NULL;
179 		*size = 0;
180 	}
181 }
pop_id3_picture(mpg123_picture ** list,size_t * size)182 static void pop_id3_picture(mpg123_picture **list, size_t *size)
183 {
184 	mpg123_picture *x;
185 	if(*size < 1) return;
186 
187 	free_mpg123_picture(&((*list)[*size-1]));
188 	if(*size > 1)
189 	{
190 		x = safe_realloc(*list, sizeof(mpg123_picture)*(*size-1));
191 		if(x != NULL){ *list  = x; *size -= 1; }
192 	}
193 	else
194 	{
195 		free(*list);
196 		*list = NULL;
197 		*size = 0;
198 	}
199 }
200 
201 /* OK, back to the higher level functions. */
202 
exit_id3(mpg123_handle * fr)203 void exit_id3(mpg123_handle *fr)
204 {
205 	free_picture(fr);
206 	free_comment(fr);
207 	free_extra(fr);
208 	free_text(fr);
209 }
210 
reset_id3(mpg123_handle * fr)211 void reset_id3(mpg123_handle *fr)
212 {
213 	exit_id3(fr);
214 	init_id3(fr);
215 }
216 
217 /* Set the id3v2.artist id3v2.title ... links to elements of the array. */
id3_link(mpg123_handle * fr)218 void id3_link(mpg123_handle *fr)
219 {
220 	size_t i;
221 	mpg123_id3v2 *v2 = &fr->id3v2;
222 	debug("linking ID3v2");
223 	null_id3_links(fr);
224 	for(i=0; i<v2->texts; ++i)
225 	{
226 		mpg123_text *entry = &v2->text[i];
227 		if     (!strncmp("TIT2", entry->id, 4)) v2->title  = &entry->text;
228 		else if(!strncmp("TALB", entry->id, 4)) v2->album  = &entry->text;
229 		else if(!strncmp("TPE1", entry->id, 4)) v2->artist = &entry->text;
230 		else if(!strncmp("TYER", entry->id, 4)) v2->year   = &entry->text;
231 		else if(!strncmp("TCON", entry->id, 4)) v2->genre  = &entry->text;
232 	}
233 	for(i=0; i<v2->comments; ++i)
234 	{
235 		mpg123_text *entry = &v2->comment_list[i];
236 		if(entry->description.fill == 0 || entry->description.p[0] == 0)
237 		v2->comment = &entry->text;
238 	}
239 	/* When no generic comment found, use the last non-generic one. */
240 	if(v2->comment == NULL && v2->comments > 0)
241 	v2->comment = &v2->comment_list[v2->comments-1].text;
242 }
243 
244 /*
245 	Store ID3 text data in an mpg123_string; either verbatim copy or everything translated to UTF-8 encoding.
246 	Preserve the zero string separator (I don't need strlen for the total size).
247 
248 	ID3v2 standard says that there should be one text frame of specific type per tag, and subsequent tags overwrite old values.
249 	So, I always replace the text that may be stored already (perhaps with a list of zero-separated strings, though).
250 */
store_id3_text(mpg123_string * sb,unsigned char * source,size_t source_size,const int noquiet,const int notranslate)251 static void store_id3_text(mpg123_string *sb, unsigned char *source, size_t source_size, const int noquiet, const int notranslate)
252 {
253 	unsigned char encoding;
254 	if(!source_size)
255 	{
256 		debug("Empty id3 data!");
257 		return;
258 	}
259 
260 	/* We shall just copy the data. Client wants to decode itself. */
261 	if(notranslate)
262 	{
263 		/* Future: Add a path for ID3 errors. */
264 		if(!mpg123_resize_string(sb, source_size))
265 		{
266 			if(noquiet) error("Cannot resize target string, out of memory?");
267 			return;
268 		}
269 		memcpy(sb->p, source, source_size);
270 		sb->fill = source_size;
271 		debug1("stored undecoded ID3 text of size %"SIZE_P, (size_p)source_size);
272 		return;
273 	}
274 
275 	encoding = source[0];
276 	if(encoding > mpg123_id3_enc_max)
277 	{
278 		if(noquiet)
279 			error1("Unknown text encoding %u, I take no chances, sorry!", encoding);
280 
281 		mpg123_free_string(sb);
282 		return;
283 	}
284 	id3_to_utf8(sb, encoding, source+1, source_size-1, noquiet);
285 
286 	if(sb->fill) debug1("UTF-8 string (the first one): %s", sb->p);
287 	else if(noquiet) error("unable to convert string to UTF-8 (out of memory, junk input?)!");
288 }
289 
290 /* On error, sb->size is 0. */
291 /* Also, encoding has been checked already! */
id3_to_utf8(mpg123_string * sb,unsigned char encoding,const unsigned char * source,size_t source_size,int noquiet)292 void id3_to_utf8(mpg123_string *sb, unsigned char encoding, const unsigned char *source, size_t source_size, int noquiet)
293 {
294 	unsigned int bwidth;
295 	debug1("encoding: %u", encoding);
296 	/* A note: ID3v2.3 uses UCS-2 non-variable 16bit encoding, v2.4 uses UTF16.
297 	   UTF-16 uses a reserved/private range in UCS-2 to add the magic, so we just always treat it as UTF. */
298 	bwidth = encoding_widths[encoding];
299 	/* Hack! I've seen a stray zero byte before BOM. Is that supposed to happen? */
300 	if(encoding != mpg123_id3_utf16be) /* UTF16be _can_ beging with a null byte! */
301 	while(source_size > bwidth && source[0] == 0)
302 	{
303 		--source_size;
304 		++source;
305 		debug("skipped leading zero");
306 	}
307 	if(source_size % bwidth)
308 	{
309 		/* When we need two bytes for a character, it's strange to have an uneven bytestream length. */
310 		if(noquiet) warning2("Weird tag size %d for encoding %u - I will probably trim too early or something but I think the MP3 is broken.", (int)source_size, encoding);
311 		source_size -= source_size % bwidth;
312 	}
313 	text_converters[encoding](sb, source, source_size, noquiet);
314 }
315 
316 /* You have checked encoding to be in the range already. */
next_text(unsigned char * prev,unsigned char encoding,size_t limit)317 static unsigned char *next_text(unsigned char* prev, unsigned char encoding, size_t limit)
318 {
319 	unsigned char *text = prev;
320 	size_t width = encoding_widths[encoding];
321 
322 	/* So I go lengths to find zero or double zero...
323 	   Remember bug 2834636: Only check for aligned NULLs! */
324 	while(text-prev < (ssize_t)limit)
325 	{
326 		if(text[0] == 0)
327 		{
328 			if(width <= limit-(text-prev))
329 			{
330 				size_t i = 1;
331 				for(; i<width; ++i) if(text[i] != 0) break;
332 
333 				if(i == width) /* found a null wide enough! */
334 				{
335 					text += width;
336 					break;
337 				}
338 			}
339 			else return NULL; /* No full character left? This text is broken */
340 		}
341 
342 		text += width;
343 	}
344 	if((size_t)(text-prev) >= limit) text = NULL;
345 
346 	return text;
347 }
348 
enc_name(unsigned char enc)349 static const char *enc_name(unsigned char enc)
350 {
351 	switch(enc)
352 	{
353 		case 0:  return "Latin 1";
354 		case 1:  return "UTF-16 BOM";
355 		case 2:  return "UTF-16 BE";
356 		case 3:  return "UTF-8";
357 		default: return "unknown!";
358 	}
359 }
360 
process_text(mpg123_handle * fr,unsigned char * realdata,size_t realsize,char * id)361 static void process_text(mpg123_handle *fr, unsigned char *realdata, size_t realsize, char *id)
362 {
363 	/* Text encoding          $xx */
364 	/* The text (encoded) ... */
365 	mpg123_text *t = add_text(fr);
366 	if(VERBOSE4) fprintf(stderr, "Note: Storing text from %s encoding\n", enc_name(realdata[0]));
367 	if(t == NULL)
368 	{
369 		if(NOQUIET) error("Unable to attach new text!");
370 		return;
371 	}
372 	memcpy(t->id, id, 4);
373 	store_id3_text(&t->text, realdata, realsize, NOQUIET, fr->p.flags & MPG123_PLAIN_ID3TEXT);
374 	if(VERBOSE4) fprintf(stderr, "Note: ID3v2 %c%c%c%c text frame: %s\n", id[0], id[1], id[2], id[3], t->text.p);
375 }
376 
process_picture(mpg123_handle * fr,unsigned char * realdata,size_t realsize)377 static void process_picture(mpg123_handle *fr, unsigned char *realdata, size_t realsize)
378 {
379 	unsigned char encoding = realdata[0];
380 	mpg123_picture *i = NULL;
381 	unsigned char* workpoint;
382 	if(realsize == 0)
383 	{
384 		debug("Empty id3 data!");
385 		return;
386 	}
387 	if(encoding > mpg123_id3_enc_max)
388 	{
389 		if(NOQUIET)
390 			error1("Unknown text encoding %u, I take no chances, sorry!", encoding);
391 		return;
392 	}
393 	if(VERBOSE4) fprintf(stderr, "Note: Storing picture from APIC frame.\n");
394 	/* decompose realdata accordingly */
395 	i = add_picture(fr);
396 	if(i == NULL)
397 	{
398 		if(NOQUIET) error("Unable to attach new picture!");
399 		return;
400 	}
401 	realdata++; realsize--;
402 	/* get mime type (encoding is always latin-1) */
403 	workpoint = next_text(realdata, 0, realsize);
404 	if (workpoint == NULL) {
405 		pop_picture(fr);
406 		if (NOQUIET) error("Unable to get mime type for picture; skipping picture.");
407 		return;
408 	}
409 	id3_to_utf8(&i->mime_type, 0, realdata, workpoint - realdata, NOQUIET);
410 	realsize -= workpoint - realdata;
411 	realdata = workpoint;
412 	/* get picture type */
413 	i->type = realdata[0];
414 	realdata++; realsize--;
415 	/* get description (encoding is encoding) */
416 	workpoint = next_text(realdata, encoding, realsize);
417 	if (workpoint == NULL) {
418 		if (NOQUIET) error("Unable to get description for picture; skipping picture.");
419 		pop_picture(fr);
420 		return;
421 	}
422 	id3_to_utf8(&i->description, encoding, realdata, workpoint - realdata, NOQUIET);
423 	realsize -= workpoint - realdata;
424 	if (realsize == 0) {
425 		if (NOQUIET) error("No picture data defined; skipping picture.");
426 		pop_picture(fr);
427 		return;
428 	}
429 	/* store_id3_picture(i, picture, realsize, NOQUIET)) */
430 	i->data = (unsigned char*)malloc(realsize);
431 	if (i->data == NULL) {
432 		if (NOQUIET) error("Unable to allocate memory for picture; skipping picture");
433 		pop_picture(fr);
434 		return;
435 	}
436 	memcpy(i->data, workpoint, realsize);
437 	i->size = realsize;
438 	if(VERBOSE4) fprintf(stderr, "Note: ID3v2 APIC picture frame of type: %d\n", i->type);
439 }
440 
441 /* Store a new comment that perhaps is a RVA / RVA_ALBUM/AUDIOPHILE / RVA_MIX/RADIO one
442    Special gimmik: It also stores USLT to the texts. Stucture is the same as for comments. */
process_comment(mpg123_handle * fr,enum frame_types tt,unsigned char * realdata,size_t realsize,int rva_level,char * id)443 static void process_comment(mpg123_handle *fr, enum frame_types tt, unsigned char *realdata, size_t realsize, int rva_level, char *id)
444 {
445 	/* Text encoding          $xx */
446 	/* Language               $xx xx xx */
447 	/* Short description (encoded!)      <text> $00 (00) */
448 	/* Then the comment text (encoded) ... */
449 	unsigned char  encoding = realdata[0];
450 	unsigned char *lang     = realdata+1; /* I'll only use the 3 bytes! */
451 	unsigned char *descr    = realdata+4;
452 	unsigned char *text     = NULL;
453 	mpg123_text *xcom = NULL;
454 	mpg123_text localcom; /* UTF-8 variant for local processing. */
455 
456 	if(realsize < (size_t)(descr-realdata))
457 	{
458 		if(NOQUIET) error1("Invalid frame size of %"SIZE_P" (too small for anything).", (size_p)realsize);
459 		return;
460 	}
461 	if(encoding > mpg123_id3_enc_max)
462 	{
463 		if(NOQUIET)
464 			error1("Unknown text encoding %u, I take no chances, sorry!", encoding);
465 		return;
466 	}
467 	xcom = (tt == uslt ? add_text(fr) : add_comment(fr));
468 	if(VERBOSE4) fprintf(stderr, "Note: Storing comment from %s encoding\n", enc_name(realdata[0]));
469 	if(xcom == NULL)
470 	{
471 		if(NOQUIET) error("Unable to attach new comment!");
472 		return;
473 	}
474 	memcpy(xcom->lang, lang, 3);
475 	memcpy(xcom->id, id, 4);
476 	/* Now I can abuse a byte from lang for the encoding. */
477 	descr[-1] = encoding;
478 	/* Be careful with finding the end of description, I have to honor encoding here. */
479 	text = next_text(descr, encoding, realsize-(descr-realdata));
480 	if(text == NULL)
481 	{
482 		if(NOQUIET) error("No comment text / valid description?");
483 		pop_comment(fr);
484 		return;
485 	}
486 
487 	init_mpg123_text(&localcom);
488 	/* Store the text, without translation to UTF-8, but for comments always a local copy in UTF-8.
489 	   Reminder: No bailing out from here on without freeing the local comment data! */
490 	store_id3_text(&xcom->description, descr-1, text-descr+1, NOQUIET, fr->p.flags & MPG123_PLAIN_ID3TEXT);
491 	if(tt == comment)
492 	store_id3_text(&localcom.description, descr-1, text-descr+1, NOQUIET, 0);
493 
494 	text[-1] = encoding; /* Byte abusal for encoding... */
495 	store_id3_text(&xcom->text, text-1, realsize+1-(text-realdata), NOQUIET, fr->p.flags & MPG123_PLAIN_ID3TEXT);
496 	/* Remember: I will probably decode the above (again) for rva comment checking. So no messing around, please. */
497 
498 	if(VERBOSE4) /* Do _not_ print the verbatim text: The encoding might be funny! */
499 	{
500 		fprintf(stderr, "Note: ID3 comm/uslt desc of length %"SIZE_P".\n", (size_p)xcom->description.fill);
501 		fprintf(stderr, "Note: ID3 comm/uslt text of length %"SIZE_P".\n", (size_p)xcom->text.fill);
502 	}
503 	/* Look out for RVA info only when we really deal with a straight comment. */
504 	if(tt == comment && localcom.description.fill > 0)
505 	{
506 		int rva_mode = -1; /* mix / album */
507 		if(    !strcasecmp(localcom.description.p, "rva")
508 			 || !strcasecmp(localcom.description.p, "rva_mix")
509 			 || !strcasecmp(localcom.description.p, "rva_track")
510 			 || !strcasecmp(localcom.description.p, "rva_radio") )
511 		rva_mode = 0;
512 		else if(    !strcasecmp(localcom.description.p, "rva_album")
513 		         || !strcasecmp(localcom.description.p, "rva_audiophile")
514 		         || !strcasecmp(localcom.description.p, "rva_user") )
515 		rva_mode = 1;
516 		if((rva_mode > -1) && (fr->rva.level[rva_mode] <= rva_level))
517 		{
518 			/* Only translate the contents in here where we really need them. */
519 			store_id3_text(&localcom.text, text-1, realsize+1-(text-realdata), NOQUIET, 0);
520 			if(localcom.text.fill > 0)
521 			{
522 				fr->rva.gain[rva_mode] = (float) atof(localcom.text.p);
523 				if(VERBOSE3) fprintf(stderr, "Note: RVA value %fdB\n", fr->rva.gain[rva_mode]);
524 				fr->rva.peak[rva_mode] = 0;
525 				fr->rva.level[rva_mode] = rva_level;
526 			}
527 		}
528 	}
529 	/* Make sure to free the local memory... */
530 	free_mpg123_text(&localcom);
531 }
532 
process_extra(mpg123_handle * fr,unsigned char * realdata,size_t realsize,int rva_level,char * id)533 static void process_extra(mpg123_handle *fr, unsigned char* realdata, size_t realsize, int rva_level, char *id)
534 {
535 	/* Text encoding          $xx */
536 	/* Description        ... $00 (00) */
537 	/* Text ... */
538 	unsigned char encoding = realdata[0];
539 	unsigned char *descr   = realdata+1; /* remember, the encoding is descr[-1] */
540 	unsigned char *text;
541 	mpg123_text *xex;
542 	mpg123_text localex;
543 
544 	if((int)realsize < descr-realdata)
545 	{
546 		if(NOQUIET) error1("Invalid frame size of %lu (too small for anything).", (unsigned long)realsize);
547 		return;
548 	}
549 	if(encoding > mpg123_id3_enc_max)
550 	{
551 		if(NOQUIET)
552 			error1("Unknown text encoding %u, I take no chances, sorry!", encoding);
553 		return;
554 	}
555 	text = next_text(descr, encoding, realsize-(descr-realdata));
556 	if(VERBOSE4) fprintf(stderr, "Note: Storing extra from %s encoding\n", enc_name(realdata[0]));
557 	if(text == NULL)
558 	{
559 		if(NOQUIET) error("No extra frame text / valid description?");
560 		return;
561 	}
562 	xex = add_extra(fr);
563 	if(xex == NULL)
564 	{
565 		if(NOQUIET) error("Unable to attach new extra text!");
566 		return;
567 	}
568 	memcpy(xex->id, id, 4);
569 	init_mpg123_text(&localex); /* For our local copy. */
570 
571 	/* The outside storage gets reencoded to UTF-8 only if not requested otherwise.
572 	   Remember that we really need the -1 here to hand in the encoding byte!*/
573 	store_id3_text(&xex->description, descr-1, text-descr+1, NOQUIET, fr->p.flags & MPG123_PLAIN_ID3TEXT);
574 	/* Our local copy is always stored in UTF-8! */
575 	store_id3_text(&localex.description, descr-1, text-descr+1, NOQUIET, 0);
576 	/* At first, only store the outside copy of the payload. We may not need the local copy. */
577 	text[-1] = encoding;
578 	store_id3_text(&xex->text, text-1, realsize-(text-realdata)+1, NOQUIET, fr->p.flags & MPG123_PLAIN_ID3TEXT);
579 
580 	/* Now check if we would like to interpret this extra info for RVA. */
581 	if(localex.description.fill > 0)
582 	{
583 		int is_peak = 0;
584 		int rva_mode = -1; /* mix / album */
585 
586 		if(!strncasecmp(localex.description.p, "replaygain_track_",17))
587 		{
588 			if(VERBOSE3) fprintf(stderr, "Note: RVA ReplayGain track gain/peak\n");
589 
590 			rva_mode = 0;
591 			if(!strcasecmp(localex.description.p, "replaygain_track_peak")) is_peak = 1;
592 			else if(strcasecmp(localex.description.p, "replaygain_track_gain")) rva_mode = -1;
593 		}
594 		else
595 		if(!strncasecmp(localex.description.p, "replaygain_album_",17))
596 		{
597 			if(VERBOSE3) fprintf(stderr, "Note: RVA ReplayGain album gain/peak\n");
598 
599 			rva_mode = 1;
600 			if(!strcasecmp(localex.description.p, "replaygain_album_peak")) is_peak = 1;
601 			else if(strcasecmp(localex.description.p, "replaygain_album_gain")) rva_mode = -1;
602 		}
603 		if((rva_mode > -1) && (fr->rva.level[rva_mode] <= rva_level))
604 		{
605 			/* Now we need the translated copy of the data. */
606 			store_id3_text(&localex.text, text-1, realsize-(text-realdata)+1, NOQUIET, 0);
607 			if(localex.text.fill > 0)
608 			{
609 				if(is_peak)
610 				{
611 					fr->rva.peak[rva_mode] = (float) atof(localex.text.p);
612 					if(VERBOSE3) fprintf(stderr, "Note: RVA peak %f\n", fr->rva.peak[rva_mode]);
613 				}
614 				else
615 				{
616 					fr->rva.gain[rva_mode] = (float) atof(localex.text.p);
617 					if(VERBOSE3) fprintf(stderr, "Note: RVA gain %fdB\n", fr->rva.gain[rva_mode]);
618 				}
619 				fr->rva.level[rva_mode] = rva_level;
620 			}
621 		}
622 	}
623 
624 	free_mpg123_text(&localex);
625 }
626 
627 /* Make a ID3v2.3+ 4-byte ID from a ID3v2.2 3-byte ID
628    Note that not all frames survived to 2.4; the mapping goes to 2.3 .
629    A notable miss is the old RVA frame, which is very unspecific anyway.
630    This function returns -1 when a not known 3 char ID was encountered, 0 otherwise. */
promote_framename(mpg123_handle * fr,char * id)631 static int promote_framename(mpg123_handle *fr, char *id) /* fr because of VERBOSE macros */
632 {
633 	size_t i;
634 	char *old[] =
635 	{
636 		"COM",  "TAL",  "TBP",  "TCM",  "TCO",  "TCR",  "TDA",  "TDY",  "TEN",  "TFT",
637 		"TIM",  "TKE",  "TLA",  "TLE",  "TMT",  "TOA",  "TOF",  "TOL",  "TOR",  "TOT",
638 		"TP1",  "TP2",  "TP3",  "TP4",  "TPA",  "TPB",  "TRC",  "TDA",  "TRK",  "TSI",
639 		"TSS",  "TT1",  "TT2",  "TT3",  "TXT",  "TXX",  "TYE"
640 	};
641 	char *new[] =
642 	{
643 		"COMM", "TALB", "TBPM", "TCOM", "TCON", "TCOP", "TDAT", "TDLY", "TENC", "TFLT",
644 		"TIME", "TKEY", "TLAN", "TLEN", "TMED", "TOPE", "TOFN", "TOLY", "TORY", "TOAL",
645 		"TPE1", "TPE2", "TPE3", "TPE4", "TPOS", "TPUB", "TSRC", "TRDA", "TRCK", "TSIZ",
646 		"TSSE", "TIT1", "TIT2", "TIT3", "TEXT", "TXXX", "TYER"
647 	};
648 	for(i=0; i<sizeof(old)/sizeof(char*); ++i)
649 	{
650 		if(!strncmp(id, old[i], 3))
651 		{
652 			memcpy(id, new[i], 4);
653 			if(VERBOSE3) fprintf(stderr, "Translated ID3v2.2 frame %s to %s\n", old[i], new[i]);
654 			return 0;
655 		}
656 	}
657 	if(VERBOSE3) fprintf(stderr, "Ignoring untranslated ID3v2.2 frame %c%c%c\n", id[0], id[1], id[2]);
658 	return -1;
659 }
660 
661 #endif /* NO_ID3V2 */
662 
663 /*
664 	trying to parse ID3v2.3 and ID3v2.4 tags...
665 
666 	returns:  0: bad or just unparseable tag
667 	          1: good, (possibly) new tag info
668 	         <0: reader error (may need more data feed, try again)
669 */
parse_new_id3(mpg123_handle * fr,unsigned long first4bytes)670 int parse_new_id3(mpg123_handle *fr, unsigned long first4bytes)
671 {
672 	#define UNSYNC_FLAG 128
673 	#define EXTHEAD_FLAG 64
674 	#define EXP_FLAG 32
675 	#define FOOTER_FLAG 16
676 	#define UNKNOWN_FLAGS 15 /* 00001111*/
677 	unsigned char buf[6];
678 	unsigned long length=0;
679 	unsigned char flags = 0;
680 	int ret = 1;
681 	int ret2;
682 #ifndef NO_ID3V2
683 	int skiptag = 0;
684 #endif
685 	unsigned char major = first4bytes & 0xff;
686 	debug1("ID3v2: major tag version: %i", major);
687 	if(major == 0xff) return 0; /* Invalid... */
688 	if((ret2 = fr->rd->read_frame_body(fr, buf, 6)) < 0) /* read more header information */
689 	return ret2;
690 
691 	if(buf[0] == 0xff) return 0; /* Revision, will never be 0xff. */
692 
693 	/* second new byte are some nice flags, if these are invalid skip the whole thing */
694 	flags = buf[1];
695 	debug1("ID3v2: flags 0x%08x", flags);
696 	/* use 4 bytes from buf to construct 28bit uint value and return 1; return 0 if bytes are not synchsafe */
697 	#define synchsafe_to_long(buf,res) \
698 	( \
699 		(((buf)[0]|(buf)[1]|(buf)[2]|(buf)[3]) & 0x80) ? 0 : \
700 		(res =  (((unsigned long) (buf)[0]) << 21) \
701 		     | (((unsigned long) (buf)[1]) << 14) \
702 		     | (((unsigned long) (buf)[2]) << 7) \
703 		     |  ((unsigned long) (buf)[3]) \
704 		,1) \
705 	)
706 	/* id3v2.3 does not store synchsafe frame sizes, but synchsafe tag size - doh! */
707 	/* Remember: bytes_to_long() can yield ULONG_MAX on 32 bit platforms! */
708 	#define bytes_to_long(buf,res) \
709 	( \
710 		major == 3 ? \
711 		(res =  (((unsigned long) (buf)[0]) << 24) \
712 		     | (((unsigned long) (buf)[1]) << 16) \
713 		     | (((unsigned long) (buf)[2]) << 8) \
714 		     |  ((unsigned long) (buf)[3]) \
715 		,1) : synchsafe_to_long(buf,res) \
716 	)
717 	/* for id3v2.2 only */
718 	#define threebytes_to_long(buf,res) \
719 	( \
720 		res =  (((unsigned long) (buf)[0]) << 16) \
721 		     | (((unsigned long) (buf)[1]) << 8) \
722 		     |  ((unsigned long) (buf)[2]) \
723 	)
724 
725 	/* length-10 or length-20 (footer present); 4 synchsafe integers == 28 bit number  */
726 	/* we have already read 10 bytes, so left are length or length+10 bytes belonging to tag */
727 	/* Note: This is an 28 bit value in 32 bit storage, plenty of space for */
728 	/* length+x for reasonable x. */
729 	if(!synchsafe_to_long(buf+2,length))
730 	{
731 		if(NOQUIET) error4("Bad tag length (not synchsafe): 0x%02x%02x%02x%02x; You got a bad ID3 tag here.", buf[2],buf[3],buf[4],buf[5]);
732 		return 0;
733 	}
734 	debug1("ID3v2: tag data length %lu", length);
735 #ifndef NO_ID3V2
736 	if(VERBOSE2) fprintf(stderr,"Note: ID3v2.%i rev %i tag of %lu bytes\n", major, buf[0], length);
737 	/* skip if unknown version/scary flags, parse otherwise */
738 	if(fr->p.flags & MPG123_SKIP_ID3V2)
739 	{
740 		if(VERBOSE3)
741 			fprintf(stderr, "Note: Skipping ID3v2 tag per user request.\n");
742 		skiptag = 1;
743 	}
744 	if((flags & UNKNOWN_FLAGS) || (major > 4) || (major < 2))
745 	{
746 		if(NOQUIET)
747 			warning2( "ID3v2: Won't parse the ID3v2 tag with major version"
748 				" %u and flags 0x%xu - some extra code may be needed"
749 			,	major, flags );
750 		skiptag = 1;
751 	}
752 	if(length < 10)
753 	{
754 		if(NOQUIET)
755 			warning1("ID3v2: unrealistic small tag lengh %lu, skipping", length);
756 		skiptag = 1;
757 	}
758 	if(skiptag)
759 	{
760 #endif
761 		if((ret2 = fr->rd->skip_bytes(fr,length)) < 0) /* will not store data in backbuff! */
762 		ret = ret2;
763 #ifndef NO_ID3V2
764 	}
765 	else
766 	{
767 		unsigned char* tagdata = NULL;
768 		fr->id3v2.version = major;
769 		/* try to interpret that beast */
770 		if((tagdata = (unsigned char*) malloc(length+1)) != NULL)
771 		{
772 			debug("ID3v2: analysing frames...");
773 			if((ret2 = fr->rd->read_frame_body(fr,tagdata,length)) > 0)
774 			{
775 				unsigned long tagpos = 0;
776 				/* bytes of frame title and of framesize value */
777 				unsigned int head_part = fr->id3v2.version > 2 ? 4 : 3;
778 				unsigned int flag_part = fr->id3v2.version > 2 ? 2 : 0;
779 				/* The amount of bytes that are unconditionally read for each frame: */
780 				/* ID, size, flags. */
781 				unsigned int framebegin = head_part+head_part+flag_part;
782 				debug1("ID3v2: have read at all %lu bytes for the tag now", (unsigned long)length+6);
783 				/* going to apply strlen for strings inside frames, make sure that it doesn't overflow! */
784 				tagdata[length] = 0;
785 				if(flags & EXTHEAD_FLAG)
786 				{
787 					debug("ID3v2: skipping extended header");
788 					if(!bytes_to_long(tagdata, tagpos) || tagpos >= length)
789 					{
790 						ret = 0;
791 						if(NOQUIET)
792 							error4( "Bad (non-synchsafe/too large) tag offset:"
793 								"0x%02x%02x%02x%02x"
794 							,	tagdata[0], tagdata[1], tagdata[2], tagdata[3] );
795 					}
796 				}
797 				if(ret > 0)
798 				{
799 					char id[5];
800 					unsigned long framesize;
801 					unsigned long fflags; /* need 16 bits, actually */
802 					id[4] = 0;
803 					/* Pos now advanced after ext head, now a frame has to follow. */
804 					/* Note: tagpos <= length, which is 28 bit integer, so both */
805 					/* far away from overflow for adding known small values. */
806 					/* I want to read at least one full header now. */
807 					while(length >= tagpos+framebegin)
808 					{
809 						int i = 0;
810 						unsigned long pos = tagpos;
811 						/* level 1,2,3 - 0 is info from lame/info tag! */
812 						/* rva tags with ascending significance, then general frames */
813 						enum frame_types tt = unknown;
814 						/* we may have entered the padding zone or any other strangeness: check if we have valid frame id characters */
815 						for(i=0; i< head_part; ++i)
816 						if( !( ((tagdata[tagpos+i] > 47) && (tagdata[tagpos+i] < 58))
817 						    || ((tagdata[tagpos+i] > 64) && (tagdata[tagpos+i] < 91)) ) )
818 						{
819 							debug5("ID3v2: real tag data apparently ended after %lu bytes with 0x%02x%02x%02x%02x", tagpos, tagdata[tagpos], tagdata[tagpos+1], tagdata[tagpos+2], tagdata[tagpos+3]);
820 							/* This is no hard error... let's just hope that we got something meaningful already (ret==1 in that case). */
821 							goto tagparse_cleanup; /* Need to escape two loops here. */
822 						}
823 						if(ret > 0)
824 						{
825 							/* 4 or 3 bytes id */
826 							strncpy(id, (char*) tagdata+pos, head_part);
827 							id[head_part] = 0; /* terminate for 3 or 4 bytes */
828 							pos += head_part;
829 							tagpos += head_part;
830 							/* size as 32 bits or 28 bits */
831 							if(fr->id3v2.version == 2) threebytes_to_long(tagdata+pos, framesize);
832 							else
833 							if(!bytes_to_long(tagdata+pos, framesize))
834 							{
835 								/* Just assume that up to now there was some good data. */
836 								if(NOQUIET) error1("ID3v2: non-syncsafe size of %s frame, skipping the remainder of tag", id);
837 								break;
838 							}
839 							if(VERBOSE3) fprintf(stderr, "Note: ID3v2 %s frame of size %lu\n", id, framesize);
840 							tagpos += head_part;
841 							pos += head_part;
842 							if(fr->id3v2.version > 2)
843 							{
844 								fflags  = (((unsigned long) tagdata[pos]) << 8) | ((unsigned long) tagdata[pos+1]);
845 								pos    += 2;
846 								tagpos += 2;
847 							}
848 							else fflags = 0;
849 
850 							if(length - tagpos < framesize)
851 							{
852 								if(NOQUIET) error("Whoa! ID3v2 frame claims to be larger than the whole rest of the tag.");
853 								break;
854 							}
855 							tagpos += framesize; /* the important advancement in whole tag */
856 							/* for sanity, after full parsing tagpos should be == pos */
857 							/* debug4("ID3v2: found %s frame, size %lu (as bytes: 0x%08lx), flags 0x%016lx", id, framesize, framesize, fflags); */
858 							/* %0abc0000 %0h00kmnp */
859 							#define BAD_FFLAGS (unsigned long) 36784
860 							#define PRES_TAG_FFLAG 16384
861 							#define PRES_FILE_FFLAG 8192
862 							#define READ_ONLY_FFLAG 4096
863 							#define GROUP_FFLAG 64
864 							#define COMPR_FFLAG 8
865 							#define ENCR_FFLAG 4
866 							#define UNSYNC_FFLAG 2
867 							#define DATLEN_FFLAG 1
868 							if(head_part < 4 && promote_framename(fr, id) != 0) continue;
869 
870 							/* shall not or want not handle these */
871 							if(fflags & (BAD_FFLAGS | COMPR_FFLAG | ENCR_FFLAG))
872 							{
873 								if(NOQUIET) warning("ID3v2: skipping invalid/unsupported frame");
874 								continue;
875 							}
876 
877 							for(i = 0; i < KNOWN_FRAMES; ++i)
878 							if(!strncmp(frame_type[i], id, 4)){ tt = i; break; }
879 
880 							if(id[0] == 'T' && tt != extra) tt = text;
881 
882 							if(tt != unknown)
883 							{
884 								int rva_mode = -1; /* mix / album */
885 								unsigned long realsize = framesize;
886 								unsigned char* realdata = tagdata+pos;
887 								if((flags & UNSYNC_FLAG) || (fflags & UNSYNC_FFLAG))
888 								{
889 									unsigned long ipos = 0;
890 									unsigned long opos = 0;
891 									debug("Id3v2: going to de-unsync the frame data");
892 									/* de-unsync: FF00 -> FF; real FF00 is simply represented as FF0000 ... */
893 									/* damn, that means I have to delete bytes from withing the data block... thus need temporal storage */
894 									/* standard mandates that de-unsync should always be safe if flag is set */
895 									realdata = (unsigned char*) malloc(framesize); /* will need <= bytes */
896 									if(realdata == NULL)
897 									{
898 										if(NOQUIET) error("ID3v2: unable to allocate working buffer for de-unsync");
899 										continue;
900 									}
901 									/* now going byte per byte through the data... */
902 									realdata[0] = tagdata[pos];
903 									opos = 1;
904 									for(ipos = pos+1; ipos < pos+framesize; ++ipos)
905 									{
906 										if(!((tagdata[ipos] == 0) && (tagdata[ipos-1] == 0xff)))
907 										{
908 											realdata[opos++] = tagdata[ipos];
909 										}
910 									}
911 									realsize = opos;
912 									debug2("ID3v2: de-unsync made %lu out of %lu bytes", realsize, framesize);
913 								}
914 								pos = 0; /* now at the beginning again... */
915 								/* Avoid reading over boundary, even if there is a */
916 								/* zero byte of padding for safety. */
917 								if(realsize) switch(tt)
918 								{
919 									case comment:
920 									case uslt:
921 										process_comment(fr, tt, realdata, realsize, comment+1, id);
922 									break;
923 									case extra: /* perhaps foobar2000's work */
924 										process_extra(fr, realdata, realsize, extra+1, id);
925 									break;
926 									case rva2: /* "the" RVA tag */
927 									{
928 										/* starts with null-terminated identification */
929 										if(VERBOSE3) fprintf(stderr, "Note: RVA2 identification \"%s\"\n", realdata);
930 										/* default: some individual value, mix mode */
931 										rva_mode = 0;
932 										if( !strncasecmp((char*)realdata, "album", 5)
933 										    || !strncasecmp((char*)realdata, "audiophile", 10)
934 										    || !strncasecmp((char*)realdata, "user", 4))
935 										rva_mode = 1;
936 										if(fr->rva.level[rva_mode] <= rva2+1)
937 										{
938 											pos += strlen((char*) realdata) + 1;
939 											if(realdata[pos] == 1)
940 											{
941 												++pos;
942 												/* only handle master channel */
943 												debug("ID3v2: it is for the master channel");
944 												/* two bytes adjustment, one byte for bits representing peak - n bytes, eh bits, for peak */
945 												/* 16 bit signed integer = dB * 512  ... the double cast is needed to preserve the sign of negative values! */
946 												fr->rva.gain[rva_mode] = (float) ( (((short)((signed char)realdata[pos])) << 8) | realdata[pos+1] ) / 512;
947 												pos += 2;
948 												if(VERBOSE3) fprintf(stderr, "Note: RVA value %fdB\n", fr->rva.gain[rva_mode]);
949 												/* heh, the peak value is represented by a number of bits - but in what manner? Skipping that part */
950 												fr->rva.peak[rva_mode] = 0;
951 												fr->rva.level[rva_mode] = rva2+1;
952 											}
953 										}
954 									}
955 									break;
956 									/* non-rva metainfo, simply store... */
957 									case text:
958 										process_text(fr, realdata, realsize, id);
959 									break;
960 									case picture:
961 										if (fr->p.flags & MPG123_PICTURE)
962 										process_picture(fr, realdata, realsize);
963 
964 										break;
965 									default: if(NOQUIET) error1("ID3v2: unknown frame type %i", tt);
966 								}
967 								if((flags & UNSYNC_FLAG) || (fflags & UNSYNC_FFLAG)) free(realdata);
968 							}
969 							#undef BAD_FFLAGS
970 							#undef PRES_TAG_FFLAG
971 							#undef PRES_FILE_FFLAG
972 							#undef READ_ONLY_FFLAG
973 							#undef GROUP_FFLAG
974 							#undef COMPR_FFLAG
975 							#undef ENCR_FFLAG
976 							#undef UNSYNC_FFLAG
977 							#undef DATLEN_FFLAG
978 						}
979 						else break;
980 						#undef KNOWN_FRAMES
981 					}
982 				}
983 			}
984 			else
985 			{
986 				/* There are tags with zero length. Strictly not an error, then. */
987 				if(length > 0 && NOQUIET && ret2 != MPG123_NEED_MORE) error("ID3v2: Duh, not able to read ID3v2 tag data.");
988 				ret = ret2;
989 			}
990 tagparse_cleanup:
991 			free(tagdata);
992 		}
993 		else
994 		{
995 			if(NOQUIET) error1("ID3v2: Arrg! Unable to allocate %lu bytes for interpreting ID3v2 data - trying to skip instead.", length);
996 			if((ret2 = fr->rd->skip_bytes(fr,length)) < 0) ret = ret2; /* will not store data in backbuff! */
997 			else ret = 0;
998 		}
999 	}
1000 #endif /* NO_ID3V2 */
1001 	/* skip footer if present */
1002 	if((ret > 0) && (flags & FOOTER_FLAG) && ((ret2 = fr->rd->skip_bytes(fr,length)) < 0)) ret = ret2;
1003 
1004 	return ret;
1005 	#undef UNSYNC_FLAG
1006 	#undef EXTHEAD_FLAG
1007 	#undef EXP_FLAG
1008 	#undef FOOTER_FLAG
1009 	#undef UNKOWN_FLAGS
1010 }
1011 
1012 #ifndef NO_ID3V2 /* Disabling all the rest... */
1013 
convert_latin1(mpg123_string * sb,const unsigned char * s,size_t l,const int noquiet)1014 static void convert_latin1(mpg123_string *sb, const unsigned char* s, size_t l, const int noquiet)
1015 {
1016 	size_t length = l;
1017 	size_t i;
1018 	unsigned char *p;
1019 	/* determine real length, a latin1 character can at most take 2  in UTF8 */
1020 	for(i=0; i<l; ++i)
1021 	if(s[i] >= 0x80) ++length;
1022 
1023 	debug1("UTF-8 length: %lu", (unsigned long)length);
1024 	/* one extra zero byte for paranoia */
1025 	if(!mpg123_resize_string(sb, length+1)){ mpg123_free_string(sb); return ; }
1026 
1027 	p = (unsigned char*) sb->p; /* Signedness doesn't matter but it shows I thought about the non-issue */
1028 	for(i=0; i<l; ++i)
1029 	if(s[i] < 0x80){ *p = s[i]; ++p; }
1030 	else /* two-byte encoding */
1031 	{
1032 		*p     = 0xc0 | (s[i]>>6);
1033 		*(p+1) = 0x80 | (s[i] & 0x3f);
1034 		p+=2;
1035 	}
1036 
1037 	sb->p[length] = 0;
1038 	sb->fill = length+1;
1039 }
1040 
1041 /*
1042 	Check if we have a byte oder mark(s) there, return:
1043 	-1: little endian
1044 	 0: no BOM
1045 	 1: big endian
1046 
1047 	This modifies source and len to indicate the data _after_ the BOM(s).
1048 	Note on nasty data: The last encountered BOM determines the endianness.
1049 	I have seen data with multiple BOMS, namely from "the" id3v2 program.
1050 	Not nice, but what should I do?
1051 */
check_bom(const unsigned char ** source,size_t * len)1052 static int check_bom(const unsigned char** source, size_t *len)
1053 {
1054 	int this_bom    = 0;
1055 	int further_bom = 0;
1056 
1057 	if(*len < 2) return 0;
1058 
1059 	if((*source)[0] == 0xff && (*source)[1] == 0xfe)
1060 	this_bom = -1;
1061 
1062 	if((*source)[0] == 0xfe && (*source)[1] == 0xff)
1063 	this_bom = 1;
1064 
1065 	/* Skip the detected BOM. */
1066 	if(this_bom != 0)
1067 	{
1068 		*source += 2;
1069 		*len    -= 2;
1070 		/* Check for following BOMs. The last one wins! */
1071 		further_bom = check_bom(source, len);
1072 		if(further_bom == 0) return this_bom; /* End of the recursion. */
1073 		else                 return further_bom;
1074 	}
1075 	else return 0;
1076 }
1077 
1078 #define FULLPOINT(f,s) ( (((f)&0x3ff)<<10) + ((s)&0x3ff) + 0x10000 )
1079 /* Remember: There's a limit at 0x1ffff. */
1080 #define UTF8LEN(x) ( (x)<0x80 ? 1 : ((x)<0x800 ? 2 : ((x)<0x10000 ? 3 : 4)))
convert_utf16bom(mpg123_string * sb,const unsigned char * s,size_t l,const int noquiet)1081 static void convert_utf16bom(mpg123_string *sb, const unsigned char* s, size_t l, const int noquiet)
1082 {
1083 	size_t i;
1084 	size_t n; /* number bytes that make up full pairs */
1085 	unsigned char *p;
1086 	size_t length = 0; /* the resulting UTF-8 length */
1087 	/* Determine real length... extreme case can be more than utf-16 length. */
1088 	size_t high = 0;
1089 	size_t low  = 1;
1090 	int bom_endian;
1091 
1092 	debug1("convert_utf16 with length %lu", (unsigned long)l);
1093 
1094 	bom_endian = check_bom(&s, &l);
1095 	debug1("UTF16 endianness check: %i", bom_endian);
1096 
1097 	if(bom_endian == -1) /* little-endian */
1098 	{
1099 		high = 1; /* The second byte is the high byte. */
1100 		low  = 0; /* The first byte is the low byte. */
1101 	}
1102 
1103 	n = (l/2)*2; /* number bytes that make up full pairs */
1104 
1105 	/* first: get length, check for errors -- stop at first one */
1106 	for(i=0; i < n; i+=2)
1107 	{
1108 		unsigned long point = ((unsigned long) s[i+high]<<8) + s[i+low];
1109 		if((point & 0xfc00) == 0xd800) /* lead surrogate */
1110 		{
1111 			unsigned short second = (i+3 < l) ? (s[i+2+high]<<8) + s[i+2+low] : 0;
1112 			if((second & 0xfc00) == 0xdc00) /* good... */
1113 			{
1114 				point = FULLPOINT(point,second);
1115 				length += UTF8LEN(point); /* possibly 4 bytes */
1116 				i+=2; /* We overstepped one word. */
1117 			}
1118 			else /* if no valid pair, break here */
1119 			{
1120 				if(noquiet) error2("Invalid UTF16 surrogate pair at %li (0x%04lx).", (unsigned long)i, point);
1121 				n = i; /* Forget the half pair, END! */
1122 				break;
1123 			}
1124 		}
1125 		else length += UTF8LEN(point); /* 1,2 or 3 bytes */
1126 	}
1127 
1128 	if(!mpg123_resize_string(sb, length+1)){ mpg123_free_string(sb); return ; }
1129 
1130 	/* Now really convert, skip checks as these have been done just before. */
1131 	p = (unsigned char*) sb->p; /* Signedness doesn't matter but it shows I thought about the non-issue */
1132 	for(i=0; i < n; i+=2)
1133 	{
1134 		unsigned long codepoint = ((unsigned long) s[i+high]<<8) + s[i+low];
1135 		if((codepoint & 0xfc00) == 0xd800) /* lead surrogate */
1136 		{
1137 			unsigned short second = (s[i+2+high]<<8) + s[i+2+low];
1138 			codepoint = FULLPOINT(codepoint,second);
1139 			i+=2; /* We overstepped one word. */
1140 		}
1141 		if(codepoint < 0x80) *p++ = (unsigned char) codepoint;
1142 		else if(codepoint < 0x800)
1143 		{
1144 			*p++ = (unsigned char) (0xc0 | (codepoint>>6));
1145 			*p++ = (unsigned char) (0x80 | (codepoint & 0x3f));
1146 		}
1147 		else if(codepoint < 0x10000)
1148 		{
1149 			*p++ = (unsigned char) (0xe0 | (codepoint>>12));
1150 			*p++ = 0x80 | ((codepoint>>6) & 0x3f);
1151 			*p++ = 0x80 | (codepoint & 0x3f);
1152 		}
1153 		else if (codepoint < 0x200000)
1154 		{
1155 			*p++ = (unsigned char) (0xf0 | codepoint>>18);
1156 			*p++ = (unsigned char) (0x80 | ((codepoint>>12) & 0x3f));
1157 			*p++ = (unsigned char) (0x80 | ((codepoint>>6) & 0x3f));
1158 			*p++ = (unsigned char) (0x80 | (codepoint & 0x3f));
1159 		} /* ignore bigger ones (that are not possible here anyway) */
1160 	}
1161 	sb->p[sb->size-1] = 0; /* paranoia... */
1162 	sb->fill = sb->size;
1163 }
1164 #undef UTF8LEN
1165 #undef FULLPOINT
1166 
convert_utf8(mpg123_string * sb,const unsigned char * source,size_t len,const int noquiet)1167 static void convert_utf8(mpg123_string *sb, const unsigned char* source, size_t len, const int noquiet)
1168 {
1169 	if(mpg123_resize_string(sb, len+1))
1170 	{
1171 		memcpy(sb->p, source, len);
1172 		sb->p[len] = 0;
1173 		sb->fill = len+1;
1174 	}
1175 	else mpg123_free_string(sb);
1176 }
1177 
1178 #endif
1179