1 /*
2 id3: ID3v2.3 and ID3v2.4 parsing (a relevant subset)
3
4 copyright 2006-2020 by the mpg123 project - free software under the terms of the LGPL 2.1
5 see COPYING and AUTHORS files in distribution or http://mpg123.org
6 initially written by Thomas Orgis
7
8 WIP: Handling of multiple ID3 tags in a stream.
9
10 1. With update flag: Add non-unique data, replace unique.
11 - Only one TALB, TPE1, etc.
12 - Only one TXXX with a certain description.
13 - Only one COMM with certain language and description.
14 - Only one APIC with certain type and description, generally only one
15 of type 1 and 2 each.
16 2. Without update flag: wipe whole data and only store new stuff.
17
18 BIG BAD BUT: How to properly handle seeks in a stream that make
19 the parser encounter the same tags again in random order? Is
20 there even a correct way to handle that without storing an
21 ordered list of all tags? I could simplify the code here and just
22 communicate that a frame should be an update to previous, and
23 at which stream position the frame was encountered. But since
24 libmpg123 is driven by MPEG frames, there could be multiple
25 ID3v2 tags in direct succession treated by the parser without
26 the library user being able to interfere.
27
28 This is severely fucked. All that complexity also doesn't matter
29 in practice, as streams use ICY and individual files have just one
30 ID3v2 tag (relevant for libmpg123). It's an academic problem. But
31 for seekable files, I could implement some jumping logic to find
32 and parse all ID3v2 for once and then set a flag that only jumps
33 the frames on seeks. That covers all local disk playback. For
34 streams, seeking is no issue (seeking back, at least), so the
35 update/replace logic works.
36
37 Look at the standard:
38
39 ------
40 5. Tag location
41
42 The default location of an ID3v2 tag is prepended to the audio so
43 that players can benefit from the information when the data is
44 streamed. It is however possible to append the tag, or make a
45 prepend/append combination. When deciding upon where an unembedded
46 tag should be located, the following order of preference SHOULD be
47 considered.
48
49 1. Prepend the tag.
50
51 2. Prepend a tag with all vital information and add a second tag at
52 the end of the file, before tags from other tagging systems. The
53 first tag is required to have a SEEK frame.
54
55 3. Add a tag at the end of the file, before tags from other tagging
56 systems.
57
58 In case 2 and 3 the tag can simply be appended if no other known tags
59 are present. The suggested method to find ID3v2 tags are:
60
61 1. Look for a prepended tag using the pattern found in section 3.1.
62
63 2. If a SEEK frame was found, use its values to guide further
64 searching.
65
66 3. Look for a tag footer, scanning from the back of the file.
67
68 For every new tag that is found, the old tag should be discarded
69 unless the update flag in the extended header (section 3.2) is set.
70 ------
71
72 For seekable streams, I simply need to implement explicit ID3v2 search along
73 that recommendation and keep the complete information. Streams that continue
74 growing during playback will not recognize added ID3v2 tags. So be it.
75 For non-seekable streams, a tag is always parsed when encountered, assuming
76 the order of update tags always matches.
77
78 First step for the 1.26 release shall be the implementaton of the update
79 logic and glossing over the theoretical problem of re-parsing update
80 frames in the wrong order by ignoring it. They are not that relevant.
81
82 TODO: Cave in and add the missing frames from the spec. Not that far to go.
83 But need another data structure to communicate those ...
84 */
85
86 #include "mpg123lib_intern.h"
87 #include "id3.h"
88 #include "debug.h"
89
90 #ifndef NO_ID3V2 /* Only the main parsing routine will always be there. */
91
92 /* We know the usual text frames plus some specifics. */
93 #define KNOWN_FRAMES 5
94 static const char frame_type[KNOWN_FRAMES][5] = { "COMM", "TXXX", "RVA2", "USLT", "APIC" };
95 enum frame_types { unknown = -2, text = -1, comment, extra, rva2, uslt, picture };
96
97 /* UTF support definitions */
98
99 typedef void (*text_converter)(mpg123_string *sb, const unsigned char* source, size_t len, const int noquiet);
100
101 static void convert_latin1 (mpg123_string *sb, const unsigned char* source, size_t len, const int noquiet);
102 static void convert_utf16bom(mpg123_string *sb, const unsigned char* source, size_t len, const int noquiet);
103 static void convert_utf8 (mpg123_string *sb, const unsigned char* source, size_t len, const int noquiet);
104
105 static const text_converter text_converters[4] =
106 {
107 convert_latin1,
108 /* We always check for (multiple) BOM in 16bit unicode. Without BOM, UTF16 BE is the default.
109 Errors in encoding are detected anyway. */
110 convert_utf16bom,
111 convert_utf16bom,
112 convert_utf8
113 };
114
115 static const unsigned int encoding_widths[4] = { 1, 2, 2, 1 };
116
117 /* the code starts here... */
118
null_id3_links(mpg123_handle * fr)119 static void null_id3_links(mpg123_handle *fr)
120 {
121 fr->id3v2.title = NULL;
122 fr->id3v2.artist = NULL;
123 fr->id3v2.album = NULL;
124 fr->id3v2.year = NULL;
125 fr->id3v2.genre = NULL;
126 fr->id3v2.comment = NULL;
127 }
128
init_id3(mpg123_handle * fr)129 void init_id3(mpg123_handle *fr)
130 {
131 fr->id3v2.version = 0; /* nothing there */
132 null_id3_links(fr);
133 fr->id3v2.comments = 0;
134 fr->id3v2.comment_list = NULL;
135 fr->id3v2.texts = 0;
136 fr->id3v2.text = NULL;
137 fr->id3v2.extras = 0;
138 fr->id3v2.extra = NULL;
139 fr->id3v2.pictures = 0;
140 fr->id3v2.picture = NULL;
141 }
142
143 /* Managing of the text, comment and extra lists. */
144
145 /* Initialize one element. */
init_mpg123_text(mpg123_text * txt)146 static void init_mpg123_text(mpg123_text *txt)
147 {
148 mpg123_init_string(&txt->text);
149 mpg123_init_string(&txt->description);
150 txt->id[0] = 0;
151 txt->id[1] = 0;
152 txt->id[2] = 0;
153 txt->id[3] = 0;
154 txt->lang[0] = 0;
155 txt->lang[1] = 0;
156 txt->lang[2] = 0;
157 }
158
init_mpg123_picture(mpg123_picture * pic)159 static void init_mpg123_picture(mpg123_picture *pic)
160 {
161 mpg123_init_string(&pic->mime_type);
162 mpg123_init_string(&pic->description);
163 pic->type = 0;
164 pic->size = 0;
165 pic->data = NULL;
166 }
167
168 /* Free memory of one element. */
free_mpg123_text(mpg123_text * txt)169 static void free_mpg123_text(mpg123_text *txt)
170 {
171 mpg123_free_string(&txt->text);
172 mpg123_free_string(&txt->description);
173 }
174
free_mpg123_picture(mpg123_picture * pic)175 static void free_mpg123_picture(mpg123_picture * pic)
176 {
177 mpg123_free_string(&pic->mime_type);
178 mpg123_free_string(&pic->description);
179 if (pic->data != NULL)
180 free(pic->data);
181 }
182
183 /* Free memory of whole list. */
184 #define free_comment(mh) free_id3_text(&((mh)->id3v2.comment_list), &((mh)->id3v2.comments))
185 #define free_text(mh) free_id3_text(&((mh)->id3v2.text), &((mh)->id3v2.texts))
186 #define free_extra(mh) free_id3_text(&((mh)->id3v2.extra), &((mh)->id3v2.extras))
187 #define free_picture(mh) free_id3_picture(&((mh)->id3v2.picture), &((mh)->id3v2.pictures))
free_id3_text(mpg123_text ** list,size_t * size)188 static void free_id3_text(mpg123_text **list, size_t *size)
189 {
190 size_t i;
191 for(i=0; i<*size; ++i) free_mpg123_text(&((*list)[i]));
192
193 free(*list);
194 *list = NULL;
195 *size = 0;
196 }
free_id3_picture(mpg123_picture ** list,size_t * size)197 static void free_id3_picture(mpg123_picture **list, size_t *size)
198 {
199 size_t i;
200 for(i=0; i<*size; ++i) free_mpg123_picture(&((*list)[i]));
201
202 free(*list);
203 *list = NULL;
204 *size = 0;
205 }
206
207 /* Add items to the list. */
208
209 #define add_comment(mh, l, d) add_id3_text(&((mh)->id3v2.comment_list), &((mh)->id3v2.comments), NULL, l, d)
210 #define add_text(mh, id) add_id3_text(&((mh)->id3v2.text), &((mh)->id3v2.texts), id, NULL, NULL)
211 #define add_uslt(mh, l, d) add_id3_text(&((mh)->id3v2.text), &((mh)->id3v2.texts), id, l, d)
212 #define add_extra(mh, d) add_id3_text(&((mh)->id3v2.extra), &((mh)->id3v2.extras), NULL, NULL, d)
213 #define add_picture(mh, t, d) add_id3_picture(&((mh)->id3v2.picture), &((mh)->id3v2.pictures), t, d)
add_id3_text(mpg123_text ** list,size_t * size,char id[4],char lang[3],mpg123_string * description)214 static mpg123_text *add_id3_text( mpg123_text **list, size_t *size
215 , char id[4], char lang[3], mpg123_string *description )
216 {
217 mdebug( "add_id3_text id=%s lang=%s, desc=%s"
218 , id ? (char[5]) { id[0], id[1], id[2], id[3], 0 } : "(nil)"
219 , lang ? (char[4]) { lang[0], lang[1], lang[2], 0 } : "(nil)"
220 , description ? (description->fill ? description->p : "(empty)") : "(nil)" );
221 if(lang && !description)
222 return NULL; // no lone language intended
223 if(id || description)
224 {
225 // Look through list of existing texts and return an existing entry
226 // if it should be overwritten.
227 for(size_t i=0; i<*size; ++i)
228 {
229 mpg123_text *entry = *list+i;
230 if(description)
231 { // Overwrite entry with same description and same ID and language.
232 if( (!id || !memcmp(id, entry->id, 4))
233 && (!lang || !memcmp(entry->lang, lang, 3))
234 && mpg123_same_string(&(entry->description), description)
235 )
236 return entry;
237 } else if(id && !memcmp(id, entry->id, 4))
238 return entry; // Just ovewrite because of same ID.
239 mdebug("add_id3_text: entry %zu was no match", i);
240 }
241 }
242 mdebug("add_id3_text: append to list of %zu", *size);
243 // Nothing found, add new one.
244 mpg123_text *x = safe_realloc(*list, sizeof(mpg123_text)*(*size+1));
245 if(x == NULL) return NULL; /* bad */
246
247 *list = x;
248 *size += 1;
249 init_mpg123_text(&((*list)[*size-1]));
250
251 return &((*list)[*size-1]); /* Return pointer to the added text. */
252 }
253
254
add_id3_picture(mpg123_picture ** list,size_t * size,char type,mpg123_string * description)255 static mpg123_picture *add_id3_picture(mpg123_picture **list, size_t *size, char type, mpg123_string *description)
256 {
257 if(!description)
258 return NULL;
259
260 // Return entry to overwrite, if appropriate.
261 for(size_t i=0; i<*size; ++i)
262 {
263 mpg123_picture *entry = *list+i;
264 if( type == entry->type
265 && ( type == 1 || type == 2 ||
266 mpg123_same_string(&entry->description, description)
267 )
268 )
269 return entry;
270 }
271 // Append a new one.
272 mpg123_picture *x = safe_realloc(*list, sizeof(mpg123_picture)*(*size+1));
273 if(x == NULL) return NULL; /* bad */
274
275 *list = x;
276 *size += 1;
277 init_mpg123_picture(&((*list)[*size-1]));
278
279 return &((*list)[*size-1]); /* Return pointer to the added picture. */
280 }
281
282 /* OK, back to the higher level functions. */
283
exit_id3(mpg123_handle * fr)284 void exit_id3(mpg123_handle *fr)
285 {
286 free_picture(fr);
287 free_comment(fr);
288 free_extra(fr);
289 free_text(fr);
290 }
291
reset_id3(mpg123_handle * fr)292 void reset_id3(mpg123_handle *fr)
293 {
294 exit_id3(fr);
295 init_id3(fr);
296 }
297
298 /* Set the id3v2.artist id3v2.title ... links to elements of the array. */
id3_link(mpg123_handle * fr)299 void id3_link(mpg123_handle *fr)
300 {
301 size_t i;
302 mpg123_id3v2 *v2 = &fr->id3v2;
303 debug("linking ID3v2");
304 null_id3_links(fr);
305 for(i=0; i<v2->texts; ++i)
306 {
307 mpg123_text *entry = &v2->text[i];
308 if (!strncmp("TIT2", entry->id, 4)) v2->title = &entry->text;
309 else if(!strncmp("TALB", entry->id, 4)) v2->album = &entry->text;
310 else if(!strncmp("TPE1", entry->id, 4)) v2->artist = &entry->text;
311 else if(!strncmp("TYER", entry->id, 4)) v2->year = &entry->text;
312 else if(!strncmp("TCON", entry->id, 4)) v2->genre = &entry->text;
313 }
314 for(i=0; i<v2->comments; ++i)
315 {
316 mpg123_text *entry = &v2->comment_list[i];
317 if(entry->description.fill == 0 || entry->description.p[0] == 0)
318 v2->comment = &entry->text;
319 }
320 /* When no generic comment found, use the last non-generic one. */
321 if(v2->comment == NULL && v2->comments > 0)
322 v2->comment = &v2->comment_list[v2->comments-1].text;
323 }
324
325 /*
326 Store ID3 text data in an mpg123_string; either verbatim copy or
327 everything translated to UTF-8 encoding.
328 Preserve the zero string separator (I don't need strlen for the total size).
329
330 Since we can overwrite strings with ID3 update frames, don't free
331 memory, just grow strings.
332 */
store_id3_text(mpg123_string * sb,unsigned char * source,size_t source_size,const int noquiet,const int notranslate)333 static void store_id3_text(mpg123_string *sb, unsigned char *source, size_t source_size, const int noquiet, const int notranslate)
334 {
335 unsigned char encoding;
336 if(sb) // Always overwrite, even with nothing.
337 sb->fill = 0;
338 if(!source_size)
339 {
340 debug("Empty id3 data!");
341 return;
342 }
343
344 /* We shall just copy the data. Client wants to decode itself. */
345 if(notranslate)
346 {
347 /* Future: Add a path for ID3 errors. */
348 if(!mpg123_grow_string(sb, source_size))
349 {
350 if(noquiet) error("Cannot resize target string, out of memory?");
351 return;
352 }
353 memcpy(sb->p, source, source_size);
354 sb->fill = source_size;
355 debug1("stored undecoded ID3 text of size %"SIZE_P, (size_p)source_size);
356 return;
357 }
358
359 encoding = source[0];
360 if(encoding > mpg123_id3_enc_max)
361 {
362 if(noquiet)
363 error1("Unknown text encoding %u, I take no chances, sorry!", encoding);
364 return;
365 }
366 id3_to_utf8(sb, encoding, source+1, source_size-1, noquiet);
367
368 if(sb->fill) debug1("UTF-8 string (the first one): %s", sb->p);
369 else if(noquiet) error("unable to convert string to UTF-8 (out of memory, junk input?)!");
370 }
371
372 /* On error, sb->size is 0. */
373 /* Also, encoding has been checked already! */
id3_to_utf8(mpg123_string * sb,unsigned char encoding,const unsigned char * source,size_t source_size,int noquiet)374 void id3_to_utf8(mpg123_string *sb, unsigned char encoding, const unsigned char *source, size_t source_size, int noquiet)
375 {
376 unsigned int bwidth;
377 if(sb)
378 sb->fill = 0;
379 debug1("encoding: %u", encoding);
380 /* A note: ID3v2.3 uses UCS-2 non-variable 16bit encoding, v2.4 uses UTF16.
381 UTF-16 uses a reserved/private range in UCS-2 to add the magic, so we just always treat it as UTF. */
382 bwidth = encoding_widths[encoding];
383 /* Hack! I've seen a stray zero byte before BOM. Is that supposed to happen? */
384 if(encoding != mpg123_id3_utf16be) /* UTF16be _can_ beging with a null byte! */
385 while(source_size > bwidth && source[0] == 0)
386 {
387 --source_size;
388 ++source;
389 debug("skipped leading zero");
390 }
391 if(source_size % bwidth)
392 {
393 /* When we need two bytes for a character, it's strange to have an uneven bytestream length. */
394 if(noquiet) warning2("Weird tag size %d for encoding %u - I will probably trim too early or something but I think the MP3 is broken.", (int)source_size, encoding);
395 source_size -= source_size % bwidth;
396 }
397 text_converters[encoding](sb, source, source_size, noquiet);
398 }
399
400 /* You have checked encoding to be in the range already. */
next_text(unsigned char * prev,unsigned char encoding,size_t limit)401 static unsigned char *next_text(unsigned char* prev, unsigned char encoding, size_t limit)
402 {
403 unsigned char *text = prev;
404 size_t width = encoding_widths[encoding];
405
406 /* So I go lengths to find zero or double zero...
407 Remember bug 2834636: Only check for aligned NULLs! */
408 while(text-prev < (ssize_t)limit)
409 {
410 if(text[0] == 0)
411 {
412 if(width <= limit-(text-prev))
413 {
414 size_t i = 1;
415 for(; i<width; ++i) if(text[i] != 0) break;
416
417 if(i == width) /* found a null wide enough! */
418 {
419 text += width;
420 break;
421 }
422 }
423 else return NULL; /* No full character left? This text is broken */
424 }
425
426 text += width;
427 }
428 if((size_t)(text-prev) >= limit) text = NULL;
429
430 return text;
431 }
432
enc_name(unsigned char enc)433 static const char *enc_name(unsigned char enc)
434 {
435 switch(enc)
436 {
437 case 0: return "Latin 1";
438 case 1: return "UTF-16 BOM";
439 case 2: return "UTF-16 BE";
440 case 3: return "UTF-8";
441 default: return "unknown!";
442 }
443 }
444
process_text(mpg123_handle * fr,unsigned char * realdata,size_t realsize,char * id)445 static void process_text(mpg123_handle *fr, unsigned char *realdata, size_t realsize, char *id)
446 {
447 /* Text encoding $xx */
448 /* The text (encoded) ... */
449 mpg123_text *t = add_text(fr, id);
450 if(VERBOSE4) fprintf(stderr, "Note: Storing text from %s encoding\n", enc_name(realdata[0]));
451 if(t == NULL)
452 {
453 if(NOQUIET) error("Unable to attach new text!");
454 return;
455 }
456 mdebug("process_text: (over)writing entry with ID %s", t->id
457 ? (char[5]) { t->id[0], t->id[1], t->id[2], t->id[3], 0 }
458 : "(nil)" );
459 memcpy(t->id, id, 4);
460 store_id3_text(&t->text, realdata, realsize, NOQUIET, fr->p.flags & MPG123_PLAIN_ID3TEXT);
461 if(VERBOSE4) // Do not print unsanitized text to terminals!
462 fprintf(stderr, "Note: ID3v2 %c%c%c%c text frame stored\n", id[0], id[1], id[2], id[3]);
463 }
464
process_picture(mpg123_handle * fr,unsigned char * realdata,size_t realsize)465 static void process_picture(mpg123_handle *fr, unsigned char *realdata, size_t realsize)
466 {
467 unsigned char encoding;
468 mpg123_picture *i = NULL;
469 unsigned char* workpoint = NULL;
470 mpg123_string mime; mpg123_init_string(&mime);
471 unsigned char image_type = 0;
472 mpg123_string description; mpg123_init_string(&description);
473 unsigned char *image_data = NULL;
474 if(realsize < 1)
475 {
476 debug("Empty id3 data!");
477 return;
478 }
479 encoding = realdata[0];
480 realdata++; realsize--;
481 if(encoding > mpg123_id3_enc_max)
482 {
483 if(NOQUIET)
484 error1("Unknown text encoding %u, I take no chances, sorry!", encoding);
485 return;
486 }
487 if(VERBOSE4) fprintf(stderr, "Note: Storing picture from APIC frame.\n");
488
489 /* get mime type (encoding is always latin-1) */
490 workpoint = next_text(realdata, 0, realsize);
491 if(!workpoint)
492 {
493 if(NOQUIET)
494 error("Unable to get mime type for picture; skipping picture.");
495 return;
496 }
497 id3_to_utf8(&mime, 0, realdata, workpoint - realdata, NOQUIET);
498 realsize -= workpoint - realdata;
499 realdata = workpoint;
500 /* get picture type */
501 image_type = realdata[0];
502 realdata++; realsize--;
503 /* get description (encoding is encoding) */
504 workpoint = next_text(realdata, encoding, realsize);
505 if(!workpoint)
506 {
507 if(NOQUIET)
508 error("Unable to get description for picture; skipping picture.");
509 mpg123_free_string(&mime);
510 return;
511 }
512 id3_to_utf8(&description, encoding, realdata, workpoint - realdata, NOQUIET);
513 realsize -= workpoint - realdata;
514 if(realsize)
515 image_data = (unsigned char*)malloc(realsize);
516 if(!realsize || !image_data) {
517 if(NOQUIET)
518 error("No picture data or malloc failure; skipping picture.");
519 mpg123_free_string(&description);
520 mpg123_free_string(&mime);
521 return;
522 }
523 memcpy(image_data, workpoint, realsize);
524
525 // All data ready now, append to/replace in list.
526 i = add_picture(fr, image_type, &description);
527 if(!i)
528 {
529 if(NOQUIET)
530 error("Unable to attach new picture!");
531 free(image_data);
532 mpg123_free_string(&description);
533 mpg123_free_string(&mime);
534 return;
535 }
536
537 // Either this is a fresh image, or one to be replaced.
538 // We hand over memory, so old storage needs to be freed.
539 free_mpg123_picture(i);
540 i->type = image_type;
541 i->size = realsize;
542 i->data = image_data;
543 mpg123_move_string(&mime, &i->mime_type);
544 mpg123_move_string(&description, &i->description);
545 if(VERBOSE4)
546 fprintf(stderr, "Note: ID3v2 APIC picture frame of type: %d\n", i->type);
547 }
548
549 /* Store a new comment that perhaps is a RVA / RVA_ALBUM/AUDIOPHILE / RVA_MIX/RADIO one
550 Special gimmik: It also stores USLT to the texts. Stucture is the same as for comments. */
process_comment(mpg123_handle * fr,enum frame_types tt,unsigned char * realdata,size_t realsize,int rva_level,char * id)551 static void process_comment(mpg123_handle *fr, enum frame_types tt, unsigned char *realdata, size_t realsize, int rva_level, char *id)
552 {
553 /* Text encoding $xx */
554 /* Language $xx xx xx */
555 /* Short description (encoded!) <text> $00 (00) */
556 /* Then the comment text (encoded) ... */
557 unsigned char encoding = realdata[0];
558 char lang[3]; // realdata + 1
559 unsigned char *descr = realdata+4;
560 unsigned char *text = NULL;
561 mpg123_text *xcom = NULL;
562 mpg123_text localcom; // UTF-8 variant for local processing, remember to clean up!
563 init_mpg123_text(&localcom);
564
565 if(realsize < (size_t)(descr-realdata))
566 {
567 if(NOQUIET) error1("Invalid frame size of %"SIZE_P" (too small for anything).", (size_p)realsize);
568 return;
569 }
570 if(encoding > mpg123_id3_enc_max)
571 {
572 if(NOQUIET)
573 error1("Unknown text encoding %u, I take no chances, sorry!", encoding);
574 return;
575 }
576 memcpy(lang, realdata+1, 3);
577 /* Now I can abuse a byte from lang for the encoding. */
578 descr[-1] = encoding;
579 /* Be careful with finding the end of description, I have to honor encoding here. */
580 text = next_text(descr, encoding, realsize-(descr-realdata));
581 if(text == NULL)
582 {
583 if(NOQUIET)
584 error("No comment text / valid description?");
585 return;
586 }
587 { // just vor variable scope
588 mpg123_string description;
589 mpg123_init_string(&description);
590 // Store the text, with desired encoding, but for comments always a local copy in UTF-8.
591 store_id3_text( &description, descr-1, text-descr+1
592 , NOQUIET, fr->p.flags & MPG123_PLAIN_ID3TEXT );
593 if(tt == comment)
594 store_id3_text( &localcom.description, descr-1, text-descr+1
595 , NOQUIET, 0 );
596 if(VERBOSE4)
597 fprintf( stderr, "Note: Storing comment from %s encoding\n"
598 , enc_name(realdata[0]) );
599 xcom = tt == uslt
600 ? add_uslt(fr, lang, &description)
601 : add_comment(fr, lang, &description);
602 if(xcom == NULL)
603 {
604 if(NOQUIET)
605 error("Unable to attach new comment!");
606 mpg123_free_string(&description);
607 free_mpg123_text(&localcom);
608 return;
609 }
610 memcpy(xcom->id, id, 4);
611 memcpy(xcom->lang, lang, 3);
612 // That takes over the description allocation.
613 mpg123_move_string(&description, &xcom->description);
614 }
615
616 text[-1] = encoding; /* Byte abusal for encoding... */
617 store_id3_text(&xcom->text, text-1, realsize+1-(text-realdata), NOQUIET, fr->p.flags & MPG123_PLAIN_ID3TEXT);
618 /* Remember: I will probably decode the above (again) for rva comment checking. So no messing around, please. */
619
620 if(VERBOSE4) /* Do _not_ print the verbatim text: The encoding might be funny! */
621 {
622 fprintf(stderr, "Note: ID3 comm/uslt desc of length %"SIZE_P".\n", (size_p)xcom->description.fill);
623 fprintf(stderr, "Note: ID3 comm/uslt text of length %"SIZE_P".\n", (size_p)xcom->text.fill);
624 }
625 /* Look out for RVA info only when we really deal with a straight comment. */
626 if(tt == comment && localcom.description.fill > 0)
627 {
628 int rva_mode = -1; /* mix / album */
629 if( !strcasecmp(localcom.description.p, "rva")
630 || !strcasecmp(localcom.description.p, "rva_mix")
631 || !strcasecmp(localcom.description.p, "rva_track")
632 || !strcasecmp(localcom.description.p, "rva_radio") )
633 rva_mode = 0;
634 else if( !strcasecmp(localcom.description.p, "rva_album")
635 || !strcasecmp(localcom.description.p, "rva_audiophile")
636 || !strcasecmp(localcom.description.p, "rva_user") )
637 rva_mode = 1;
638 if((rva_mode > -1) && (fr->rva.level[rva_mode] <= rva_level))
639 {
640 /* Only translate the contents in here where we really need them. */
641 store_id3_text(&localcom.text, text-1, realsize+1-(text-realdata), NOQUIET, 0);
642 if(localcom.text.fill > 0)
643 {
644 fr->rva.gain[rva_mode] = (float) atof(localcom.text.p);
645 if(VERBOSE3) fprintf(stderr, "Note: RVA value %fdB\n", fr->rva.gain[rva_mode]);
646 fr->rva.peak[rva_mode] = 0;
647 fr->rva.level[rva_mode] = rva_level;
648 }
649 }
650 }
651 /* Make sure to free the local memory... */
652 free_mpg123_text(&localcom);
653 }
654
process_extra(mpg123_handle * fr,unsigned char * realdata,size_t realsize,int rva_level,char * id)655 static void process_extra(mpg123_handle *fr, unsigned char* realdata, size_t realsize, int rva_level, char *id)
656 {
657 /* Text encoding $xx */
658 /* Description ... $00 (00) */
659 /* Text ... */
660 unsigned char encoding = realdata[0];
661 unsigned char *descr = realdata+1; /* remember, the encoding is descr[-1] */
662 unsigned char *text;
663 mpg123_text *xex;
664 mpg123_text localex;
665
666 if((int)realsize < descr-realdata)
667 {
668 if(NOQUIET) error1("Invalid frame size of %lu (too small for anything).", (unsigned long)realsize);
669 return;
670 }
671 if(encoding > mpg123_id3_enc_max)
672 {
673 if(NOQUIET)
674 error1("Unknown text encoding %u, I take no chances, sorry!", encoding);
675 return;
676 }
677 text = next_text(descr, encoding, realsize-(descr-realdata));
678 if(VERBOSE4) fprintf(stderr, "Note: Storing extra from %s encoding\n", enc_name(realdata[0]));
679 if(text == NULL)
680 {
681 if(NOQUIET) error("No extra frame text / valid description?");
682 return;
683 }
684 { // just vor variable scope
685 mpg123_string description;
686 mpg123_init_string(&description);
687 /* The outside storage gets reencoded to UTF-8 only if not requested otherwise.
688 Remember that we really need the -1 here to hand in the encoding byte!*/
689 store_id3_text( &description, descr-1, text-descr+1
690 , NOQUIET, fr->p.flags & MPG123_PLAIN_ID3TEXT );
691 xex = add_extra(fr, &description);
692 if(xex)
693 mpg123_move_string(&description, &xex->description);
694 else
695 mpg123_free_string(&description);
696 }
697 if(xex == NULL)
698 {
699 if(NOQUIET) error("Unable to attach new extra text!");
700 return;
701 }
702 memcpy(xex->id, id, 4);
703 init_mpg123_text(&localex); /* For our local copy. */
704
705 /* Our local copy is always stored in UTF-8! */
706 store_id3_text(&localex.description, descr-1, text-descr+1, NOQUIET, 0);
707 /* At first, only store the outside copy of the payload. We may not need the local copy. */
708 text[-1] = encoding;
709 store_id3_text(&xex->text, text-1, realsize-(text-realdata)+1, NOQUIET, fr->p.flags & MPG123_PLAIN_ID3TEXT);
710
711 /* Now check if we would like to interpret this extra info for RVA. */
712 if(localex.description.fill > 0)
713 {
714 int is_peak = 0;
715 int rva_mode = -1; /* mix / album */
716
717 if(!strncasecmp(localex.description.p, "replaygain_track_",17))
718 {
719 if(VERBOSE3) fprintf(stderr, "Note: RVA ReplayGain track gain/peak\n");
720
721 rva_mode = 0;
722 if(!strcasecmp(localex.description.p, "replaygain_track_peak")) is_peak = 1;
723 else if(strcasecmp(localex.description.p, "replaygain_track_gain")) rva_mode = -1;
724 }
725 else
726 if(!strncasecmp(localex.description.p, "replaygain_album_",17))
727 {
728 if(VERBOSE3) fprintf(stderr, "Note: RVA ReplayGain album gain/peak\n");
729
730 rva_mode = 1;
731 if(!strcasecmp(localex.description.p, "replaygain_album_peak")) is_peak = 1;
732 else if(strcasecmp(localex.description.p, "replaygain_album_gain")) rva_mode = -1;
733 }
734 if((rva_mode > -1) && (fr->rva.level[rva_mode] <= rva_level))
735 {
736 /* Now we need the translated copy of the data. */
737 store_id3_text(&localex.text, text-1, realsize-(text-realdata)+1, NOQUIET, 0);
738 if(localex.text.fill > 0)
739 {
740 if(is_peak)
741 {
742 fr->rva.peak[rva_mode] = (float) atof(localex.text.p);
743 if(VERBOSE3) fprintf(stderr, "Note: RVA peak %f\n", fr->rva.peak[rva_mode]);
744 }
745 else
746 {
747 fr->rva.gain[rva_mode] = (float) atof(localex.text.p);
748 if(VERBOSE3) fprintf(stderr, "Note: RVA gain %fdB\n", fr->rva.gain[rva_mode]);
749 }
750 fr->rva.level[rva_mode] = rva_level;
751 }
752 }
753 }
754
755 free_mpg123_text(&localex);
756 }
757
758 /* Make a ID3v2.3+ 4-byte ID from a ID3v2.2 3-byte ID
759 Note that not all frames survived to 2.4; the mapping goes to 2.3 .
760 A notable miss is the old RVA frame, which is very unspecific anyway.
761 This function returns -1 when a not known 3 char ID was encountered, 0 otherwise. */
promote_framename(mpg123_handle * fr,char * id)762 static int promote_framename(mpg123_handle *fr, char *id) /* fr because of VERBOSE macros */
763 {
764 size_t i;
765 char *old[] =
766 {
767 "COM", "TAL", "TBP", "TCM", "TCO", "TCR", "TDA", "TDY", "TEN", "TFT",
768 "TIM", "TKE", "TLA", "TLE", "TMT", "TOA", "TOF", "TOL", "TOR", "TOT",
769 "TP1", "TP2", "TP3", "TP4", "TPA", "TPB", "TRC", "TDA", "TRK", "TSI",
770 "TSS", "TT1", "TT2", "TT3", "TXT", "TXX", "TYE"
771 };
772 char *new[] =
773 {
774 "COMM", "TALB", "TBPM", "TCOM", "TCON", "TCOP", "TDAT", "TDLY", "TENC", "TFLT",
775 "TIME", "TKEY", "TLAN", "TLEN", "TMED", "TOPE", "TOFN", "TOLY", "TORY", "TOAL",
776 "TPE1", "TPE2", "TPE3", "TPE4", "TPOS", "TPUB", "TSRC", "TRDA", "TRCK", "TSIZ",
777 "TSSE", "TIT1", "TIT2", "TIT3", "TEXT", "TXXX", "TYER"
778 };
779 for(i=0; i<sizeof(old)/sizeof(char*); ++i)
780 {
781 if(!strncmp(id, old[i], 3))
782 {
783 memcpy(id, new[i], 4);
784 if(VERBOSE3) fprintf(stderr, "Translated ID3v2.2 frame %s to %s\n", old[i], new[i]);
785 return 0;
786 }
787 }
788 if(VERBOSE3) fprintf(stderr, "Ignoring untranslated ID3v2.2 frame %c%c%c\n", id[0], id[1], id[2]);
789 return -1;
790 }
791
792 #endif /* NO_ID3V2 */
793
store_id3v2(mpg123_handle * fr,unsigned long first4bytes,unsigned char buf[6],unsigned long length)794 int store_id3v2( mpg123_handle *fr
795 , unsigned long first4bytes, unsigned char buf[6], unsigned long length )
796 {
797 int ret = 1;
798 off_t ret2;
799 unsigned long fullen = 10+length;
800 if(fr->id3v2_raw)
801 free(fr->id3v2_raw);
802 fr->id3v2_size = 0;
803 /* Allocate one byte more for a closing zero as safety catch for strlen(). */
804 fr->id3v2_raw = malloc(fullen+1);
805 if(!fr->id3v2_raw)
806 {
807 fr->err = MPG123_OUT_OF_MEM;
808 if(NOQUIET)
809 error1("ID3v2: Arrg! Unable to allocate %lu bytes"
810 " for ID3v2 data - trying to skip instead.", length+1);
811 if((ret2=fr->rd->skip_bytes(fr,length)) < 0)
812 ret = ret2;
813 else
814 ret = 0;
815 }
816 else
817 {
818 fr->id3v2_raw[0] = (first4bytes>>24) & 0xff;
819 fr->id3v2_raw[1] = (first4bytes>>16) & 0xff;
820 fr->id3v2_raw[2] = (first4bytes>>8) & 0xff;
821 fr->id3v2_raw[3] = first4bytes & 0xff;
822 memcpy(fr->id3v2_raw+4, buf, 6);
823 if((ret2=fr->rd->read_frame_body(fr, fr->id3v2_raw+10, length)) < 0)
824 {
825 ret=ret2;
826 free(fr->id3v2_raw);
827 fr->id3v2_raw = NULL;
828 }
829 else
830 { /* Closing with a zero for paranoia. */
831 fr->id3v2_raw[fullen] = 0;
832 fr->id3v2_size = fullen;
833 }
834 }
835 return ret;
836 }
837
838 /*
839 trying to parse ID3v2.3 and ID3v2.4 tags...
840
841 returns: 0: bad or just unparseable tag
842 1: good, (possibly) new tag info
843 <0: reader error (may need more data feed, try again)
844 */
parse_new_id3(mpg123_handle * fr,unsigned long first4bytes)845 int parse_new_id3(mpg123_handle *fr, unsigned long first4bytes)
846 {
847 #define UNSYNC_FLAG 128
848 #define EXTHEAD_FLAG 64 /* ID3v2.3+ */
849 #define COMPRESS_FLAG 64 /* ID3v2.2 */
850 #define EXP_FLAG 32
851 #define FOOTER_FLAG 16
852 #define EXT_UPDATE_FLAG 64 /* ID3v2.4 only: extended header update flag */
853 #define UNKNOWN_FLAGS 15 /* 00001111*/
854 unsigned char buf[6];
855 unsigned long length=0;
856 unsigned char flags = 0;
857 int ret = 1;
858 off_t ret2;
859 int storetag = 0;
860 unsigned int footlen = 0;
861 #ifndef NO_ID3V2
862 int skiptag = 0;
863 #endif
864 unsigned char major = first4bytes & 0xff;
865 debug1("ID3v2: major tag version: %i", major);
866
867 if(major == 0xff) return 0; /* Invalid... */
868 if((ret2 = fr->rd->read_frame_body(fr, buf, 6)) < 0) /* read more header information */
869 return ret2;
870
871 if(buf[0] == 0xff) return 0; /* Revision, will never be 0xff. */
872
873 if(fr->p.flags & MPG123_STORE_RAW_ID3)
874 storetag = 1;
875 /* second new byte are some nice flags, if these are invalid skip the whole thing */
876 flags = buf[1];
877 debug1("ID3v2: flags 0x%08x", flags);
878 /* use 4 bytes from buf to construct 28bit uint value and return 1; return 0 if bytes are not synchsafe */
879 #define synchsafe_to_long(buf,res) \
880 ( \
881 (((buf)[0]|(buf)[1]|(buf)[2]|(buf)[3]) & 0x80) ? 0 : \
882 (res = (((unsigned long) (buf)[0]) << 21) \
883 | (((unsigned long) (buf)[1]) << 14) \
884 | (((unsigned long) (buf)[2]) << 7) \
885 | ((unsigned long) (buf)[3]) \
886 ,1) \
887 )
888 /* id3v2.3 does not store synchsafe frame sizes, but synchsafe tag size - doh! */
889 /* Remember: bytes_to_long() can yield ULONG_MAX on 32 bit platforms! */
890 #define bytes_to_long(buf,res) \
891 ( \
892 major == 3 ? \
893 (res = (((unsigned long) (buf)[0]) << 24) \
894 | (((unsigned long) (buf)[1]) << 16) \
895 | (((unsigned long) (buf)[2]) << 8) \
896 | ((unsigned long) (buf)[3]) \
897 ,1) : synchsafe_to_long(buf,res) \
898 )
899 /* for id3v2.2 only */
900 #define threebytes_to_long(buf,res) \
901 ( \
902 res = (((unsigned long) (buf)[0]) << 16) \
903 | (((unsigned long) (buf)[1]) << 8) \
904 | ((unsigned long) (buf)[2]) \
905 )
906
907 /* length-10 or length-20 (footer present); 4 synchsafe integers == 28 bit number */
908 /* we have already read 10 bytes, so left are length or length+10 bytes belonging to tag */
909 /* Note: This is an 28 bit value in 32 bit storage, plenty of space for */
910 /* length+x for reasonable x. */
911 if(!synchsafe_to_long(buf+2,length))
912 {
913 if(NOQUIET) error4("Bad tag length (not synchsafe): 0x%02x%02x%02x%02x; You got a bad ID3 tag here.", buf[2],buf[3],buf[4],buf[5]);
914 return 0;
915 }
916 if(flags & FOOTER_FLAG)
917 footlen = 10;
918 debug1("ID3v2: tag data length %lu", length);
919 #ifndef NO_ID3V2
920 if(VERBOSE2) fprintf(stderr,"Note: ID3v2.%i rev %i tag of %lu bytes\n", major, buf[0], length);
921 /* skip if unknown version/scary flags, parse otherwise */
922 if(fr->p.flags & MPG123_SKIP_ID3V2)
923 {
924 if(VERBOSE3)
925 fprintf(stderr, "Note: Skipping ID3v2 tag per user request.\n");
926 skiptag = 1;
927 }
928 if((flags & UNKNOWN_FLAGS) || (major > 4) || (major < 2))
929 {
930 if(NOQUIET)
931 warning2( "ID3v2: Won't parse the ID3v2 tag with major version"
932 " %u and flags 0x%xu - some extra code may be needed"
933 , major, flags );
934 skiptag = 1;
935 }
936 // Standard says that compressed tags should be ignored as there isn't an agreed
937 // compressoion scheme.
938 if(major == 2 && flags & COMPRESS_FLAG)
939 {
940 if(NOQUIET)
941 warning("ID3v2: ignoring compressed ID3v2.2 tag");
942 skiptag = 1;
943 }
944 if(length < 10)
945 {
946 if(NOQUIET)
947 warning1("ID3v2: unrealistic small tag lengh %lu, skipping", length);
948 skiptag = 1;
949 }
950 if(!skiptag)
951 storetag = 1;
952 #endif
953 if(storetag)
954 {
955 /* Stores whole tag with footer and an additonal trailing zero. */
956 if((ret2 = store_id3v2(fr, first4bytes, buf, length+footlen)) <= 0)
957 return ret2;
958 }
959 #ifndef NO_ID3V2
960 if(skiptag)
961 {
962 if(VERBOSE3)
963 fprintf(stderr, "Note: skipped tag clearing possibly existing ID3v2 data");
964 reset_id3(fr); // Old data is invalid.
965 #endif
966 if(!storetag && (ret2=fr->rd->skip_bytes(fr,length+footlen))<0)
967 ret=ret2;
968 #ifndef NO_ID3V2
969 }
970 else
971 {
972 unsigned char* tagdata = fr->id3v2_raw+10;
973 /* try to interpret that beast */
974 debug("ID3v2: analysing frames...");
975 if(length > 0)
976 {
977 unsigned char extflags = 0;
978 unsigned long tagpos = 0;
979 /* bytes of frame title and of framesize value */
980 unsigned int head_part = major > 2 ? 4 : 3;
981 unsigned int flag_part = major > 2 ? 2 : 0;
982 /* The amount of bytes that are unconditionally read for each frame: */
983 /* ID, size, flags. */
984 unsigned int framebegin = head_part+head_part+flag_part;
985 debug1("ID3v2: have read at all %lu bytes for the tag now", (unsigned long)length+6);
986 if(flags & EXTHEAD_FLAG)
987 {
988 debug("ID3v2: extended header");
989 if(!bytes_to_long(tagdata, tagpos) || tagpos >= length)
990 {
991 ret = 0;
992 if(NOQUIET)
993 error4( "Bad (non-synchsafe/too large) tag offset from extended header:"
994 "0x%02x%02x%02x%02x"
995 , tagdata[0], tagdata[1], tagdata[2], tagdata[3] );
996 } else if(tagpos < 6)
997 {
998 ret = 0;
999 if(NOQUIET)
1000 merror("Extended header too small (%lu).", tagpos);
1001 }
1002 if(major == 3)
1003 {
1004 tagpos += 4; // The size itself is not included.
1005 if(tagpos >= length)
1006 {
1007 ret = 0;
1008 if(NOQUIET)
1009 error("Too much extended v2.3 header.");
1010 }
1011 } else if(ret) // v2.4 and at least got my 6 bytes of ext header
1012 {
1013 // Only v4 knows update frames, check for that.
1014 // Need to step back. Header is 4 bytes length, one byte flag size,
1015 // one byte flags. Flag size has to be 1!
1016 if(tagdata[4] == 1 && tagdata[5] & EXT_UPDATE_FLAG)
1017 {
1018 if(VERBOSE3)
1019 fprintf(stderr, "Note: ID3v2.4 update tag\n");
1020 extflags |= EXT_UPDATE_FLAG;
1021 }
1022 }
1023 }
1024 if(!(extflags & EXT_UPDATE_FLAG))
1025 {
1026 if(VERBOSE3)
1027 fprintf(stderr, "Note: non-update tag replacing existing ID3v2 data\n");
1028 reset_id3(fr);
1029 }
1030 if(ret > 0)
1031 {
1032 char id[5];
1033 unsigned long framesize;
1034 unsigned long fflags; /* need 16 bits, actually */
1035 id[4] = 0;
1036 fr->id3v2.version = major;
1037 /* Pos now advanced after ext head, now a frame has to follow. */
1038 /* Note: tagpos <= length, which is 28 bit integer, so both */
1039 /* far away from overflow for adding known small values. */
1040 /* I want to read at least one full header now. */
1041 while(length >= tagpos+framebegin)
1042 {
1043 int i = 0;
1044 unsigned long pos = tagpos;
1045 /* level 1,2,3 - 0 is info from lame/info tag! */
1046 /* rva tags with ascending significance, then general frames */
1047 enum frame_types tt = unknown;
1048 /* we may have entered the padding zone or any other strangeness: check if we have valid frame id characters */
1049 for(i=0; i< head_part; ++i)
1050 if( !( ((tagdata[tagpos+i] > 47) && (tagdata[tagpos+i] < 58))
1051 || ((tagdata[tagpos+i] > 64) && (tagdata[tagpos+i] < 91)) ) )
1052 {
1053 debug5("ID3v2: real tag data apparently ended after %lu bytes with 0x%02x%02x%02x%02x", tagpos, tagdata[tagpos], tagdata[tagpos+1], tagdata[tagpos+2], tagdata[tagpos+3]);
1054 /* This is no hard error... let's just hope that we got something meaningful already (ret==1 in that case). */
1055 goto tagparse_cleanup; /* Need to escape two loops here. */
1056 }
1057 if(ret > 0)
1058 {
1059 /* 4 or 3 bytes id */
1060 strncpy(id, (char*) tagdata+pos, head_part);
1061 id[head_part] = 0; /* terminate for 3 or 4 bytes */
1062 pos += head_part;
1063 tagpos += head_part;
1064 /* size as 32 bits or 28 bits */
1065 if(fr->id3v2.version == 2) threebytes_to_long(tagdata+pos, framesize);
1066 else
1067 if(!bytes_to_long(tagdata+pos, framesize))
1068 {
1069 /* Just assume that up to now there was some good data. */
1070 if(NOQUIET) error1("ID3v2: non-syncsafe size of %s frame, skipping the remainder of tag", id);
1071 break;
1072 }
1073 if(VERBOSE3) fprintf(stderr, "Note: ID3v2 %s frame of size %lu\n", id, framesize);
1074 tagpos += head_part;
1075 pos += head_part;
1076 if(fr->id3v2.version > 2)
1077 {
1078 fflags = (((unsigned long) tagdata[pos]) << 8) | ((unsigned long) tagdata[pos+1]);
1079 pos += 2;
1080 tagpos += 2;
1081 }
1082 else fflags = 0;
1083
1084 if(length - tagpos < framesize)
1085 {
1086 if(NOQUIET) error("Whoa! ID3v2 frame claims to be larger than the whole rest of the tag.");
1087 break;
1088 }
1089 tagpos += framesize; /* the important advancement in whole tag */
1090 /* for sanity, after full parsing tagpos should be == pos */
1091 /* debug4("ID3v2: found %s frame, size %lu (as bytes: 0x%08lx), flags 0x%016lx", id, framesize, framesize, fflags); */
1092 /* %0abc0000 %0h00kmnp */
1093 #define BAD_FFLAGS (unsigned long) 36784
1094 #define PRES_TAG_FFLAG 16384
1095 #define PRES_FILE_FFLAG 8192
1096 #define READ_ONLY_FFLAG 4096
1097 #define GROUP_FFLAG 64
1098 #define COMPR_FFLAG 8
1099 #define ENCR_FFLAG 4
1100 #define UNSYNC_FFLAG 2
1101 #define DATLEN_FFLAG 1
1102 if(head_part < 4 && promote_framename(fr, id) != 0) continue;
1103
1104 /* shall not or want not handle these */
1105 if(fflags & (BAD_FFLAGS | COMPR_FFLAG | ENCR_FFLAG))
1106 {
1107 if(NOQUIET) warning("ID3v2: skipping invalid/unsupported frame");
1108 continue;
1109 }
1110
1111 for(i = 0; i < KNOWN_FRAMES; ++i)
1112 if(!strncmp(frame_type[i], id, 4)){ tt = i; break; }
1113
1114 if(id[0] == 'T' && tt != extra) tt = text;
1115
1116 if(tt != unknown)
1117 {
1118 int rva_mode = -1; /* mix / album */
1119 unsigned long realsize = framesize;
1120 unsigned char* realdata = tagdata+pos;
1121 unsigned char* unsyncbuffer = NULL;
1122 if(((flags & UNSYNC_FLAG) || (fflags & UNSYNC_FFLAG)) && framesize > 0)
1123 {
1124 unsigned long ipos = 0;
1125 unsigned long opos = 0;
1126 debug("Id3v2: going to de-unsync the frame data");
1127 /* de-unsync: FF00 -> FF; real FF00 is simply represented as FF0000 ... */
1128 /* damn, that means I have to delete bytes from withing the data block... thus need temporal storage */
1129 /* standard mandates that de-unsync should always be safe if flag is set */
1130 realdata = unsyncbuffer = malloc(framesize+1); /* will need <= bytes, plus a safety zero */
1131 if(realdata == NULL)
1132 {
1133 if(NOQUIET) error("ID3v2: unable to allocate working buffer for de-unsync");
1134 continue;
1135 }
1136 /* now going byte per byte through the data... */
1137 realdata[0] = tagdata[pos];
1138 opos = 1;
1139 for(ipos = pos+1; ipos < pos+framesize; ++ipos)
1140 {
1141 if(!((tagdata[ipos] == 0) && (tagdata[ipos-1] == 0xff)))
1142 {
1143 realdata[opos++] = tagdata[ipos];
1144 }
1145 }
1146 realsize = opos;
1147 /* Append a zero to keep strlen() safe. */
1148 realdata[realsize] = 0;
1149 debug2("ID3v2: de-unsync made %lu out of %lu bytes", realsize, framesize);
1150 }
1151 pos = 0; /* now at the beginning again... */
1152 /* Avoid reading over boundary, even if there is a */
1153 /* zero byte of padding for safety. */
1154 if(realsize) switch(tt)
1155 {
1156 case comment:
1157 case uslt:
1158 process_comment(fr, tt, realdata, realsize, comment+1, id);
1159 break;
1160 case extra: /* perhaps foobar2000's work */
1161 process_extra(fr, realdata, realsize, extra+1, id);
1162 break;
1163 case rva2: /* "the" RVA tag */
1164 {
1165 /* starts with null-terminated identification */
1166 if(VERBOSE3) fprintf(stderr, "Note: RVA2 identification \"%s\"\n", realdata);
1167 /* default: some individual value, mix mode */
1168 rva_mode = 0;
1169 if( !strncasecmp((char*)realdata, "album", 5)
1170 || !strncasecmp((char*)realdata, "audiophile", 10)
1171 || !strncasecmp((char*)realdata, "user", 4))
1172 rva_mode = 1;
1173 if(fr->rva.level[rva_mode] <= rva2+1)
1174 {
1175 pos += strlen((char*) realdata) + 1;
1176 debug2("got my pos: %zu - %zu", realsize, pos);
1177 // channel and two bytes for RVA value
1178 // pos possibly just past the safety zero, so one more than realsize
1179 if(pos > realsize || realsize-pos < 3)
1180 {
1181 if(NOQUIET)
1182 error("bad RVA2 tag (truncated?)");
1183 }
1184 else if(realdata[pos] == 1)
1185 {
1186 ++pos;
1187 /* only handle master channel */
1188 debug("ID3v2: it is for the master channel");
1189 /* two bytes adjustment, one byte for bits representing peak - n bytes, eh bits, for peak */
1190 /* 16 bit signed integer = dB * 512. Do not shift signed integers! Multiply instead.
1191 Also no implementation-defined casting. Reinterpret the pointer to signed char, then do
1192 proper casting. */
1193 fr->rva.gain[rva_mode] = (float) (
1194 ((short)((signed char*)realdata)[pos]) * 256 + (short)realdata[pos+1] ) / 512;
1195 pos += 2;
1196 if(VERBOSE3) fprintf(stderr, "Note: RVA value %fdB\n", fr->rva.gain[rva_mode]);
1197 /* heh, the peak value is represented by a number of bits - but in what manner? Skipping that part */
1198 fr->rva.peak[rva_mode] = 0;
1199 fr->rva.level[rva_mode] = rva2+1;
1200 }
1201 }
1202 }
1203 break;
1204 /* non-rva metainfo, simply store... */
1205 case text:
1206 process_text(fr, realdata, realsize, id);
1207 break;
1208 case picture:
1209 if (fr->p.flags & MPG123_PICTURE)
1210 process_picture(fr, realdata, realsize);
1211
1212 break;
1213 default: if(NOQUIET) error1("ID3v2: unknown frame type %i", tt);
1214 }
1215 if(unsyncbuffer)
1216 free(unsyncbuffer);
1217 }
1218 #undef BAD_FFLAGS
1219 #undef PRES_TAG_FFLAG
1220 #undef PRES_FILE_FFLAG
1221 #undef READ_ONLY_FFLAG
1222 #undef GROUP_FFLAG
1223 #undef COMPR_FFLAG
1224 #undef ENCR_FFLAG
1225 #undef UNSYNC_FFLAG
1226 #undef DATLEN_FFLAG
1227 }
1228 else break;
1229 #undef KNOWN_FRAMES
1230 }
1231 } else
1232 {
1233 if(VERBOSE3)
1234 fprintf(stderr, "Note: faulty ID3v2 tag still clearing old data\n");
1235 reset_id3(fr);
1236 }
1237 } else // No new data, but still there was a tag that invalidates old data.
1238 {
1239 if(VERBOSE3)
1240 fprintf(stderr, "Note: empty ID3v2 clearing old data\n");
1241 reset_id3(fr);
1242 }
1243 tagparse_cleanup:
1244 /* Get rid of stored raw data that should not be kept. */
1245 if(!(fr->p.flags & MPG123_STORE_RAW_ID3))
1246 {
1247 free(fr->id3v2_raw);
1248 fr->id3v2_raw = NULL;
1249 fr->id3v2_size = 0;
1250 }
1251 }
1252 #endif /* NO_ID3V2 */
1253 return ret;
1254 #undef UNSYNC_FLAG
1255 #undef EXTHEAD_FLAG
1256 #undef COMPRESS_FLAG
1257 #undef EXP_FLAG
1258 #undef FOOTER_FLAG
1259 #undef EXT_UPDATE_FLAG
1260 #undef UNKOWN_FLAGS
1261 }
1262
1263 #ifndef NO_ID3V2 /* Disabling all the rest... */
1264
convert_latin1(mpg123_string * sb,const unsigned char * s,size_t l,const int noquiet)1265 static void convert_latin1(mpg123_string *sb, const unsigned char* s, size_t l, const int noquiet)
1266 {
1267 size_t length = l;
1268 size_t i;
1269 unsigned char *p;
1270 /* determine real length, a latin1 character can at most take 2 in UTF8 */
1271 for(i=0; i<l; ++i)
1272 if(s[i] >= 0x80) ++length;
1273
1274 debug1("UTF-8 length: %lu", (unsigned long)length);
1275 /* one extra zero byte for paranoia */
1276 if(!mpg123_grow_string(sb, length+1))
1277 return;
1278
1279 p = (unsigned char*) sb->p; /* Signedness doesn't matter but it shows I thought about the non-issue */
1280 for(i=0; i<l; ++i)
1281 if(s[i] < 0x80){ *p = s[i]; ++p; }
1282 else /* two-byte encoding */
1283 {
1284 *p = 0xc0 | (s[i]>>6);
1285 *(p+1) = 0x80 | (s[i] & 0x3f);
1286 p+=2;
1287 }
1288
1289 sb->p[length] = 0;
1290 sb->fill = length+1;
1291 }
1292
1293 /*
1294 Check if we have a byte oder mark(s) there, return:
1295 -1: little endian
1296 0: no BOM
1297 1: big endian
1298
1299 This modifies source and len to indicate the data _after_ the BOM(s).
1300 Note on nasty data: The last encountered BOM determines the endianness.
1301 I have seen data with multiple BOMS, namely from "the" id3v2 program.
1302 Not nice, but what should I do?
1303 */
check_bom(const unsigned char ** source,size_t * len)1304 static int check_bom(const unsigned char** source, size_t *len)
1305 {
1306 int this_bom = 0;
1307 int further_bom = 0;
1308
1309 if(*len < 2) return 0;
1310
1311 if((*source)[0] == 0xff && (*source)[1] == 0xfe)
1312 this_bom = -1;
1313
1314 if((*source)[0] == 0xfe && (*source)[1] == 0xff)
1315 this_bom = 1;
1316
1317 /* Skip the detected BOM. */
1318 if(this_bom != 0)
1319 {
1320 *source += 2;
1321 *len -= 2;
1322 /* Check for following BOMs. The last one wins! */
1323 further_bom = check_bom(source, len);
1324 if(further_bom == 0) return this_bom; /* End of the recursion. */
1325 else return further_bom;
1326 }
1327 else return 0;
1328 }
1329
1330 #define FULLPOINT(f,s) ( (((f)&0x3ff)<<10) + ((s)&0x3ff) + 0x10000 )
1331 /* Remember: There's a limit at 0x1ffff. */
1332 #define UTF8LEN(x) ( (x)<0x80 ? 1 : ((x)<0x800 ? 2 : ((x)<0x10000 ? 3 : 4)))
convert_utf16bom(mpg123_string * sb,const unsigned char * s,size_t l,const int noquiet)1333 static void convert_utf16bom(mpg123_string *sb, const unsigned char* s, size_t l, const int noquiet)
1334 {
1335 size_t i;
1336 size_t n; /* number bytes that make up full pairs */
1337 unsigned char *p;
1338 size_t length = 0; /* the resulting UTF-8 length */
1339 /* Determine real length... extreme case can be more than utf-16 length. */
1340 size_t high = 0;
1341 size_t low = 1;
1342 int bom_endian;
1343
1344 debug1("convert_utf16 with length %lu", (unsigned long)l);
1345
1346 bom_endian = check_bom(&s, &l);
1347 debug1("UTF16 endianness check: %i", bom_endian);
1348
1349 if(bom_endian == -1) /* little-endian */
1350 {
1351 high = 1; /* The second byte is the high byte. */
1352 low = 0; /* The first byte is the low byte. */
1353 }
1354
1355 n = (l/2)*2; /* number bytes that make up full pairs */
1356
1357 /* first: get length, check for errors -- stop at first one */
1358 for(i=0; i < n; i+=2)
1359 {
1360 unsigned long point = ((unsigned long) s[i+high]<<8) + s[i+low];
1361 if((point & 0xfc00) == 0xd800) /* lead surrogate */
1362 {
1363 unsigned short second = (i+3 < l) ? (s[i+2+high]<<8) + s[i+2+low] : 0;
1364 if((second & 0xfc00) == 0xdc00) /* good... */
1365 {
1366 point = FULLPOINT(point,second);
1367 length += UTF8LEN(point); /* possibly 4 bytes */
1368 i+=2; /* We overstepped one word. */
1369 }
1370 else /* if no valid pair, break here */
1371 {
1372 if(noquiet) error2("Invalid UTF16 surrogate pair at %li (0x%04lx).", (unsigned long)i, point);
1373 n = i; /* Forget the half pair, END! */
1374 break;
1375 }
1376 }
1377 else length += UTF8LEN(point); /* 1,2 or 3 bytes */
1378 }
1379
1380 if(!mpg123_grow_string(sb, length+1))
1381 return;
1382
1383 /* Now really convert, skip checks as these have been done just before. */
1384 p = (unsigned char*) sb->p; /* Signedness doesn't matter but it shows I thought about the non-issue */
1385 for(i=0; i < n; i+=2)
1386 {
1387 unsigned long codepoint = ((unsigned long) s[i+high]<<8) + s[i+low];
1388 if((codepoint & 0xfc00) == 0xd800) /* lead surrogate */
1389 {
1390 unsigned short second = (s[i+2+high]<<8) + s[i+2+low];
1391 codepoint = FULLPOINT(codepoint,second);
1392 i+=2; /* We overstepped one word. */
1393 }
1394 if(codepoint < 0x80) *p++ = (unsigned char) codepoint;
1395 else if(codepoint < 0x800)
1396 {
1397 *p++ = (unsigned char) (0xc0 | (codepoint>>6));
1398 *p++ = (unsigned char) (0x80 | (codepoint & 0x3f));
1399 }
1400 else if(codepoint < 0x10000)
1401 {
1402 *p++ = (unsigned char) (0xe0 | (codepoint>>12));
1403 *p++ = 0x80 | ((codepoint>>6) & 0x3f);
1404 *p++ = 0x80 | (codepoint & 0x3f);
1405 }
1406 else if (codepoint < 0x200000)
1407 {
1408 *p++ = (unsigned char) (0xf0 | codepoint>>18);
1409 *p++ = (unsigned char) (0x80 | ((codepoint>>12) & 0x3f));
1410 *p++ = (unsigned char) (0x80 | ((codepoint>>6) & 0x3f));
1411 *p++ = (unsigned char) (0x80 | (codepoint & 0x3f));
1412 } /* ignore bigger ones (that are not possible here anyway) */
1413 }
1414 sb->p[sb->size-1] = 0; /* paranoia... */
1415 sb->fill = sb->size;
1416 }
1417 #undef UTF8LEN
1418 #undef FULLPOINT
1419
convert_utf8(mpg123_string * sb,const unsigned char * source,size_t len,const int noquiet)1420 static void convert_utf8(mpg123_string *sb, const unsigned char* source, size_t len, const int noquiet)
1421 {
1422 if(mpg123_grow_string(sb, len+1))
1423 {
1424 memcpy(sb->p, source, len);
1425 sb->p[len] = 0;
1426 sb->fill = len+1;
1427 }
1428 }
1429
1430 #endif
1431