1 /* -*- Mode: C; tab-width: 2; indent-tabs-mode: t; c-basic-offset: 2 -*- */
2 /* Copyright 2006-2008 Tim-Philipp Müller <tim centricular net>
3  * Copyright 2005 Jan Schmidt <thaytan@mad.scientist.com>
4  * Copyright 2002,2003 Scott Wheeler <wheeler@kde.org> (portions from taglib)
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Library General Public
8  * License as published by the Free Software Foundation; either
9  * version 2 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Library General Public License for more details.
15  *
16  * You should have received a copy of the GNU Library General Public
17  * License along with this library; if not, write to the
18  * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
19  * Boston, MA 02110-1301, USA.
20  */
21 
22 #ifdef HAVE_CONFIG_H
23 #include "config.h"
24 #endif
25 
26 #include <string.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <gst/tag/tag.h>
30 #include <gst/base/gsttypefindhelper.h>
31 
32 #ifdef HAVE_ZLIB
33 #include <zlib.h>
34 #endif
35 
36 #include "id3v2.h"
37 
38 #ifndef GST_DISABLE_GST_DEBUG
39 #define GST_CAT_DEFAULT id3v2_ensure_debug_category()
40 #endif
41 
42 static gboolean parse_comment_frame (ID3TagsWorking * work);
43 static gchar *parse_url_link_frame (ID3TagsWorking * work,
44     const gchar ** tag_name);
45 static GArray *parse_text_identification_frame (ID3TagsWorking * work);
46 static gchar *parse_user_text_identification_frame (ID3TagsWorking * work,
47     const gchar ** tag_name);
48 static gchar *parse_unique_file_identifier (ID3TagsWorking * work,
49     const gchar ** tag_name);
50 static gboolean parse_relative_volume_adjustment_two (ID3TagsWorking * work);
51 static void parse_obsolete_tdat_frame (ID3TagsWorking * work);
52 static gboolean id3v2_tag_to_taglist (ID3TagsWorking * work,
53     const gchar * tag_name, const gchar * tag_str);
54 /* Parse a single string into an array of gchar* */
55 static void parse_split_strings (guint8 encoding, gchar * data, gint data_size,
56     GArray ** out_fields);
57 static void free_tag_strings (GArray * fields);
58 static gboolean
59 id3v2_genre_fields_to_taglist (ID3TagsWorking * work, const gchar * tag_name,
60     GArray * tag_fields);
61 static gboolean parse_picture_frame (ID3TagsWorking * work);
62 static gboolean parse_private_frame_data (ID3TagsWorking * work);
63 
64 #define ID3V2_ENCODING_ISO8859 0x00
65 #define ID3V2_ENCODING_UTF16   0x01
66 #define ID3V2_ENCODING_UTF16BE 0x02
67 #define ID3V2_ENCODING_UTF8    0x03
68 
69 gboolean
id3v2_parse_frame(ID3TagsWorking * work)70 id3v2_parse_frame (ID3TagsWorking * work)
71 {
72   const gchar *tag_name;
73   gboolean result = FALSE;
74   gint i;
75   guint8 *frame_data = work->hdr.frame_data;
76   guint frame_data_size = work->cur_frame_size;
77   gchar *tag_str = NULL;
78   GArray *tag_fields = NULL;
79   guint8 *uu_data = NULL;
80 
81 #ifdef HAVE_ZLIB
82   guint8 *uncompressed_data = NULL;
83 #endif
84 
85   /* Check that the frame id is valid */
86   for (i = 0; i < 5 && work->frame_id[i] != '\0'; i++) {
87     if (!g_ascii_isalnum (work->frame_id[i])) {
88       GST_DEBUG ("Encountered invalid frame_id");
89       return FALSE;
90     }
91   }
92 
93   /* Can't handle encrypted frames right now (in case we ever do, we'll have
94    * to do the decryption after the un-unsynchronisation and decompression,
95    * not here) */
96   if (work->frame_flags & ID3V2_FRAME_FORMAT_ENCRYPTION) {
97     GST_WARNING ("Encrypted frames are not supported");
98     return FALSE;
99   }
100 
101   tag_name = gst_tag_from_id3_tag (work->frame_id);
102   if (tag_name == NULL &&
103       strncmp (work->frame_id, "RVA2", 4) != 0 &&
104       strncmp (work->frame_id, "TXXX", 4) != 0 &&
105       strncmp (work->frame_id, "TDAT", 4) != 0 &&
106       strncmp (work->frame_id, "UFID", 4) != 0) {
107     return FALSE;
108   }
109 
110   if (work->frame_flags & (ID3V2_FRAME_FORMAT_COMPRESSION |
111           ID3V2_FRAME_FORMAT_DATA_LENGTH_INDICATOR)) {
112     if (work->hdr.frame_data_size <= 4)
113       return FALSE;
114     if (ID3V2_VER_MAJOR (work->hdr.version) == 3) {
115       work->parse_size = GST_READ_UINT32_BE (frame_data);
116     } else {
117       work->parse_size = id3v2_read_synch_uint (frame_data, 4);
118     }
119     frame_data += 4;
120     frame_data_size -= 4;
121     GST_LOG ("Un-unsynced data size %d (of %d)", work->parse_size,
122         frame_data_size);
123     if (work->parse_size > frame_data_size) {
124       GST_WARNING ("ID3v2 frame %s data has invalid size %d (>%d)",
125           work->frame_id, work->parse_size, frame_data_size);
126       return FALSE;
127     }
128   }
129 
130   /* in v2.3 the frame sizes are not syncsafe, so the entire tag had to be
131    * unsynced. In v2.4 the frame sizes are syncsafe so it's just the frame
132    * data that needs un-unsyncing, but not the frame headers. */
133   if (ID3V2_VER_MAJOR (work->hdr.version) == 4) {
134     if ((work->hdr.flags & ID3V2_HDR_FLAG_UNSYNC) != 0 ||
135         ((work->frame_flags & ID3V2_FRAME_FORMAT_UNSYNCHRONISATION) != 0)) {
136       GST_DEBUG ("Un-unsyncing frame %s", work->frame_id);
137       uu_data = id3v2_ununsync_data (frame_data, &frame_data_size);
138       frame_data = uu_data;
139       GST_MEMDUMP ("ID3v2 frame (un-unsyced)", frame_data, frame_data_size);
140     }
141   }
142 
143   work->parse_size = frame_data_size;
144 
145   if (work->frame_flags & ID3V2_FRAME_FORMAT_COMPRESSION) {
146 #ifdef HAVE_ZLIB
147     uLongf destSize = work->parse_size;
148     Bytef *dest, *src;
149 
150     uncompressed_data = g_malloc (work->parse_size);
151 
152     dest = (Bytef *) uncompressed_data;
153     src = (Bytef *) frame_data;
154 
155     if (uncompress (dest, &destSize, src, frame_data_size) != Z_OK) {
156       g_free (uncompressed_data);
157       g_free (uu_data);
158       return FALSE;
159     }
160     if (destSize != work->parse_size) {
161       GST_WARNING
162           ("Decompressing ID3v2 frame %s did not produce expected size %d bytes (got %lu)",
163           tag_name, work->parse_size, destSize);
164       g_free (uncompressed_data);
165       g_free (uu_data);
166       return FALSE;
167     }
168     work->parse_data = uncompressed_data;
169 #else
170     GST_WARNING ("Compressed ID3v2 tag frame could not be decompressed, because"
171         " libgsttag-" GST_API_VERSION " was compiled without zlib support");
172     g_free (uu_data);
173     return FALSE;
174 #endif
175   } else {
176     work->parse_data = frame_data;
177   }
178 
179   if (work->frame_id[0] == 'T') {
180     if (strcmp (work->frame_id, "TDAT") == 0) {
181       parse_obsolete_tdat_frame (work);
182       result = TRUE;
183     } else if (strcmp (work->frame_id, "TXXX") == 0) {
184       /* Handle user text frame */
185       tag_str = parse_user_text_identification_frame (work, &tag_name);
186     } else {
187       /* Text identification frame */
188       tag_fields = parse_text_identification_frame (work);
189     }
190   } else if (work->frame_id[0] == 'W' && strcmp (work->frame_id, "WXXX") != 0) {
191     /* URL link frame: ISO-8859-1 encoded, one frame per tag */
192     tag_str = parse_url_link_frame (work, &tag_name);
193   } else if (!strcmp (work->frame_id, "COMM")) {
194     /* Comment */
195     result = parse_comment_frame (work);
196   } else if (!strcmp (work->frame_id, "APIC")) {
197     /* Attached picture */
198     result = parse_picture_frame (work);
199   } else if (!strcmp (work->frame_id, "RVA2")) {
200     /* Relative volume */
201     result = parse_relative_volume_adjustment_two (work);
202   } else if (!strcmp (work->frame_id, "UFID")) {
203     /* Unique file identifier */
204     tag_str = parse_unique_file_identifier (work, &tag_name);
205   } else if (!strcmp (work->frame_id, "PRIV")) {
206     /* private frame */
207     result = parse_private_frame_data (work);
208   }
209 #ifdef HAVE_ZLIB
210   if (work->frame_flags & ID3V2_FRAME_FORMAT_COMPRESSION) {
211     g_free (uncompressed_data);
212     uncompressed_data = NULL;
213     work->parse_data = frame_data;
214   }
215 #endif
216 
217   if (tag_str != NULL) {
218     /* g_print ("Tag %s value %s\n", tag_name, tag_str); */
219     result = id3v2_tag_to_taglist (work, tag_name, tag_str);
220     g_free (tag_str);
221   }
222   if (tag_fields != NULL) {
223     if (strcmp (work->frame_id, "TCON") == 0) {
224       /* Genre strings need special treatment */
225       result |= id3v2_genre_fields_to_taglist (work, tag_name, tag_fields);
226     } else {
227       gint t;
228 
229       for (t = 0; t < tag_fields->len; t++) {
230         tag_str = g_array_index (tag_fields, gchar *, t);
231         if (tag_str != NULL && tag_str[0] != '\0')
232           result |= id3v2_tag_to_taglist (work, tag_name, tag_str);
233       }
234     }
235     free_tag_strings (tag_fields);
236   }
237 
238   g_free (uu_data);
239 
240   return result;
241 }
242 
243 static gboolean
parse_comment_frame(ID3TagsWorking * work)244 parse_comment_frame (ID3TagsWorking * work)
245 {
246   guint dummy;
247   guint8 encoding;
248   gchar language[4];
249   GArray *fields = NULL;
250   gchar *description, *text;
251 
252   if (work->parse_size < 6)
253     return FALSE;
254 
255   encoding = work->parse_data[0];
256   language[0] = g_ascii_tolower (work->parse_data[1]);
257   language[1] = g_ascii_tolower (work->parse_data[2]);
258   language[2] = g_ascii_tolower (work->parse_data[3]);
259   language[3] = '\0';
260 
261   parse_split_strings (encoding, (gchar *) work->parse_data + 4,
262       work->parse_size - 4, &fields);
263 
264   if (fields == NULL || fields->len < 2) {
265     GST_WARNING ("Failed to decode comment frame");
266     goto fail;
267   }
268   description = g_array_index (fields, gchar *, 0);
269   text = g_array_index (fields, gchar *, 1);
270 
271   if (!g_utf8_validate (text, -1, NULL)) {
272     GST_WARNING ("Converted string is not valid utf-8");
273     goto fail;
274   }
275 
276   /* skip our own dummy descriptions (from id3v2mux) */
277   if (strlen (description) > 0 && g_utf8_validate (description, -1, NULL) &&
278       sscanf (description, "c%u", &dummy) != 1) {
279     gchar *s;
280 
281     /* must be either an ISO-639-1 or ISO-639-2 language code */
282     if (language[0] != '\0' &&
283         g_ascii_isalpha (language[0]) &&
284         g_ascii_isalpha (language[1]) &&
285         (g_ascii_isalpha (language[2]) || language[2] == '\0')) {
286       const gchar *lang_code;
287 
288       /* prefer two-letter ISO 639-1 code if we have a mapping */
289       lang_code = gst_tag_get_language_code (language);
290       s = g_strdup_printf ("%s[%s]=%s", description,
291           (lang_code) ? lang_code : language, text);
292     } else {
293       s = g_strdup_printf ("%s=%s", description, text);
294     }
295     gst_tag_list_add (work->tags, GST_TAG_MERGE_APPEND,
296         GST_TAG_EXTENDED_COMMENT, s, NULL);
297     g_free (s);
298   } else if (text != NULL && *text != '\0') {
299     gst_tag_list_add (work->tags, GST_TAG_MERGE_APPEND,
300         GST_TAG_COMMENT, text, NULL);
301   } else {
302     goto fail;
303   }
304 
305   free_tag_strings (fields);
306   return TRUE;
307 
308 fail:
309   {
310     GST_WARNING ("failed to parse COMM frame");
311     free_tag_strings (fields);
312     return FALSE;
313   }
314 }
315 
316 static GArray *
parse_text_identification_frame(ID3TagsWorking * work)317 parse_text_identification_frame (ID3TagsWorking * work)
318 {
319   guchar encoding;
320   GArray *fields = NULL;
321 
322   if (work->parse_size < 2)
323     return NULL;
324 
325   encoding = work->parse_data[0];
326   parse_split_strings (encoding, (gchar *) work->parse_data + 1,
327       work->parse_size - 1, &fields);
328   if (fields) {
329     if (fields->len > 0) {
330       GST_LOG ("Read %d fields from Text ID frame of size %d with encoding %d"
331           ". First is '%s'", fields->len, work->parse_size - 1, encoding,
332           g_array_index (fields, gchar *, 0));
333     } else {
334       GST_LOG ("Read 0 fields from Text ID frame of size %d with encoding %d",
335           work->parse_size - 1, encoding);
336     }
337   }
338 
339   return fields;
340 }
341 
342 static gboolean
link_is_known_license(const gchar * url)343 link_is_known_license (const gchar * url)
344 {
345   return g_str_has_prefix (url, "http://creativecommons.org/licenses/");
346 }
347 
348 static gchar *
parse_url_link_frame(ID3TagsWorking * work,const gchar ** tag_name)349 parse_url_link_frame (ID3TagsWorking * work, const gchar ** tag_name)
350 {
351   gsize len;
352   gchar *nul, *data, *link;
353 
354   *tag_name = NULL;
355 
356   if (work->parse_size == 0)
357     return NULL;
358 
359   data = (gchar *) work->parse_data;
360   /* if there's more data then the string is long, we only want to parse the
361    * data up to the terminating zero to g_convert and ignore the rest, as
362    * per spec */
363   nul = memchr (data, '\0', work->parse_size);
364   if (nul != NULL) {
365     len = (gsize) (nul - data);
366   } else {
367     len = work->parse_size;
368   }
369 
370   link = g_convert (data, len, "UTF-8", "ISO-8859-1", NULL, NULL, NULL);
371 
372   if (link == NULL || !gst_uri_is_valid (link)) {
373     GST_DEBUG ("Invalid URI in %s frame: %s", work->frame_id,
374         GST_STR_NULL (link));
375     g_free (link);
376     return NULL;
377   }
378 
379   /* we don't know if it's a link to a page that explains the copyright
380    * situation, or a link that points to/represents a license, the ID3 spec
381    * does not separate those two things; for now only put known license URIs
382    * into GST_TAG_LICENSE_URI and everything else into GST_TAG_COPYRIGHT_URI */
383   if (strcmp (work->frame_id, "WCOP") == 0) {
384     if (link_is_known_license (link))
385       *tag_name = GST_TAG_LICENSE_URI;
386     else
387       *tag_name = GST_TAG_COPYRIGHT_URI;
388   } else if (strcmp (work->frame_id, "WOAF") == 0) {
389     /* can't be bothered to create a CONTACT_URI tag for this, so let's just
390      * put into into GST_TAG_CONTACT, which is where it ends up when reading
391      * the info from vorbis comments as well */
392     *tag_name = GST_TAG_CONTACT;
393   }
394 
395   return link;
396 }
397 
398 
399 static gchar *
parse_user_text_identification_frame(ID3TagsWorking * work,const gchar ** tag_name)400 parse_user_text_identification_frame (ID3TagsWorking * work,
401     const gchar ** tag_name)
402 {
403   gchar *ret;
404   guchar encoding;
405   GArray *fields = NULL;
406 
407   *tag_name = NULL;
408 
409   if (work->parse_size < 2)
410     return NULL;
411 
412   encoding = work->parse_data[0];
413 
414   parse_split_strings (encoding, (gchar *) work->parse_data + 1,
415       work->parse_size - 1, &fields);
416 
417   if (fields == NULL)
418     return NULL;
419 
420   if (fields->len != 2) {
421     GST_WARNING ("Expected 2 fields in TXXX frame, but got %d", fields->len);
422     free_tag_strings (fields);
423     return NULL;
424   }
425 
426   *tag_name =
427       gst_tag_from_id3_user_tag ("TXXX", g_array_index (fields, gchar *, 0));
428 
429   GST_LOG ("TXXX frame of size %d. Mapped descriptor '%s' to GStreamer tag %s",
430       work->parse_size - 1, g_array_index (fields, gchar *, 0),
431       GST_STR_NULL (*tag_name));
432 
433   if (*tag_name) {
434     ret = g_strdup (g_array_index (fields, gchar *, 1));
435     /* GST_LOG ("%s = %s", *tag_name, GST_STR_NULL (ret)); */
436   } else {
437     ret = NULL;
438   }
439 
440   free_tag_strings (fields);
441   return ret;
442 }
443 
444 static gboolean
parse_id_string(ID3TagsWorking * work,gchar ** p_str,gint * p_len,gint * p_datalen)445 parse_id_string (ID3TagsWorking * work, gchar ** p_str, gint * p_len,
446     gint * p_datalen)
447 {
448   gint len, datalen;
449 
450   if (work->parse_size < 2)
451     return FALSE;
452 
453   for (len = 0; len < work->parse_size - 1; ++len) {
454     if (work->parse_data[len] == '\0')
455       break;
456   }
457 
458   datalen = work->parse_size - (len + 1);
459   if (len == 0 || datalen <= 0)
460     return FALSE;
461 
462   *p_str = g_strndup ((gchar *) work->parse_data, len);
463   *p_len = len;
464   *p_datalen = datalen;
465 
466   return TRUE;
467 }
468 
469 static gboolean
parse_private_frame_data(ID3TagsWorking * work)470 parse_private_frame_data (ID3TagsWorking * work)
471 {
472   GstBuffer *binary_data = NULL;
473   GstStructure *owner_info = NULL;
474   guint8 *owner_str = NULL;
475   gsize owner_len;
476   GstSample *priv_frame = NULL;
477 
478   if (work->parse_size == 0) {
479     /* private frame data not available */
480     return FALSE;
481   }
482 
483   owner_str =
484       (guint8 *) memchr ((guint8 *) work->parse_data, 0, work->parse_size);
485 
486   if (owner_str == NULL) {
487     GST_WARNING ("Invalid PRIV frame received");
488     return FALSE;
489   }
490 
491   owner_len = (gsize) (owner_str - work->parse_data) + 1;
492 
493   owner_info =
494       gst_structure_new ("ID3PrivateFrame", "owner", G_TYPE_STRING,
495       work->parse_data, NULL);
496 
497   binary_data = gst_buffer_new_and_alloc (work->parse_size - owner_len);
498   gst_buffer_fill (binary_data, 0, work->parse_data + owner_len,
499       work->parse_size - owner_len);
500 
501   priv_frame = gst_sample_new (binary_data, NULL, NULL, owner_info);
502 
503   gst_tag_list_add (work->tags, GST_TAG_MERGE_APPEND,
504       GST_TAG_PRIVATE_DATA, priv_frame, NULL);
505 
506   gst_sample_unref (priv_frame);
507   gst_buffer_unref (binary_data);
508 
509   return TRUE;
510 }
511 
512 static gchar *
parse_unique_file_identifier(ID3TagsWorking * work,const gchar ** tag_name)513 parse_unique_file_identifier (ID3TagsWorking * work, const gchar ** tag_name)
514 {
515   gint len, datalen;
516   gchar *owner_id, *data, *ret = NULL;
517 
518   GST_LOG ("parsing UFID frame of size %d", work->parse_size);
519 
520   if (!parse_id_string (work, &owner_id, &len, &datalen))
521     return NULL;
522 
523   data = (gchar *) work->parse_data + len + 1;
524   GST_LOG ("UFID owner ID: %s (+ %d bytes of data)", owner_id, datalen);
525 
526   if (strcmp (owner_id, "http://musicbrainz.org") == 0 &&
527       g_utf8_validate (data, datalen, NULL)) {
528     *tag_name = GST_TAG_MUSICBRAINZ_TRACKID;
529     ret = g_strndup (data, datalen);
530   } else {
531     GST_INFO ("Unknown UFID owner ID: %s", owner_id);
532   }
533   g_free (owner_id);
534 
535   return ret;
536 }
537 
538 /* parse data and return length of the next string in the given encoding,
539  * including the NUL terminator */
540 static gint
scan_encoded_string(guint8 encoding,gchar * data,gint data_size)541 scan_encoded_string (guint8 encoding, gchar * data, gint data_size)
542 {
543   gint i;
544 
545   switch (encoding) {
546     case ID3V2_ENCODING_ISO8859:
547     case ID3V2_ENCODING_UTF8:
548       for (i = 0; i < data_size; ++i) {
549         if (data[i] == '\0')
550           return i + 1;
551       }
552       break;
553     case ID3V2_ENCODING_UTF16:
554     case ID3V2_ENCODING_UTF16BE:
555       /* we don't care about BOMs here and treat them as part of the string */
556       /* Find '\0\0' terminator */
557       for (i = 0; i < data_size - 1; i += 2) {
558         if (data[i] == '\0' && data[i + 1] == '\0')
559           return i + 2;
560       }
561       break;
562     default:
563       break;
564   }
565 
566   return 0;
567 }
568 
569 static gboolean
parse_picture_frame(ID3TagsWorking * work)570 parse_picture_frame (ID3TagsWorking * work)
571 {
572   guint8 txt_encoding, pic_type;
573   gchar *mime_str = NULL;
574   gint len, datalen;
575 
576   GST_LOG ("APIC frame (ID3v2.%u)", ID3V2_VER_MAJOR (work->hdr.version));
577 
578   if (work->parse_size < 1 + 1 + 1 + 1 + 1)
579     goto not_enough_data;
580 
581   txt_encoding = work->parse_data[0];
582   ++work->parse_data;
583   --work->parse_size;
584 
585   /* Read image format; in early ID3v2 versions this is a fixed-length
586    * 3-character string without terminator; in later versions (>= 2.3.0)
587    * this is a NUL-terminated string of variable length */
588   if (ID3V2_VER_MAJOR (work->hdr.version) < 3) {
589     if (work->parse_size < 3)
590       goto not_enough_data;
591 
592     mime_str = g_strndup ((gchar *) work->parse_data, 3);
593     len = 3;
594   } else {
595     if (!parse_id_string (work, &mime_str, &len, &datalen))
596       return FALSE;
597     ++len;                      /* for string terminator */
598   }
599 
600   if (work->parse_size < len + 1 + 1 + 1)
601     goto not_enough_data;
602 
603   work->parse_data += len;
604   work->parse_size -= len;
605 
606   /* Read image type */
607   pic_type = work->parse_data[0];
608   ++work->parse_data;
609   --work->parse_size;
610 
611   GST_LOG ("APIC frame mime type    : %s", GST_STR_NULL (mime_str));
612   GST_LOG ("APIC frame picture type : 0x%02x", (guint) pic_type);
613 
614   if (work->parse_size < 1 + 1)
615     goto not_enough_data;
616 
617   len = scan_encoded_string (txt_encoding, (gchar *) work->parse_data,
618       work->parse_size);
619 
620   if (len < 1)
621     goto error;
622 
623   /* just skip the description string ... */
624   GST_LOG ("Skipping description string (%d bytes in original coding)", len);
625 
626   if (work->parse_size < len + 1)
627     goto not_enough_data;
628 
629   work->parse_data += len;
630   work->parse_size -= len;
631 
632   GST_DEBUG ("image data is %u bytes", work->parse_size);
633 
634   if (work->parse_size <= 0)
635     goto not_enough_data;
636 
637   if (!gst_tag_list_add_id3_image (work->tags, (guint8 *) work->parse_data,
638           work->parse_size, pic_type)) {
639     goto error;
640   }
641 
642   g_free (mime_str);
643   return TRUE;
644 
645 not_enough_data:
646   {
647     GST_DEBUG ("not enough data, skipping APIC frame");
648     /* fall through to error */
649   }
650 error:
651   {
652     GST_DEBUG ("problem parsing APIC frame, skipping");
653     g_free (mime_str);
654     return FALSE;
655   }
656 }
657 
658 #define ID3V2_RVA2_CHANNEL_MASTER  1
659 
660 static gboolean
parse_relative_volume_adjustment_two(ID3TagsWorking * work)661 parse_relative_volume_adjustment_two (ID3TagsWorking * work)
662 {
663   const gchar *gain_tag_name = NULL;
664   const gchar *peak_tag_name = NULL;
665   gdouble gain_dB, peak_val;
666   guint64 peak;
667   guint8 *data, chan, peak_bits;
668   gchar *id;
669   gint len, datalen, i;
670 
671   if (!parse_id_string (work, &id, &len, &datalen))
672     return FALSE;
673 
674   if (datalen < (1 + 2 + 1)) {
675     GST_WARNING ("broken RVA2 frame, data size only %d bytes", datalen);
676     g_free (id);
677     return FALSE;
678   }
679 
680   data = work->parse_data + len + 1;
681   chan = GST_READ_UINT8 (data);
682   gain_dB = (gdouble) ((gint16) GST_READ_UINT16_BE (data + 1)) / 512.0;
683   /* The meaning of the peak value is not defined in the ID3v2 spec. However,
684    * the first/only implementation of this seems to have been in XMMS, and
685    * other libs (like mutagen) seem to follow that implementation as well:
686    * see http://bugs.xmms.org/attachment.cgi?id=113&action=view */
687   peak_bits = GST_READ_UINT8 (data + 1 + 2);
688   if (peak_bits > 64) {
689     GST_WARNING ("silly peak precision of %d bits, ignoring", (gint) peak_bits);
690     peak_bits = 0;
691   }
692   data += 1 + 2 + 1;
693   datalen -= 1 + 2 + 1;
694   if (peak_bits == 16) {
695     peak = GST_READ_UINT16_BE (data);
696   } else {
697     peak = 0;
698     for (i = 0; i < (GST_ROUND_UP_8 (peak_bits) / 8) && datalen > 0; ++i) {
699       peak = peak << 8;
700       peak |= GST_READ_UINT8 (data);
701       ++data;
702       --datalen;
703     }
704   }
705 
706   if (peak_bits > 0) {
707     peak = peak << (64 - GST_ROUND_UP_8 (peak_bits));
708     peak_val =
709         gst_guint64_to_gdouble (peak) /
710         gst_util_guint64_to_gdouble (G_MAXINT64);
711     GST_LOG ("RVA2 frame: id=%s, chan=%u, adj=%.2fdB, peak_bits=%u, peak=%.2f",
712         id, chan, gain_dB, (guint) peak_bits, peak_val);
713   } else {
714     peak_val = 0;
715   }
716 
717   if (chan == ID3V2_RVA2_CHANNEL_MASTER && strcmp (id, "track") == 0) {
718     gain_tag_name = GST_TAG_TRACK_GAIN;
719     peak_tag_name = GST_TAG_TRACK_PEAK;
720   } else if (chan == ID3V2_RVA2_CHANNEL_MASTER && strcmp (id, "album") == 0) {
721     gain_tag_name = GST_TAG_ALBUM_GAIN;
722     peak_tag_name = GST_TAG_ALBUM_PEAK;
723   } else {
724     GST_INFO ("Unhandled RVA2 frame id '%s' for channel %d", id, chan);
725   }
726 
727   if (gain_tag_name) {
728     gst_tag_list_add (work->tags, GST_TAG_MERGE_APPEND,
729         gain_tag_name, gain_dB, NULL);
730   }
731   if (peak_tag_name && peak_bits > 0) {
732     gst_tag_list_add (work->tags, GST_TAG_MERGE_APPEND,
733         peak_tag_name, peak_val, NULL);
734   }
735 
736   g_free (id);
737 
738   return (gain_tag_name != NULL || peak_tag_name != NULL);
739 }
740 
741 static void
parse_obsolete_tdat_frame(ID3TagsWorking * work)742 parse_obsolete_tdat_frame (ID3TagsWorking * work)
743 {
744   if (work->parse_size >= 5 &&
745       work->parse_data[0] == ID3V2_ENCODING_ISO8859 &&
746       g_ascii_isdigit (work->parse_data[1]) &&
747       g_ascii_isdigit (work->parse_data[2]) &&
748       g_ascii_isdigit (work->parse_data[3]) &&
749       g_ascii_isdigit (work->parse_data[4])) {
750 
751     guint pending_day = (10 * g_ascii_digit_value (work->parse_data[1])) +
752         g_ascii_digit_value (work->parse_data[2]);
753     guint pending_month = (10 * g_ascii_digit_value (work->parse_data[3])) +
754         g_ascii_digit_value (work->parse_data[4]);
755 
756     if (pending_day >= 1 && pending_day <= 31 && pending_month >= 1
757         && pending_month <= 12) {
758       GST_LOG ("date (dd/mm) %02u/%02u", pending_day, pending_month);
759       work->pending_day = pending_day;
760       work->pending_month = pending_month;
761     } else {
762       GST_WARNING ("Ignoring invalid ID3v2 TDAT frame (dd/mm) %02u/%02u",
763           pending_day, pending_month);
764     }
765   }
766 }
767 
768 static gboolean
id3v2_tag_to_taglist(ID3TagsWorking * work,const gchar * tag_name,const gchar * tag_str)769 id3v2_tag_to_taglist (ID3TagsWorking * work, const gchar * tag_name,
770     const gchar * tag_str)
771 {
772   GType tag_type = gst_tag_get_type (tag_name);
773   GstTagList *tag_list = work->tags;
774 
775   if (tag_str == NULL)
776     return FALSE;
777 
778   switch (tag_type) {
779     case G_TYPE_UINT:
780     {
781       gint current, total;
782 
783       if (sscanf (tag_str, "%d/%d", &current, &total) == 2) {
784         if (total <= 0) {
785           GST_WARNING ("Ignoring invalid value for total %d in tag %s",
786               total, tag_name);
787         } else {
788           if (strcmp (tag_name, GST_TAG_TRACK_NUMBER) == 0) {
789             gst_tag_list_add (tag_list, GST_TAG_MERGE_APPEND,
790                 GST_TAG_TRACK_COUNT, total, NULL);
791           } else if (strcmp (tag_name, GST_TAG_ALBUM_VOLUME_NUMBER) == 0) {
792             gst_tag_list_add (tag_list, GST_TAG_MERGE_APPEND,
793                 GST_TAG_ALBUM_VOLUME_COUNT, total, NULL);
794           }
795         }
796       } else if (sscanf (tag_str, "%d", &current) != 1) {
797         /* Not an integer in the string */
798         GST_WARNING ("Tag string for tag %s does not contain an integer - "
799             "ignoring", tag_name);
800         break;
801       }
802 
803       if (current <= 0) {
804         GST_WARNING ("Ignoring invalid value %d in tag %s", current, tag_name);
805       } else {
806         gst_tag_list_add (tag_list, GST_TAG_MERGE_APPEND, tag_name, current,
807             NULL);
808       }
809       break;
810     }
811     case G_TYPE_UINT64:
812     {
813       guint64 tmp;
814 
815       g_assert (strcmp (tag_name, GST_TAG_DURATION) == 0);
816       tmp = strtoul (tag_str, NULL, 10);
817       if (tmp == 0) {
818         break;
819       }
820       gst_tag_list_add (tag_list, GST_TAG_MERGE_APPEND,
821           GST_TAG_DURATION, tmp * 1000 * 1000, NULL);
822       break;
823     }
824     case G_TYPE_STRING:{
825       const GValue *val;
826       guint i, num;
827 
828       /* make sure we add each unique string only once per tag, we don't want
829        * to have the same genre in the genre list multiple times, for example,
830        * or the same DiscID in there twice just because it's contained in the
831        * tag multiple times under different TXXX user tags */
832       num = gst_tag_list_get_tag_size (tag_list, tag_name);
833       for (i = 0; i < num; ++i) {
834         val = gst_tag_list_get_value_index (tag_list, tag_name, i);
835         if (val != NULL && strcmp (g_value_get_string (val), tag_str) == 0)
836           break;
837       }
838       if (i == num) {
839         gst_tag_list_add (tag_list, GST_TAG_MERGE_APPEND,
840             tag_name, tag_str, NULL);
841       }
842       break;
843     }
844 
845     default:{
846       if (tag_type == GST_TYPE_DATE_TIME) {
847         GstDateTime *dt;
848 
849         /* Dates can be yyyy-MM-dd, yyyy-MM or yyyy */
850         dt = gst_date_time_new_from_iso8601_string (tag_str);
851         if (dt != NULL) {
852           gst_tag_list_add (tag_list, GST_TAG_MERGE_APPEND, tag_name, dt, NULL);
853           gst_date_time_unref (dt);
854         } else {
855           GST_WARNING ("Could not transform '%s' into date", tag_str);
856         }
857       } else {
858         GValue src = { 0, };
859         GValue dest = { 0, };
860 
861         /* handles anything else */
862         g_value_init (&src, G_TYPE_STRING);
863         g_value_set_string (&src, (const gchar *) tag_str);
864         g_value_init (&dest, tag_type);
865 
866         if (g_value_transform (&src, &dest)) {
867           gst_tag_list_add_values (tag_list, GST_TAG_MERGE_APPEND,
868               tag_name, &dest, NULL);
869         } else if (tag_type == G_TYPE_DOUBLE) {
870           /* replaygain tags in TXXX frames ... */
871           g_value_set_double (&dest, g_strtod (tag_str, NULL));
872           gst_tag_list_add_values (tag_list, GST_TAG_MERGE_KEEP,
873               tag_name, &dest, NULL);
874           GST_LOG ("Converted string '%s' to double %f", tag_str,
875               g_value_get_double (&dest));
876         } else {
877           GST_WARNING ("Failed to transform tag from string '%s' to type '%s'",
878               tag_str, g_type_name (tag_type));
879         }
880 
881         g_value_unset (&src);
882         g_value_unset (&dest);
883       }
884       break;
885     }
886   }
887 
888   return TRUE;
889 }
890 
891 /* Check that an array of characters contains only digits */
892 static gboolean
id3v2_are_digits(const gchar * chars,gint size)893 id3v2_are_digits (const gchar * chars, gint size)
894 {
895   gint i;
896 
897   for (i = 0; i < size; i++) {
898     if (!g_ascii_isdigit (chars[i]))
899       return FALSE;
900   }
901   return TRUE;
902 }
903 
904 static gboolean
id3v2_genre_string_to_taglist(ID3TagsWorking * work,const gchar * tag_name,const gchar * tag_str,gint len)905 id3v2_genre_string_to_taglist (ID3TagsWorking * work, const gchar * tag_name,
906     const gchar * tag_str, gint len)
907 {
908   g_return_val_if_fail (tag_str != NULL, FALSE);
909 
910   /* If it's a number, it might be a defined genre */
911   if (id3v2_are_digits (tag_str, len)) {
912     tag_str = gst_tag_id3_genre_get (strtol (tag_str, NULL, 10));
913     return id3v2_tag_to_taglist (work, tag_name, tag_str);
914   }
915   /* Otherwise it might be "RX" or "CR" */
916   if (len == 2) {
917     if (g_ascii_strncasecmp ("rx", tag_str, len) == 0)
918       return id3v2_tag_to_taglist (work, tag_name, "Remix");
919 
920     if (g_ascii_strncasecmp ("cr", tag_str, len) == 0)
921       return id3v2_tag_to_taglist (work, tag_name, "Cover");
922   }
923 
924   /* Otherwise it's a string */
925   return id3v2_tag_to_taglist (work, tag_name, tag_str);
926 }
927 
928 static gboolean
id3v2_genre_fields_to_taglist(ID3TagsWorking * work,const gchar * tag_name,GArray * tag_fields)929 id3v2_genre_fields_to_taglist (ID3TagsWorking * work, const gchar * tag_name,
930     GArray * tag_fields)
931 {
932   gchar *tag_str = NULL;
933   gboolean result = FALSE;
934   gint i;
935 
936   for (i = 0; i < tag_fields->len; i++) {
937     gint len;
938 
939     tag_str = g_array_index (tag_fields, gchar *, i);
940     if (tag_str == NULL)
941       continue;
942 
943     len = strlen (tag_str);
944     /* Only supposed to see '(n)' type numeric genre strings in ID3 <= 2.3.0
945      * but apparently we see them in 2.4.0 sometimes too */
946     if (TRUE || work->hdr.version <= 0x300) {   /* <= 2.3.0 */
947       /* Check for genre numbers wrapped in parentheses, possibly
948        * followed by a string */
949       while (len >= 2) {
950         gint pos;
951         gboolean found = FALSE;
952 
953         /* Double parenthesis ends the numeric genres, but we need
954          * to swallow the first one so we actually output '(' */
955         if (tag_str[0] == '(' && tag_str[1] == '(') {
956           tag_str++;
957           len--;
958           break;
959         }
960 
961         /* If the first char is not a parenthesis, then stop
962          * looking for parenthesised genre strings */
963         if (tag_str[0] != '(')
964           break;
965 
966         for (pos = 1; pos < len; pos++) {
967           if (tag_str[pos] == ')') {
968             gchar *tmp_str;
969 
970             tmp_str = g_strndup (tag_str + 1, pos - 1);
971             result |=
972                 id3v2_genre_string_to_taglist (work, tag_name, tmp_str,
973                 pos - 1);
974             g_free (tmp_str);
975             tag_str += pos + 1;
976             len -= pos + 1;
977             found = TRUE;
978             break;
979           }
980 
981           /* If we encounter a non-digit while searching for a closing
982            * parenthesis, we should not try and interpret this as a
983            * numeric genre string */
984           if (!g_ascii_isdigit (tag_str[pos]))
985             break;
986         }
987         if (!found)
988           break;                /* There was no closing parenthesis */
989       }
990     }
991 
992     if (len > 0 && tag_str != NULL)
993       result |= id3v2_genre_string_to_taglist (work, tag_name, tag_str, len);
994   }
995   return result;
996 }
997 
998 static gboolean
find_utf16_bom(gchar * data,gint * p_data_endianness)999 find_utf16_bom (gchar * data, gint * p_data_endianness)
1000 {
1001   guint16 marker = (GST_READ_UINT8 (data) << 8) | GST_READ_UINT8 (data + 1);
1002 
1003   switch (marker) {
1004     case 0xFFFE:
1005       *p_data_endianness = G_LITTLE_ENDIAN;
1006       return TRUE;
1007     case 0xFEFF:
1008       *p_data_endianness = G_BIG_ENDIAN;
1009       return TRUE;
1010     default:
1011       break;
1012   }
1013   return FALSE;
1014 }
1015 
1016 static void *
string_utf8_dup(const gchar * start,const guint size)1017 string_utf8_dup (const gchar * start, const guint size)
1018 {
1019   const gchar *env;
1020   gsize bytes_read;
1021   gchar *utf8;
1022 
1023   /* Should we try the charsets specified
1024    * via environment variables FIRST ? */
1025   if (g_utf8_validate (start, size, NULL)) {
1026     utf8 = g_strndup (start, size);
1027     goto beach;
1028   }
1029 
1030   env = g_getenv ("GST_ID3V1_TAG_ENCODING");
1031   if (!env || *env == '\0')
1032     env = g_getenv ("GST_ID3_TAG_ENCODING");
1033   if (!env || *env == '\0')
1034     env = g_getenv ("GST_TAG_ENCODING");
1035 
1036   /* Try charsets specified via the environment */
1037   if (env && *env != '\0') {
1038     gchar **c, **csets;
1039 
1040     csets = g_strsplit (env, G_SEARCHPATH_SEPARATOR_S, -1);
1041 
1042     for (c = csets; c && *c; ++c) {
1043       if ((utf8 =
1044               g_convert (start, size, "UTF-8", *c, &bytes_read, NULL, NULL))) {
1045         if (bytes_read == size) {
1046           GST_DEBUG ("Using charset %s to interpret id3 tags", *c);
1047           g_strfreev (csets);
1048           goto beach;
1049         }
1050         g_free (utf8);
1051         utf8 = NULL;
1052       }
1053     }
1054   }
1055   /* Try current locale (if not UTF-8) */
1056   if (!g_get_charset (&env)) {
1057     if ((utf8 = g_locale_to_utf8 (start, size, &bytes_read, NULL, NULL))) {
1058       if (bytes_read == size) {
1059         goto beach;
1060       }
1061       g_free (utf8);
1062       utf8 = NULL;
1063     }
1064   }
1065 
1066   /* Try ISO-8859-1 */
1067   utf8 =
1068       g_convert (start, size, "UTF-8", "ISO-8859-1", &bytes_read, NULL, NULL);
1069   if (utf8 != NULL && bytes_read == size) {
1070     goto beach;
1071   }
1072 
1073   g_free (utf8);
1074   return NULL;
1075 
1076 beach:
1077 
1078   g_strchomp (utf8);
1079 
1080   return (utf8);
1081 }
1082 
1083 static void
parse_insert_string_field(guint8 encoding,gchar * data,gint data_size,GArray * fields)1084 parse_insert_string_field (guint8 encoding, gchar * data, gint data_size,
1085     GArray * fields)
1086 {
1087   gchar *field = NULL;
1088 
1089   switch (encoding) {
1090     case ID3V2_ENCODING_UTF16:
1091     case ID3V2_ENCODING_UTF16BE:
1092     {
1093       gunichar2 *utf16;
1094       gint data_endianness;
1095       glong n_read = 0, size = 0;
1096       guint len, i;
1097 
1098       if (encoding == ID3V2_ENCODING_UTF16)
1099         data_endianness = G_BYTE_ORDER;
1100       else
1101         data_endianness = G_BIG_ENDIAN;
1102 
1103       /* Sometimes we see strings with multiple BOM markers at the start.
1104        * In that case, we assume the innermost one is correct. If that fails
1105        * to produce valid UTF-8, we try the other endianness anyway */
1106       while (data_size >= 2 && find_utf16_bom (data, &data_endianness)) {
1107         data += 2;              /* skip BOM */
1108         data_size -= 2;
1109       }
1110 
1111       if (data_size < 2) {
1112         field = g_strdup ("");
1113         break;
1114       }
1115 
1116       /* alloc needed to ensure correct alignment which is required by GLib */
1117       len = data_size / 2;
1118       utf16 = g_try_new (gunichar2, len + 1);
1119       if (utf16 == NULL)
1120         break;
1121 
1122       memcpy (utf16, data, 2 * len);
1123 
1124       GST_LOG ("Trying interpreting data as UTF-16-%s first",
1125           (data_endianness == G_LITTLE_ENDIAN) ? "LE" : "BE");
1126 
1127       if (data_endianness != G_BYTE_ORDER) {
1128         /* convert to native endian UTF-16 */
1129         for (i = 0; i < len; ++i)
1130           utf16[i] = GUINT16_SWAP_LE_BE (utf16[i]);
1131       }
1132 
1133       /* convert to UTF-8 */
1134       field = g_utf16_to_utf8 (utf16, len, &n_read, &size, NULL);
1135       if (field != NULL && n_read > 0 && g_utf8_validate (field, -1, NULL)) {
1136         g_free (utf16);
1137         break;
1138       }
1139 
1140       GST_DEBUG ("Trying interpreting data as UTF-16-%s now as fallback",
1141           (data_endianness == G_LITTLE_ENDIAN) ? "BE" : "LE");
1142 
1143       for (i = 0; i < len; ++i)
1144         utf16[i] = GUINT16_SWAP_LE_BE (utf16[i]);
1145 
1146       g_free (field);
1147       n_read = size = 0;
1148 
1149       /* try again */
1150       field = g_utf16_to_utf8 (utf16, len, &n_read, &size, NULL);
1151       g_free (utf16);
1152 
1153       if (field != NULL && n_read > 0 && g_utf8_validate (field, -1, NULL))
1154         break;
1155 
1156       GST_DEBUG ("Could not convert UTF-16 string to UTF-8");
1157       g_free (field);
1158       field = NULL;
1159       break;
1160     }
1161     case ID3V2_ENCODING_ISO8859:
1162       if (g_utf8_validate (data, data_size, NULL))
1163         field = g_strndup (data, data_size);
1164       else
1165         /* field = g_convert (data, data_size, "UTF-8", "ISO-8859-1",
1166            NULL, NULL, NULL); */
1167         field = string_utf8_dup (data, data_size);
1168       break;
1169     default:
1170       field = g_strndup (data, data_size);
1171       break;
1172   }
1173 
1174   if (field) {
1175     if (g_utf8_validate (field, -1, NULL)) {
1176       g_array_append_val (fields, field);
1177       return;
1178     }
1179 
1180     GST_DEBUG ("%s was bad UTF-8 after conversion from encoding %d. Ignoring",
1181         field, encoding);
1182     g_free (field);
1183   }
1184 }
1185 
1186 static void
parse_split_strings(guint8 encoding,gchar * data,gint data_size,GArray ** out_fields)1187 parse_split_strings (guint8 encoding, gchar * data, gint data_size,
1188     GArray ** out_fields)
1189 {
1190   GArray *fields = g_array_new (FALSE, TRUE, sizeof (gchar *));
1191   gint text_pos;
1192   gint prev = 0;
1193 
1194   g_return_if_fail (out_fields != NULL);
1195 
1196   switch (encoding) {
1197     case ID3V2_ENCODING_ISO8859:
1198       for (text_pos = 0; text_pos < data_size; text_pos++) {
1199         if (data[text_pos] == 0) {
1200           parse_insert_string_field (encoding, data + prev,
1201               text_pos - prev, fields);
1202           prev = text_pos + 1;
1203         }
1204       }
1205       if (data_size - prev > 0 && data[prev] != 0x00) {
1206         parse_insert_string_field (encoding, data + prev,
1207             data_size - prev, fields);
1208       }
1209 
1210       break;
1211     case ID3V2_ENCODING_UTF8:
1212       for (prev = 0, text_pos = 0; text_pos < data_size; text_pos++) {
1213         if (data[text_pos] == '\0') {
1214           parse_insert_string_field (encoding, data + prev,
1215               text_pos - prev, fields);
1216           prev = text_pos + 1;
1217         }
1218       }
1219       if (data_size - prev > 0 && data[prev] != 0x00) {
1220         parse_insert_string_field (encoding, data + prev,
1221             data_size - prev, fields);
1222       }
1223       break;
1224     case ID3V2_ENCODING_UTF16:
1225     case ID3V2_ENCODING_UTF16BE:
1226     {
1227       /* Find '\0\0' terminator */
1228       for (text_pos = 0; text_pos < data_size - 1; text_pos += 2) {
1229         if (data[text_pos] == '\0' && data[text_pos + 1] == '\0') {
1230           /* found a delimiter */
1231           parse_insert_string_field (encoding, data + prev,
1232               text_pos - prev, fields);
1233           prev = text_pos + 2;
1234         }
1235       }
1236       if (data_size - prev > 1 &&
1237           (data[prev] != 0x00 || data[prev + 1] != 0x00)) {
1238         /* There were 2 or more non-null chars left, convert those too */
1239         parse_insert_string_field (encoding, data + prev,
1240             data_size - prev, fields);
1241       }
1242       break;
1243     }
1244   }
1245   if (fields->len > 0)
1246     *out_fields = fields;
1247   else
1248     g_array_free (fields, TRUE);
1249 }
1250 
1251 static void
free_tag_strings(GArray * fields)1252 free_tag_strings (GArray * fields)
1253 {
1254   if (fields) {
1255     gint i;
1256     gchar *c;
1257 
1258     for (i = 0; i < fields->len; i++) {
1259       c = g_array_index (fields, gchar *, i);
1260       g_free (c);
1261     }
1262     g_array_free (fields, TRUE);
1263   }
1264 }
1265