1 ////////////////////////////////////////////////////////////////////////////
2 //                           **** WAVPACK ****                            //
3 //                  Hybrid Lossless Wavefile Compressor                   //
4 //                Copyright (c) 1998 - 2017 David Bryant                  //
5 //                          All Rights Reserved.                          //
6 //      Distributed under the BSD Software License (see license.txt)      //
7 ////////////////////////////////////////////////////////////////////////////
8 
9 // import_id3.c
10 
11 // This module provides limited support for importing existing ID3 tags
12 // (from DSF files, for example) into WavPack files
13 
14 #include <sys/stat.h>
15 #include <stdlib.h>
16 #include <stdarg.h>
17 #include <string.h>
18 #include <stdio.h>
19 #include <ctype.h>
20 
21 #include "wavpack.h"
22 
23 static struct {
24     char *id3_item, *ape_item;
25 } text_tag_table [] = {
26     { "TALB", "Album" },
27     { "TPE1", "Artist" },
28     { "TPE2", "AlbumArtist" },
29     { "TPE3", "Conductor" },
30     { "TIT1", "Grouping" },
31     { "TIT2", "Title" },
32     { "TIT3", "Subtitle" },
33     { "TSST", "DiscSubtitle" },
34     { "TSOA", "AlbumSort" },
35     { "TSOT", "TitleSort" },
36     { "TSO2", "AlbumArtistSort" },
37     { "TSOP", "ArtistSort" },
38     { "TPOS", "Disc" },
39     { "TRCK", "Track" },
40     { "TCON", "Genre" },
41     { "TYER", "Year" },
42     { "TCOM", "Composer" },
43     { "TPUB", "Publisher" },
44     { "TCMP", "Compilation" },
45     { "TENC", "EncodedBy" },
46     { "TEXT", "Lyricist" },
47     { "TCOP", "Copyright" },
48     { "TLAN", "Language" },
49     { "TSRC", "ISRC" },
50     { "TMED", "Media" },
51     { "TMOO", "Mood" },
52     { "TBPM", "BPM" }
53 };
54 
55 #define NUM_TEXT_TAG_ITEMS (sizeof (text_tag_table) / sizeof (text_tag_table [0]))
56 
57 static int WideCharToUTF8 (const uint16_t *Wide, unsigned char *pUTF8, int len);
58 static void Latin1ToUTF8 (void *string, int len);
59 
60 // Import specified ID3v2.3 tag. The WavPack context accepts the tag items, and can be
61 // NULL for doing a dry-run through the tag. If errors occur then a description will be
62 // written to "error" (which must be 80 characters) and -1 will be returned. If no
63 // errors occur then the number of tag items successfully written will be returned, or
64 // zero in the case of no applicable tags. An optional integer pointer can be provided
65 // to accept the total number of bytes consumed by the tag (name and value).
66 
ImportID3v2_syncsafe(WavpackContext * wpc,unsigned char * tag_data,int tag_size,char * error,int32_t * bytes_used,int syncsafe)67 static int ImportID3v2_syncsafe (WavpackContext *wpc, unsigned char *tag_data, int tag_size, char *error, int32_t *bytes_used, int syncsafe)
68 {
69     int tag_size_from_header, items_imported = 0, done_cover = 0;
70     unsigned char id3_header [10];
71 
72     if (bytes_used)
73         *bytes_used = 0;
74 
75     if (tag_size < sizeof (id3_header)) {
76         strcpy (error, "can't read tag header");
77         return -1;
78     }
79 
80     memcpy (id3_header, tag_data, sizeof (id3_header));
81     tag_size -= sizeof (id3_header);
82     tag_data += sizeof (id3_header);
83 
84     if (strncmp ((char *) id3_header, "ID3", 3)) {
85         strcpy (error, "no ID3v2 tag found");
86         return -1;
87     }
88 
89     if (id3_header [3] != 3 || id3_header [4] == 0xFF || (id3_header [5] & 0x1F)) {
90         strcpy (error, "not valid ID3v2.3");
91         return -1;
92     }
93 
94     if (id3_header [5] & 0x80) {
95         strcpy (error, "unsynchonization detected");
96         return -1;
97     }
98 
99     if (id3_header [5] & 0x40) {
100         strcpy (error, "extended header detected");
101         return -1;
102     }
103 
104     if (id3_header [5] & 0x20) {
105         strcpy (error, "experimental indicator detected");
106         return -1;
107     }
108 
109     if ((id3_header [6] | id3_header [7] | id3_header [8] | id3_header [9]) & 0x80) {
110         strcpy (error, "not valid ID3v2.3 (bad size)");
111         return -1;
112     }
113 
114     tag_size_from_header = id3_header [9] + (id3_header [8] << 7) + (id3_header [7] << 14) + (id3_header [6] << 21);
115 
116     if (tag_size_from_header > tag_size) {
117         strcpy (error, "tag is truncated");
118         return -1;
119     }
120 
121     while (1) {
122         unsigned char frame_header [10], *frame_body;
123         int frame_size, i;
124 
125         if (tag_size < sizeof (frame_header))
126             break;
127 
128         memcpy (frame_header, tag_data, sizeof (frame_header));
129         tag_size -= sizeof (frame_header);
130         tag_data += sizeof (frame_header);
131 
132         if (!frame_header [0] && !frame_header [1] && !frame_header [2] && !frame_header [3])
133             break;
134 
135         for (i = 0; i < 4; ++i)
136             if (frame_header [i] < '0' ||
137                 (frame_header [i] > '9' && frame_header [i] < 'A') ||
138                 frame_header [i] > 'Z') {
139                     strcpy (error, "bad frame identity");
140                     return -1;
141             }
142 
143         if (frame_header [9]) {
144             strcpy (error, "unknown frame_header flag set");
145             return -1;
146         }
147 
148         if (syncsafe)
149             frame_size = frame_header [7] + (frame_header [6] << 7) + (frame_header [5] << 14) + (frame_header [4] << 21);
150         else
151             frame_size = frame_header [7] + (frame_header [6] << 8) + (frame_header [5] << 16) + (frame_header [4] << 24);
152 
153         if (!frame_size) {
154             strcpy (error, "empty frame not allowed");
155             return -1;
156         }
157 
158         if (frame_size > tag_size) {
159             strcpy (error, "can't read frame body");
160             return -1;
161         }
162 
163         frame_body = malloc (frame_size + 4);
164 
165         memcpy (frame_body, tag_data, frame_size);
166         tag_size -= frame_size;
167         tag_data += frame_size;
168 
169         if (frame_header [0] == 'T') {
170             int txxx_mode = !strncmp ((char *) frame_header, "TXXX", 4), si = 0;
171             unsigned char *utf8_strings [2];
172 
173             if (frame_body [0] == 0) {
174                 unsigned char *fp = frame_body + 1, *fe = frame_body + frame_size;
175 
176                 while (si < 2 && fp < fe && *fp) {
177                     utf8_strings [si] = malloc (frame_size * 3);
178 
179                     for (i = 0; fp < fe; ++i)
180                         if (!(utf8_strings [si] [i] = *fp++))
181                             break;
182 
183                     if (fp == fe)
184                         utf8_strings [si] [i] = 0;
185 
186                     Latin1ToUTF8 (utf8_strings [si++], frame_size * 3);
187                 }
188             }
189             else if (frame_body [0] == 1) {
190                 unsigned char *fp = frame_body + 1, *fe = frame_body + frame_size;
191                 uint16_t *wide_string = malloc (frame_size);
192 
193                 while (si < 2 && fp <= fe - 4 && fp [0] == 0xFF && fp [1] == 0xFE && (fp [2] | fp [3])) {
194                     utf8_strings [si] = malloc (frame_size * 2);
195                     fp += 2;
196 
197                     for (i = 0; fp <= fe - 2; ++i, fp += 2)
198                         if (!(wide_string [i] = fp [0] | (fp [1] << 8))) {
199                             fp += 2;
200                             break;
201                         }
202 
203                     wide_string [i] = 0;
204                     WideCharToUTF8 (wide_string, utf8_strings [si++], frame_size * 2);
205                 }
206 
207                 free (wide_string);
208             }
209             else {
210                 strcpy (error, "unknown character encoding");
211                 return -1;
212             }
213 
214             // if we got a text string (or a TXXX and two text strings) store them here
215 
216             if (si) {
217                 if (txxx_mode && si == 2) {
218                     unsigned char *cptr = utf8_strings [0];
219 
220                     // if all single-byte UTF8, format TXXX description to match case of regular APEv2 descriptions (e.g., Performer)
221 
222                     while (*cptr)
223                         if (*cptr & 0x80)
224                             break;
225                         else
226                             cptr++;
227 
228                     if (!*cptr && isupper (*utf8_strings [0])) {
229                         cptr = utf8_strings [0];
230 
231                         while (*++cptr)
232                             if (isupper (*cptr))
233                                 *cptr = tolower (*cptr);
234                     }
235 
236                     if (wpc && !WavpackAppendTagItem (wpc, (char *) utf8_strings [0], (char *) utf8_strings [1], (int) strlen ((char *) utf8_strings [1]))) {
237                         strcpy (error, WavpackGetErrorMessage (wpc));
238                         return -1;
239                     }
240 
241                     items_imported++;
242                     if (bytes_used) *bytes_used += (int) (strlen ((char *) utf8_strings [0]) + strlen ((char *) utf8_strings [1]) + 1);
243                 }
244                 else if (!txxx_mode && si == 1)    // if not TXXX, look up item in the table to find APEv2 item name
245                     for (i = 0; i < NUM_TEXT_TAG_ITEMS; ++i)
246                         if (!strncmp ((char *) frame_header, text_tag_table [i].id3_item, 4)) {
247                             if (wpc && !WavpackAppendTagItem (wpc, text_tag_table [i].ape_item, (char *) utf8_strings [0], (int) strlen ((char *) utf8_strings [0]))) {
248                                 strcpy (error, WavpackGetErrorMessage (wpc));
249                                 return -1;
250                             }
251 
252                             items_imported++;
253                             if (bytes_used) *bytes_used += (int) (strlen ((char *) utf8_strings [0]) + strlen (text_tag_table [i].ape_item) + 1);
254                         }
255 
256                 do
257                     free (utf8_strings [--si]);
258                 while (si);
259             }
260         }
261         else if (!strncmp ((char *) frame_header, "APIC", 4)) {
262             if (frame_body [0] == 0) {
263                 char *mime_type, *extension, *item = NULL;
264                 unsigned char *frame_ptr = frame_body + 1;
265                 int frame_bytes = frame_size - 1;
266                 unsigned char picture_type;
267 
268                 mime_type = (char *) frame_ptr;
269 
270                 while (frame_bytes-- && *frame_ptr++);
271 
272                 if (frame_bytes < 0) {
273                     strcpy (error, "unterminated picture mime type");
274                     return -1;
275                 }
276 
277                 if (frame_bytes == 0) {
278                     strcpy (error, "no picture type");
279                     return -1;
280                 }
281 
282                 picture_type = *frame_ptr++;
283                 frame_bytes--;
284 
285                 while (frame_bytes-- && *frame_ptr++);
286 
287                 if (frame_bytes < 0) {
288                     strcpy (error, "unterminated picture description");
289                     return -1;
290                 }
291 
292                 if (frame_bytes < 2) {
293                     strcpy (error, "no picture data");
294                     return -1;
295                 }
296 
297                 if (strstr (mime_type, "jpeg") || strstr (mime_type, "JPEG"))
298                     extension = ".jpg";
299                 else if (strstr (mime_type, "png") || strstr (mime_type, "PNG"))
300                     extension = ".png";
301                 else if (frame_ptr [0] == 0xFF && frame_ptr [1] == 0xD8)
302                     extension = ".jpg";
303                 else if (frame_ptr [0] == 0x89 && frame_ptr [1] == 0x50)
304                     extension = ".png";
305                 else
306                     extension = "";
307 
308                 if (picture_type == 3) {
309                     item = "Cover Art (Front)";
310                     done_cover = 1;
311                 }
312                 else if (picture_type == 4)
313                     item = "Cover Art (Back)";
314                 else if (picture_type != 1 && picture_type != 2 && !done_cover) {
315                     item = "Cover Art (Front)";
316                     done_cover = 1;
317                 }
318 
319                 if (item) {
320                     int binary_tag_size = (int) strlen (item) + (int) strlen (extension) + 1 + frame_bytes;
321                     char *binary_tag_image = malloc (binary_tag_size);
322 
323                     strcpy (binary_tag_image, item);
324                     strcat (binary_tag_image, extension);
325                     memcpy (binary_tag_image + binary_tag_size - frame_bytes, frame_ptr, frame_bytes);
326 
327                     if (wpc && !WavpackAppendBinaryTagItem (wpc, item, binary_tag_image, binary_tag_size)) {
328                         strcpy (error, WavpackGetErrorMessage (wpc));
329                         return -1;
330                     }
331 
332                     items_imported++;
333                     if (bytes_used) *bytes_used += (int) strlen (item) + 1 + binary_tag_size;
334                     free (binary_tag_image);
335                 }
336             }
337             else {
338                 strcpy (error, "unhandled APIC character encoding");
339                 return -1;
340             }
341         }
342 
343         free (frame_body);
344     }
345 
346     return items_imported;
347 }
348 
ImportID3v2(WavpackContext * wpc,unsigned char * tag_data,int tag_size,char * error,int32_t * bytes_used)349 int ImportID3v2 (WavpackContext *wpc, unsigned char *tag_data, int tag_size, char *error, int32_t *bytes_used)
350 {
351     int res, res_ss;
352 
353     if (bytes_used)
354         *bytes_used = 0;
355 
356     // look for the ID3 tag in case it's not first thing in the wrapper (like in WAV or DSDIFF files)
357 
358     if (tag_size >= 10) {
359         unsigned char *cp = tag_data, *ce = cp + tag_size;
360 
361         while (cp < ce - 10)
362             if (cp [0] == 'I' && cp [1] == 'D' && cp [2] == '3' && cp [3] == 3) {
363                 tag_size = (int)(ce - cp);
364                 tag_data = cp;
365                 break;
366             }
367             else
368                 cp++;
369 
370         if (cp == ce - 10)      // no tag found is NOT an error
371             return 0;
372     }
373 
374     res = ImportID3v2_syncsafe (NULL, tag_data, tag_size, error, bytes_used, 0);
375 
376     if (res > 0)
377         return wpc ? ImportID3v2_syncsafe (wpc, tag_data, tag_size, error, bytes_used, 0) : res;
378 
379     res_ss = ImportID3v2_syncsafe (NULL, tag_data, tag_size, error, bytes_used, 1);
380 
381     if (res_ss > 0)
382         return wpc ? ImportID3v2_syncsafe (wpc, tag_data, tag_size, error, bytes_used, 1) : res_ss;
383 
384     return res;
385 }
386 
387 // Convert the Unicode wide-format string into a UTF-8 string using no more
388 // than the specified buffer length. The wide-format string must be NULL
389 // terminated and the resulting string will be NULL terminated. The actual
390 // number of characters converted (not counting terminator) is returned, which
391 // may be less than the number of characters in the wide string if the buffer
392 // length is exceeded.
393 
WideCharToUTF8(const uint16_t * Wide,unsigned char * pUTF8,int len)394 static int WideCharToUTF8 (const uint16_t *Wide, unsigned char *pUTF8, int len)
395 {
396     const uint16_t *pWide = Wide;
397     int outndx = 0;
398 
399     while (*pWide) {
400         if (*pWide < 0x80 && outndx + 1 < len)
401             pUTF8 [outndx++] = (unsigned char) *pWide++;
402         else if (*pWide < 0x800 && outndx + 2 < len) {
403             pUTF8 [outndx++] = (unsigned char) (0xc0 | ((*pWide >> 6) & 0x1f));
404             pUTF8 [outndx++] = (unsigned char) (0x80 | (*pWide++ & 0x3f));
405         }
406         else if (outndx + 3 < len) {
407             pUTF8 [outndx++] = (unsigned char) (0xe0 | ((*pWide >> 12) & 0xf));
408             pUTF8 [outndx++] = (unsigned char) (0x80 | ((*pWide >> 6) & 0x3f));
409             pUTF8 [outndx++] = (unsigned char) (0x80 | (*pWide++ & 0x3f));
410         }
411         else
412             break;
413     }
414 
415     pUTF8 [outndx] = 0;
416     return (int)(pWide - Wide);
417 }
418 
419 // Convert a Latin1 string into its Unicode UTF-8 format equivalent. The
420 // conversion is done in-place so the maximum length of the string buffer must
421 // be specified because the string may become longer or shorter. If the
422 // resulting string will not fit in the specified buffer size then it is
423 // truncated.
424 
425 #ifdef _WIN32
426 
427 #include <windows.h>
428 
Latin1ToUTF8(void * string,int len)429 static void Latin1ToUTF8 (void *string, int len)
430 {
431     int max_chars = (int) strlen (string);
432     uint16_t *temp = (uint16_t *) malloc ((max_chars + 1) * sizeof (uint16_t));
433 
434     MultiByteToWideChar (28591, 0, string, -1, temp, max_chars + 1);
435     WideCharToUTF8 (temp, (unsigned char *) string, len);
436     free (temp);
437 }
438 
439 #else
440 
441 #include <iconv.h>
442 
Latin1ToUTF8(void * string,int len)443 static void Latin1ToUTF8 (void *string, int len)
444 {
445     char *temp = malloc (len);
446     char *outp = temp;
447     char *inp = string;
448     size_t insize = 0;
449     size_t outsize = len - 1;
450     int err = 0;
451     iconv_t converter;
452 
453     memset(temp, 0, len);
454 
455     insize = strlen (string);
456     converter = iconv_open ("UTF-8", "ISO-8859-1");
457 
458     if (converter != (iconv_t) -1) {
459         err = iconv (converter, &inp, &insize, &outp, &outsize);
460         iconv_close (converter);
461     }
462     else
463         err = -1;
464 
465     if (err == -1) {
466         free(temp);
467         return;
468     }
469 
470     memmove (string, temp, len);
471     free (temp);
472 }
473 
474 #endif
475 
476