1 ////////////////////////////////////////////////////////////////////////////
2 // **** WAVPACK **** //
3 // Hybrid Lossless Wavefile Compressor //
4 // Copyright (c) 1998 - 2017 David Bryant //
5 // All Rights Reserved. //
6 // Distributed under the BSD Software License (see license.txt) //
7 ////////////////////////////////////////////////////////////////////////////
8
9 // import_id3.c
10
11 // This module provides limited support for importing existing ID3 tags
12 // (from DSF files, for example) into WavPack files
13
14 #include <sys/stat.h>
15 #include <stdlib.h>
16 #include <stdarg.h>
17 #include <string.h>
18 #include <stdio.h>
19 #include <ctype.h>
20
21 #include "wavpack.h"
22
23 static struct {
24 char *id3_item, *ape_item;
25 } text_tag_table [] = {
26 { "TALB", "Album" },
27 { "TPE1", "Artist" },
28 { "TPE2", "AlbumArtist" },
29 { "TPE3", "Conductor" },
30 { "TIT1", "Grouping" },
31 { "TIT2", "Title" },
32 { "TIT3", "Subtitle" },
33 { "TSST", "DiscSubtitle" },
34 { "TSOA", "AlbumSort" },
35 { "TSOT", "TitleSort" },
36 { "TSO2", "AlbumArtistSort" },
37 { "TSOP", "ArtistSort" },
38 { "TPOS", "Disc" },
39 { "TRCK", "Track" },
40 { "TCON", "Genre" },
41 { "TYER", "Year" },
42 { "TCOM", "Composer" },
43 { "TPUB", "Publisher" },
44 { "TCMP", "Compilation" },
45 { "TENC", "EncodedBy" },
46 { "TEXT", "Lyricist" },
47 { "TCOP", "Copyright" },
48 { "TLAN", "Language" },
49 { "TSRC", "ISRC" },
50 { "TMED", "Media" },
51 { "TMOO", "Mood" },
52 { "TBPM", "BPM" }
53 };
54
55 #define NUM_TEXT_TAG_ITEMS (sizeof (text_tag_table) / sizeof (text_tag_table [0]))
56
57 static int WideCharToUTF8 (const uint16_t *Wide, unsigned char *pUTF8, int len);
58 static void Latin1ToUTF8 (void *string, int len);
59
60 // Import specified ID3v2.3 tag. The WavPack context accepts the tag items, and can be
61 // NULL for doing a dry-run through the tag. If errors occur then a description will be
62 // written to "error" (which must be 80 characters) and -1 will be returned. If no
63 // errors occur then the number of tag items successfully written will be returned, or
64 // zero in the case of no applicable tags. An optional integer pointer can be provided
65 // to accept the total number of bytes consumed by the tag (name and value).
66
ImportID3v2_syncsafe(WavpackContext * wpc,unsigned char * tag_data,int tag_size,char * error,int32_t * bytes_used,int syncsafe)67 static int ImportID3v2_syncsafe (WavpackContext *wpc, unsigned char *tag_data, int tag_size, char *error, int32_t *bytes_used, int syncsafe)
68 {
69 int tag_size_from_header, items_imported = 0, done_cover = 0;
70 unsigned char id3_header [10];
71
72 if (bytes_used)
73 *bytes_used = 0;
74
75 if (tag_size < sizeof (id3_header)) {
76 strcpy (error, "can't read tag header");
77 return -1;
78 }
79
80 memcpy (id3_header, tag_data, sizeof (id3_header));
81 tag_size -= sizeof (id3_header);
82 tag_data += sizeof (id3_header);
83
84 if (strncmp ((char *) id3_header, "ID3", 3)) {
85 strcpy (error, "no ID3v2 tag found");
86 return -1;
87 }
88
89 if (id3_header [3] != 3 || id3_header [4] == 0xFF || (id3_header [5] & 0x1F)) {
90 strcpy (error, "not valid ID3v2.3");
91 return -1;
92 }
93
94 if (id3_header [5] & 0x80) {
95 strcpy (error, "unsynchonization detected");
96 return -1;
97 }
98
99 if (id3_header [5] & 0x40) {
100 strcpy (error, "extended header detected");
101 return -1;
102 }
103
104 if (id3_header [5] & 0x20) {
105 strcpy (error, "experimental indicator detected");
106 return -1;
107 }
108
109 if ((id3_header [6] | id3_header [7] | id3_header [8] | id3_header [9]) & 0x80) {
110 strcpy (error, "not valid ID3v2.3 (bad size)");
111 return -1;
112 }
113
114 tag_size_from_header = id3_header [9] + (id3_header [8] << 7) + (id3_header [7] << 14) + (id3_header [6] << 21);
115
116 if (tag_size_from_header > tag_size) {
117 strcpy (error, "tag is truncated");
118 return -1;
119 }
120
121 while (1) {
122 unsigned char frame_header [10], *frame_body;
123 int frame_size, i;
124
125 if (tag_size < sizeof (frame_header))
126 break;
127
128 memcpy (frame_header, tag_data, sizeof (frame_header));
129 tag_size -= sizeof (frame_header);
130 tag_data += sizeof (frame_header);
131
132 if (!frame_header [0] && !frame_header [1] && !frame_header [2] && !frame_header [3])
133 break;
134
135 for (i = 0; i < 4; ++i)
136 if (frame_header [i] < '0' ||
137 (frame_header [i] > '9' && frame_header [i] < 'A') ||
138 frame_header [i] > 'Z') {
139 strcpy (error, "bad frame identity");
140 return -1;
141 }
142
143 if (frame_header [9]) {
144 strcpy (error, "unknown frame_header flag set");
145 return -1;
146 }
147
148 if (syncsafe)
149 frame_size = frame_header [7] + (frame_header [6] << 7) + (frame_header [5] << 14) + (frame_header [4] << 21);
150 else
151 frame_size = frame_header [7] + (frame_header [6] << 8) + (frame_header [5] << 16) + (frame_header [4] << 24);
152
153 if (!frame_size) {
154 strcpy (error, "empty frame not allowed");
155 return -1;
156 }
157
158 if (frame_size > tag_size) {
159 strcpy (error, "can't read frame body");
160 return -1;
161 }
162
163 frame_body = malloc (frame_size + 4);
164
165 memcpy (frame_body, tag_data, frame_size);
166 tag_size -= frame_size;
167 tag_data += frame_size;
168
169 if (frame_header [0] == 'T') {
170 int txxx_mode = !strncmp ((char *) frame_header, "TXXX", 4), si = 0;
171 unsigned char *utf8_strings [2];
172
173 if (frame_body [0] == 0) {
174 unsigned char *fp = frame_body + 1, *fe = frame_body + frame_size;
175
176 while (si < 2 && fp < fe && *fp) {
177 utf8_strings [si] = malloc (frame_size * 3);
178
179 for (i = 0; fp < fe; ++i)
180 if (!(utf8_strings [si] [i] = *fp++))
181 break;
182
183 if (fp == fe)
184 utf8_strings [si] [i] = 0;
185
186 Latin1ToUTF8 (utf8_strings [si++], frame_size * 3);
187 }
188 }
189 else if (frame_body [0] == 1) {
190 unsigned char *fp = frame_body + 1, *fe = frame_body + frame_size;
191 uint16_t *wide_string = malloc (frame_size);
192
193 while (si < 2 && fp <= fe - 4 && fp [0] == 0xFF && fp [1] == 0xFE && (fp [2] | fp [3])) {
194 utf8_strings [si] = malloc (frame_size * 2);
195 fp += 2;
196
197 for (i = 0; fp <= fe - 2; ++i, fp += 2)
198 if (!(wide_string [i] = fp [0] | (fp [1] << 8))) {
199 fp += 2;
200 break;
201 }
202
203 wide_string [i] = 0;
204 WideCharToUTF8 (wide_string, utf8_strings [si++], frame_size * 2);
205 }
206
207 free (wide_string);
208 }
209 else {
210 strcpy (error, "unknown character encoding");
211 return -1;
212 }
213
214 // if we got a text string (or a TXXX and two text strings) store them here
215
216 if (si) {
217 if (txxx_mode && si == 2) {
218 unsigned char *cptr = utf8_strings [0];
219
220 // if all single-byte UTF8, format TXXX description to match case of regular APEv2 descriptions (e.g., Performer)
221
222 while (*cptr)
223 if (*cptr & 0x80)
224 break;
225 else
226 cptr++;
227
228 if (!*cptr && isupper (*utf8_strings [0])) {
229 cptr = utf8_strings [0];
230
231 while (*++cptr)
232 if (isupper (*cptr))
233 *cptr = tolower (*cptr);
234 }
235
236 if (wpc && !WavpackAppendTagItem (wpc, (char *) utf8_strings [0], (char *) utf8_strings [1], (int) strlen ((char *) utf8_strings [1]))) {
237 strcpy (error, WavpackGetErrorMessage (wpc));
238 return -1;
239 }
240
241 items_imported++;
242 if (bytes_used) *bytes_used += (int) (strlen ((char *) utf8_strings [0]) + strlen ((char *) utf8_strings [1]) + 1);
243 }
244 else if (!txxx_mode && si == 1) // if not TXXX, look up item in the table to find APEv2 item name
245 for (i = 0; i < NUM_TEXT_TAG_ITEMS; ++i)
246 if (!strncmp ((char *) frame_header, text_tag_table [i].id3_item, 4)) {
247 if (wpc && !WavpackAppendTagItem (wpc, text_tag_table [i].ape_item, (char *) utf8_strings [0], (int) strlen ((char *) utf8_strings [0]))) {
248 strcpy (error, WavpackGetErrorMessage (wpc));
249 return -1;
250 }
251
252 items_imported++;
253 if (bytes_used) *bytes_used += (int) (strlen ((char *) utf8_strings [0]) + strlen (text_tag_table [i].ape_item) + 1);
254 }
255
256 do
257 free (utf8_strings [--si]);
258 while (si);
259 }
260 }
261 else if (!strncmp ((char *) frame_header, "APIC", 4)) {
262 if (frame_body [0] == 0) {
263 char *mime_type, *extension, *item = NULL;
264 unsigned char *frame_ptr = frame_body + 1;
265 int frame_bytes = frame_size - 1;
266 unsigned char picture_type;
267
268 mime_type = (char *) frame_ptr;
269
270 while (frame_bytes-- && *frame_ptr++);
271
272 if (frame_bytes < 0) {
273 strcpy (error, "unterminated picture mime type");
274 return -1;
275 }
276
277 if (frame_bytes == 0) {
278 strcpy (error, "no picture type");
279 return -1;
280 }
281
282 picture_type = *frame_ptr++;
283 frame_bytes--;
284
285 while (frame_bytes-- && *frame_ptr++);
286
287 if (frame_bytes < 0) {
288 strcpy (error, "unterminated picture description");
289 return -1;
290 }
291
292 if (frame_bytes < 2) {
293 strcpy (error, "no picture data");
294 return -1;
295 }
296
297 if (strstr (mime_type, "jpeg") || strstr (mime_type, "JPEG"))
298 extension = ".jpg";
299 else if (strstr (mime_type, "png") || strstr (mime_type, "PNG"))
300 extension = ".png";
301 else if (frame_ptr [0] == 0xFF && frame_ptr [1] == 0xD8)
302 extension = ".jpg";
303 else if (frame_ptr [0] == 0x89 && frame_ptr [1] == 0x50)
304 extension = ".png";
305 else
306 extension = "";
307
308 if (picture_type == 3) {
309 item = "Cover Art (Front)";
310 done_cover = 1;
311 }
312 else if (picture_type == 4)
313 item = "Cover Art (Back)";
314 else if (picture_type != 1 && picture_type != 2 && !done_cover) {
315 item = "Cover Art (Front)";
316 done_cover = 1;
317 }
318
319 if (item) {
320 int binary_tag_size = (int) strlen (item) + (int) strlen (extension) + 1 + frame_bytes;
321 char *binary_tag_image = malloc (binary_tag_size);
322
323 strcpy (binary_tag_image, item);
324 strcat (binary_tag_image, extension);
325 memcpy (binary_tag_image + binary_tag_size - frame_bytes, frame_ptr, frame_bytes);
326
327 if (wpc && !WavpackAppendBinaryTagItem (wpc, item, binary_tag_image, binary_tag_size)) {
328 strcpy (error, WavpackGetErrorMessage (wpc));
329 return -1;
330 }
331
332 items_imported++;
333 if (bytes_used) *bytes_used += (int) strlen (item) + 1 + binary_tag_size;
334 free (binary_tag_image);
335 }
336 }
337 else {
338 strcpy (error, "unhandled APIC character encoding");
339 return -1;
340 }
341 }
342
343 free (frame_body);
344 }
345
346 return items_imported;
347 }
348
ImportID3v2(WavpackContext * wpc,unsigned char * tag_data,int tag_size,char * error,int32_t * bytes_used)349 int ImportID3v2 (WavpackContext *wpc, unsigned char *tag_data, int tag_size, char *error, int32_t *bytes_used)
350 {
351 int res, res_ss;
352
353 if (bytes_used)
354 *bytes_used = 0;
355
356 // look for the ID3 tag in case it's not first thing in the wrapper (like in WAV or DSDIFF files)
357
358 if (tag_size >= 10) {
359 unsigned char *cp = tag_data, *ce = cp + tag_size;
360
361 while (cp < ce - 10)
362 if (cp [0] == 'I' && cp [1] == 'D' && cp [2] == '3' && cp [3] == 3) {
363 tag_size = (int)(ce - cp);
364 tag_data = cp;
365 break;
366 }
367 else
368 cp++;
369
370 if (cp == ce - 10) // no tag found is NOT an error
371 return 0;
372 }
373
374 res = ImportID3v2_syncsafe (NULL, tag_data, tag_size, error, bytes_used, 0);
375
376 if (res > 0)
377 return wpc ? ImportID3v2_syncsafe (wpc, tag_data, tag_size, error, bytes_used, 0) : res;
378
379 res_ss = ImportID3v2_syncsafe (NULL, tag_data, tag_size, error, bytes_used, 1);
380
381 if (res_ss > 0)
382 return wpc ? ImportID3v2_syncsafe (wpc, tag_data, tag_size, error, bytes_used, 1) : res_ss;
383
384 return res;
385 }
386
387 // Convert the Unicode wide-format string into a UTF-8 string using no more
388 // than the specified buffer length. The wide-format string must be NULL
389 // terminated and the resulting string will be NULL terminated. The actual
390 // number of characters converted (not counting terminator) is returned, which
391 // may be less than the number of characters in the wide string if the buffer
392 // length is exceeded.
393
WideCharToUTF8(const uint16_t * Wide,unsigned char * pUTF8,int len)394 static int WideCharToUTF8 (const uint16_t *Wide, unsigned char *pUTF8, int len)
395 {
396 const uint16_t *pWide = Wide;
397 int outndx = 0;
398
399 while (*pWide) {
400 if (*pWide < 0x80 && outndx + 1 < len)
401 pUTF8 [outndx++] = (unsigned char) *pWide++;
402 else if (*pWide < 0x800 && outndx + 2 < len) {
403 pUTF8 [outndx++] = (unsigned char) (0xc0 | ((*pWide >> 6) & 0x1f));
404 pUTF8 [outndx++] = (unsigned char) (0x80 | (*pWide++ & 0x3f));
405 }
406 else if (outndx + 3 < len) {
407 pUTF8 [outndx++] = (unsigned char) (0xe0 | ((*pWide >> 12) & 0xf));
408 pUTF8 [outndx++] = (unsigned char) (0x80 | ((*pWide >> 6) & 0x3f));
409 pUTF8 [outndx++] = (unsigned char) (0x80 | (*pWide++ & 0x3f));
410 }
411 else
412 break;
413 }
414
415 pUTF8 [outndx] = 0;
416 return (int)(pWide - Wide);
417 }
418
419 // Convert a Latin1 string into its Unicode UTF-8 format equivalent. The
420 // conversion is done in-place so the maximum length of the string buffer must
421 // be specified because the string may become longer or shorter. If the
422 // resulting string will not fit in the specified buffer size then it is
423 // truncated.
424
425 #ifdef _WIN32
426
427 #include <windows.h>
428
Latin1ToUTF8(void * string,int len)429 static void Latin1ToUTF8 (void *string, int len)
430 {
431 int max_chars = (int) strlen (string);
432 uint16_t *temp = (uint16_t *) malloc ((max_chars + 1) * sizeof (uint16_t));
433
434 MultiByteToWideChar (28591, 0, string, -1, temp, max_chars + 1);
435 WideCharToUTF8 (temp, (unsigned char *) string, len);
436 free (temp);
437 }
438
439 #else
440
441 #include <iconv.h>
442
Latin1ToUTF8(void * string,int len)443 static void Latin1ToUTF8 (void *string, int len)
444 {
445 char *temp = malloc (len);
446 char *outp = temp;
447 char *inp = string;
448 size_t insize = 0;
449 size_t outsize = len - 1;
450 int err = 0;
451 iconv_t converter;
452
453 memset(temp, 0, len);
454
455 insize = strlen (string);
456 converter = iconv_open ("UTF-8", "ISO-8859-1");
457
458 if (converter != (iconv_t) -1) {
459 err = iconv (converter, &inp, &insize, &outp, &outsize);
460 iconv_close (converter);
461 }
462 else
463 err = -1;
464
465 if (err == -1) {
466 free(temp);
467 return;
468 }
469
470 memmove (string, temp, len);
471 free (temp);
472 }
473
474 #endif
475
476