1 /*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License as published by
4 * the Free Software Foundation; either version 2 of the License, or
5 * (at your option) any later version.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 *
12 * You should have received a copy of the GNU General Public License
13 * along with this program; if not, write to the Free Software
14 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
15 */
16
17 #include "id3.h"
18 #include "id3_genre.dat"
19 #include "id3_compat.c"
20 #include "id3_frametype.c"
21
22 #define NGENRES (sizeof(genre_table) / sizeof(genre_table[0]))
23
24 // Read an int from a variable number of bytes
25 static int
_varint(unsigned char * buf,int length)26 _varint(unsigned char *buf, int length)
27 {
28 int i, b, number = 0;
29
30 if (buf) {
31 for ( i = 0; i < length; i++ ) {
32 b = length - 1 - i;
33 number = number | (unsigned int)( buf[i] & 0xff ) << ( 8*b );
34 }
35 return number;
36 }
37 else {
38 return 0;
39 }
40 }
41
42 int
parse_id3(PerlIO * infile,char * file,HV * info,HV * tags,off_t seek,off_t file_size)43 parse_id3(PerlIO *infile, char *file, HV *info, HV *tags, off_t seek, off_t file_size)
44 {
45 int err = 0;
46 unsigned char *bptr;
47
48 id3info *id3;
49 Newz(0, id3, sizeof(id3info), id3info);
50 Newz(0, id3->buf, sizeof(Buffer), Buffer);
51 Newz(0, id3->utf8, sizeof(Buffer), Buffer);
52
53 id3->infile = infile;
54 id3->file = file;
55 id3->info = info;
56 id3->tags = tags;
57 id3->offset = seek;
58
59 buffer_init(id3->buf, ID3_BLOCK_SIZE);
60
61 if ( !seek ) {
62 // Check for ID3v1 tag first
63 PerlIO_seek(infile, file_size - 128, SEEK_SET);
64 if ( !_check_buf(infile, id3->buf, 128, 128) ) {
65 err = -1;
66 goto out;
67 }
68
69 bptr = buffer_ptr(id3->buf);
70 if (bptr[0] == 'T' && bptr[1] == 'A' && bptr[2] == 'G') {
71 _id3_parse_v1(id3);
72 }
73 }
74
75 // Check for ID3v2 tag
76 PerlIO_seek(infile, seek, SEEK_SET);
77 buffer_clear(id3->buf);
78
79 // Read enough for header (10) + extended header size (4)
80 if ( !_check_buf(infile, id3->buf, 14, ID3_BLOCK_SIZE) ) {
81 err = -1;
82 goto out;
83 }
84
85 bptr = buffer_ptr(id3->buf);
86 if (bptr[0] == 'I' && bptr[1] == 'D' && bptr[2] == '3') {
87 _id3_parse_v2(id3);
88 }
89
90 out:
91 buffer_free(id3->buf);
92 Safefree(id3->buf);
93
94 if (id3->utf8->alloc)
95 buffer_free(id3->utf8);
96 Safefree(id3->utf8);
97
98 Safefree(id3);
99
100 return err;
101 }
102
103 int
_id3_parse_v1(id3info * id3)104 _id3_parse_v1(id3info *id3)
105 {
106 SV *tmp = NULL;
107 uint8_t read = 0;
108 unsigned char *bptr;
109 uint8_t comment_len;
110 uint8_t genre;
111
112 buffer_consume(id3->buf, 3); // TAG
113
114 read = _id3_get_v1_utf8_string(id3, &tmp, 30);
115 if (tmp && SvPOK(tmp) && sv_len(tmp)) {
116 DEBUG_TRACE("ID3v1 title: %s\n", SvPVX(tmp));
117 my_hv_store( id3->tags, ID3_FRAME_TITLE, tmp );
118 }
119 else {
120 if (tmp) SvREFCNT_dec(tmp);
121 }
122 if (read < 30) {
123 buffer_consume(id3->buf, 30 - read);
124 }
125
126 tmp = NULL;
127 read = _id3_get_v1_utf8_string(id3, &tmp, 30);
128 if (tmp && SvPOK(tmp) && sv_len(tmp)) {
129 DEBUG_TRACE("ID3v1 artist: %s\n", SvPVX(tmp));
130 my_hv_store( id3->tags, ID3_FRAME_ARTIST, tmp );
131 tmp = NULL;
132 }
133 else {
134 if (tmp) SvREFCNT_dec(tmp);
135 }
136 if (read < 30) {
137 buffer_consume(id3->buf, 30 - read);
138 }
139
140 tmp = NULL;
141 read = _id3_get_v1_utf8_string(id3, &tmp, 30);
142 if (tmp && SvPOK(tmp) && sv_len(tmp)) {
143 DEBUG_TRACE("ID3v1 album: %s\n", SvPVX(tmp));
144 my_hv_store( id3->tags, ID3_FRAME_ALBUM, tmp );
145 tmp = NULL;
146 }
147 else {
148 if (tmp) SvREFCNT_dec(tmp);
149 }
150 if (read < 30) {
151 buffer_consume(id3->buf, 30 - read);
152 }
153
154 tmp = NULL;
155 read = _id3_get_v1_utf8_string(id3, &tmp, 4);
156 if (tmp && SvPOK(tmp) && sv_len(tmp)) {
157 DEBUG_TRACE("ID3v1 year: %s\n", SvPVX(tmp));
158 my_hv_store( id3->tags, ID3_FRAME_YEAR, tmp );
159 tmp = NULL;
160 }
161 else {
162 if (tmp) SvREFCNT_dec(tmp);
163 }
164 if (read < 4) {
165 buffer_consume(id3->buf, 4 - read);
166 }
167
168 bptr = buffer_ptr(id3->buf);
169 if (bptr[28] == 0 && bptr[29] != 0) {
170 // ID3v1.1 track number is present
171 comment_len = 28;
172 my_hv_store( id3->tags, ID3_FRAME_TRACK, newSVuv(bptr[29]) );
173 my_hv_store( id3->info, "id3_version", newSVpv( "ID3v1.1", 0 ) );
174 }
175 else {
176 comment_len = 30;
177 my_hv_store( id3->info, "id3_version", newSVpv( "ID3v1", 0 ) );
178 }
179
180 tmp = NULL;
181 read = _id3_get_v1_utf8_string(id3, &tmp, comment_len);
182 if (tmp && SvPOK(tmp) && sv_len(tmp)) {
183 AV *comment_array = newAV();
184 av_push( comment_array, newSVpvn("XXX", 3) );
185 av_push( comment_array, newSVpvn("", 0) );
186 av_push( comment_array, tmp );
187 DEBUG_TRACE("ID3v1 comment: %s\n", SvPVX(tmp));
188 my_hv_store( id3->tags, ID3_FRAME_COMMENT, newRV_noinc( (SV *)comment_array ) );
189 tmp = NULL;
190 }
191 else {
192 if (tmp) SvREFCNT_dec(tmp);
193 }
194 if (read < 30) {
195 buffer_consume(id3->buf, 30 - read);
196 }
197
198 genre = buffer_get_char(id3->buf);
199 if (genre < NGENRES) {
200 char const *genre_string = _id3_genre_index(genre);
201 my_hv_store( id3->tags, ID3_FRAME_GENRE, newSVpv(genre_string, 0) );
202 }
203 else if (genre < 255) {
204 my_hv_store( id3->tags, ID3_FRAME_GENRE, newSVpvf("Unknown/%d", genre) );
205 }
206
207 return 1;
208 }
209
210 int
_id3_parse_v2(id3info * id3)211 _id3_parse_v2(id3info *id3)
212 {
213 int ret = 1;
214 unsigned char *bptr;
215
216 // Verify we have a valid tag
217 bptr = buffer_ptr(id3->buf);
218 if ( !(
219 bptr[3] < 0xff && bptr[4] < 0xff &&
220 bptr[6] < 0x80 && bptr[7] < 0x80 && bptr[8] < 0x80 && bptr[9] < 0x80
221 ) ) {
222 PerlIO_printf(PerlIO_stderr(), "Invalid ID3v2 tag in %s\n", id3->file);
223 return 0;
224 }
225
226 buffer_consume(id3->buf, 3); // ID3
227
228 id3->version_major = buffer_get_char(id3->buf);
229 id3->version_minor = buffer_get_char(id3->buf);
230 id3->flags = buffer_get_char(id3->buf);
231 id3->size = 10 + buffer_get_syncsafe(id3->buf, 4);
232
233 id3->size_remain = id3->size - 10;
234
235 if (id3->flags & ID3_TAG_FLAG_FOOTERPRESENT) {
236 id3->size += 10;
237 }
238
239 DEBUG_TRACE("Parsing ID3v2.%d.%d tag, flags %x, size %d\n", id3->version_major, id3->version_minor, id3->flags, id3->size);
240
241 if (id3->flags & ID3_TAG_FLAG_UNSYNCHRONISATION) {
242 if (id3->version_major < 4) {
243 // It's unclear but the v2.4.0-changes document seems to say that v2.4 should
244 // ignore the tag-level unsync flag and only worry about frame-level unsync
245
246 // For v2.2/v2.3, unsync the entire tag. This is unfortunate due to
247 // increased memory usage but the only way to do it, as frame size values only
248 // indicate the post-unsync size, so it's not possible to unsync each frame individually
249 // tested with v2.3-unsync.mp3
250 if ( !_check_buf(id3->infile, id3->buf, id3->size, id3->size) ) {
251 ret = 0;
252 goto out;
253 }
254
255 id3->size_remain = _id3_deunsync( buffer_ptr(id3->buf), id3->size );
256
257 DEBUG_TRACE(" Un-synchronized tag, new_size %d\n", id3->size_remain);
258
259 my_hv_store( id3->info, "id3_was_unsynced", newSVuv(1) );
260 }
261 else {
262 DEBUG_TRACE(" Ignoring v2.4 tag un-synchronize flag\n");
263 }
264 }
265
266 if (id3->flags & ID3_TAG_FLAG_EXTENDEDHEADER) {
267 uint32_t ehsize;
268
269 // If the tag is v2.2, this bit is actually the compression bit and the tag should be ignored
270 if (id3->version_major == 2) {
271 ret = 0;
272 goto out;
273 }
274
275 // tested with v2.3-ext-header.mp3
276
277 // We don't care about the value of the extended flags or CRC, so just read the size and skip it
278 ehsize = buffer_get_int(id3->buf);
279
280 // ehsize may be invalid, tested with v2.3-ext-header-invalid.mp3
281 if (ehsize > id3->size_remain - 4) {
282 warn("Error: Invalid ID3 extended header size (%s)\n", id3->file);
283 ret = 0;
284 goto out;
285 }
286
287 DEBUG_TRACE(" Skipping extended header, size %d\n", ehsize);
288
289 if ( !_check_buf(id3->infile, id3->buf, ehsize, ID3_BLOCK_SIZE) ) {
290 ret = 0;
291 goto out;
292 }
293 buffer_consume(id3->buf, ehsize);
294
295 id3->size_remain -= ehsize + 4;
296 }
297
298 // Parse frames
299 while (id3->size_remain > 0) {
300 //DEBUG_TRACE(" remain: %d\n", id3->size_remain);
301 if ( !_id3_parse_v2_frame(id3) ) {
302 break;
303 }
304 }
305
306 if (id3->version_major < 4) {
307 // map old year/date/time (TYER/TDAT/TIME) frames to TDRC
308 // tested in v2.3-xsop.mp3
309 _id3_convert_tdrc(id3);
310 }
311
312 // Set id3_version info element, which contains all tag versions found
313 {
314 SV *version = newSVpvf( "ID3v2.%d.%d", id3->version_major, id3->version_minor );
315
316 if ( my_hv_exists(id3->info, "id3_version") ) {
317 SV **entry = my_hv_fetch(id3->info, "id3_version");
318 if (entry != NULL) {
319 sv_catpv( version, ", " );
320 sv_catsv( version, *entry );
321 }
322 }
323
324 my_hv_store( id3->info, "id3_version", version );
325 }
326
327 out:
328 return ret;
329 }
330
331 int
_id3_parse_v2_frame(id3info * id3)332 _id3_parse_v2_frame(id3info *id3)
333 {
334 int ret = 1;
335 char id[5];
336 uint16_t flags = 0;
337 uint32_t size = 0;
338 uint32_t decoded_size = 0;
339 uint32_t unsync_extra = 0;
340 id3_frametype const *frametype;
341 Buffer *tmp_buf = 0;
342
343 // If the frame is compressed, it will be decompressed here
344 Buffer *decompressed = 0;
345
346 // tag_data_safe flag is used if skipping artwork and artwork is not raw image data (needs unsync)
347 id3->tag_data_safe = 1;
348
349 if ( !_check_buf(id3->infile, id3->buf, 10, ID3_BLOCK_SIZE) ) {
350 ret = 0;
351 goto out;
352 }
353
354 if (id3->version_major == 2) {
355 // v2.2
356 id3_compat const *compat;
357
358 // Read 3-letter id
359 buffer_get(id3->buf, &id, 3);
360 id[3] = 0;
361
362 if (id[0] == 0) {
363 // padding
364 DEBUG_TRACE(" Found start of padding, aborting\n");
365 ret = 0;
366 goto out;
367 }
368
369 size = buffer_get_int24(id3->buf);
370
371 DEBUG_TRACE(" %s, size %d\n", id, size);
372
373 // map 3-char id to 4-char id
374 compat = _id3_compat_lookup((char *)&id, 3);
375 if (compat && compat->equiv) {
376 strncpy(id, compat->equiv, 4);
377 id[4] = 0;
378
379 DEBUG_TRACE(" compat -> %s\n", id);
380 }
381 else {
382 // no compat mapping (obsolete), prepend 'Y' to id
383 id[4] = 0;
384 id[3] = id[2];
385 id[2] = id[1];
386 id[1] = id[0];
387 id[0] = 'Y';
388
389 DEBUG_TRACE(" obsolete/unknown -> %s\n", id);
390 }
391
392 id3->size_remain -= 6;
393
394 if (size > id3->size_remain) {
395 DEBUG_TRACE(" frame size too big, aborting\n");
396 ret = 0;
397 goto out;
398 }
399 }
400 else {
401 // Read 4-letter id
402 buffer_get(id3->buf, &id, 4);
403 id[4] = 0;
404
405 if (id[0] == 0) {
406 // padding
407 DEBUG_TRACE(" Found start of padding, aborting\n");
408 ret = 0;
409 goto out;
410 }
411
412 id3->size_remain -= 4;
413
414 if (id3->version_major == 3) {
415 // v2.3
416 id3_compat const *compat;
417
418 size = buffer_get_int(id3->buf);
419 flags = buffer_get_short(id3->buf);
420
421 DEBUG_TRACE(" %s, frame flags %x, size %d\n", id, flags, size);
422
423 // map to v2.4 id
424 if (id[3] == ' ') {
425 // iTunes writes bad frame IDs such as 'TSA ', these should be run through compat
426 // as 3-char frames
427 compat = _id3_compat_lookup((char *)&id, 3);
428 }
429 else {
430 compat = _id3_compat_lookup((char *)&id, 4);
431 }
432 if (compat && compat->equiv) {
433 strncpy(id, compat->equiv, 4);
434 id[4] = 0;
435
436 DEBUG_TRACE(" compat -> %s\n", id);
437 }
438
439 id3->size_remain -= 6;
440
441 if (size > id3->size_remain) {
442 DEBUG_TRACE(" frame size too big, aborting\n");
443 ret = 0;
444 goto out;
445 }
446
447 if (flags & ID3_FRAME_FLAG_V23_COMPRESSION) {
448 // tested with v2.3-compressed-frame.mp3
449 decoded_size = buffer_get_int(id3->buf);
450 id3->size_remain -= 4;
451 size -= 4;
452 }
453
454 if (flags & ID3_FRAME_FLAG_V23_ENCRYPTION) {
455 // tested with v2.3-encrypted-frame.mp3
456 #ifdef AUDIO_SCAN_DEBUG
457 DEBUG_TRACE(" encrypted, method %d\n", buffer_get_char(id3->buf));
458 #else
459 buffer_consume(id3->buf, 1);
460 #endif
461
462 id3->size_remain--;
463 size--;
464
465 DEBUG_TRACE(" skipping encrypted frame\n");
466 _id3_skip(id3, size);
467 id3->size_remain -= size;
468 goto out;
469 }
470
471 if (flags & ID3_FRAME_FLAG_V23_GROUPINGIDENTITY) {
472 // tested with v2.3-group-id.mp3
473 #ifdef AUDIO_SCAN_DEBUG
474 DEBUG_TRACE(" group_id %d\n", buffer_get_char(id3->buf));
475 #else
476 buffer_consume(id3->buf, 1);
477 #endif
478
479 id3->size_remain--;
480 size--;
481 }
482
483 // Perform decompression if necessary after all optional extra bytes have been read
484 // XXX need test for compressed + unsync
485 if (flags & ID3_FRAME_FLAG_V23_COMPRESSION && decoded_size) {
486 unsigned long tmp_size;
487
488 if ( !_check_buf(id3->infile, id3->buf, size, ID3_BLOCK_SIZE) ) {
489 ret = 0;
490 goto out;
491 }
492
493 DEBUG_TRACE(" decompressing, decoded_size %d\n", decoded_size);
494
495 Newz(0, decompressed, sizeof(Buffer), Buffer);
496 buffer_init(decompressed, decoded_size);
497
498 tmp_size = decoded_size;
499 if (
500 uncompress(buffer_ptr(decompressed), &tmp_size, buffer_ptr(id3->buf), size) != Z_OK
501 ||
502 tmp_size != decoded_size
503 ) {
504 DEBUG_TRACE(" unable to decompress frame\n");
505 buffer_free(decompressed);
506 Safefree(decompressed);
507 decompressed = 0;
508 }
509 else {
510 // Hack buffer so it knows we've added data directly
511 decompressed->end = decoded_size;
512 }
513 }
514 }
515 else {
516 // v2.4
517
518 // iTunes writes non-syncsafe length integers, check for this here
519 if ( _varint(buffer_ptr(id3->buf), 4) & 0x80 ) {
520 size = buffer_get_int(id3->buf);
521 DEBUG_TRACE(" found non-syncsafe iTunes size for %s, size adjusted to %d\n", id, size);
522 }
523 else {
524 size = buffer_get_syncsafe(id3->buf, 4);
525 }
526
527 flags = buffer_get_short(id3->buf);
528
529 id3->size_remain -= 6;
530
531 DEBUG_TRACE(" %s, frame flags %x, size %d\n", id, flags, size);
532
533 if (size > id3->size_remain) {
534 DEBUG_TRACE(" frame size too big, aborting\n");
535 ret = 0;
536 goto out;
537 }
538
539 // iTunes writes bad frame IDs such as 'TSA ', these should be run through compat
540 // as 3-char frames
541 if (id[3] == ' ') {
542 id3_compat const *compat;
543 compat = _id3_compat_lookup((char *)&id, 3);
544 if (compat && compat->equiv) {
545 strncpy(id, compat->equiv, 4);
546 id[4] = 0;
547
548 DEBUG_TRACE(" bad iTunes v2.4 tag, compat -> %s\n", id);
549 }
550 }
551
552 if (flags & ID3_FRAME_FLAG_V24_GROUPINGIDENTITY) {
553 // tested with v2.4-group-id.mp3
554 #ifdef AUDIO_SCAN_DEBUG
555 DEBUG_TRACE(" group_id %d\n", buffer_get_char(id3->buf));
556 #else
557 buffer_consume(id3->buf, 1);
558 #endif
559 id3->size_remain--;
560 size--;
561 }
562
563 if (flags & ID3_FRAME_FLAG_V24_ENCRYPTION) {
564 // tested with v2.4-encrypted-frame.mp3
565 #ifdef AUDIO_SCAN_DEBUG
566 DEBUG_TRACE(" encrypted, method %d\n", buffer_get_char(id3->buf));
567 #else
568 buffer_consume(id3->buf, 1);
569 #endif
570
571 id3->size_remain--;
572 size--;
573
574 DEBUG_TRACE(" skipping encrypted frame\n");
575 _id3_skip(id3, size);
576 id3->size_remain -= size;
577 goto out;
578 }
579
580 if (flags & ID3_FRAME_FLAG_V24_DATALENGTHINDICATOR) {
581 decoded_size = buffer_get_syncsafe(id3->buf, 4);
582 id3->size_remain -= 4;
583 size -= 4;
584
585 DEBUG_TRACE(" data length indicator, size %d\n", decoded_size);
586 }
587
588 if (flags & ID3_FRAME_FLAG_V24_UNSYNCHRONISATION) {
589 // Special case, do not unsync an APIC frame if not reading artwork,
590 // FF's are not likely to appear in the part we care about anyway
591 if ( !strcmp(id, "APIC") && _env_true("AUDIO_SCAN_NO_ARTWORK") ) {
592 DEBUG_TRACE(" Would un-synchronize APIC frame, but ignoring because of AUDIO_SCAN_NO_ARTWORK\n");
593
594 // Reset decoded_size to 0 since we aren't actually decoding.
595 // XXX this would break if we have a compressed + unsync APIC frame but not very likely in the real world
596 decoded_size = 0;
597
598 id3->tag_data_safe = 0;
599 }
600 else {
601 // tested with v2.4-unsync.mp3
602 if ( !_check_buf(id3->infile, id3->buf, size, ID3_BLOCK_SIZE) ) {
603 ret = 0;
604 goto out;
605 }
606
607 decoded_size = _id3_deunsync( buffer_ptr(id3->buf), size );
608
609 unsync_extra = size - decoded_size;
610
611 DEBUG_TRACE(" Un-synchronized frame, new_size %d\n", decoded_size);
612 }
613 }
614
615 if (flags & ID3_FRAME_FLAG_V24_COMPRESSION) {
616 // tested with v2.4-compressed-frame.mp3
617 // XXX need test for compressed + unsync
618 unsigned long tmp_size;
619
620 if ( !_check_buf(id3->infile, id3->buf, size, ID3_BLOCK_SIZE) ) {
621 ret = 0;
622 goto out;
623 }
624
625 DEBUG_TRACE(" decompressing\n");
626
627 Newz(0, decompressed, sizeof(Buffer), Buffer);
628 buffer_init(decompressed, decoded_size);
629
630 tmp_size = decoded_size;
631 if (
632 uncompress(buffer_ptr(decompressed), &tmp_size, buffer_ptr(id3->buf), size) != Z_OK
633 ||
634 tmp_size != decoded_size
635 ) {
636 DEBUG_TRACE(" unable to decompress frame\n");
637 buffer_free(decompressed);
638 Safefree(decompressed);
639 decompressed = 0;
640 }
641 else {
642 // Hack buffer so it knows we've added data directly
643 decompressed->end = decoded_size;
644 }
645 }
646 }
647 }
648
649 // Special case, completely skip XHD3 frame (mp3HD) as it will be large
650 // Also skip NCON, a large tag written by MusicMatch
651 if ( !strcmp(id, "XHD3") || !strcmp(id, "NCON") ) {
652 DEBUG_TRACE(" skipping large binary %s frame\n", id);
653 _id3_skip(id3, size);
654 id3->size_remain -= size;
655 goto out;
656 }
657
658 frametype = _id3_frametype_lookup(id, 4);
659 if (frametype == 0) {
660 switch ( id[0] ) {
661 case 'T':
662 frametype = &id3_frametype_text;
663 break;
664
665 case 'W':
666 frametype = &id3_frametype_url;
667 break;
668
669 case 'X':
670 case 'Y':
671 case 'Z':
672 frametype = &id3_frametype_experimental;
673 break;
674
675 default:
676 frametype = &id3_frametype_unknown;
677 break;
678 }
679 }
680
681 #ifdef AUDIO_SCAN_DEBUG
682 {
683 int i;
684 DEBUG_TRACE(" nfields %d:", frametype->nfields);
685 for (i = 0; i < frametype->nfields; ++i) {
686 DEBUG_TRACE(" %d", frametype->fields[i]);
687 }
688 DEBUG_TRACE("\n");
689 }
690 #endif
691
692 // If frame was compressed, temporarily set the id3 buffer to use the decompressed buffer
693 if (decompressed) {
694 tmp_buf = id3->buf;
695 id3->buf = decompressed;
696 }
697
698 if ( !_id3_parse_v2_frame_data(id3, (char *)&id, decoded_size ? decoded_size : size, frametype) ) {
699 DEBUG_TRACE(" error parsing frame, aborting\n");
700 ret = 0;
701 goto out;
702 }
703
704 if (id3->size_remain > size) {
705 id3->size_remain -= size;
706 }
707 else {
708 id3->size_remain = 0;
709 }
710
711 // Consume extra bytes if we had to unsync this frame
712 if (unsync_extra) {
713 DEBUG_TRACE(" consuming extra bytes after unsync: %d\n", unsync_extra);
714 buffer_consume(id3->buf, unsync_extra);
715 }
716
717 out:
718 if (decompressed) {
719 // Reset id3 buffer and consume rest of compressed frame
720 id3->buf = tmp_buf;
721 buffer_consume(id3->buf, size);
722
723 buffer_free(decompressed);
724 Safefree(decompressed);
725 }
726
727 return ret;
728 }
729
730 int
_id3_parse_v2_frame_data(id3info * id3,char const * id,uint32_t size,id3_frametype const * frametype)731 _id3_parse_v2_frame_data(id3info *id3, char const *id, uint32_t size, id3_frametype const *frametype)
732 {
733 int ret = 1;
734 uint32_t read = 0;
735 int8_t encoding = -1;
736
737 uint8_t buffer_art = ( !strcmp(id, "APIC") ) ? 1 : 0;
738 uint8_t skip_art = ( buffer_art && _env_true("AUDIO_SCAN_NO_ARTWORK") ) ? 1 : 0;
739
740 // Bug 16703, a completely empty frame is against the rules, skip it
741 if (!size)
742 return 1;
743
744 if (skip_art) {
745 // Only buffer enough for the APIC header fields, this is only a rough guess
746 // because the description could technically be very long
747 if ( !_check_buf(id3->infile, id3->buf, 128, ID3_BLOCK_SIZE) ) {
748 return 0;
749 }
750 DEBUG_TRACE(" partial read due to AUDIO_SCAN_NO_ARTWORK\n");
751 }
752 else {
753 // Use a special buffering mode for binary artwork, to avoid
754 // using 2x the memory of the APIC frame (once for buffer, once for SV)
755 if (buffer_art) {
756 // Buffer enough for encoding/MIME/picture type/description
757 if ( !_check_buf(id3->infile, id3->buf, 128, ID3_BLOCK_SIZE) ) {
758 return 0;
759 }
760 }
761 else {
762 // Buffer the entire frame
763 if ( !_check_buf(id3->infile, id3->buf, size, ID3_BLOCK_SIZE) ) {
764 return 0;
765 }
766 }
767 }
768
769 if ( frametype->fields[0] == ID3_FIELD_TYPE_TEXTENCODING ) {
770 // many frames have an encoding byte, read it here
771 encoding = buffer_get_char(id3->buf);
772 read++;
773 DEBUG_TRACE(" encoding: %d\n", encoding);
774
775 if (encoding < 0 || encoding > 3) {
776 DEBUG_TRACE(" invalid encoding, skipping frame\n");
777 goto out;
778 }
779 }
780
781 // Special handling for TXXX/WXXX frames
782 if ( !strcmp(id, "TXXX") || !strcmp(id, "WXXX") ) {
783 // Read key and uppercase it
784 SV *key = NULL;
785 SV *value = NULL;
786
787 read += _id3_get_utf8_string(id3, &key, size - read, encoding);
788
789 if (key != NULL && SvPOK(key) && sv_len(key)) {
790 upcase(SvPVX(key));
791
792 // Read value
793 if (frametype->fields[2] == ID3_FIELD_TYPE_LATIN1) {
794 // WXXX frames have a latin1 value field regardless of encoding byte
795 encoding = ISO_8859_1;
796 }
797
798 read += _id3_get_utf8_string(id3, &value, size - read, encoding);
799
800 // (T|W)XXX frames don't support multiple strings separated by nulls, even in v2.4
801
802 // Only one tag per unique key value is allowed, that's why there is no array support here
803 if (value != NULL && SvPOK(value) && sv_len(value)) {
804 my_hv_store_ent( id3->tags, key, value );
805 }
806 else {
807 my_hv_store_ent( id3->tags, key, &PL_sv_undef );
808 if (value) SvREFCNT_dec(value);
809 }
810 }
811 else {
812 DEBUG_TRACE(" invalid/empty (T|W)XXX key, skipping frame\n");
813 }
814
815 if (key) SvREFCNT_dec(key);
816 }
817
818 // Special handling for TCON genre frame
819 else if ( !strcmp(id, "TCON") ) {
820 AV *genres = newAV();
821 char *sptr, *end, *tmp;
822
823 while (read < size) {
824 SV *value = NULL;
825
826 // v2.4 handles multiple genres using null char separators (or $00 $00 in UTF-16),
827 // this is handled by _id3_get_utf8_string
828 read += _id3_get_utf8_string(id3, &value, size - read, encoding);
829 if (value != NULL && SvPOK(value)) {
830 sptr = SvPVX(value);
831
832 // Test if the string contains only a number,
833 // strtol will set tmp to end in this case
834 end = sptr + sv_len(value);
835 strtol(sptr, &tmp, 0);
836
837 if ( tmp == end ) {
838 // Convert raw number to genre string
839 av_push( genres, newSVpv( _id3_genre_name((char *)sptr), 0 ) );
840
841 // value as an SV won't be used, must drop refcnt
842 SvREFCNT_dec(value);
843 }
844 else if ( *sptr == '(' ) {
845 // Handle (26), (26)Ambient, etc, only the number portion will be read
846
847 if (id3->version_major < 4) {
848 // v2.2/v2.3 handle multiple genres using parens for some reason, i.e. (51)(39) or (55)(Text)
849 char *ptr = sptr;
850 char *end = sptr + sv_len(value);
851
852 while (end - ptr > 0) {
853 if ( *ptr++ == '(' ) {
854 char *paren = strchr(ptr, ')');
855 if (paren == NULL)
856 paren = end;
857
858 if ( isdigit(*ptr) || !strncmp((char *)ptr, "RX", 2) || !strncmp((char *)ptr, "CR", 2) ) {
859 av_push( genres, newSVpv( _id3_genre_name((char *)ptr), 0 ) );
860 }
861 else {
862 // Handle text within parens
863 av_push( genres, newSVpvn(ptr, paren - ptr) );
864 }
865 ptr = paren;
866 }
867 }
868 }
869 else {
870 // v2.4, the (51) method is no longer valid but we will support it anyway
871 sptr++;
872 if ( isdigit(*sptr) || !strncmp(sptr, "RX", 2) || !strncmp(sptr, "CR", 2) ) {
873 av_push( genres, newSVpv( _id3_genre_name((char *)sptr), 0 ) );
874 }
875 else {
876 av_push( genres, newSVpv( (char *)sptr, 0 ) );
877 }
878 }
879
880 // value as an SV won't be used, must drop refcnt
881 SvREFCNT_dec(value);
882 }
883 else {
884 // Support raw RX/CR value
885 if ( !strncmp(sptr, "RX", 2) || !strncmp(sptr, "CR", 2) ) {
886 av_push( genres, newSVpv( _id3_genre_name((char *)sptr), 0 ) );
887
888 // value as an SV won't be used, must drop refcnt
889 SvREFCNT_dec(value);
890 }
891 else {
892 // Store plain text genre
893 av_push( genres, value );
894 }
895 }
896 }
897 }
898
899 if (av_len(genres) > 0) {
900 my_hv_store( id3->tags, id, newRV_noinc( (SV *)genres ) );
901 }
902 else if (av_len(genres) == 0) {
903 my_hv_store( id3->tags, id, av_shift(genres) );
904 SvREFCNT_dec(genres);
905 }
906 else {
907 SvREFCNT_dec(genres);
908 }
909 }
910
911 // 1-field frames: MCDI, PCNT, SEEK (unsupported), T* (text), W* (url), unknown
912 // and 2-field frames where the first field is encoding
913 // are mapped to plain hash entries
914 else if (
915 frametype->nfields == 1 ||
916 (frametype->nfields == 2 && frametype->fields[0] == ID3_FIELD_TYPE_TEXTENCODING)
917 ) {
918 int i = frametype->nfields - 1;
919 AV *array = NULL;
920 SV *value = NULL;
921 int count = 0;
922
923 switch ( frametype->fields[i] ) {
924 case ID3_FIELD_TYPE_LATIN1: // W* frames
925 read += _id3_get_utf8_string(id3, &value, size - read, ISO_8859_1);
926 if (value != NULL && SvPOK(value))
927 my_hv_store( id3->tags, id, value );
928 break;
929
930 case ID3_FIELD_TYPE_STRINGLIST: // T* frames
931 // XXX technically in v2.2/v2.3 we should ignore multiple strings separated by nulls, but
932 // allowing it is fine I think
933 while (read < size) {
934 if (count++ == 1 && value != NULL) {
935 // we're reading the second string in the list, move first value to new array
936 array = newAV();
937 av_push(array, value);
938 }
939 value = NULL;
940
941 read += _id3_get_utf8_string(id3, &value, size - read, encoding);
942
943 if (array != NULL && value != NULL && SvPOK(value)) {
944 // second+ string, add to array
945 // Bug 16452, do not add a null string
946 if (sv_len(value) > 0)
947 av_push(array, value);
948 }
949 }
950
951 if (array != NULL) {
952 if (av_len(array) == 0) {
953 // Handle the case where we have multiple empty strings leaving an array of 1
954 my_hv_store( id3->tags, id, av_shift(array) );
955 SvREFCNT_dec(array);
956 }
957 else {
958 my_hv_store( id3->tags, id, newRV_noinc( (SV *)array ) );
959 }
960 }
961 else if (value != NULL && SvPOK(value)) {
962 my_hv_store( id3->tags, id, value );
963 }
964 break;
965
966 case ID3_FIELD_TYPE_INT32: // SEEK (unsupported, XXX need test)
967 my_hv_store( id3->tags, id, newSViv( buffer_get_int(id3->buf) ) );
968 read += 4;
969 break;
970
971 case ID3_FIELD_TYPE_INT32PLUS: // PCNT
972 my_hv_store( id3->tags, id, newSViv( _varint( buffer_ptr(id3->buf), size - read ) ) );
973 buffer_consume(id3->buf, size - read);
974 read = size;
975 break;
976
977 case ID3_FIELD_TYPE_BINARYDATA: // unknown/obsolete frames
978 // Special handling for RVA(D), tested in v2.2-itunes81.mp3, v2.3-itunes81.mp3
979 if ( !strcmp(id, "RVAD") ) {
980 read += _id3_parse_rvad(id3, id, size - read);
981 }
982
983 // Special handling for RGAD (non-standard replaygain frame), tested in v2.3-rgad.mp3
984 // Based on some code found at http://getid3.sourceforge.net/source/module.tag.id3v2.phps
985 else if ( !strcmp(id, "RGAD") ) {
986 read += _id3_parse_rgad(id3);
987 }
988
989 // Other unknown binary data
990 else {
991 // Y* obsolete frames
992 my_hv_store( id3->tags, id, newSVpvn( buffer_ptr(id3->buf), size - read ) );
993 buffer_consume(id3->buf, size - read);
994 read = size;
995 }
996 break;
997
998 default:
999 // XXX
1000 warn(" !!! unhandled field type %d\n", frametype->fields[i]);
1001 buffer_consume(id3->buf, size - read);
1002 read += size - read;
1003 break;
1004 }
1005 }
1006
1007 // 2+ field frames are mapped to arrayrefs:
1008 // The following frames have tests:
1009 // ETCO, UFID, USLT, SYLT, COMM, RVA2, APIC, GEOB, POPM, LINK, PRIV
1010 //
1011 // XXX The following frames need tests:
1012 // MLLT, SYTC, EQU2, RVRB, AENC, POSS, USER, OWNE,
1013 // COMR, ENCR, GRID, SIGN, ASPI, LINK (v2.4)
1014 else {
1015 int i = 0;
1016 AV *framedata = newAV();
1017
1018 // If we read an initial encoding byte, start at field 2
1019 if (encoding >= 0)
1020 i = 1;
1021
1022 for (; i < frametype->nfields; i++) {
1023 SV *value = NULL;
1024
1025 switch ( frametype->fields[i] ) {
1026 case ID3_FIELD_TYPE_LATIN1:
1027 // Special case, fix v2.2 PIC frame fields as they don't match APIC
1028 // This is a rather hackish place to put this, but there's not really any other place
1029 if ( id3->version_major == 2 && !strcmp(id, "APIC") ) {
1030 av_push( framedata, newSVpvn( buffer_ptr(id3->buf), 3 ) );
1031 buffer_consume(id3->buf, 3);
1032 read += 3;
1033 DEBUG_TRACE(" PIC image format, read %d\n", read);
1034 }
1035 else {
1036 read += _id3_get_utf8_string(id3, &value, size - read, ISO_8859_1);
1037 if (value != NULL && SvPOK(value))
1038 av_push( framedata, value );
1039 }
1040 break;
1041
1042 // ID3_FIELD_TYPE_LATIN1FULL - not used
1043
1044 case ID3_FIELD_TYPE_LATIN1LIST: // LINK
1045 while (read < size) {
1046 read += _id3_get_utf8_string(id3, &value, size - read, ISO_8859_1);
1047 if (value != NULL && SvPOK(value))
1048 av_push( framedata, value );
1049 value = NULL;
1050 DEBUG_TRACE(" latin1list, read %d\n", read);
1051 }
1052 break;
1053
1054 case ID3_FIELD_TYPE_STRING:
1055 read += _id3_get_utf8_string(id3, &value, size - read, encoding);
1056 if (value != NULL && SvPOK(value)) {
1057 av_push( framedata, value );
1058 DEBUG_TRACE(" string, read %d: %s\n", read, SvPVX(value));
1059 }
1060 else {
1061 av_push( framedata, &PL_sv_undef );
1062 if (value) SvREFCNT_dec(value);
1063 }
1064 break;
1065
1066 case ID3_FIELD_TYPE_STRINGFULL: // USLT, COMM, read entire string until end of frame
1067 {
1068 SV *tmp = newSVpvn( "", 0 );
1069 while (read < size) {
1070 read += _id3_get_utf8_string(id3, &value, size - read, encoding);
1071 if (value != NULL && SvPOK(value)) {
1072 sv_catsv( tmp, value );
1073 SvREFCNT_dec(value);
1074 }
1075 value = NULL;
1076 }
1077 av_push( framedata, tmp );
1078 DEBUG_TRACE(" stringfull, read %d: %s\n", read, SvPVX(tmp));
1079 break;
1080 }
1081
1082 // ID3_FIELD_TYPE_STRINGLIST - only used for text frames, handled above
1083
1084 case ID3_FIELD_TYPE_LANGUAGE: // USLT, SYLT, COMM, USER, 3-byte language code
1085 if (size - read >= 3) {
1086 av_push( framedata, newSVpvn( buffer_ptr(id3->buf), 3 ) );
1087 buffer_consume(id3->buf, 3);
1088 read += 3;
1089 DEBUG_TRACE(" language, read %d\n", read);
1090 }
1091 break;
1092
1093 case ID3_FIELD_TYPE_FRAMEID: // LINK, 3-byte frame id (v2.3, must be a bug in the spec?),
1094 // 4-byte frame id (v2.4) XXX need test
1095 {
1096 uint8_t len = (id3->version_major == 3) ? 3 : 4;
1097 if (size - read >= len) {
1098 av_push( framedata, newSVpvn( buffer_ptr(id3->buf), len ) );
1099 buffer_consume(id3->buf, len);
1100 read += len;
1101 DEBUG_TRACE(" frameid, read %d\n", read);
1102 }
1103 break;
1104 }
1105
1106 case ID3_FIELD_TYPE_DATE: // OWNE, COMR, XXX need test, YYYYMMDD
1107 if (size - read >= 8) {
1108 av_push( framedata, newSVpvn( buffer_ptr(id3->buf), 8 ) );
1109 buffer_consume(id3->buf, 8);
1110 read += 8;
1111 DEBUG_TRACE(" date, read %d\n", read);
1112 }
1113 break;
1114
1115 case ID3_FIELD_TYPE_INT8: // ETCO, MLLT, SYTC, SYLT, EQU2, RVRB, APIC,
1116 // POPM, RBUF, POSS, COMR, ENCR, GRID, SIGN, ASPI
1117 if (size - read >= 1) {
1118 av_push( framedata, newSViv( buffer_get_char(id3->buf) ) );
1119 read += 1;
1120 DEBUG_TRACE(" int8, read %d\n", read);
1121 }
1122 break;
1123
1124 case ID3_FIELD_TYPE_INT16: // MLLT, RVRB, AENC, ASPI
1125 if (size - read >= 2) {
1126 av_push( framedata, newSViv( buffer_get_short(id3->buf) ) );
1127 read += 2;
1128 DEBUG_TRACE(" int16, read %d\n", read);
1129 }
1130 break;
1131
1132 case ID3_FIELD_TYPE_INT24: // MLLT, RBUF
1133 if (size - read >= 3) {
1134 av_push( framedata, newSViv( buffer_get_int24(id3->buf) ) );
1135 read += 3;
1136 DEBUG_TRACE(" int24, read %d\n", read);
1137 }
1138 break;
1139
1140 case ID3_FIELD_TYPE_INT32: // RBUF, SEEK, ASPI
1141 if (size - read >= 4) {
1142 av_push( framedata, newSViv( buffer_get_int(id3->buf) ) );
1143 read += 4;
1144 DEBUG_TRACE(" int32, read %d\n", read);
1145 }
1146 break;
1147
1148 case ID3_FIELD_TYPE_INT32PLUS: // POPM
1149 if (size - read >= 4) {
1150 av_push( framedata, newSViv( _varint( buffer_ptr(id3->buf), size - read ) ) );
1151 buffer_consume(id3->buf, size - read);
1152 read = size;
1153 DEBUG_TRACE(" int32plus, read %d\n", read);
1154 }
1155 break;
1156
1157 case ID3_FIELD_TYPE_BINARYDATA: // ETCO, MLLT, SYTC, SYLT, RVA2, EQU2, APIC,
1158 // GEOB, AENC, POSS, COMR, ENCR, GRID, PRIV, SIGN, ASPI
1159 // Special handling for APIC tags when in skip_art mode
1160 if (skip_art) {
1161 av_push( framedata, newSVuv(size - read) );
1162
1163 // I don't think it's possible to obtain an APIC offset when a tag has been unsync'ed,
1164 // so we can't support skip_art mode in this case. See v2.3-unsync-apic-bad-offset.mp3
1165 if (id3->flags & ID3_TAG_FLAG_UNSYNCHRONISATION && id3->version_major < 4) {
1166 DEBUG_TRACE(" cannot obtain APIC offset due to v2.3 unsync tag\n");
1167 }
1168 else {
1169 // Record offset of APIC image data too, unless the data needs to be unsynchronized or is empty
1170 if (id3->tag_data_safe && (size - read) > 0)
1171 av_push( framedata, newSVuv(id3->offset + (id3->size - id3->size_remain) + read) );
1172 }
1173
1174 _id3_skip(id3, size - read);
1175 read = size;
1176 }
1177
1178 // Special buffering mode for APIC data, avoids a large buffer allocation
1179 else if (buffer_art) {
1180 uint32_t remain = size - read;
1181 uint32_t chunk_size;
1182 SV *artwork = newSVpv("", 0);
1183
1184 while (read < size) {
1185 if ( !_check_buf(id3->infile, id3->buf, 1, ID3_BLOCK_SIZE) ) {
1186 return 0;
1187 }
1188
1189 chunk_size = remain < buffer_len(id3->buf) ? remain : buffer_len(id3->buf);
1190
1191 read += chunk_size;
1192 remain -= chunk_size;
1193
1194 sv_catpvn( artwork, buffer_ptr(id3->buf), chunk_size );
1195 buffer_consume(id3->buf, chunk_size);
1196
1197 DEBUG_TRACE(" buffered %d bytes of APIC data (remaining %d)\n", chunk_size, remain);
1198 }
1199
1200 av_push( framedata, artwork );
1201 }
1202
1203 // Special handling for RVA2 tags
1204 else if ( !strcmp(id, "RVA2") ) {
1205 read += _id3_parse_rva2(id3, size, framedata);
1206 }
1207
1208 // Special handling for SYLT tags
1209 else if ( !strcmp(id, "SYLT") ) {
1210 read += _id3_parse_sylt(id3, encoding, size - read, framedata);
1211 }
1212
1213 // Special handling for ETCO tags
1214 else if ( !strcmp(id, "ETCO") ) {
1215 read += _id3_parse_etco(id3, size - read, framedata);
1216 }
1217
1218 // All other binary frames, copy as-is
1219 else {
1220 if (size - read > 1) {
1221 av_push( framedata, newSVpvn( buffer_ptr(id3->buf), size - read ) );
1222 buffer_consume(id3->buf, size - read);
1223 read = size;
1224 DEBUG_TRACE(" binarydata, read %d\n", read);
1225 }
1226 }
1227 break;
1228
1229 default:
1230 break;
1231 }
1232 }
1233
1234 _id3_set_array_tag(id3, id, framedata);
1235 }
1236
1237 out:
1238 if (read < size) {
1239 buffer_consume(id3->buf, size - read);
1240 DEBUG_TRACE(" !!! consuming extra bytes in frame: %d\n", size - read);
1241 }
1242
1243 return ret;
1244 }
1245
1246 void
_id3_set_array_tag(id3info * id3,char const * id,AV * framedata)1247 _id3_set_array_tag(id3info *id3, char const *id, AV *framedata)
1248 {
1249 if ( av_len(framedata) != -1 ) {
1250 if ( my_hv_exists( id3->tags, id ) ) {
1251 // If tag already exists, move it to an arrayref
1252 SV **entry = my_hv_fetch( id3->tags, id );
1253 if (entry != NULL) {
1254 if ( SvTYPE( SvRV(*entry) ) == SVt_PV ) {
1255 // A normal string entry, convert to array
1256 AV *ref = newAV();
1257
1258 // XXX need test, this may be illegal because you can't have multiple duplicate frames?
1259 DEBUG_TRACE(" !!! converting normal string tag to array\n");
1260
1261 av_push( ref, *entry );
1262 av_push( ref, newRV_noinc( (SV *)framedata ) );
1263 my_hv_store( id3->tags, id, newRV_noinc( (SV *)ref ) );
1264 }
1265 else if ( SvTYPE( SvRV(*entry) ) == SVt_PVAV ) {
1266 // If type of first item is array, add new item to entry
1267 SV **first = av_fetch( (AV *)SvRV(*entry), 0, 0 );
1268 if ( first == NULL || ( SvROK(*first) && SvTYPE( SvRV(*first) ) == SVt_PVAV ) ) {
1269 av_push( (AV *)SvRV(*entry), newRV_noinc( (SV *)framedata ) );
1270 }
1271 else {
1272 AV *ref = newAV();
1273 av_push( ref, SvREFCNT_inc(*entry) );
1274 av_push( ref, newRV_noinc( (SV *)framedata) );
1275 my_hv_store( id3->tags, id, newRV_noinc( (SV *)ref ) );
1276 }
1277 }
1278 }
1279 }
1280 else {
1281 my_hv_store( id3->tags, id, newRV_noinc( (SV *)framedata ) );
1282 }
1283 }
1284 else {
1285 SvREFCNT_dec(framedata);
1286 }
1287 }
1288
1289 // Read a latin1 or UTF-8 string from an ID3v1 tag
1290 // This function handles trimming spaces off the end
1291 uint32_t
_id3_get_v1_utf8_string(id3info * id3,SV ** string,uint32_t len)1292 _id3_get_v1_utf8_string(id3info *id3, SV **string, uint32_t len)
1293 {
1294 uint32_t read = 0;
1295 char *ptr;
1296 char *str;
1297
1298 read = _id3_get_utf8_string(id3, string, len, ISO_8859_1);
1299
1300 if (read) {
1301 // Trim spaces from end
1302 if (*string != NULL) {
1303 str = SvPVX(*string);
1304 ptr = str + sv_len(*string);
1305
1306 while (ptr > str && ptr[-1] == ' ')
1307 --ptr;
1308
1309 *ptr = 0;
1310 SvCUR_set(*string, ptr - str);
1311 }
1312 }
1313
1314 return read;
1315 }
1316
1317 uint32_t
_id3_get_utf8_string(id3info * id3,SV ** string,uint32_t len,uint8_t encoding)1318 _id3_get_utf8_string(id3info *id3, SV **string, uint32_t len, uint8_t encoding)
1319 {
1320 uint8_t byteorder = UTF16_BYTEORDER_ANY;
1321 uint32_t read = 0;
1322 unsigned char *bptr;
1323
1324 // Init scratch buffer if necessary
1325 if ( !id3->utf8->alloc ) {
1326 // Use a larger initial buffer if reading ISO-8859-1 to avoid
1327 // always having to allocate a second time
1328 buffer_init( id3->utf8, encoding == ISO_8859_1 ? len * 2 : len );
1329 }
1330 else {
1331 // Reset scratch buffer
1332 buffer_clear(id3->utf8);
1333 }
1334
1335 if ( *string != NULL ) {
1336 warn(" !!! string SV is not null: %s\n", SvPVX(*string));
1337 }
1338
1339 switch (encoding) {
1340 case ISO_8859_1:
1341 read += buffer_get_latin1_as_utf8(id3->buf, id3->utf8, len);
1342 break;
1343
1344 case UTF_16BE:
1345 byteorder = UTF16_BYTEORDER_BE;
1346
1347 case UTF_16:
1348 bptr = buffer_ptr(id3->buf);
1349
1350 switch ( (bptr[0] << 8) | bptr[1] ) {
1351 case 0xfeff:
1352 DEBUG_TRACE(" UTF-16 BOM is big-endian\n");
1353 byteorder = UTF16_BYTEORDER_BE;
1354 buffer_consume(id3->buf, 2);
1355 read += 2;
1356 break;
1357
1358 case 0xfffe:
1359 DEBUG_TRACE(" UTF-16 BOM is little-endian\n");
1360 byteorder = UTF16_BYTEORDER_LE;
1361 buffer_consume(id3->buf, 2);
1362 read += 2;
1363 break;
1364 }
1365
1366 /* Bug 14728
1367 If there is no BOM, assume LE, this is what appears in the wild -andy
1368 */
1369 if (byteorder == UTF16_BYTEORDER_ANY) {
1370 DEBUG_TRACE(" UTF-16 byte order defaulting to little-endian, no BOM\n");
1371 byteorder = UTF16_BYTEORDER_LE;
1372 }
1373
1374 read += buffer_get_utf16_as_utf8(id3->buf, id3->utf8, len - read, byteorder);
1375 break;
1376
1377 case UTF_8:
1378 read += buffer_get_utf8(id3->buf, id3->utf8, len);
1379 break;
1380
1381 default:
1382 break;
1383 }
1384
1385 if (read) {
1386 if ( buffer_len(id3->utf8) ) {
1387 *string = newSVpv( buffer_ptr(id3->utf8), 0 );
1388 sv_utf8_decode(*string);
1389 DEBUG_TRACE(" read utf8 string of %d bytes: %s\n", buffer_len(id3->utf8), SvPVX(*string));
1390 }
1391 else {
1392 DEBUG_TRACE(" empty string\n");
1393 }
1394 }
1395
1396 return read;
1397 }
1398
1399 uint32_t
_id3_parse_rvad(id3info * id3,char const * id,uint32_t size)1400 _id3_parse_rvad(id3info *id3, char const *id, uint32_t size)
1401 {
1402 unsigned char *rva = buffer_ptr(id3->buf);
1403 int sign_r = rva[0] & 0x01 ? 1 : -1;
1404 int sign_l = rva[0] & 0x02 ? 1 : -1;
1405 int bytes = rva[1] / 8;
1406 float vol[2];
1407 float peak[2];
1408 int i;
1409 AV *framedata = newAV();
1410
1411 // Sanity check, first byte must be either 0 or 1, second byte > 0
1412 if (rva[0] & 0xFE || rva[1] == 0) {
1413 return 0;
1414 }
1415
1416 // Calculated size must match the actual size
1417 if (size != 2 + (bytes * 4)) {
1418 return 0;
1419 }
1420
1421 rva += 2;
1422
1423 vol[0] = _varint( rva, bytes ) * sign_r / 256.;
1424 vol[1] = _varint( rva + bytes, bytes ) * sign_l / 256.;
1425
1426 peak[0] = _varint( rva + (bytes * 2), bytes );
1427 peak[1] = _varint( rva + (bytes * 3), bytes );
1428
1429 // iTunes uses a range of -255 to 255
1430 // to be -100% (silent) to 100% (+6dB)
1431 for (i = 0; i < 2; i++) {
1432 if ( vol[i] == -255 ) {
1433 vol[i] = -96.0;
1434 }
1435 else {
1436 vol[i] = 20.0 * log( ( vol[i] + 255 ) / 255 ) / log(10);
1437 }
1438
1439 av_push( framedata, newSVpvf( "%f dB", vol[i] ) );
1440 av_push( framedata, newSVpvf( "%f", peak[i] ) );
1441 }
1442
1443 my_hv_store( id3->tags, id, newRV_noinc( (SV *)framedata ) );
1444
1445 buffer_consume(id3->buf, 2 + (bytes * 4));
1446
1447 return 2 + (bytes * 4);
1448 }
1449
1450 uint32_t
_id3_parse_rgad(id3info * id3)1451 _id3_parse_rgad(id3info *id3)
1452 {
1453 float radio = 0.0;
1454 float audiophile = 0.0;
1455 uint8_t sign = 0;
1456 HV *framedata = newHV();
1457 uint32_t read = 0;
1458
1459 // Peak (32-bit float)
1460 my_hv_store( framedata, "peak", newSVpvf( "%f", (float)buffer_get_float32(id3->buf) ) );
1461 read += 4;
1462
1463 // Radio (16 bits)
1464
1465 // Radio Name code (3 bits, should always be 1)
1466 buffer_get_bits(id3->buf, 3);
1467
1468 my_hv_store( framedata, "track_originator", newSVuv( buffer_get_bits(id3->buf, 3) ) );
1469
1470 // Sign bit (1 bit)
1471 sign = buffer_get_bits(id3->buf, 1);
1472
1473 // Gain value (9 bits)
1474 radio = (float)buffer_get_bits(id3->buf, 9);
1475 radio /= 10.0;
1476 if (sign == 1) radio *= -1.0;
1477 my_hv_store( framedata, "track_gain", newSVpvf( "%f dB", radio ) );
1478
1479 read += 2;
1480
1481 // Audiophile (16 bits)
1482
1483 // Audiophile Name code (3 bits, should always be 2)
1484 buffer_get_bits(id3->buf, 3);
1485
1486 // Audiophile Originator code (3 bits)
1487 my_hv_store( framedata, "album_originator", newSVuv( buffer_get_bits(id3->buf, 3) ) );
1488
1489 // Sign bit (1 bit)
1490 sign = buffer_get_bits(id3->buf, 1);
1491
1492 // Gain value (9 bits)
1493 audiophile = (float)buffer_get_bits(id3->buf, 9);
1494 audiophile /= 10.0;
1495 if (sign == 1) audiophile *= -1.0;
1496 my_hv_store( framedata, "album_gain", newSVpvf( "%f dB", audiophile ) );
1497
1498 read += 2;
1499
1500 my_hv_store( id3->tags, "RGAD", newRV_noinc( (SV *)framedata ) );
1501
1502 return read;
1503 }
1504
1505 uint32_t
_id3_parse_rva2(id3info * id3,uint32_t len,AV * framedata)1506 _id3_parse_rva2(id3info *id3, uint32_t len, AV *framedata)
1507 {
1508 float adj = 0.0;
1509 int adj_fp;
1510 uint8_t peakbits;
1511 float peak = 0.0;
1512 uint32_t read = 0;
1513 unsigned char *bptr;
1514
1515 // Channel
1516 av_push( framedata, newSViv( buffer_get_char(id3->buf) ) );
1517
1518 // Adjustment
1519 bptr = buffer_ptr(id3->buf);
1520 adj_fp = *(signed char *)(bptr) << 8;
1521 adj_fp |= *(unsigned char *)(bptr+1);
1522 adj = adj_fp / 512.0;
1523 av_push( framedata, newSVpvf( "%f dB", adj ) );
1524 buffer_consume(id3->buf, 2);
1525
1526 // Peak
1527 // Based on code from mp3gain
1528 peakbits = buffer_get_char(id3->buf);
1529
1530 read += 4;
1531
1532 if (4 + (peakbits + 7) / 8 <= len) {
1533 DEBUG_TRACE(" peakbits: %d\n", peakbits);
1534 if (peakbits > 0) {
1535 peak += (float)buffer_get_char(id3->buf);
1536 read++;
1537 }
1538 if (peakbits > 8) {
1539 peak += (float)buffer_get_char(id3->buf) / 256.0;
1540 read++;
1541 }
1542 if (peakbits > 16) {
1543 peak += (float)buffer_get_char(id3->buf) / 65536.0;
1544 read++;
1545 }
1546
1547 if (peakbits > 0)
1548 peak /= (float)(1 << ((peakbits - 1) & 7));
1549 }
1550
1551 av_push( framedata, newSVpvf( "%f dB", peak ) );
1552
1553 return read;
1554 }
1555
1556 uint32_t
_id3_parse_sylt(id3info * id3,uint8_t encoding,uint32_t len,AV * framedata)1557 _id3_parse_sylt(id3info *id3, uint8_t encoding, uint32_t len, AV *framedata)
1558 {
1559 uint32_t read = 0;
1560 AV *content = newAV();
1561 unsigned char *bptr;
1562
1563 while (read < len) {
1564 SV *value = NULL;
1565 HV *lyric = newHV();
1566
1567 read += _id3_get_utf8_string(id3, &value, len - read, encoding);
1568 if (value != NULL && SvPOK(value) && sv_len(value)) {
1569 my_hv_store( lyric, "text", value );
1570 }
1571 else {
1572 my_hv_store( lyric, "text", &PL_sv_undef );
1573 if (value) SvREFCNT(value);
1574 }
1575
1576 my_hv_store( lyric, "timestamp", newSVuv( buffer_get_int(id3->buf) ) );
1577 read += 4;
1578
1579 // A $0A newline byte may follow, for some odd reason
1580 bptr = buffer_ptr(id3->buf);
1581 if ( len - read > 0 && bptr[0] == 0x0a ) {
1582 buffer_consume(id3->buf, 1);
1583 read++;
1584 }
1585
1586 av_push( content, newRV_noinc( (SV *)lyric ) );
1587 }
1588
1589 av_push( framedata, newRV_noinc( (SV *)content ) );
1590
1591 return read;
1592 }
1593
1594 uint32_t
_id3_parse_etco(id3info * id3,uint32_t len,AV * framedata)1595 _id3_parse_etco(id3info *id3, uint32_t len, AV *framedata)
1596 {
1597 uint32_t read = 0;
1598 AV *content = newAV();
1599
1600 while (read < len) {
1601 HV *event = newHV();
1602
1603 my_hv_store( event, "type", newSVuv( buffer_get_char(id3->buf) ) );
1604 my_hv_store( event, "timestamp", newSVuv( buffer_get_int(id3->buf) ) );
1605 read += 5;
1606
1607 av_push( content, newRV_noinc( (SV *)event ) );
1608 }
1609
1610 av_push( framedata, newRV_noinc( (SV *)content ) );
1611
1612 return read;
1613 }
1614
1615 void
_id3_convert_tdrc(id3info * id3)1616 _id3_convert_tdrc(id3info *id3)
1617 {
1618 char timestamp[17] = { 0 };
1619
1620 if ( my_hv_exists(id3->tags, "TYER") ) {
1621 SV *tyer = my_hv_delete(id3->tags, "TYER");
1622 if (SvPOK(tyer) && sv_len(tyer) == 4) {
1623 char *ptr = SvPVX(tyer);
1624 timestamp[0] = ptr[0];
1625 timestamp[1] = ptr[1];
1626 timestamp[2] = ptr[2];
1627 timestamp[3] = ptr[3];
1628 DEBUG_TRACE(" Converted TYER (%s) to TDRC (%s)\n", SvPVX(tyer), timestamp);
1629 }
1630 }
1631
1632 if ( my_hv_exists(id3->tags, "TDAT") ) {
1633 SV *tdat = my_hv_delete(id3->tags, "TDAT");
1634 if (SvPOK(tdat) && sv_len(tdat) == 4) {
1635 char *ptr = SvPVX(tdat);
1636 timestamp[4] = '-';
1637 timestamp[5] = ptr[2];
1638 timestamp[6] = ptr[3];
1639 timestamp[7] = '-';
1640 timestamp[8] = ptr[0];
1641 timestamp[9] = ptr[1];
1642 DEBUG_TRACE(" Converted TDAT (%s) to TDRC (%s)\n", SvPVX(tdat), timestamp);
1643 }
1644 }
1645
1646 if ( my_hv_exists(id3->tags, "TIME") ) {
1647 SV *time = my_hv_delete(id3->tags, "TIME");
1648 if (SvPOK(time) && sv_len(time) == 4) {
1649 char *ptr = SvPVX(time);
1650 timestamp[10] = 'T';
1651 timestamp[11] = ptr[0];
1652 timestamp[12] = ptr[1];
1653 timestamp[13] = ':';
1654 timestamp[14] = ptr[2];
1655 timestamp[15] = ptr[3];
1656 DEBUG_TRACE(" Converted TIME (%s) to TDRC (%s)\n", SvPVX(time), timestamp);
1657 }
1658 }
1659
1660 if (timestamp[0]) {
1661 my_hv_store( id3->tags, "TDRC", newSVpv(timestamp, 0) );
1662 }
1663 }
1664
1665 // deunsync in-place, from libid3tag
1666 uint32_t
_id3_deunsync(unsigned char * data,uint32_t length)1667 _id3_deunsync(unsigned char *data, uint32_t length)
1668 {
1669 unsigned char *old;
1670 unsigned char *end = data + length;
1671 unsigned char *new;
1672
1673 if (length == 0)
1674 return 0;
1675
1676 for (old = new = data; old < end - 1; ++old) {
1677 *new++ = *old;
1678 if (old[0] == 0xff && old[1] == 0x00)
1679 ++old;
1680 }
1681
1682 *new++ = *old;
1683
1684 return new - data;
1685 }
1686
1687 void
_id3_skip(id3info * id3,uint32_t size)1688 _id3_skip(id3info *id3, uint32_t size)
1689 {
1690 if ( buffer_len(id3->buf) >= size ) {
1691 buffer_consume(id3->buf, size);
1692
1693 DEBUG_TRACE(" skipped buffer data size %d\n", size);
1694 }
1695 else {
1696 PerlIO_seek(id3->infile, size - buffer_len(id3->buf), SEEK_CUR);
1697 buffer_clear(id3->buf);
1698
1699 DEBUG_TRACE(" seeked past %d bytes to %d\n", size, (int)PerlIO_tell(id3->infile));
1700 }
1701 }
1702
1703 // return an ID3v1 genre string indexed by number
1704 char const *
_id3_genre_index(unsigned int index)1705 _id3_genre_index(unsigned int index)
1706 {
1707 return (index < NGENRES) ? genre_table[index] : 0;
1708 }
1709
1710 // translate an ID3v2 genre number/keyword to its full name
1711 char const *
_id3_genre_name(char const * string)1712 _id3_genre_name(char const *string)
1713 {
1714 static char const genre_remix[] = { 'R', 'e', 'm', 'i', 'x', 0 };
1715 static char const genre_cover[] = { 'C', 'o', 'v', 'e', 'r', 0 };
1716 unsigned long number;
1717
1718 if (string == 0 || *string == 0)
1719 return 0;
1720
1721 if (string[0] == 'R' && string[1] == 'X')
1722 return genre_remix;
1723 if (string[0] == 'C' && string[1] == 'R')
1724 return genre_cover;
1725
1726 number = strtol(string, NULL, 0);
1727
1728 return (number < NGENRES) ? genre_table[number] : string;
1729 }
1730