1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 2 of the License, or
5  * (at your option) any later version.
6  *
7  * This program is distributed in the hope that it will be useful,
8  * but WITHOUT ANY WARRANTY; without even the implied warranty of
9  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10  * GNU General Public License for more details.
11  *
12  * You should have received a copy of the GNU General Public License
13  * along with this program; if not, write to the Free Software
14  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
15  */
16 
17 #include "id3.h"
18 #include "id3_genre.dat"
19 #include "id3_compat.c"
20 #include "id3_frametype.c"
21 
22 #define NGENRES (sizeof(genre_table) / sizeof(genre_table[0]))
23 
24 // Read an int from a variable number of bytes
25 static int
_varint(unsigned char * buf,int length)26 _varint(unsigned char *buf, int length)
27 {
28   int i, b, number = 0;
29 
30   if (buf) {
31     for ( i = 0; i < length; i++ ) {
32       b = length - 1 - i;
33       number = number | (unsigned int)( buf[i] & 0xff ) << ( 8*b );
34     }
35     return number;
36   }
37   else {
38     return 0;
39   }
40 }
41 
42 int
parse_id3(PerlIO * infile,char * file,HV * info,HV * tags,off_t seek,off_t file_size)43 parse_id3(PerlIO *infile, char *file, HV *info, HV *tags, off_t seek, off_t file_size)
44 {
45   int err = 0;
46   unsigned char *bptr;
47 
48   id3info *id3;
49   Newz(0, id3, sizeof(id3info), id3info);
50   Newz(0, id3->buf, sizeof(Buffer), Buffer);
51   Newz(0, id3->utf8, sizeof(Buffer), Buffer);
52 
53   id3->infile = infile;
54   id3->file   = file;
55   id3->info   = info;
56   id3->tags   = tags;
57   id3->offset = seek;
58 
59   buffer_init(id3->buf, ID3_BLOCK_SIZE);
60 
61   if ( !seek ) {
62     // Check for ID3v1 tag first
63     PerlIO_seek(infile, file_size - 128, SEEK_SET);
64     if ( !_check_buf(infile, id3->buf, 128, 128) ) {
65       err = -1;
66       goto out;
67     }
68 
69     bptr = buffer_ptr(id3->buf);
70     if (bptr[0] == 'T' && bptr[1] == 'A' && bptr[2] == 'G') {
71       _id3_parse_v1(id3);
72     }
73   }
74 
75   // Check for ID3v2 tag
76   PerlIO_seek(infile, seek, SEEK_SET);
77   buffer_clear(id3->buf);
78 
79   // Read enough for header (10) + extended header size (4)
80   if ( !_check_buf(infile, id3->buf, 14, ID3_BLOCK_SIZE) ) {
81     err = -1;
82     goto out;
83   }
84 
85   bptr = buffer_ptr(id3->buf);
86   if (bptr[0] == 'I' && bptr[1] == 'D' && bptr[2] == '3') {
87     _id3_parse_v2(id3);
88   }
89 
90 out:
91   buffer_free(id3->buf);
92   Safefree(id3->buf);
93 
94   if (id3->utf8->alloc)
95     buffer_free(id3->utf8);
96   Safefree(id3->utf8);
97 
98   Safefree(id3);
99 
100   return err;
101 }
102 
103 int
_id3_parse_v1(id3info * id3)104 _id3_parse_v1(id3info *id3)
105 {
106   SV *tmp = NULL;
107   uint8_t read = 0;
108   unsigned char *bptr;
109   uint8_t comment_len;
110   uint8_t genre;
111 
112   buffer_consume(id3->buf, 3); // TAG
113 
114   read = _id3_get_v1_utf8_string(id3, &tmp, 30);
115   if (tmp && SvPOK(tmp) && sv_len(tmp)) {
116     DEBUG_TRACE("ID3v1 title: %s\n", SvPVX(tmp));
117     my_hv_store( id3->tags, ID3_FRAME_TITLE, tmp );
118   }
119   else {
120     if (tmp) SvREFCNT_dec(tmp);
121   }
122   if (read < 30) {
123     buffer_consume(id3->buf, 30 - read);
124   }
125 
126   tmp = NULL;
127   read = _id3_get_v1_utf8_string(id3, &tmp, 30);
128   if (tmp && SvPOK(tmp) && sv_len(tmp)) {
129     DEBUG_TRACE("ID3v1 artist: %s\n", SvPVX(tmp));
130     my_hv_store( id3->tags, ID3_FRAME_ARTIST, tmp );
131     tmp = NULL;
132   }
133   else {
134     if (tmp) SvREFCNT_dec(tmp);
135   }
136   if (read < 30) {
137     buffer_consume(id3->buf, 30 - read);
138   }
139 
140   tmp = NULL;
141   read = _id3_get_v1_utf8_string(id3, &tmp, 30);
142   if (tmp && SvPOK(tmp) && sv_len(tmp)) {
143     DEBUG_TRACE("ID3v1 album: %s\n", SvPVX(tmp));
144     my_hv_store( id3->tags, ID3_FRAME_ALBUM, tmp );
145     tmp = NULL;
146   }
147   else {
148     if (tmp) SvREFCNT_dec(tmp);
149   }
150   if (read < 30) {
151     buffer_consume(id3->buf, 30 - read);
152   }
153 
154   tmp = NULL;
155   read = _id3_get_v1_utf8_string(id3, &tmp, 4);
156   if (tmp && SvPOK(tmp) && sv_len(tmp)) {
157     DEBUG_TRACE("ID3v1 year: %s\n", SvPVX(tmp));
158     my_hv_store( id3->tags, ID3_FRAME_YEAR, tmp );
159     tmp = NULL;
160   }
161   else {
162     if (tmp) SvREFCNT_dec(tmp);
163   }
164   if (read < 4) {
165     buffer_consume(id3->buf, 4 - read);
166   }
167 
168   bptr = buffer_ptr(id3->buf);
169   if (bptr[28] == 0 && bptr[29] != 0) {
170     // ID3v1.1 track number is present
171     comment_len = 28;
172     my_hv_store( id3->tags, ID3_FRAME_TRACK, newSVuv(bptr[29]) );
173     my_hv_store( id3->info, "id3_version", newSVpv( "ID3v1.1", 0 ) );
174   }
175   else {
176     comment_len = 30;
177     my_hv_store( id3->info, "id3_version", newSVpv( "ID3v1", 0 ) );
178   }
179 
180   tmp = NULL;
181   read = _id3_get_v1_utf8_string(id3, &tmp, comment_len);
182   if (tmp && SvPOK(tmp) && sv_len(tmp)) {
183     AV *comment_array = newAV();
184     av_push( comment_array, newSVpvn("XXX", 3) );
185     av_push( comment_array, newSVpvn("", 0) );
186     av_push( comment_array, tmp );
187     DEBUG_TRACE("ID3v1 comment: %s\n", SvPVX(tmp));
188     my_hv_store( id3->tags, ID3_FRAME_COMMENT, newRV_noinc( (SV *)comment_array ) );
189     tmp = NULL;
190   }
191   else {
192     if (tmp) SvREFCNT_dec(tmp);
193   }
194   if (read < 30) {
195     buffer_consume(id3->buf, 30 - read);
196   }
197 
198   genre = buffer_get_char(id3->buf);
199   if (genre < NGENRES) {
200     char const *genre_string = _id3_genre_index(genre);
201     my_hv_store( id3->tags, ID3_FRAME_GENRE, newSVpv(genre_string, 0) );
202   }
203   else if (genre < 255) {
204     my_hv_store( id3->tags, ID3_FRAME_GENRE, newSVpvf("Unknown/%d", genre) );
205   }
206 
207   return 1;
208 }
209 
210 int
_id3_parse_v2(id3info * id3)211 _id3_parse_v2(id3info *id3)
212 {
213   int ret = 1;
214   unsigned char *bptr;
215 
216   // Verify we have a valid tag
217   bptr = buffer_ptr(id3->buf);
218   if ( !(
219     bptr[3] < 0xff && bptr[4] < 0xff &&
220     bptr[6] < 0x80 && bptr[7] < 0x80 && bptr[8] < 0x80 && bptr[9] < 0x80
221   ) ) {
222     PerlIO_printf(PerlIO_stderr(), "Invalid ID3v2 tag in %s\n", id3->file);
223     return 0;
224   }
225 
226   buffer_consume(id3->buf, 3); // ID3
227 
228   id3->version_major = buffer_get_char(id3->buf);
229   id3->version_minor = buffer_get_char(id3->buf);
230   id3->flags         = buffer_get_char(id3->buf);
231   id3->size          = 10 + buffer_get_syncsafe(id3->buf, 4);
232 
233   id3->size_remain = id3->size - 10;
234 
235   if (id3->flags & ID3_TAG_FLAG_FOOTERPRESENT) {
236     id3->size += 10;
237   }
238 
239   DEBUG_TRACE("Parsing ID3v2.%d.%d tag, flags %x, size %d\n", id3->version_major, id3->version_minor, id3->flags, id3->size);
240 
241   if (id3->flags & ID3_TAG_FLAG_UNSYNCHRONISATION) {
242     if (id3->version_major < 4) {
243       // It's unclear but the v2.4.0-changes document seems to say that v2.4 should
244       // ignore the tag-level unsync flag and only worry about frame-level unsync
245 
246       // For v2.2/v2.3, unsync the entire tag.  This is unfortunate due to
247       // increased memory usage but the only way to do it, as frame size values only
248       // indicate the post-unsync size, so it's not possible to unsync each frame individually
249       // tested with v2.3-unsync.mp3
250       if ( !_check_buf(id3->infile, id3->buf, id3->size, id3->size) ) {
251         ret = 0;
252         goto out;
253       }
254 
255       id3->size_remain = _id3_deunsync( buffer_ptr(id3->buf), id3->size );
256 
257       DEBUG_TRACE("    Un-synchronized tag, new_size %d\n", id3->size_remain);
258 
259       my_hv_store( id3->info, "id3_was_unsynced", newSVuv(1) );
260     }
261     else {
262       DEBUG_TRACE("  Ignoring v2.4 tag un-synchronize flag\n");
263     }
264   }
265 
266   if (id3->flags & ID3_TAG_FLAG_EXTENDEDHEADER) {
267     uint32_t ehsize;
268 
269     // If the tag is v2.2, this bit is actually the compression bit and the tag should be ignored
270     if (id3->version_major == 2) {
271       ret = 0;
272       goto out;
273     }
274 
275     // tested with v2.3-ext-header.mp3
276 
277     // We don't care about the value of the extended flags or CRC, so just read the size and skip it
278     ehsize = buffer_get_int(id3->buf);
279 
280     // ehsize may be invalid, tested with v2.3-ext-header-invalid.mp3
281     if (ehsize > id3->size_remain - 4) {
282       warn("Error: Invalid ID3 extended header size (%s)\n", id3->file);
283       ret = 0;
284       goto out;
285     }
286 
287     DEBUG_TRACE("  Skipping extended header, size %d\n", ehsize);
288 
289     if ( !_check_buf(id3->infile, id3->buf, ehsize, ID3_BLOCK_SIZE) ) {
290       ret = 0;
291       goto out;
292     }
293     buffer_consume(id3->buf, ehsize);
294 
295     id3->size_remain -= ehsize + 4;
296   }
297 
298   // Parse frames
299   while (id3->size_remain > 0) {
300     //DEBUG_TRACE("    remain: %d\n", id3->size_remain);
301     if ( !_id3_parse_v2_frame(id3) ) {
302       break;
303     }
304   }
305 
306   if (id3->version_major < 4) {
307     // map old year/date/time (TYER/TDAT/TIME) frames to TDRC
308     // tested in v2.3-xsop.mp3
309     _id3_convert_tdrc(id3);
310   }
311 
312   // Set id3_version info element, which contains all tag versions found
313   {
314     SV *version = newSVpvf( "ID3v2.%d.%d", id3->version_major, id3->version_minor );
315 
316     if ( my_hv_exists(id3->info, "id3_version") ) {
317       SV **entry = my_hv_fetch(id3->info, "id3_version");
318       if (entry != NULL) {
319         sv_catpv( version, ", " );
320         sv_catsv( version, *entry );
321       }
322     }
323 
324     my_hv_store( id3->info, "id3_version", version );
325   }
326 
327 out:
328   return ret;
329 }
330 
331 int
_id3_parse_v2_frame(id3info * id3)332 _id3_parse_v2_frame(id3info *id3)
333 {
334   int ret = 1;
335   char id[5];
336   uint16_t flags = 0;
337   uint32_t size  = 0;
338   uint32_t decoded_size = 0;
339   uint32_t unsync_extra = 0;
340   id3_frametype const *frametype;
341   Buffer *tmp_buf = 0;
342 
343   // If the frame is compressed, it will be decompressed here
344   Buffer *decompressed = 0;
345 
346   // tag_data_safe flag is used if skipping artwork and artwork is not raw image data (needs unsync)
347   id3->tag_data_safe = 1;
348 
349   if ( !_check_buf(id3->infile, id3->buf, 10, ID3_BLOCK_SIZE) ) {
350     ret = 0;
351     goto out;
352   }
353 
354   if (id3->version_major == 2) {
355     // v2.2
356     id3_compat const *compat;
357 
358     // Read 3-letter id
359     buffer_get(id3->buf, &id, 3);
360     id[3] = 0;
361 
362     if (id[0] == 0) {
363       // padding
364       DEBUG_TRACE("  Found start of padding, aborting\n");
365       ret = 0;
366       goto out;
367     }
368 
369     size = buffer_get_int24(id3->buf);
370 
371     DEBUG_TRACE("  %s, size %d\n", id, size);
372 
373     // map 3-char id to 4-char id
374     compat = _id3_compat_lookup((char *)&id, 3);
375     if (compat && compat->equiv) {
376       strncpy(id, compat->equiv, 4);
377       id[4] = 0;
378 
379       DEBUG_TRACE("    compat -> %s\n", id);
380     }
381     else {
382       // no compat mapping (obsolete), prepend 'Y' to id
383       id[4] = 0;
384       id[3] = id[2];
385       id[2] = id[1];
386       id[1] = id[0];
387       id[0] = 'Y';
388 
389       DEBUG_TRACE("    obsolete/unknown -> %s\n", id);
390     }
391 
392     id3->size_remain -= 6;
393 
394     if (size > id3->size_remain) {
395       DEBUG_TRACE("    frame size too big, aborting\n");
396       ret = 0;
397       goto out;
398     }
399   }
400   else {
401     // Read 4-letter id
402     buffer_get(id3->buf, &id, 4);
403     id[4] = 0;
404 
405     if (id[0] == 0) {
406       // padding
407       DEBUG_TRACE("  Found start of padding, aborting\n");
408       ret = 0;
409       goto out;
410     }
411 
412     id3->size_remain -= 4;
413 
414     if (id3->version_major == 3) {
415       // v2.3
416       id3_compat const *compat;
417 
418       size  = buffer_get_int(id3->buf);
419       flags = buffer_get_short(id3->buf);
420 
421       DEBUG_TRACE("  %s, frame flags %x, size %d\n", id, flags, size);
422 
423       // map to v2.4 id
424       if (id[3] == ' ') {
425         // iTunes writes bad frame IDs such as 'TSA ', these should be run through compat
426         // as 3-char frames
427         compat = _id3_compat_lookup((char *)&id, 3);
428       }
429       else {
430         compat = _id3_compat_lookup((char *)&id, 4);
431       }
432       if (compat && compat->equiv) {
433         strncpy(id, compat->equiv, 4);
434         id[4] = 0;
435 
436         DEBUG_TRACE("    compat -> %s\n", id);
437       }
438 
439       id3->size_remain -= 6;
440 
441       if (size > id3->size_remain) {
442         DEBUG_TRACE("    frame size too big, aborting\n");
443         ret = 0;
444         goto out;
445       }
446 
447       if (flags & ID3_FRAME_FLAG_V23_COMPRESSION) {
448         // tested with v2.3-compressed-frame.mp3
449         decoded_size = buffer_get_int(id3->buf);
450         id3->size_remain -= 4;
451         size -= 4;
452       }
453 
454       if (flags & ID3_FRAME_FLAG_V23_ENCRYPTION) {
455         // tested with v2.3-encrypted-frame.mp3
456 #ifdef AUDIO_SCAN_DEBUG
457         DEBUG_TRACE("    encrypted, method %d\n", buffer_get_char(id3->buf));
458 #else
459         buffer_consume(id3->buf, 1);
460 #endif
461 
462         id3->size_remain--;
463         size--;
464 
465         DEBUG_TRACE("    skipping encrypted frame\n");
466         _id3_skip(id3, size);
467         id3->size_remain -= size;
468         goto out;
469       }
470 
471       if (flags & ID3_FRAME_FLAG_V23_GROUPINGIDENTITY) {
472         // tested with v2.3-group-id.mp3
473 #ifdef AUDIO_SCAN_DEBUG
474         DEBUG_TRACE("    group_id %d\n", buffer_get_char(id3->buf));
475 #else
476         buffer_consume(id3->buf, 1);
477 #endif
478 
479         id3->size_remain--;
480         size--;
481       }
482 
483       // Perform decompression if necessary after all optional extra bytes have been read
484       // XXX need test for compressed + unsync
485       if (flags & ID3_FRAME_FLAG_V23_COMPRESSION && decoded_size) {
486         unsigned long tmp_size;
487 
488         if ( !_check_buf(id3->infile, id3->buf, size, ID3_BLOCK_SIZE) ) {
489           ret = 0;
490           goto out;
491         }
492 
493         DEBUG_TRACE("    decompressing, decoded_size %d\n", decoded_size);
494 
495         Newz(0, decompressed, sizeof(Buffer), Buffer);
496         buffer_init(decompressed, decoded_size);
497 
498         tmp_size = decoded_size;
499         if (
500           uncompress(buffer_ptr(decompressed), &tmp_size, buffer_ptr(id3->buf), size) != Z_OK
501           ||
502     	    tmp_size != decoded_size
503     	  ) {
504           DEBUG_TRACE("    unable to decompress frame\n");
505           buffer_free(decompressed);
506           Safefree(decompressed);
507           decompressed = 0;
508         }
509         else {
510           // Hack buffer so it knows we've added data directly
511           decompressed->end = decoded_size;
512         }
513       }
514     }
515     else {
516       // v2.4
517 
518       // iTunes writes non-syncsafe length integers, check for this here
519       if ( _varint(buffer_ptr(id3->buf), 4) & 0x80 ) {
520         size = buffer_get_int(id3->buf);
521         DEBUG_TRACE("    found non-syncsafe iTunes size for %s, size adjusted to %d\n", id, size);
522       }
523       else {
524         size = buffer_get_syncsafe(id3->buf, 4);
525       }
526 
527       flags = buffer_get_short(id3->buf);
528 
529       id3->size_remain -= 6;
530 
531       DEBUG_TRACE("  %s, frame flags %x, size %d\n", id, flags, size);
532 
533       if (size > id3->size_remain) {
534         DEBUG_TRACE("    frame size too big, aborting\n");
535         ret = 0;
536         goto out;
537       }
538 
539       // iTunes writes bad frame IDs such as 'TSA ', these should be run through compat
540       // as 3-char frames
541       if (id[3] == ' ') {
542         id3_compat const *compat;
543         compat = _id3_compat_lookup((char *)&id, 3);
544         if (compat && compat->equiv) {
545           strncpy(id, compat->equiv, 4);
546           id[4] = 0;
547 
548           DEBUG_TRACE("    bad iTunes v2.4 tag, compat -> %s\n", id);
549         }
550       }
551 
552       if (flags & ID3_FRAME_FLAG_V24_GROUPINGIDENTITY) {
553         // tested with v2.4-group-id.mp3
554 #ifdef AUDIO_SCAN_DEBUG
555         DEBUG_TRACE("    group_id %d\n", buffer_get_char(id3->buf));
556 #else
557         buffer_consume(id3->buf, 1);
558 #endif
559         id3->size_remain--;
560         size--;
561       }
562 
563       if (flags & ID3_FRAME_FLAG_V24_ENCRYPTION) {
564         // tested with v2.4-encrypted-frame.mp3
565 #ifdef AUDIO_SCAN_DEBUG
566         DEBUG_TRACE("    encrypted, method %d\n", buffer_get_char(id3->buf));
567 #else
568         buffer_consume(id3->buf, 1);
569 #endif
570 
571         id3->size_remain--;
572         size--;
573 
574         DEBUG_TRACE("    skipping encrypted frame\n");
575         _id3_skip(id3, size);
576         id3->size_remain -= size;
577         goto out;
578       }
579 
580       if (flags & ID3_FRAME_FLAG_V24_DATALENGTHINDICATOR) {
581         decoded_size = buffer_get_syncsafe(id3->buf, 4);
582         id3->size_remain -= 4;
583         size -= 4;
584 
585         DEBUG_TRACE("    data length indicator, size %d\n", decoded_size);
586       }
587 
588       if (flags & ID3_FRAME_FLAG_V24_UNSYNCHRONISATION) {
589         // Special case, do not unsync an APIC frame if not reading artwork,
590         // FF's are not likely to appear in the part we care about anyway
591         if ( !strcmp(id, "APIC") && _env_true("AUDIO_SCAN_NO_ARTWORK") ) {
592           DEBUG_TRACE("    Would un-synchronize APIC frame, but ignoring because of AUDIO_SCAN_NO_ARTWORK\n");
593 
594           // Reset decoded_size to 0 since we aren't actually decoding.
595           // XXX this would break if we have a compressed + unsync APIC frame but not very likely in the real world
596           decoded_size = 0;
597 
598           id3->tag_data_safe = 0;
599         }
600         else {
601           // tested with v2.4-unsync.mp3
602           if ( !_check_buf(id3->infile, id3->buf, size, ID3_BLOCK_SIZE) ) {
603             ret = 0;
604             goto out;
605           }
606 
607           decoded_size = _id3_deunsync( buffer_ptr(id3->buf), size );
608 
609           unsync_extra = size - decoded_size;
610 
611           DEBUG_TRACE("    Un-synchronized frame, new_size %d\n", decoded_size);
612         }
613       }
614 
615       if (flags & ID3_FRAME_FLAG_V24_COMPRESSION) {
616         // tested with v2.4-compressed-frame.mp3
617         // XXX need test for compressed + unsync
618         unsigned long tmp_size;
619 
620         if ( !_check_buf(id3->infile, id3->buf, size, ID3_BLOCK_SIZE) ) {
621           ret = 0;
622           goto out;
623         }
624 
625         DEBUG_TRACE("    decompressing\n");
626 
627         Newz(0, decompressed, sizeof(Buffer), Buffer);
628         buffer_init(decompressed, decoded_size);
629 
630         tmp_size = decoded_size;
631         if (
632           uncompress(buffer_ptr(decompressed), &tmp_size, buffer_ptr(id3->buf), size) != Z_OK
633           ||
634     	    tmp_size != decoded_size
635     	  ) {
636           DEBUG_TRACE("    unable to decompress frame\n");
637           buffer_free(decompressed);
638           Safefree(decompressed);
639           decompressed = 0;
640         }
641         else {
642           // Hack buffer so it knows we've added data directly
643           decompressed->end = decoded_size;
644         }
645       }
646     }
647   }
648 
649   // Special case, completely skip XHD3 frame (mp3HD) as it will be large
650   // Also skip NCON, a large tag written by MusicMatch
651   if ( !strcmp(id, "XHD3") || !strcmp(id, "NCON") ) {
652     DEBUG_TRACE("    skipping large binary %s frame\n", id);
653     _id3_skip(id3, size);
654     id3->size_remain -= size;
655     goto out;
656   }
657 
658   frametype = _id3_frametype_lookup(id, 4);
659   if (frametype == 0) {
660     switch ( id[0] ) {
661     case 'T':
662       frametype = &id3_frametype_text;
663       break;
664 
665     case 'W':
666       frametype = &id3_frametype_url;
667       break;
668 
669     case 'X':
670     case 'Y':
671     case 'Z':
672       frametype = &id3_frametype_experimental;
673       break;
674 
675     default:
676       frametype = &id3_frametype_unknown;
677       break;
678     }
679   }
680 
681 #ifdef AUDIO_SCAN_DEBUG
682   {
683     int i;
684     DEBUG_TRACE("    nfields %d:", frametype->nfields);
685     for (i = 0; i < frametype->nfields; ++i) {
686       DEBUG_TRACE(" %d", frametype->fields[i]);
687     }
688     DEBUG_TRACE("\n");
689   }
690 #endif
691 
692   // If frame was compressed, temporarily set the id3 buffer to use the decompressed buffer
693   if (decompressed) {
694     tmp_buf  = id3->buf;
695     id3->buf = decompressed;
696   }
697 
698   if ( !_id3_parse_v2_frame_data(id3, (char *)&id, decoded_size ? decoded_size : size, frametype) ) {
699     DEBUG_TRACE("    error parsing frame, aborting\n");
700     ret = 0;
701     goto out;
702   }
703 
704   if (id3->size_remain > size) {
705     id3->size_remain -= size;
706   }
707   else {
708     id3->size_remain = 0;
709   }
710 
711   // Consume extra bytes if we had to unsync this frame
712   if (unsync_extra) {
713     DEBUG_TRACE("    consuming extra bytes after unsync: %d\n", unsync_extra);
714     buffer_consume(id3->buf, unsync_extra);
715   }
716 
717 out:
718   if (decompressed) {
719     // Reset id3 buffer and consume rest of compressed frame
720     id3->buf = tmp_buf;
721     buffer_consume(id3->buf, size);
722 
723     buffer_free(decompressed);
724     Safefree(decompressed);
725   }
726 
727   return ret;
728 }
729 
730 int
_id3_parse_v2_frame_data(id3info * id3,char const * id,uint32_t size,id3_frametype const * frametype)731 _id3_parse_v2_frame_data(id3info *id3, char const *id, uint32_t size, id3_frametype const *frametype)
732 {
733   int ret = 1;
734   uint32_t read = 0;
735   int8_t encoding = -1;
736 
737   uint8_t buffer_art = ( !strcmp(id, "APIC") ) ? 1 : 0;
738   uint8_t skip_art   = ( buffer_art && _env_true("AUDIO_SCAN_NO_ARTWORK") ) ? 1 : 0;
739 
740   // Bug 16703, a completely empty frame is against the rules, skip it
741   if (!size)
742     return 1;
743 
744   if (skip_art) {
745     // Only buffer enough for the APIC header fields, this is only a rough guess
746     // because the description could technically be very long
747     if ( !_check_buf(id3->infile, id3->buf, 128, ID3_BLOCK_SIZE) ) {
748       return 0;
749     }
750     DEBUG_TRACE("    partial read due to AUDIO_SCAN_NO_ARTWORK\n");
751   }
752   else {
753     // Use a special buffering mode for binary artwork, to avoid
754     // using 2x the memory of the APIC frame (once for buffer, once for SV)
755     if (buffer_art) {
756       // Buffer enough for encoding/MIME/picture type/description
757       if ( !_check_buf(id3->infile, id3->buf, 128, ID3_BLOCK_SIZE) ) {
758         return 0;
759       }
760     }
761     else {
762       // Buffer the entire frame
763       if ( !_check_buf(id3->infile, id3->buf, size, ID3_BLOCK_SIZE) ) {
764         return 0;
765       }
766     }
767   }
768 
769   if ( frametype->fields[0] == ID3_FIELD_TYPE_TEXTENCODING ) {
770     // many frames have an encoding byte, read it here
771     encoding = buffer_get_char(id3->buf);
772     read++;
773     DEBUG_TRACE("    encoding: %d\n", encoding);
774 
775     if (encoding < 0 || encoding > 3) {
776       DEBUG_TRACE("    invalid encoding, skipping frame\n");
777       goto out;
778     }
779   }
780 
781   // Special handling for TXXX/WXXX frames
782   if ( !strcmp(id, "TXXX") || !strcmp(id, "WXXX") ) {
783     // Read key and uppercase it
784     SV *key   = NULL;
785     SV *value = NULL;
786 
787     read += _id3_get_utf8_string(id3, &key, size - read, encoding);
788 
789     if (key != NULL && SvPOK(key) && sv_len(key)) {
790       upcase(SvPVX(key));
791 
792       // Read value
793       if (frametype->fields[2] == ID3_FIELD_TYPE_LATIN1) {
794         // WXXX frames have a latin1 value field regardless of encoding byte
795         encoding = ISO_8859_1;
796       }
797 
798       read += _id3_get_utf8_string(id3, &value, size - read, encoding);
799 
800       // (T|W)XXX frames don't support multiple strings separated by nulls, even in v2.4
801 
802       // Only one tag per unique key value is allowed, that's why there is no array support here
803       if (value != NULL && SvPOK(value) && sv_len(value)) {
804         my_hv_store_ent( id3->tags, key, value );
805       }
806       else {
807         my_hv_store_ent( id3->tags, key, &PL_sv_undef );
808         if (value) SvREFCNT_dec(value);
809       }
810     }
811     else {
812       DEBUG_TRACE("    invalid/empty (T|W)XXX key, skipping frame\n");
813     }
814 
815     if (key) SvREFCNT_dec(key);
816   }
817 
818   // Special handling for TCON genre frame
819   else if ( !strcmp(id, "TCON") ) {
820     AV *genres = newAV();
821     char *sptr, *end, *tmp;
822 
823     while (read < size) {
824       SV *value  = NULL;
825 
826       // v2.4 handles multiple genres using null char separators (or $00 $00 in UTF-16),
827       // this is handled by _id3_get_utf8_string
828       read += _id3_get_utf8_string(id3, &value, size - read, encoding);
829       if (value != NULL && SvPOK(value)) {
830         sptr = SvPVX(value);
831 
832         // Test if the string contains only a number,
833         // strtol will set tmp to end in this case
834         end = sptr + sv_len(value);
835         strtol(sptr, &tmp, 0);
836 
837         if ( tmp == end ) {
838           // Convert raw number to genre string
839           av_push( genres, newSVpv( _id3_genre_name((char *)sptr), 0 ) );
840 
841           // value as an SV won't be used, must drop refcnt
842           SvREFCNT_dec(value);
843         }
844         else if ( *sptr == '(' ) {
845           // Handle (26), (26)Ambient, etc, only the number portion will be read
846 
847           if (id3->version_major < 4) {
848             // v2.2/v2.3 handle multiple genres using parens for some reason, i.e. (51)(39) or (55)(Text)
849             char *ptr = sptr;
850             char *end = sptr + sv_len(value);
851 
852             while (end - ptr > 0) {
853               if ( *ptr++ == '(' ) {
854                 char *paren = strchr(ptr, ')');
855                 if (paren == NULL)
856                   paren = end;
857 
858                 if ( isdigit(*ptr) || !strncmp((char *)ptr, "RX", 2) || !strncmp((char *)ptr, "CR", 2) ) {
859                   av_push( genres, newSVpv( _id3_genre_name((char *)ptr), 0 ) );
860                 }
861                 else {
862                   // Handle text within parens
863                   av_push( genres, newSVpvn(ptr,  paren - ptr) );
864                 }
865                 ptr = paren;
866               }
867             }
868           }
869           else {
870             // v2.4, the (51) method is no longer valid but we will support it anyway
871             sptr++;
872             if ( isdigit(*sptr) || !strncmp(sptr, "RX", 2) || !strncmp(sptr, "CR", 2) ) {
873               av_push( genres, newSVpv( _id3_genre_name((char *)sptr), 0 ) );
874             }
875             else {
876               av_push( genres, newSVpv( (char *)sptr, 0 ) );
877             }
878           }
879 
880           // value as an SV won't be used, must drop refcnt
881           SvREFCNT_dec(value);
882         }
883         else {
884           // Support raw RX/CR value
885           if ( !strncmp(sptr, "RX", 2) || !strncmp(sptr, "CR", 2) ) {
886             av_push( genres, newSVpv( _id3_genre_name((char *)sptr), 0 ) );
887 
888             // value as an SV won't be used, must drop refcnt
889             SvREFCNT_dec(value);
890           }
891           else {
892             // Store plain text genre
893             av_push( genres, value );
894           }
895         }
896       }
897     }
898 
899     if (av_len(genres) > 0) {
900       my_hv_store( id3->tags, id, newRV_noinc( (SV *)genres ) );
901     }
902     else if (av_len(genres) == 0) {
903       my_hv_store( id3->tags, id, av_shift(genres) );
904       SvREFCNT_dec(genres);
905     }
906     else {
907       SvREFCNT_dec(genres);
908     }
909   }
910 
911   // 1-field frames: MCDI, PCNT, SEEK (unsupported), T* (text), W* (url), unknown
912   // and 2-field frames where the first field is encoding
913   // are mapped to plain hash entries
914   else if (
915     frametype->nfields == 1 ||
916     (frametype->nfields == 2 && frametype->fields[0] == ID3_FIELD_TYPE_TEXTENCODING)
917   ) {
918     int i = frametype->nfields - 1;
919     AV *array = NULL;
920     SV *value = NULL;
921     int count = 0;
922 
923     switch ( frametype->fields[i] ) {
924       case ID3_FIELD_TYPE_LATIN1: // W* frames
925         read += _id3_get_utf8_string(id3, &value, size - read, ISO_8859_1);
926         if (value != NULL && SvPOK(value))
927           my_hv_store( id3->tags, id, value );
928         break;
929 
930       case ID3_FIELD_TYPE_STRINGLIST: // T* frames
931         // XXX technically in v2.2/v2.3 we should ignore multiple strings separated by nulls, but
932         // allowing it is fine I think
933         while (read < size) {
934           if (count++ == 1 && value != NULL) {
935             // we're reading the second string in the list, move first value to new array
936             array = newAV();
937             av_push(array, value);
938           }
939           value = NULL;
940 
941           read += _id3_get_utf8_string(id3, &value, size - read, encoding);
942 
943           if (array != NULL && value != NULL && SvPOK(value)) {
944             // second+ string, add to array
945             // Bug 16452, do not add a null string
946             if (sv_len(value) > 0)
947               av_push(array, value);
948           }
949         }
950 
951         if (array != NULL) {
952           if (av_len(array) == 0) {
953             // Handle the case where we have multiple empty strings leaving an array of 1
954             my_hv_store( id3->tags, id, av_shift(array) );
955             SvREFCNT_dec(array);
956           }
957           else {
958             my_hv_store( id3->tags, id, newRV_noinc( (SV *)array ) );
959           }
960         }
961         else if (value != NULL && SvPOK(value)) {
962           my_hv_store( id3->tags, id, value );
963         }
964         break;
965 
966       case ID3_FIELD_TYPE_INT32: // SEEK (unsupported, XXX need test)
967         my_hv_store( id3->tags, id, newSViv( buffer_get_int(id3->buf) ) );
968         read += 4;
969         break;
970 
971       case ID3_FIELD_TYPE_INT32PLUS: // PCNT
972         my_hv_store( id3->tags, id, newSViv( _varint( buffer_ptr(id3->buf), size - read ) ) );
973         buffer_consume(id3->buf, size - read);
974         read = size;
975         break;
976 
977       case ID3_FIELD_TYPE_BINARYDATA: // unknown/obsolete frames
978         // Special handling for RVA(D), tested in v2.2-itunes81.mp3, v2.3-itunes81.mp3
979         if ( !strcmp(id, "RVAD") ) {
980           read += _id3_parse_rvad(id3, id, size - read);
981         }
982 
983         // Special handling for RGAD (non-standard replaygain frame), tested in v2.3-rgad.mp3
984         // Based on some code found at http://getid3.sourceforge.net/source/module.tag.id3v2.phps
985         else if ( !strcmp(id, "RGAD") ) {
986           read += _id3_parse_rgad(id3);
987         }
988 
989         // Other unknown binary data
990         else {
991           // Y* obsolete frames
992           my_hv_store( id3->tags, id, newSVpvn( buffer_ptr(id3->buf), size - read ) );
993           buffer_consume(id3->buf, size - read);
994           read = size;
995         }
996         break;
997 
998       default:
999         // XXX
1000         warn("   !!! unhandled field type %d\n", frametype->fields[i]);
1001         buffer_consume(id3->buf, size - read);
1002         read += size - read;
1003         break;
1004     }
1005   }
1006 
1007   // 2+ field frames are mapped to arrayrefs:
1008   // The following frames have tests:
1009   // ETCO, UFID, USLT, SYLT, COMM, RVA2, APIC, GEOB, POPM, LINK, PRIV
1010   //
1011   // XXX The following frames need tests:
1012   // MLLT, SYTC, EQU2, RVRB, AENC, POSS, USER, OWNE,
1013   // COMR, ENCR, GRID, SIGN, ASPI, LINK (v2.4)
1014   else {
1015     int i = 0;
1016     AV *framedata = newAV();
1017 
1018     // If we read an initial encoding byte, start at field 2
1019     if (encoding >= 0)
1020       i = 1;
1021 
1022     for (; i < frametype->nfields; i++) {
1023       SV *value = NULL;
1024 
1025       switch ( frametype->fields[i] ) {
1026         case ID3_FIELD_TYPE_LATIN1:
1027           // Special case, fix v2.2 PIC frame fields as they don't match APIC
1028           // This is a rather hackish place to put this, but there's not really any other place
1029           if ( id3->version_major == 2 && !strcmp(id, "APIC") ) {
1030             av_push( framedata, newSVpvn( buffer_ptr(id3->buf), 3 ) );
1031             buffer_consume(id3->buf, 3);
1032             read += 3;
1033             DEBUG_TRACE("    PIC image format, read %d\n", read);
1034           }
1035           else {
1036             read += _id3_get_utf8_string(id3, &value, size - read, ISO_8859_1);
1037             if (value != NULL && SvPOK(value))
1038               av_push( framedata, value );
1039           }
1040           break;
1041 
1042         // ID3_FIELD_TYPE_LATIN1FULL - not used
1043 
1044         case ID3_FIELD_TYPE_LATIN1LIST: // LINK
1045           while (read < size) {
1046             read += _id3_get_utf8_string(id3, &value, size - read, ISO_8859_1);
1047             if (value != NULL && SvPOK(value))
1048               av_push( framedata, value );
1049             value = NULL;
1050             DEBUG_TRACE("    latin1list, read %d\n", read);
1051           }
1052           break;
1053 
1054         case ID3_FIELD_TYPE_STRING:
1055           read += _id3_get_utf8_string(id3, &value, size - read, encoding);
1056           if (value != NULL && SvPOK(value)) {
1057             av_push( framedata, value );
1058             DEBUG_TRACE("    string, read %d: %s\n", read, SvPVX(value));
1059           }
1060           else {
1061             av_push( framedata, &PL_sv_undef );
1062             if (value) SvREFCNT_dec(value);
1063           }
1064           break;
1065 
1066         case ID3_FIELD_TYPE_STRINGFULL: // USLT, COMM, read entire string until end of frame
1067         {
1068           SV *tmp = newSVpvn( "", 0 );
1069           while (read < size) {
1070             read += _id3_get_utf8_string(id3, &value, size - read, encoding);
1071             if (value != NULL && SvPOK(value)) {
1072               sv_catsv( tmp, value );
1073               SvREFCNT_dec(value);
1074             }
1075             value = NULL;
1076           }
1077           av_push( framedata, tmp );
1078           DEBUG_TRACE("    stringfull, read %d: %s\n", read, SvPVX(tmp));
1079           break;
1080         }
1081 
1082         // ID3_FIELD_TYPE_STRINGLIST - only used for text frames, handled above
1083 
1084         case ID3_FIELD_TYPE_LANGUAGE: // USLT, SYLT, COMM, USER, 3-byte language code
1085           if (size - read >= 3) {
1086             av_push( framedata, newSVpvn( buffer_ptr(id3->buf), 3 ) );
1087             buffer_consume(id3->buf, 3);
1088             read += 3;
1089             DEBUG_TRACE("    language, read %d\n", read);
1090           }
1091           break;
1092 
1093         case ID3_FIELD_TYPE_FRAMEID: // LINK, 3-byte frame id (v2.3, must be a bug in the spec?),
1094                                      // 4-byte frame id (v2.4) XXX need test
1095         {
1096           uint8_t len = (id3->version_major == 3) ? 3 : 4;
1097           if (size - read >= len) {
1098             av_push( framedata, newSVpvn( buffer_ptr(id3->buf), len ) );
1099             buffer_consume(id3->buf, len);
1100             read += len;
1101             DEBUG_TRACE("    frameid, read %d\n", read);
1102           }
1103           break;
1104         }
1105 
1106         case ID3_FIELD_TYPE_DATE: // OWNE, COMR, XXX need test, YYYYMMDD
1107           if (size - read >= 8) {
1108             av_push( framedata, newSVpvn( buffer_ptr(id3->buf), 8 ) );
1109             buffer_consume(id3->buf, 8);
1110             read += 8;
1111             DEBUG_TRACE("    date, read %d\n", read);
1112           }
1113           break;
1114 
1115         case ID3_FIELD_TYPE_INT8: // ETCO, MLLT, SYTC, SYLT, EQU2, RVRB, APIC,
1116                                   // POPM, RBUF, POSS, COMR, ENCR, GRID, SIGN, ASPI
1117           if (size - read >= 1) {
1118             av_push( framedata, newSViv( buffer_get_char(id3->buf) ) );
1119             read += 1;
1120             DEBUG_TRACE("    int8, read %d\n", read);
1121           }
1122           break;
1123 
1124         case ID3_FIELD_TYPE_INT16: // MLLT, RVRB, AENC, ASPI
1125           if (size - read >= 2) {
1126             av_push( framedata, newSViv( buffer_get_short(id3->buf) ) );
1127             read += 2;
1128             DEBUG_TRACE("    int16, read %d\n", read);
1129           }
1130           break;
1131 
1132         case ID3_FIELD_TYPE_INT24: // MLLT, RBUF
1133           if (size - read >= 3) {
1134             av_push( framedata, newSViv( buffer_get_int24(id3->buf) ) );
1135             read += 3;
1136             DEBUG_TRACE("    int24, read %d\n", read);
1137           }
1138           break;
1139 
1140         case ID3_FIELD_TYPE_INT32: // RBUF, SEEK, ASPI
1141           if (size - read >= 4) {
1142             av_push( framedata, newSViv( buffer_get_int(id3->buf) ) );
1143             read += 4;
1144             DEBUG_TRACE("    int32, read %d\n", read);
1145           }
1146           break;
1147 
1148         case ID3_FIELD_TYPE_INT32PLUS: // POPM
1149           if (size - read >= 4) {
1150             av_push( framedata, newSViv( _varint( buffer_ptr(id3->buf), size - read ) ) );
1151             buffer_consume(id3->buf, size - read);
1152             read = size;
1153             DEBUG_TRACE("    int32plus, read %d\n", read);
1154           }
1155           break;
1156 
1157         case ID3_FIELD_TYPE_BINARYDATA: // ETCO, MLLT, SYTC, SYLT, RVA2, EQU2, APIC,
1158                                         // GEOB, AENC, POSS, COMR, ENCR, GRID, PRIV, SIGN, ASPI
1159           // Special handling for APIC tags when in skip_art mode
1160           if (skip_art) {
1161             av_push( framedata, newSVuv(size - read) );
1162 
1163             // I don't think it's possible to obtain an APIC offset when a tag has been unsync'ed,
1164             // so we can't support skip_art mode in this case. See v2.3-unsync-apic-bad-offset.mp3
1165             if (id3->flags & ID3_TAG_FLAG_UNSYNCHRONISATION && id3->version_major < 4) {
1166               DEBUG_TRACE("    cannot obtain APIC offset due to v2.3 unsync tag\n");
1167             }
1168             else {
1169               // Record offset of APIC image data too, unless the data needs to be unsynchronized or is empty
1170               if (id3->tag_data_safe && (size - read) > 0)
1171                 av_push( framedata, newSVuv(id3->offset + (id3->size - id3->size_remain) + read) );
1172             }
1173 
1174             _id3_skip(id3, size - read);
1175             read = size;
1176           }
1177 
1178           // Special buffering mode for APIC data, avoids a large buffer allocation
1179           else if (buffer_art) {
1180             uint32_t remain = size - read;
1181             uint32_t chunk_size;
1182             SV *artwork = newSVpv("", 0);
1183 
1184             while (read < size) {
1185               if ( !_check_buf(id3->infile, id3->buf, 1, ID3_BLOCK_SIZE) ) {
1186                 return 0;
1187               }
1188 
1189               chunk_size = remain < buffer_len(id3->buf) ? remain : buffer_len(id3->buf);
1190 
1191               read += chunk_size;
1192               remain -= chunk_size;
1193 
1194               sv_catpvn( artwork, buffer_ptr(id3->buf), chunk_size );
1195               buffer_consume(id3->buf, chunk_size);
1196 
1197               DEBUG_TRACE("    buffered %d bytes of APIC data (remaining %d)\n", chunk_size, remain);
1198             }
1199 
1200             av_push( framedata, artwork );
1201           }
1202 
1203           // Special handling for RVA2 tags
1204           else if ( !strcmp(id, "RVA2") ) {
1205             read += _id3_parse_rva2(id3, size, framedata);
1206           }
1207 
1208           // Special handling for SYLT tags
1209           else if ( !strcmp(id, "SYLT") ) {
1210             read += _id3_parse_sylt(id3, encoding, size - read, framedata);
1211           }
1212 
1213           // Special handling for ETCO tags
1214           else if ( !strcmp(id, "ETCO") ) {
1215             read += _id3_parse_etco(id3, size - read, framedata);
1216           }
1217 
1218           // All other binary frames, copy as-is
1219           else {
1220             if (size - read > 1) {
1221               av_push( framedata, newSVpvn( buffer_ptr(id3->buf), size - read ) );
1222               buffer_consume(id3->buf, size - read);
1223               read = size;
1224               DEBUG_TRACE("    binarydata, read %d\n", read);
1225             }
1226           }
1227           break;
1228 
1229         default:
1230           break;
1231       }
1232     }
1233 
1234     _id3_set_array_tag(id3, id, framedata);
1235   }
1236 
1237 out:
1238   if (read < size) {
1239     buffer_consume(id3->buf, size - read);
1240     DEBUG_TRACE("    !!! consuming extra bytes in frame: %d\n", size - read);
1241   }
1242 
1243   return ret;
1244 }
1245 
1246 void
_id3_set_array_tag(id3info * id3,char const * id,AV * framedata)1247 _id3_set_array_tag(id3info *id3, char const *id, AV *framedata)
1248 {
1249   if ( av_len(framedata) != -1 ) {
1250     if ( my_hv_exists( id3->tags, id ) ) {
1251       // If tag already exists, move it to an arrayref
1252       SV **entry = my_hv_fetch( id3->tags, id );
1253       if (entry != NULL) {
1254         if ( SvTYPE( SvRV(*entry) ) == SVt_PV ) {
1255           // A normal string entry, convert to array
1256 	  AV *ref = newAV();
1257 
1258           // XXX need test, this may be illegal because you can't have multiple duplicate frames?
1259           DEBUG_TRACE("   !!! converting normal string tag to array\n");
1260 
1261           av_push( ref, *entry );
1262           av_push( ref, newRV_noinc( (SV *)framedata ) );
1263           my_hv_store( id3->tags, id, newRV_noinc( (SV *)ref ) );
1264         }
1265         else if ( SvTYPE( SvRV(*entry) ) == SVt_PVAV ) {
1266           // If type of first item is array, add new item to entry
1267           SV **first = av_fetch( (AV *)SvRV(*entry), 0, 0 );
1268           if ( first == NULL || ( SvROK(*first) && SvTYPE( SvRV(*first) ) == SVt_PVAV ) ) {
1269             av_push( (AV *)SvRV(*entry), newRV_noinc( (SV *)framedata ) );
1270           }
1271           else {
1272             AV *ref = newAV();
1273             av_push( ref, SvREFCNT_inc(*entry) );
1274             av_push( ref, newRV_noinc( (SV *)framedata) );
1275             my_hv_store( id3->tags, id, newRV_noinc( (SV *)ref ) );
1276           }
1277         }
1278       }
1279     }
1280     else {
1281       my_hv_store( id3->tags, id, newRV_noinc( (SV *)framedata ) );
1282     }
1283   }
1284   else {
1285     SvREFCNT_dec(framedata);
1286   }
1287 }
1288 
1289 // Read a latin1 or UTF-8 string from an ID3v1 tag
1290 // This function handles trimming spaces off the end
1291 uint32_t
_id3_get_v1_utf8_string(id3info * id3,SV ** string,uint32_t len)1292 _id3_get_v1_utf8_string(id3info *id3, SV **string, uint32_t len)
1293 {
1294   uint32_t read = 0;
1295   char *ptr;
1296   char *str;
1297 
1298   read = _id3_get_utf8_string(id3, string, len, ISO_8859_1);
1299 
1300   if (read) {
1301     // Trim spaces from end
1302     if (*string != NULL) {
1303       str = SvPVX(*string);
1304       ptr = str + sv_len(*string);
1305 
1306       while (ptr > str && ptr[-1] == ' ')
1307         --ptr;
1308 
1309       *ptr = 0;
1310       SvCUR_set(*string, ptr - str);
1311     }
1312   }
1313 
1314   return read;
1315 }
1316 
1317 uint32_t
_id3_get_utf8_string(id3info * id3,SV ** string,uint32_t len,uint8_t encoding)1318 _id3_get_utf8_string(id3info *id3, SV **string, uint32_t len, uint8_t encoding)
1319 {
1320   uint8_t byteorder = UTF16_BYTEORDER_ANY;
1321   uint32_t read = 0;
1322   unsigned char *bptr;
1323 
1324   // Init scratch buffer if necessary
1325   if ( !id3->utf8->alloc ) {
1326     // Use a larger initial buffer if reading ISO-8859-1 to avoid
1327     // always having to allocate a second time
1328     buffer_init( id3->utf8, encoding == ISO_8859_1 ? len * 2 : len );
1329   }
1330   else {
1331     // Reset scratch buffer
1332     buffer_clear(id3->utf8);
1333   }
1334 
1335   if ( *string != NULL ) {
1336     warn("    !!! string SV is not null: %s\n", SvPVX(*string));
1337   }
1338 
1339   switch (encoding) {
1340     case ISO_8859_1:
1341       read += buffer_get_latin1_as_utf8(id3->buf, id3->utf8, len);
1342       break;
1343 
1344     case UTF_16BE:
1345       byteorder = UTF16_BYTEORDER_BE;
1346 
1347     case UTF_16:
1348       bptr = buffer_ptr(id3->buf);
1349 
1350       switch ( (bptr[0] << 8) | bptr[1] ) {
1351       case 0xfeff:
1352         DEBUG_TRACE("    UTF-16 BOM is big-endian\n");
1353         byteorder = UTF16_BYTEORDER_BE;
1354         buffer_consume(id3->buf, 2);
1355         read += 2;
1356         break;
1357 
1358       case 0xfffe:
1359         DEBUG_TRACE("    UTF-16 BOM is little-endian\n");
1360         byteorder = UTF16_BYTEORDER_LE;
1361         buffer_consume(id3->buf, 2);
1362         read += 2;
1363         break;
1364       }
1365 
1366       /* Bug 14728
1367         If there is no BOM, assume LE, this is what appears in the wild -andy
1368       */
1369       if (byteorder == UTF16_BYTEORDER_ANY) {
1370         DEBUG_TRACE("    UTF-16 byte order defaulting to little-endian, no BOM\n");
1371         byteorder = UTF16_BYTEORDER_LE;
1372       }
1373 
1374       read += buffer_get_utf16_as_utf8(id3->buf, id3->utf8, len - read, byteorder);
1375       break;
1376 
1377     case UTF_8:
1378       read += buffer_get_utf8(id3->buf, id3->utf8, len);
1379       break;
1380 
1381     default:
1382       break;
1383   }
1384 
1385   if (read) {
1386     if ( buffer_len(id3->utf8) ) {
1387       *string = newSVpv( buffer_ptr(id3->utf8), 0 );
1388       sv_utf8_decode(*string);
1389       DEBUG_TRACE("    read utf8 string of %d bytes: %s\n", buffer_len(id3->utf8), SvPVX(*string));
1390     }
1391     else {
1392       DEBUG_TRACE("    empty string\n");
1393     }
1394   }
1395 
1396   return read;
1397 }
1398 
1399 uint32_t
_id3_parse_rvad(id3info * id3,char const * id,uint32_t size)1400 _id3_parse_rvad(id3info *id3, char const *id, uint32_t size)
1401 {
1402   unsigned char *rva = buffer_ptr(id3->buf);
1403   int sign_r = rva[0] & 0x01 ? 1 : -1;
1404   int sign_l = rva[0] & 0x02 ? 1 : -1;
1405   int bytes = rva[1] / 8;
1406   float vol[2];
1407   float peak[2];
1408   int i;
1409   AV *framedata = newAV();
1410 
1411   // Sanity check, first byte must be either 0 or 1, second byte > 0
1412   if (rva[0] & 0xFE || rva[1] == 0) {
1413     return 0;
1414   }
1415 
1416   // Calculated size must match the actual size
1417   if (size != 2 + (bytes * 4)) {
1418     return 0;
1419   }
1420 
1421   rva += 2;
1422 
1423   vol[0] = _varint( rva, bytes ) * sign_r / 256.;
1424   vol[1] = _varint( rva + bytes, bytes ) * sign_l / 256.;
1425 
1426   peak[0] = _varint( rva + (bytes * 2), bytes );
1427   peak[1] = _varint( rva + (bytes * 3), bytes );
1428 
1429   // iTunes uses a range of -255 to 255
1430 	// to be -100% (silent) to 100% (+6dB)
1431   for (i = 0; i < 2; i++) {
1432     if ( vol[i] == -255 ) {
1433       vol[i] = -96.0;
1434     }
1435     else {
1436       vol[i] = 20.0 * log( ( vol[i] + 255 ) / 255 ) / log(10);
1437     }
1438 
1439     av_push( framedata, newSVpvf( "%f dB", vol[i] ) );
1440     av_push( framedata, newSVpvf( "%f", peak[i] ) );
1441   }
1442 
1443   my_hv_store( id3->tags, id, newRV_noinc( (SV *)framedata ) );
1444 
1445   buffer_consume(id3->buf, 2 + (bytes * 4));
1446 
1447   return 2 + (bytes * 4);
1448 }
1449 
1450 uint32_t
_id3_parse_rgad(id3info * id3)1451 _id3_parse_rgad(id3info *id3)
1452 {
1453   float radio = 0.0;
1454   float audiophile = 0.0;
1455   uint8_t sign = 0;
1456   HV *framedata = newHV();
1457   uint32_t read = 0;
1458 
1459   // Peak (32-bit float)
1460   my_hv_store( framedata, "peak", newSVpvf( "%f", (float)buffer_get_float32(id3->buf) ) );
1461   read += 4;
1462 
1463   // Radio (16 bits)
1464 
1465   // Radio Name code (3 bits, should always be 1)
1466   buffer_get_bits(id3->buf, 3);
1467 
1468   my_hv_store( framedata, "track_originator", newSVuv( buffer_get_bits(id3->buf, 3) ) );
1469 
1470   // Sign bit (1 bit)
1471   sign = buffer_get_bits(id3->buf, 1);
1472 
1473   // Gain value (9 bits)
1474   radio = (float)buffer_get_bits(id3->buf, 9);
1475   radio /= 10.0;
1476   if (sign == 1) radio *= -1.0;
1477   my_hv_store( framedata, "track_gain", newSVpvf( "%f dB", radio ) );
1478 
1479   read += 2;
1480 
1481   // Audiophile (16 bits)
1482 
1483   // Audiophile Name code (3 bits, should always be 2)
1484   buffer_get_bits(id3->buf, 3);
1485 
1486   // Audiophile Originator code (3 bits)
1487   my_hv_store( framedata, "album_originator", newSVuv( buffer_get_bits(id3->buf, 3) ) );
1488 
1489   // Sign bit (1 bit)
1490   sign = buffer_get_bits(id3->buf, 1);
1491 
1492   // Gain value (9 bits)
1493   audiophile = (float)buffer_get_bits(id3->buf, 9);
1494   audiophile /= 10.0;
1495   if (sign == 1) audiophile *= -1.0;
1496   my_hv_store( framedata, "album_gain", newSVpvf( "%f dB", audiophile ) );
1497 
1498   read += 2;
1499 
1500   my_hv_store( id3->tags, "RGAD", newRV_noinc( (SV *)framedata ) );
1501 
1502   return read;
1503 }
1504 
1505 uint32_t
_id3_parse_rva2(id3info * id3,uint32_t len,AV * framedata)1506 _id3_parse_rva2(id3info *id3, uint32_t len, AV *framedata)
1507 {
1508   float adj = 0.0;
1509   int adj_fp;
1510   uint8_t peakbits;
1511   float peak = 0.0;
1512   uint32_t read = 0;
1513   unsigned char *bptr;
1514 
1515   // Channel
1516   av_push( framedata, newSViv( buffer_get_char(id3->buf) ) );
1517 
1518   // Adjustment
1519   bptr = buffer_ptr(id3->buf);
1520   adj_fp = *(signed char *)(bptr) << 8;
1521   adj_fp |= *(unsigned char *)(bptr+1);
1522   adj = adj_fp / 512.0;
1523   av_push( framedata, newSVpvf( "%f dB", adj ) );
1524   buffer_consume(id3->buf, 2);
1525 
1526   // Peak
1527   // Based on code from mp3gain
1528   peakbits = buffer_get_char(id3->buf);
1529 
1530   read += 4;
1531 
1532   if (4 + (peakbits + 7) / 8 <= len) {
1533     DEBUG_TRACE("    peakbits: %d\n", peakbits);
1534     if (peakbits > 0) {
1535       peak += (float)buffer_get_char(id3->buf);
1536       read++;
1537     }
1538     if (peakbits > 8) {
1539       peak += (float)buffer_get_char(id3->buf) / 256.0;
1540       read++;
1541     }
1542     if (peakbits > 16) {
1543       peak += (float)buffer_get_char(id3->buf) / 65536.0;
1544       read++;
1545     }
1546 
1547     if (peakbits > 0)
1548       peak /= (float)(1 << ((peakbits - 1) & 7));
1549   }
1550 
1551   av_push( framedata, newSVpvf( "%f dB", peak ) );
1552 
1553   return read;
1554 }
1555 
1556 uint32_t
_id3_parse_sylt(id3info * id3,uint8_t encoding,uint32_t len,AV * framedata)1557 _id3_parse_sylt(id3info *id3, uint8_t encoding, uint32_t len, AV *framedata)
1558 {
1559   uint32_t read = 0;
1560   AV *content = newAV();
1561   unsigned char *bptr;
1562 
1563   while (read < len) {
1564     SV *value = NULL;
1565     HV *lyric = newHV();
1566 
1567     read += _id3_get_utf8_string(id3, &value, len - read, encoding);
1568     if (value != NULL && SvPOK(value) && sv_len(value)) {
1569       my_hv_store( lyric, "text", value );
1570     }
1571     else {
1572       my_hv_store( lyric, "text", &PL_sv_undef );
1573       if (value) SvREFCNT(value);
1574     }
1575 
1576     my_hv_store( lyric, "timestamp", newSVuv( buffer_get_int(id3->buf) ) );
1577     read += 4;
1578 
1579     // A $0A newline byte may follow, for some odd reason
1580     bptr = buffer_ptr(id3->buf);
1581     if ( len - read > 0 && bptr[0] == 0x0a ) {
1582       buffer_consume(id3->buf, 1);
1583       read++;
1584     }
1585 
1586     av_push( content, newRV_noinc( (SV *)lyric ) );
1587   }
1588 
1589   av_push( framedata, newRV_noinc( (SV *)content ) );
1590 
1591   return read;
1592 }
1593 
1594 uint32_t
_id3_parse_etco(id3info * id3,uint32_t len,AV * framedata)1595 _id3_parse_etco(id3info *id3, uint32_t len, AV *framedata)
1596 {
1597   uint32_t read = 0;
1598   AV *content = newAV();
1599 
1600   while (read < len) {
1601     HV *event = newHV();
1602 
1603     my_hv_store( event, "type", newSVuv( buffer_get_char(id3->buf) ) );
1604     my_hv_store( event, "timestamp", newSVuv( buffer_get_int(id3->buf) ) );
1605     read += 5;
1606 
1607     av_push( content, newRV_noinc( (SV *)event ) );
1608   }
1609 
1610   av_push( framedata, newRV_noinc( (SV *)content ) );
1611 
1612   return read;
1613 }
1614 
1615 void
_id3_convert_tdrc(id3info * id3)1616 _id3_convert_tdrc(id3info *id3)
1617 {
1618   char timestamp[17] = { 0 };
1619 
1620   if ( my_hv_exists(id3->tags, "TYER") ) {
1621     SV *tyer = my_hv_delete(id3->tags, "TYER");
1622     if (SvPOK(tyer) && sv_len(tyer) == 4) {
1623       char *ptr = SvPVX(tyer);
1624       timestamp[0] = ptr[0];
1625       timestamp[1] = ptr[1];
1626       timestamp[2] = ptr[2];
1627       timestamp[3] = ptr[3];
1628       DEBUG_TRACE("  Converted TYER (%s) to TDRC (%s)\n", SvPVX(tyer), timestamp);
1629     }
1630   }
1631 
1632   if ( my_hv_exists(id3->tags, "TDAT") ) {
1633     SV *tdat = my_hv_delete(id3->tags, "TDAT");
1634     if (SvPOK(tdat) && sv_len(tdat) == 4) {
1635       char *ptr = SvPVX(tdat);
1636       timestamp[4] = '-';
1637       timestamp[5] = ptr[2];
1638       timestamp[6] = ptr[3];
1639       timestamp[7] = '-';
1640       timestamp[8] = ptr[0];
1641       timestamp[9] = ptr[1];
1642       DEBUG_TRACE("  Converted TDAT (%s) to TDRC (%s)\n", SvPVX(tdat), timestamp);
1643     }
1644   }
1645 
1646   if ( my_hv_exists(id3->tags, "TIME") ) {
1647     SV *time = my_hv_delete(id3->tags, "TIME");
1648     if (SvPOK(time) && sv_len(time) == 4) {
1649       char *ptr = SvPVX(time);
1650       timestamp[10] = 'T';
1651       timestamp[11] = ptr[0];
1652       timestamp[12] = ptr[1];
1653       timestamp[13] = ':';
1654       timestamp[14] = ptr[2];
1655       timestamp[15] = ptr[3];
1656       DEBUG_TRACE("  Converted TIME (%s) to TDRC (%s)\n", SvPVX(time), timestamp);
1657     }
1658   }
1659 
1660   if (timestamp[0]) {
1661     my_hv_store( id3->tags, "TDRC", newSVpv(timestamp, 0) );
1662   }
1663 }
1664 
1665 // deunsync in-place, from libid3tag
1666 uint32_t
_id3_deunsync(unsigned char * data,uint32_t length)1667 _id3_deunsync(unsigned char *data, uint32_t length)
1668 {
1669   unsigned char *old;
1670   unsigned char *end = data + length;
1671   unsigned char *new;
1672 
1673   if (length == 0)
1674     return 0;
1675 
1676   for (old = new = data; old < end - 1; ++old) {
1677     *new++ = *old;
1678     if (old[0] == 0xff && old[1] == 0x00)
1679       ++old;
1680   }
1681 
1682   *new++ = *old;
1683 
1684   return new - data;
1685 }
1686 
1687 void
_id3_skip(id3info * id3,uint32_t size)1688 _id3_skip(id3info *id3, uint32_t size)
1689 {
1690   if ( buffer_len(id3->buf) >= size ) {
1691     buffer_consume(id3->buf, size);
1692 
1693     DEBUG_TRACE("  skipped buffer data size %d\n", size);
1694   }
1695   else {
1696     PerlIO_seek(id3->infile, size - buffer_len(id3->buf), SEEK_CUR);
1697     buffer_clear(id3->buf);
1698 
1699     DEBUG_TRACE("  seeked past %d bytes to %d\n", size, (int)PerlIO_tell(id3->infile));
1700   }
1701 }
1702 
1703 // return an ID3v1 genre string indexed by number
1704 char const *
_id3_genre_index(unsigned int index)1705 _id3_genre_index(unsigned int index)
1706 {
1707   return (index < NGENRES) ? genre_table[index] : 0;
1708 }
1709 
1710 // translate an ID3v2 genre number/keyword to its full name
1711 char const *
_id3_genre_name(char const * string)1712 _id3_genre_name(char const *string)
1713 {
1714   static char const genre_remix[] = { 'R', 'e', 'm', 'i', 'x', 0 };
1715   static char const genre_cover[] = { 'C', 'o', 'v', 'e', 'r', 0 };
1716   unsigned long number;
1717 
1718   if (string == 0 || *string == 0)
1719     return 0;
1720 
1721   if (string[0] == 'R' && string[1] == 'X')
1722     return genre_remix;
1723   if (string[0] == 'C' && string[1] == 'R')
1724     return genre_cover;
1725 
1726   number = strtol(string, NULL, 0);
1727 
1728   return (number < NGENRES) ? genre_table[number] : string;
1729 }
1730