1#------------------------------------------------------------------------------ 2# File: Audible.pm 3# 4# Description: Read metadata from Audible audio books 5# 6# Revisions: 2015/04/05 - P. Harvey Created 7# 8# References: 1) https://github.com/jteeuwen/audible 9# 2) https://code.google.com/p/pyaudibletags/ 10# 3) http://wiki.multimedia.cx/index.php?title=Audible_Audio 11#------------------------------------------------------------------------------ 12 13package Image::ExifTool::Audible; 14 15use strict; 16use vars qw($VERSION); 17use Image::ExifTool qw(:DataAccess :Utils); 18 19$VERSION = '1.02'; 20 21sub ProcessAudible_meta($$$); 22sub ProcessAudible_cvrx($$$); 23 24%Image::ExifTool::Audible::Main = ( 25 GROUPS => { 2 => 'Audio' }, 26 NOTES => q{ 27 ExifTool will extract any information found in the metadata dictionary of 28 Audible .AA files, even if not listed in the table below. 29 }, 30 # tags found in the metadata dictionary (chunk 2) 31 pubdate => { Name => 'PublishDate', Groups => { 2 => 'Time' } }, 32 pub_date_start => { Name => 'PublishDateStart', Groups => { 2 => 'Time' } }, 33 author => { Name => 'Author', Groups => { 2 => 'Author' } }, 34 copyright => { Name => 'Copyright', Groups => { 2 => 'Author' } }, 35 # also seen (ref PH): 36 # product_id, parent_id, title, provider, narrator, price, description, 37 # long_description, short_title, is_aggregation, title_id, codec, HeaderSeed, 38 # EncryptedBlocks, HeaderKey, license_list, CPUType, license_count, <12 hex digits>, 39 # parent_short_title, parent_title, aggregation_id, short_description, user_alias 40 41 # information extracted from other chunks 42 _chapter_count => { Name => 'ChapterCount' }, # from chunk 6 43 _cover_art => { # from chunk 11 44 Name => 'CoverArt', 45 Groups => { 2 => 'Preview' }, 46 Binary => 1, 47 }, 48); 49 50# 'tags' atoms observed in Audible .m4b audio books (ref PH) 51%Image::ExifTool::Audible::tags = ( 52 GROUPS => { 0 => 'QuickTime', 2 => 'Audio' }, 53 NOTES => 'Information found in "tags" atom of Audible M4B audio books.', 54 meta => { 55 Name => 'Audible_meta', 56 SubDirectory => { TagTable => 'Image::ExifTool::Audible::meta' }, 57 }, 58 cvrx => { 59 Name => 'Audible_cvrx', 60 SubDirectory => { TagTable => 'Image::ExifTool::Audible::cvrx' }, 61 }, 62 tseg => { 63 Name => 'Audible_tseg', 64 SubDirectory => { TagTable => 'Image::ExifTool::Audible::tseg' }, 65 }, 66); 67 68# 'meta' information observed in Audible .m4b audio books (ref PH) 69%Image::ExifTool::Audible::meta = ( 70 PROCESS_PROC => \&ProcessAudible_meta, 71 GROUPS => { 0 => 'QuickTime', 2 => 'Audio' }, 72 NOTES => 'Information found in Audible M4B "meta" atom.', 73 Album => 'Album', 74 ALBUMARTIST => { Name => 'AlbumArtist', Groups => { 2 => 'Author' } }, 75 Artist => { Name => 'Artist', Groups => { 2 => 'Author' } }, 76 Comment => 'Comment', 77 Genre => 'Genre', 78 itunesmediatype => { Name => 'iTunesMediaType', Description => 'iTunes Media Type' }, 79 SUBTITLE => 'Subtitle', 80 Title => 'Title', 81 TOOL => 'CreatorTool', 82 Year => { Name => 'Year', Groups => { 2 => 'Time' } }, 83 track => 'ChapterName', # (found in 'meta' of 'tseg' atom) 84); 85 86# 'cvrx' information observed in Audible .m4b audio books (ref PH) 87%Image::ExifTool::Audible::cvrx = ( 88 PROCESS_PROC => \&ProcessAudible_cvrx, 89 GROUPS => { 0 => 'QuickTime', 2 => 'Audio' }, 90 NOTES => 'Audible cover art information in M4B audio books.', 91 VARS => { NO_ID => 1 }, 92 CoverArtType => 'CoverArtType', 93 CoverArt => { 94 Name => 'CoverArt', 95 Groups => { 2 => 'Preview' }, 96 Binary => 1, 97 }, 98); 99 100# 'tseg' information observed in Audible .m4b audio books (ref PH) 101%Image::ExifTool::Audible::tseg = ( 102 GROUPS => { 0 => 'QuickTime', 2 => 'Audio' }, 103 tshd => { 104 Name => 'ChapterNumber', 105 Format => 'int32u', 106 ValueConv => '$val + 1', # start counting from 1 107 }, 108 meta => { 109 Name => 'Audible_meta2', 110 SubDirectory => { TagTable => 'Image::ExifTool::Audible::meta' }, 111 }, 112); 113 114#------------------------------------------------------------------------------ 115# Process Audible 'meta' tags from M4B files (ref PH) 116# Inputs: 0) ExifTool object ref, 1) dirInfo ref, 2) tag table ref 117# Returns: 1 on success 118sub ProcessAudible_meta($$$) 119{ 120 my ($et, $dirInfo, $tagTablePtr) = @_; 121 my $dataPt = $$dirInfo{DataPt}; 122 my $dataPos = $$dirInfo{DataPos}; 123 my $dirLen = length $$dataPt; 124 return 0 if $dirLen < 4; 125 my $num = Get32u($dataPt, 0); 126 $et->VerboseDir('Audible_meta', $num); 127 my $pos = 4; 128 my $index; 129 for ($index=0; $index<$num; ++$index) { 130 last if $pos + 3 > $dirLen; 131 my $unk = Get8u($dataPt, $pos); # ? (0x80 or 0x00) 132 last unless $unk == 0x80 or $unk == 0x00; 133 my $len = Get16u($dataPt, $pos + 1); # tag length 134 $pos += 3; 135 last if $pos + $len + 6 > $dirLen or not $len; 136 my $tag = substr($$dataPt, $pos, $len); # tag ID 137 my $ver = Get16u($dataPt, $pos + $len); # version? 138 last unless $ver == 0x0001; 139 my $size = Get32u($dataPt, $pos + $len + 2);# data size 140 $pos += $len + 6; 141 last if $pos + $size > $dirLen; 142 my $val = $et->Decode(substr($$dataPt, $pos, $size), 'UTF8'); 143 unless ($$tagTablePtr{$tag}) { 144 my $name = Image::ExifTool::MakeTagName(($tag =~ /[a-z]/) ? $tag : lc($tag)); 145 AddTagToTable($tagTablePtr, $tag, { Name => $name }); 146 } 147 $et->HandleTag($tagTablePtr, $tag, $val, 148 DataPt => $dataPt, 149 DataPos => $dataPos, 150 Start => $pos, 151 Size => $size, 152 Index => $index, 153 ); 154 $pos += $size; 155 } 156 return 1; 157} 158 159#------------------------------------------------------------------------------ 160# Process Audible 'cvrx' cover art atom from M4B files (ref PH) 161# Inputs: 0) ExifTool object ref, 1) dirInfo ref, 2) tag table ref 162# Returns: 1 on success 163sub ProcessAudible_cvrx($$$) 164{ 165 my ($et, $dirInfo, $tagTablePtr) = @_; 166 my $dataPt = $$dirInfo{DataPt}; 167 my $dataPos = $$dirInfo{DataPos}; 168 my $dirLen = length $$dataPt; 169 return 0 if 0x0a > $dirLen; 170 my $len = Get16u($dataPt, 0x08); 171 return 0 if 0x0a + $len + 6 > $dirLen; 172 my $size = Get32u($dataPt, 0x0a + $len + 2); 173 return 0 if 0x0a + $len + 6 + $size > $dirLen; 174 $et->VerboseDir('Audible_cvrx', undef, $dirLen); 175 $et->HandleTag($tagTablePtr, 'CoverArtType', undef, 176 DataPt => $dataPt, 177 DataPos => $dataPos, 178 Start => 0x0a, 179 Size => $len, 180 ); 181 $et->HandleTag($tagTablePtr, 'CoverArt', undef, 182 DataPt => $dataPt, 183 DataPos => $dataPos, 184 Start => 0x0a + $len + 6, 185 Size => $size, 186 ); 187 return 1; 188} 189 190#------------------------------------------------------------------------------ 191# Read information from an Audible .AA file 192# Inputs: 0) ExifTool ref, 1) dirInfo ref 193# Returns: 1 on success, 0 if this wasn't a valid AA file 194sub ProcessAA($$) 195{ 196 my ($et, $dirInfo) = @_; 197 my $raf = $$dirInfo{RAF}; 198 my ($buff, $toc, $entry, $i); 199 200 # check magic number 201 return 0 unless $raf->Read($buff, 16) == 16 and $buff=~/^.{4}\x57\x90\x75\x36/s; 202 # check file size 203 if (defined $$et{VALUE}{FileSize}) { 204 # first 4 bytes of the file should be the filesize 205 unpack('N', $buff) == $$et{VALUE}{FileSize} or return 0; 206 } 207 $et->SetFileType(); 208 SetByteOrder('MM'); 209 my $bytes = 12 * Get32u(\$buff, 8); # table of contents size in bytes 210 $bytes > 0xc00 and $et->Warn('Invalid TOC'), return 1; 211 # read the table of contents 212 $raf->Read($toc, $bytes) == $bytes or $et->Warn('Truncated TOC'), return 1; 213 my $tagTablePtr = GetTagTable('Image::ExifTool::Audible::Main'); 214 # parse table of contents (in $toc) 215 for ($entry=0; $entry<$bytes; $entry+=12) { 216 my $type = Get32u(\$toc, $entry); 217 next unless $type == 2 or $type == 6 or $type == 11; 218 my $offset = Get32u(\$toc, $entry + 4); 219 my $length = Get32u(\$toc, $entry + 8) or next; 220 $raf->Seek($offset, 0) or $et->Warn("Chunk $type seek error"), last; 221 if ($type == 6) { # offset table 222 next if $length < 4 or $raf->Read($buff, 4) != 4; # only read the chapter count 223 $et->HandleTag($tagTablePtr, '_chapter_count', Get32u(\$buff, 0)); 224 next; 225 } 226 # read the chunk 227 $length > 100000000 and $et->Warn("Chunk $type too big"), next; 228 $raf->Read($buff, $length) == $length or $et->Warn("Chunk $type read error"), last; 229 if ($type == 11) { # cover art 230 next if $length < 8; 231 my $len = Get32u(\$buff, 0); 232 my $off = Get32u(\$buff, 4); 233 next if $off < $offset + 8 or $off - $offset + $len > $length; 234 $et->HandleTag($tagTablePtr, '_cover_art', substr($buff, $off-$offset, $len)); 235 next; 236 } 237 # parse metadata dictionary (in $buff) 238 $length < 4 and $et->Warn('Bad dictionary'), next; 239 my $num = Get32u(\$buff, 0); 240 $num > 0x200 and $et->Warn('Bad dictionary count'), next; 241 my $pos = 4; # dictionary starts immediately after count 242 require Image::ExifTool::HTML; # (for UnescapeHTML) 243 $et->VerboseDir('Audible Metadata', $num); 244 for ($i=0; $i<$num; ++$i) { 245 my $tagPos = $pos + 9; # position of tag string 246 $tagPos > $length and $et->Warn('Truncated dictionary'), last; 247 # (1 unknown byte ignored at start of each dictionary entry) 248 my $tagLen = Get32u(\$buff, $pos + 1); # tag string length 249 my $valLen = Get32u(\$buff, $pos + 5); # value string length 250 my $valPos = $tagPos + $tagLen; # position of value string 251 my $nxtPos = $valPos + $valLen; # position of next entry 252 $nxtPos > $length and $et->Warn('Bad dictionary entry'), last; 253 my $tag = substr($buff, $tagPos, $tagLen); 254 my $val = substr($buff, $valPos, $valLen); 255 unless ($$tagTablePtr{$tag}) { 256 my $name = Image::ExifTool::MakeTagName($tag); 257 $name =~ s/_(.)/\U$1/g; # change from underscore-separated to mixed case 258 AddTagToTable($tagTablePtr, $tag, { Name => $name }); 259 } 260 # unescape HTML character references and convert from UTF-8 261 $val = $et->Decode(Image::ExifTool::HTML::UnescapeHTML($val), 'UTF8'); 262 $et->HandleTag($tagTablePtr, $tag, $val, 263 DataPos => $offset, 264 DataPt => \$buff, 265 Start => $valPos, 266 Size => $valLen, 267 Index => $i, 268 ); 269 $pos = $nxtPos; # step to next dictionary entry 270 } 271 } 272 return 1; 273} 274 2751; # end 276 277__END__ 278 279=head1 NAME 280 281Image::ExifTool::Audible - Read meta information from Audible audio books 282 283=head1 SYNOPSIS 284 285This module is used by Image::ExifTool 286 287=head1 DESCRIPTION 288 289This module contains definitions required by Image::ExifTool to read meta 290information from Audible audio books. 291 292=head1 AUTHOR 293 294Copyright 2003-2021, Phil Harvey (philharvey66 at gmail.com) 295 296This library is free software; you can redistribute it and/or modify it 297under the same terms as Perl itself. 298 299=head1 REFERENCES 300 301=over 4 302 303=item L<https://github.com/jteeuwen/audible> 304 305=item L<https://code.google.com/p/pyaudibletags/> 306 307=item L<http://wiki.multimedia.cx/index.php?title=Audible_Audio> 308 309=back 310 311=head1 SEE ALSO 312 313L<Image::ExifTool::TagNames/Audible Tags>, 314L<Image::ExifTool(3pm)|Image::ExifTool> 315 316=cut 317 318