1#------------------------------------------------------------------------------
2# File:         Audible.pm
3#
4# Description:  Read metadata from Audible audio books
5#
6# Revisions:    2015/04/05 - P. Harvey Created
7#
8# References:   1) https://github.com/jteeuwen/audible
9#               2) https://code.google.com/p/pyaudibletags/
10#               3) http://wiki.multimedia.cx/index.php?title=Audible_Audio
11#------------------------------------------------------------------------------
12
13package Image::ExifTool::Audible;
14
15use strict;
16use vars qw($VERSION);
17use Image::ExifTool qw(:DataAccess :Utils);
18
19$VERSION = '1.02';
20
21sub ProcessAudible_meta($$$);
22sub ProcessAudible_cvrx($$$);
23
24%Image::ExifTool::Audible::Main = (
25    GROUPS => { 2 => 'Audio' },
26    NOTES => q{
27        ExifTool will extract any information found in the metadata dictionary of
28        Audible .AA files, even if not listed in the table below.
29    },
30    # tags found in the metadata dictionary (chunk 2)
31    pubdate    => { Name => 'PublishDate', Groups => { 2 => 'Time' } },
32    pub_date_start => { Name => 'PublishDateStart', Groups => { 2 => 'Time' } },
33    author     => { Name => 'Author',      Groups => { 2 => 'Author' } },
34    copyright  => { Name => 'Copyright',   Groups => { 2 => 'Author' } },
35    # also seen (ref PH):
36    # product_id, parent_id, title, provider, narrator, price, description,
37    # long_description, short_title, is_aggregation, title_id, codec, HeaderSeed,
38    # EncryptedBlocks, HeaderKey, license_list, CPUType, license_count, <12 hex digits>,
39    # parent_short_title, parent_title, aggregation_id, short_description, user_alias
40
41    # information extracted from other chunks
42    _chapter_count => { Name => 'ChapterCount' },       # from chunk 6
43    _cover_art => { # from chunk 11
44        Name => 'CoverArt',
45        Groups => { 2 => 'Preview' },
46        Binary => 1,
47    },
48);
49
50# 'tags' atoms observed in Audible .m4b audio books (ref PH)
51%Image::ExifTool::Audible::tags = (
52    GROUPS => { 0 => 'QuickTime', 2 => 'Audio' },
53    NOTES => 'Information found in "tags" atom of Audible M4B audio books.',
54    meta => {
55        Name => 'Audible_meta',
56        SubDirectory => { TagTable => 'Image::ExifTool::Audible::meta' },
57    },
58    cvrx => {
59        Name => 'Audible_cvrx',
60        SubDirectory => { TagTable => 'Image::ExifTool::Audible::cvrx' },
61    },
62    tseg => {
63        Name => 'Audible_tseg',
64        SubDirectory => { TagTable => 'Image::ExifTool::Audible::tseg' },
65    },
66);
67
68# 'meta' information observed in Audible .m4b audio books (ref PH)
69%Image::ExifTool::Audible::meta = (
70    PROCESS_PROC => \&ProcessAudible_meta,
71    GROUPS => { 0 => 'QuickTime', 2 => 'Audio' },
72    NOTES => 'Information found in Audible M4B "meta" atom.',
73    Album       => 'Album',
74    ALBUMARTIST => { Name => 'AlbumArtist', Groups => { 2 => 'Author' } },
75    Artist      => { Name => 'Artist',      Groups => { 2 => 'Author' } },
76    Comment     => 'Comment',
77    Genre       => 'Genre',
78    itunesmediatype => { Name => 'iTunesMediaType', Description => 'iTunes Media Type' },
79    SUBTITLE    => 'Subtitle',
80    Title       => 'Title',
81    TOOL        => 'CreatorTool',
82    Year        => { Name => 'Year', Groups => { 2 => 'Time' } },
83    track       => 'ChapterName', # (found in 'meta' of 'tseg' atom)
84);
85
86# 'cvrx' information observed in Audible .m4b audio books (ref PH)
87%Image::ExifTool::Audible::cvrx = (
88    PROCESS_PROC => \&ProcessAudible_cvrx,
89    GROUPS => { 0 => 'QuickTime', 2 => 'Audio' },
90    NOTES => 'Audible cover art information in M4B audio books.',
91    VARS => { NO_ID => 1 },
92    CoverArtType => 'CoverArtType',
93    CoverArt     => {
94        Name => 'CoverArt',
95        Groups => { 2 => 'Preview' },
96        Binary => 1,
97    },
98);
99
100# 'tseg' information observed in Audible .m4b audio books (ref PH)
101%Image::ExifTool::Audible::tseg = (
102    GROUPS => { 0 => 'QuickTime', 2 => 'Audio' },
103    tshd => {
104        Name => 'ChapterNumber',
105        Format => 'int32u',
106        ValueConv => '$val + 1',    # start counting from 1
107    },
108    meta => {
109        Name => 'Audible_meta2',
110        SubDirectory => { TagTable => 'Image::ExifTool::Audible::meta' },
111    },
112);
113
114#------------------------------------------------------------------------------
115# Process Audible 'meta' tags from M4B files (ref PH)
116# Inputs: 0) ExifTool object ref, 1) dirInfo ref, 2) tag table ref
117# Returns: 1 on success
118sub ProcessAudible_meta($$$)
119{
120    my ($et, $dirInfo, $tagTablePtr) = @_;
121    my $dataPt = $$dirInfo{DataPt};
122    my $dataPos = $$dirInfo{DataPos};
123    my $dirLen = length $$dataPt;
124    return 0 if $dirLen < 4;
125    my $num = Get32u($dataPt, 0);
126    $et->VerboseDir('Audible_meta', $num);
127    my $pos = 4;
128    my $index;
129    for ($index=0; $index<$num; ++$index) {
130        last if $pos + 3 > $dirLen;
131        my $unk = Get8u($dataPt, $pos);             # ? (0x80 or 0x00)
132        last unless $unk == 0x80 or $unk == 0x00;
133        my $len = Get16u($dataPt, $pos + 1);        # tag length
134        $pos += 3;
135        last if $pos + $len + 6 > $dirLen or not $len;
136        my $tag = substr($$dataPt, $pos, $len);     # tag ID
137        my $ver = Get16u($dataPt, $pos + $len);     # version?
138        last unless $ver == 0x0001;
139        my $size = Get32u($dataPt, $pos + $len + 2);# data size
140        $pos += $len + 6;
141        last if $pos + $size > $dirLen;
142        my $val = $et->Decode(substr($$dataPt, $pos, $size), 'UTF8');
143        unless ($$tagTablePtr{$tag}) {
144            my $name = Image::ExifTool::MakeTagName(($tag =~ /[a-z]/) ? $tag : lc($tag));
145            AddTagToTable($tagTablePtr, $tag, { Name => $name });
146        }
147        $et->HandleTag($tagTablePtr, $tag, $val,
148            DataPt  => $dataPt,
149            DataPos => $dataPos,
150            Start   => $pos,
151            Size    => $size,
152            Index   => $index,
153        );
154        $pos += $size;
155    }
156    return 1;
157}
158
159#------------------------------------------------------------------------------
160# Process Audible 'cvrx' cover art atom from M4B files (ref PH)
161# Inputs: 0) ExifTool object ref, 1) dirInfo ref, 2) tag table ref
162# Returns: 1 on success
163sub ProcessAudible_cvrx($$$)
164{
165    my ($et, $dirInfo, $tagTablePtr) = @_;
166    my $dataPt = $$dirInfo{DataPt};
167    my $dataPos = $$dirInfo{DataPos};
168    my $dirLen = length $$dataPt;
169    return 0 if 0x0a > $dirLen;
170    my $len = Get16u($dataPt, 0x08);
171    return 0 if 0x0a + $len + 6 > $dirLen;
172    my $size = Get32u($dataPt, 0x0a + $len + 2);
173    return 0 if 0x0a + $len + 6 + $size > $dirLen;
174    $et->VerboseDir('Audible_cvrx', undef, $dirLen);
175    $et->HandleTag($tagTablePtr, 'CoverArtType', undef,
176        DataPt  => $dataPt,
177        DataPos => $dataPos,
178        Start   => 0x0a,
179        Size    => $len,
180    );
181    $et->HandleTag($tagTablePtr, 'CoverArt', undef,
182        DataPt  => $dataPt,
183        DataPos => $dataPos,
184        Start   => 0x0a + $len + 6,
185        Size    => $size,
186    );
187    return 1;
188}
189
190#------------------------------------------------------------------------------
191# Read information from an Audible .AA file
192# Inputs: 0) ExifTool ref, 1) dirInfo ref
193# Returns: 1 on success, 0 if this wasn't a valid AA file
194sub ProcessAA($$)
195{
196    my ($et, $dirInfo) = @_;
197    my $raf = $$dirInfo{RAF};
198    my ($buff, $toc, $entry, $i);
199
200    # check magic number
201    return 0 unless $raf->Read($buff, 16) == 16 and $buff=~/^.{4}\x57\x90\x75\x36/s;
202    # check file size
203    if (defined $$et{VALUE}{FileSize}) {
204        # first 4 bytes of the file should be the filesize
205        unpack('N', $buff) == $$et{VALUE}{FileSize} or return 0;
206    }
207    $et->SetFileType();
208    SetByteOrder('MM');
209    my $bytes = 12 * Get32u(\$buff, 8); # table of contents size in bytes
210    $bytes > 0xc00 and $et->Warn('Invalid TOC'), return 1;
211    # read the table of contents
212    $raf->Read($toc, $bytes) == $bytes or $et->Warn('Truncated TOC'), return 1;
213    my $tagTablePtr = GetTagTable('Image::ExifTool::Audible::Main');
214    # parse table of contents (in $toc)
215    for ($entry=0; $entry<$bytes; $entry+=12) {
216        my $type = Get32u(\$toc, $entry);
217        next unless $type == 2 or $type == 6 or $type == 11;
218        my $offset = Get32u(\$toc, $entry + 4);
219        my $length = Get32u(\$toc, $entry + 8) or next;
220        $raf->Seek($offset, 0) or $et->Warn("Chunk $type seek error"), last;
221        if ($type == 6) {   # offset table
222            next if $length < 4 or $raf->Read($buff, 4) != 4; # only read the chapter count
223            $et->HandleTag($tagTablePtr, '_chapter_count', Get32u(\$buff, 0));
224            next;
225        }
226        # read the chunk
227        $length > 100000000 and $et->Warn("Chunk $type too big"), next;
228        $raf->Read($buff, $length) == $length or $et->Warn("Chunk $type read error"), last;
229        if ($type == 11) {  # cover art
230            next if $length < 8;
231            my $len = Get32u(\$buff, 0);
232            my $off = Get32u(\$buff, 4);
233            next if $off < $offset + 8 or $off - $offset + $len > $length;
234            $et->HandleTag($tagTablePtr, '_cover_art', substr($buff, $off-$offset, $len));
235            next;
236        }
237        # parse metadata dictionary (in $buff)
238        $length < 4 and $et->Warn('Bad dictionary'), next;
239        my $num = Get32u(\$buff, 0);
240        $num > 0x200 and $et->Warn('Bad dictionary count'), next;
241        my $pos = 4;    # dictionary starts immediately after count
242        require Image::ExifTool::HTML;  # (for UnescapeHTML)
243        $et->VerboseDir('Audible Metadata', $num);
244        for ($i=0; $i<$num; ++$i) {
245            my $tagPos = $pos + 9;                  # position of tag string
246            $tagPos > $length and $et->Warn('Truncated dictionary'), last;
247            # (1 unknown byte ignored at start of each dictionary entry)
248            my $tagLen = Get32u(\$buff, $pos + 1);  # tag string length
249            my $valLen = Get32u(\$buff, $pos + 5);  # value string length
250            my $valPos = $tagPos + $tagLen;         # position of value string
251            my $nxtPos = $valPos + $valLen;         # position of next entry
252            $nxtPos > $length and $et->Warn('Bad dictionary entry'), last;
253            my $tag = substr($buff, $tagPos, $tagLen);
254            my $val = substr($buff, $valPos, $valLen);
255            unless ($$tagTablePtr{$tag}) {
256                my $name = Image::ExifTool::MakeTagName($tag);
257                $name =~ s/_(.)/\U$1/g; # change from underscore-separated to mixed case
258                AddTagToTable($tagTablePtr, $tag, { Name => $name });
259            }
260            # unescape HTML character references and convert from UTF-8
261            $val = $et->Decode(Image::ExifTool::HTML::UnescapeHTML($val), 'UTF8');
262            $et->HandleTag($tagTablePtr, $tag, $val,
263                DataPos => $offset,
264                DataPt  => \$buff,
265                Start   => $valPos,
266                Size    => $valLen,
267                Index   => $i,
268            );
269            $pos = $nxtPos; # step to next dictionary entry
270        }
271    }
272    return 1;
273}
274
2751;  # end
276
277__END__
278
279=head1 NAME
280
281Image::ExifTool::Audible - Read meta information from Audible audio books
282
283=head1 SYNOPSIS
284
285This module is used by Image::ExifTool
286
287=head1 DESCRIPTION
288
289This module contains definitions required by Image::ExifTool to read meta
290information from Audible audio books.
291
292=head1 AUTHOR
293
294Copyright 2003-2021, Phil Harvey (philharvey66 at gmail.com)
295
296This library is free software; you can redistribute it and/or modify it
297under the same terms as Perl itself.
298
299=head1 REFERENCES
300
301=over 4
302
303=item L<https://github.com/jteeuwen/audible>
304
305=item L<https://code.google.com/p/pyaudibletags/>
306
307=item L<http://wiki.multimedia.cx/index.php?title=Audible_Audio>
308
309=back
310
311=head1 SEE ALSO
312
313L<Image::ExifTool::TagNames/Audible Tags>,
314L<Image::ExifTool(3pm)|Image::ExifTool>
315
316=cut
317
318