1#------------------------------------------------------------------------------
2# File:         PLIST.pm
3#
4# Description:  Read Apple PLIST information
5#
6# Revisions:    2013-02-01 - P. Harvey Created
7#
8# References:   1) http://www.apple.com/DTDs/PropertyList-1.0.dtd
9#               2) http://opensource.apple.com/source/CF/CF-550/CFBinaryPList.c
10#
11# Notes:      - Sony MODD files also use XML PLIST format, but with a few quirks
12#
13#             - Decodes both the binary and XML-based PLIST formats
14#------------------------------------------------------------------------------
15
16package Image::ExifTool::PLIST;
17
18use strict;
19use vars qw($VERSION);
20use Image::ExifTool qw(:DataAccess :Utils);
21use Image::ExifTool::XMP;
22use Image::ExifTool::GPS;
23
24$VERSION = '1.09';
25
26sub ExtractObject($$;$);
27sub Get24u($$);
28
29# access routines to read various-sized integer/real values (add 0x100 to size for reals)
30my %readProc = (
31    1 => \&Get8u,
32    2 => \&Get16u,
33    3 => \&Get24u,
34    4 => \&Get32u,
35    8 => \&Get64u,
36    0x104 => \&GetFloat,
37    0x108 => \&GetDouble,
38);
39
40# recognize different types of PLIST files based on certain tags
41my %plistType = (
42    adjustmentBaseVersion => 'AAE',
43);
44
45# PLIST tags (generated on-the-fly for most tags)
46%Image::ExifTool::PLIST::Main = (
47    PROCESS_PROC => \&ProcessPLIST,
48    GROUPS => { 0 => 'PLIST', 1 => 'XML', 2 => 'Document' },
49    VARS => { LONG_TAGS => 4 },
50    NOTES => q{
51        Apple Property List tags.  ExifTool reads both XML and binary-format PLIST
52        files, and will extract any existing tags even if they aren't listed below.
53        These tags belong to the family 0 "PLIST" group, but family 1 group may be
54        either "XML" or "PLIST" depending on whether the format is XML or binary.
55    },
56#
57# tags found in PLIST information of QuickTime iTunesInfo iTunMOVI atom (ref PH)
58#
59    'cast//name'          => { Name => 'Cast',          List => 1 },
60    'directors//name'     => { Name => 'Directors',     List => 1 },
61    'producers//name'     => { Name => 'Producers',     List => 1 },
62    'screenwriters//name' => { Name => 'Screenwriters', List => 1 },
63    'codirectors//name'   => { Name => 'Codirectors',   List => 1 }, # (NC)
64    'studio//name'        => { Name => 'Studio',        List => 1 }, # (NC)
65#
66# tags found in MODD files (ref PH)
67#
68    'MetaDataList//DateTimeOriginal' => {
69        Name => 'DateTimeOriginal',
70        Description => 'Date/Time Original',
71        Groups => { 2 => 'Time' },
72        # Sony uses a "real" here -- number of days since Dec 31, 1899
73        ValueConv => 'IsFloat($val) ? ConvertUnixTime(($val - 25569) * 24 * 3600) : $val',
74        PrintConv => '$self->ConvertDateTime($val)',
75    },
76    'MetaDataList//Duration' => {
77        Name => 'Duration',
78        Groups => { 2 => 'Video' },
79        PrintConv => 'ConvertDuration($val)',
80    },
81    'MetaDataList//Geolocation/Latitude' => {
82        Name => 'GPSLatitude',
83        Groups => { 2 => 'Location' },
84        PrintConv => 'Image::ExifTool::GPS::ToDMS($self, $val, 1, "N")',
85    },
86    'MetaDataList//Geolocation/Longitude' => {
87        Name => 'GPSLongitude',
88        Groups => { 2 => 'Location' },
89        PrintConv => 'Image::ExifTool::GPS::ToDMS($self, $val, 1, "E")',
90    },
91    'MetaDataList//Geolocation/MapDatum' => {
92        Name => 'GPSMapDatum',
93        Groups => { 2 => 'Location' },
94    },
95    XMLFileType => {
96        # recognize MODD files by their content
97        RawConv => q{
98            if ($val eq 'ModdXML' and $$self{FILE_TYPE} eq 'XMP') {
99                $self->OverrideFileType('MODD');
100            }
101            return $val;
102        },
103    },
104);
105
106#------------------------------------------------------------------------------
107# We found a PLIST XML property name/value
108# Inputs: 0) ExifTool object ref, 1) tag table ref
109#         2) reference to array of XML property names (last is current property)
110#         3) property value, 4) attribute hash ref (not used here)
111# Returns: 1 if valid tag was found
112sub FoundTag($$$$;$)
113{
114    my ($et, $tagTablePtr, $props, $val, $attrs) = @_;
115    return 0 unless @$props;
116    my $verbose = $et->Options('Verbose');
117    my $keys = $$et{PListKeys} || ( $$et{PListKeys} = [] );
118
119    my $prop = $$props[-1];
120    if ($verbose > 1) {
121        $et->VPrint(0, $$et{INDENT}, '[', join('/',@$props), ' = ',
122                    $et->Printable($val), "]\n");
123    }
124    # un-escape XML character entities
125    $val = Image::ExifTool::XMP::UnescapeXML($val);
126
127    # handle the various PLIST properties
128    if ($prop eq 'data') {
129        if ($val =~ /^[0-9a-f]+$/ and not length($val) & 0x01) {
130            # MODD files use ASCII-hex encoded "data"...
131            my $buff = pack('H*', $val);
132            $val = \$buff;
133        } else {
134            # ...but the PLIST DTD specifies Base64 encoding
135            $val = Image::ExifTool::XMP::DecodeBase64($val);
136        }
137    } elsif ($prop eq 'date') {
138        $val = Image::ExifTool::XMP::ConvertXMPDate($val);
139    } elsif ($prop eq 'true' or $prop eq 'false') {
140        $val = ucfirst $prop;
141    } else {
142        # convert from UTF8 to ExifTool Charset
143        $val = $et->Decode($val, 'UTF8');
144        if ($prop eq 'key') {
145            if (@$props <= 3) { # top-level key should be plist/dict/key
146                @$keys = ( $val );
147            } else {
148                # save key names to be used in tag name
149                push @$keys, '' while @$keys < @$props - 3;
150                pop @$keys while @$keys > @$props - 2;
151                $$keys[@$props - 3] = $val;
152            }
153            return 0;
154        }
155    }
156
157    return 0 unless @$keys; # can't store value if no associated key
158
159    my $tag = join '/', @$keys;     # generate tag ID from 'key' values
160    my $tagInfo = $$tagTablePtr{$tag};
161    unless ($tagInfo) {
162        $et->VPrint(0, $$et{INDENT}, "[adding $tag]\n") if $verbose;
163        # generate tag name from ID
164        my $name = $tag;
165        $name =~ s{^MetaDataList//}{};  # shorten long MODD metadata tag names
166        $name =~ s{//name$}{};          # remove unnecessary MODD "name" property
167        $name =~ s/([^A-Za-z])([a-z])/$1\u$2/g; # capitalize words
168        $name =~ tr/-_a-zA-Z0-9//dc;    # remove illegal characters
169        $tagInfo = { Name => ucfirst($name), List => 1 };
170        if ($prop eq 'date') {
171            $$tagInfo{Groups}{2} = 'Time';
172            $$tagInfo{PrintConv} = '$self->ConvertDateTime($val)';
173        }
174        AddTagToTable($tagTablePtr, $tag, $tagInfo);
175    }
176    # allow list-behaviour only for consecutive tags with the same ID
177    if ($$et{LastPListTag} and $$et{LastPListTag} ne $tagInfo) {
178        delete $$et{LIST_TAGS}{$$et{LastPListTag}};
179    }
180    $$et{LastPListTag} = $tagInfo;
181    # override file type if applicable
182    $et->OverrideFileType($plistType{$tag}) if $plistType{$tag} and $$et{FILE_TYPE} eq 'XMP';
183    # save the tag
184    $et->HandleTag($tagTablePtr, $tag, $val);
185
186    return 1;
187}
188
189#------------------------------------------------------------------------------
190# Get big-endian 24-bit integer
191# Inputs: 0) data ref, 1) offset
192# Returns: integer value
193sub Get24u($$)
194{
195    my ($dataPt, $off) = @_;
196    return unpack 'N', "\0" . substr($$dataPt, $off, 3);
197}
198
199#------------------------------------------------------------------------------
200# Extract object from binary PLIST file at the current file position (ref 2)
201# Inputs: 0) ExifTool ref, 1) PLIST info ref, 2) parent tag ID (undef for top)
202# Returns: the object, or undef on error
203sub ExtractObject($$;$)
204{
205    my ($et, $plistInfo, $parent) = @_;
206    my $raf = $$plistInfo{RAF};
207    my ($buff, $val);
208
209    $raf->Read($buff, 1) == 1 or return undef;
210    my $type = ord($buff) >> 4;
211    my $size = ord($buff) & 0x0f;
212    if ($type == 0) {       # null/bool/fill
213        $val = { 0x00=>'<null>', 0x08=>'True', 0x09=>'False', 0x0f=>'<fill>' }->{$size};
214    } elsif ($type == 1 or $type == 2 or $type == 3) { # int, float or date
215        $size = 1 << $size;
216        my $proc = ($type == 1 ? $readProc{$size} : $readProc{$size + 0x100}) or return undef;
217        $val = &$proc(\$buff, 0) if $raf->Read($buff, $size) == $size;
218        if ($type == 3 and defined $val) {   # date
219            # dates are referenced to Jan 1, 2001 (11323 days from Unix time zero)
220            $val = Image::ExifTool::ConvertUnixTime($val + 11323 * 24 * 3600, 1);
221            $$plistInfo{DateFormat} = 1;
222        }
223    } elsif ($type == 8) {  # UID
224        ++$size;
225        $raf->Read($buff, $size) == $size or return undef;
226        my $proc = $readProc{$size};
227        if ($proc) {
228            $val = &$proc(\$buff, 0);
229        } elsif ($size == 16) {
230            require Image::ExifTool::ASF;
231            $val = Image::ExifTool::ASF::GetGUID($buff);
232        } else {
233            $val = "0x" . unpack 'H*', $buff;
234        }
235    } else {
236        # $size is the size of the remaining types
237        if ($size == 0x0f) {
238            # size is stored in extra integer object
239            $size = ExtractObject($et, $plistInfo);
240            return undef unless defined $size and $size =~ /^\d+$/;
241        }
242        if ($type == 4) {  # data
243            if ($size < 1000000 or $et->Options('Binary')) {
244                $raf->Read($buff, $size) == $size or return undef;
245            } else {
246                $buff = "Binary data $size bytes";
247            }
248            $val = \$buff;  # (return reference for binary data)
249        } elsif ($type == 5) {  # ASCII string
250            $raf->Read($val, $size) == $size or return undef;
251        } elsif ($type == 6) {  # UCS-2BE string
252            $size *= 2;
253            $raf->Read($buff, $size) == $size or return undef;
254            $val = $et->Decode($buff, 'UCS2');
255        } elsif ($type == 10 or $type == 12 or $type == 13) { # array, set or dict
256            # the remaining types store a list of references
257            my $refSize = $$plistInfo{RefSize};
258            my $refProc = $$plistInfo{RefProc};
259            my $num = $type == 13 ? $size * 2 : $size;
260            my $len = $num * $refSize;
261            $raf->Read($buff, $len) == $len or return undef;
262            my $table = $$plistInfo{Table};
263            my ($i, $ref, @refs, @array);
264            for ($i=0; $i<$num; ++$i) {
265                my $ref = &$refProc(\$buff, $i * $refSize);
266                return 0 if $ref >= @$table;
267                push @refs, $ref;
268            }
269            if ($type == 13) { # dict
270                # prevent infinite recursion
271                if (defined $parent and length $parent > 1000) {
272                    $et->WarnOnce('Possible deep recursion while parsing PLIST');
273                    return undef;
274                }
275                my $tagTablePtr = $$plistInfo{TagTablePtr};
276                my $verbose = $et->Options('Verbose');
277                for ($i=0; $i<$size; ++$i) {
278                    # get the entry key
279                    $raf->Seek($$table[$refs[$i]], 0) or return undef;
280                    my $key = ExtractObject($et, $plistInfo);
281                    next unless defined $key and length $key; # silently ignore bad dict entries
282                    # get the entry value
283                    $raf->Seek($$table[$refs[$i+$size]], 0) or return undef;
284                    # generate an ID for this tag
285                    my $tag = defined $parent ? "$parent/$key" : $key;
286                    undef $$plistInfo{DateFormat};
287                    my $val = ExtractObject($et, $plistInfo, $tag);
288                    next if not defined $val or ref($val) eq 'HASH';
289                    my $tagInfo = $et->GetTagInfo($tagTablePtr, $tag);
290                    unless ($tagInfo) {
291                        $et->VPrint(0, $$et{INDENT}, "[adding $tag]\n") if $verbose;
292                        my $name = $tag;
293                        $name =~ s/([^A-Za-z])([a-z])/$1\u$2/g; # capitalize words
294                        $name =~ tr/-_a-zA-Z0-9//dc; # remove illegal characters
295                        $tagInfo = { Name => ucfirst($name), List => 1 };
296                        if ($$plistInfo{DateFormat}) {
297                            $$tagInfo{Groups}{2} = 'Time';
298                            $$tagInfo{PrintConv} = '$self->ConvertDateTime($val)';
299                        }
300                        AddTagToTable($tagTablePtr, $tag, $tagInfo);
301                    }
302                    # allow list-behaviour only for consecutive tags with the same ID
303                    if ($$et{LastPListTag} and $$et{LastPListTag} ne $tagInfo) {
304                        delete $$et{LIST_TAGS}{$$et{LastPListTag}};
305                    }
306                    $$et{LastPListTag} = $tagInfo;
307                    $et->HandleTag($tagTablePtr, $tag, $val);
308                }
309                $val = { }; # flag the value as a dictionary (ie. tags already saved)
310            } else {
311                # extract the referenced objects
312                foreach $ref (@refs) {
313                    $raf->Seek($$table[$ref], 0) or return undef;   # seek to this object
314                    $val = ExtractObject($et, $plistInfo, $parent);
315                    next unless defined $val and ref $val ne 'HASH';
316                    push @array, $val;
317                }
318                $val = \@array;
319            }
320        }
321    }
322    return $val;
323}
324
325#------------------------------------------------------------------------------
326# Process binary PLIST data (ref 2)
327# Inputs: 0) ExifTool object ref, 1) DirInfo ref, 2) tag table ref
328# Returns: 1 on success (and returns plist value as $$dirInfo{Value})
329sub ProcessBinaryPLIST($$$)
330{
331    my ($et, $dirInfo, $tagTablePtr) = @_;
332    my ($i, $buff, @table);
333    my $dataPt = $$dirInfo{DataPt};
334
335    $et->VerboseDir('Binary PLIST');
336    SetByteOrder('MM');
337
338    if ($dataPt) {
339        my $start = $$dirInfo{DirStart};
340        if ($start or ($$dirInfo{DirLen} and $$dirInfo{DirLen} != length $$dataPt)) {
341            my $buf2 = substr($$dataPt, $start || 0, $$dirInfo{DirLen});
342            $$dirInfo{RAF} = new File::RandomAccess(\$buf2);
343        } else {
344            $$dirInfo{RAF} = new File::RandomAccess($dataPt);
345        }
346        my $strt = $$dirInfo{DirStart} || 0;
347    }
348    # read and parse the trailer
349    my $raf = $$dirInfo{RAF} or return 0;
350    $raf->Seek(-32,2) and $raf->Read($buff,32)==32 or return 0;
351    my $intSize = Get8u(\$buff, 6);
352    my $refSize = Get8u(\$buff, 7);
353    my $numObj = Get64u(\$buff, 8);
354    my $topObj = Get64u(\$buff, 16);
355    my $tableOff = Get64u(\$buff, 24);
356
357    return 0 if $topObj >= $numObj;
358    my $intProc = $readProc{$intSize} or return 0;
359    my $refProc = $readProc{$refSize} or return 0;
360
361    # read and parse the offset table
362    my $tableSize = $intSize * $numObj;
363    $raf->Seek($tableOff, 0) and $raf->Read($buff, $tableSize) == $tableSize or return 0;
364    for ($i=0; $i<$numObj; ++$i) {
365        push @table, &$intProc(\$buff, $i * $intSize);
366    }
367    my %plistInfo = (
368        RAF => $raf,
369        RefSize => $refSize,
370        RefProc => $refProc,
371        Table => \@table,
372        TagTablePtr => $tagTablePtr,
373    );
374    # position file pointer at the top object, and extract it
375    $raf->Seek($table[$topObj], 0) or return 0;
376    $$dirInfo{Value} = ExtractObject($et, \%plistInfo);
377    return defined $$dirInfo{Value} ? 1 : 0;
378}
379
380#------------------------------------------------------------------------------
381# Extract information from a PLIST file
382# Inputs: 0) ExifTool object ref, 1) dirInfo ref, 2) tag table ref
383# Returns: 1 on success, 0 if this wasn't valid PLIST
384sub ProcessPLIST($$;$)
385{
386    my ($et, $dirInfo, $tagTablePtr) = @_;
387
388    # process XML PLIST data using the XMP module
389    $$dirInfo{XMPParseOpts}{FoundProc} = \&FoundTag;
390    my $result = Image::ExifTool::XMP::ProcessXMP($et, $dirInfo, $tagTablePtr);
391    delete $$dirInfo{XMPParseOpts};
392
393    unless ($result) {
394        my $buff;
395        my $raf = $$dirInfo{RAF} or return 0;
396        $raf->Seek(0,0) and $raf->Read($buff, 64) or return 0;
397        if ($buff =~ /^bplist0/) {
398            # binary PLIST file
399            my $tagTablePtr = GetTagTable('Image::ExifTool::PLIST::Main');
400            $et->SetFileType('PLIST', 'application/x-plist');
401            $$et{SET_GROUP1} = 'PLIST';
402            unless (ProcessBinaryPLIST($et, $dirInfo, $tagTablePtr)) {
403                $et->Error('Error reading binary PLIST file');
404            }
405            delete $$et{SET_GROUP1};
406            $result = 1;
407        } elsif ($$et{FILE_EXT} and $$et{FILE_EXT} eq 'PLIST' and
408            $buff =~ /^\xfe\xff\x00/)
409        {
410            # (have seen very old PLIST files encoded as UCS-2BE with leading BOM)
411            $et->Error('Old PLIST format currently not supported');
412            $result = 1;
413        }
414    }
415    return $result;
416}
417
4181;  # end
419
420__END__
421
422=head1 NAME
423
424Image::ExifTool::PLIST - Read Apple PLIST information
425
426=head1 SYNOPSIS
427
428This module is used by Image::ExifTool
429
430=head1 DESCRIPTION
431
432This module contains the routines used by Image::ExifTool to extract
433information from Apple Property List files.
434
435=head1 NOTES
436
437This module decodes both the binary and XML-based PLIST format.
438
439=head1 AUTHOR
440
441Copyright 2003-2021, Phil Harvey (philharvey66 at gmail.com)
442
443This library is free software; you can redistribute it and/or modify it
444under the same terms as Perl itself.
445
446=head1 REFERENCES
447
448=over 4
449
450=item L<http://www.apple.com/DTDs/PropertyList-1.0.dtd>
451
452=item L<http://opensource.apple.com/source/CF/CF-550/CFBinaryPList.c>
453
454=back
455
456=head1 SEE ALSO
457
458L<Image::ExifTool::TagNames/PLIST Tags>,
459L<Image::ExifTool(3pm)|Image::ExifTool>
460
461=cut
462
463