1#------------------------------------------------------------------------------
2# File:         VCard.pm
3#
4# Description:  Read vCard and iCalendar meta information
5#
6# Revisions:    2015/04/05 - P. Harvey Created
7#               2015/05/02 - PH Added iCalendar support
8#
9# References:   1) http://en.m.wikipedia.org/wiki/VCard
10#               2) http://tools.ietf.org/html/rfc6350
11#               3) http://tools.ietf.org/html/rfc5545
12#------------------------------------------------------------------------------
13
14package Image::ExifTool::VCard;
15
16use strict;
17use vars qw($VERSION);
18use Image::ExifTool qw(:DataAccess :Utils);
19
20$VERSION = '1.06';
21
22my %unescapeVCard = ( '\\'=>'\\', ','=>',', 'n'=>"\n", 'N'=>"\n" );
23
24# lookup for iCalendar components (used to generate family 1 group names if top level)
25my %isComponent = ( Event=>1, Todo=>1, Journal=>1, Freebusy=>1, Timezone=>1, Alarm=>1 );
26
27my %timeInfo = (
28    # convert common date/time formats to EXIF style
29    ValueConv => q{
30        $val =~ s/(\d{4})(\d{2})(\d{2})T(\d{2})(\d{2})(\d{2})(Z?)/$1:$2:$3 $4:$5:$6$7/g;
31        $val =~ s/(\d{4})(\d{2})(\d{2})/$1:$2:$3/g;
32        $val =~ s/(\d{4})-(\d{2})-(\d{2})/$1:$2:$3/g;
33        return $val;
34    },
35    PrintConv => '$self->ConvertDateTime($val)',
36);
37
38# vCard tags (ref 1/2/PH)
39# Note: The case of all tag ID's is normalized to lowercase with uppercase first letter
40%Image::ExifTool::VCard::Main = (
41    GROUPS => { 2 => 'Document' },
42    VARS => { NO_LOOKUP => 1 }, # omit tags from lookup
43    NOTES => q{
44        This table lists common vCard tags, but ExifTool will also extract any other
45        vCard tags found.  Tag names may have "Pref" added to indicate the preferred
46        instance of a vCard property, and other "TYPE" parameters may also added to
47        the tag name.  VCF files may contain multiple vCard entries which are
48        distinguished by the ExifTool family 3 group name (document  number). See
49        L<http://tools.ietf.org/html/rfc6350> for the vCard 4.0 specification.
50    },
51    Version     => { Name => 'VCardVersion',   Description => 'VCard Version' },
52    Fn          => { Name => 'FormattedName',  Groups => { 2 => 'Author' } },
53    N           => { Name => 'Name',           Groups => { 2 => 'Author' } },
54    Bday        => { Name => 'Birthday',       Groups => { 2 => 'Time' }, %timeInfo },
55    Tz          => { Name => 'TimeZone',       Groups => { 2 => 'Time' } },
56    Adr         => { Name => 'Address',        Groups => { 2 => 'Location' } },
57    Geo => {
58        Name => 'Geolocation',
59        Groups => { 2 => 'Location' },
60        # when used as a parameter, VCard 4.0 adds a "geo:" prefix that we need to remove
61        ValueConv => '$val =~ s/^geo://; $val',
62    },
63    Anniversary => { },
64    Email       => { },
65    Gender      => { },
66    Impp        => 'IMPP',
67    Lang        => 'Language',
68    Logo        => { },
69    Nickname    => { },
70    Note        => { },
71    Org         => 'Organization',
72    Photo       => { Groups => { 2 => 'Preview' } },
73    Prodid      => 'Software',
74    Rev         => 'Revision',
75    Sound       => { },
76    Tel         => 'Telephone',
77    Title       => 'JobTitle',
78    Uid         => 'UID',
79    Url         => 'URL',
80    'X-ablabel' => { Name => 'ABLabel', PrintConv => '$val =~ s/^_\$!<(.*)>!\$_$/$1/; $val' },
81    'X-abdate'  => { Name => 'ABDate',  Groups => { 2 => 'Time' }, %timeInfo },
82    'X-aim'     => 'AIM',
83    'X-icq'     => 'ICQ',
84    'X-abuid'   => 'AB_UID',
85    'X-abrelatednames' => 'ABRelatedNames',
86    'X-socialprofile'  => 'SocialProfile',
87);
88
89%Image::ExifTool::VCard::VCalendar = (
90    GROUPS => { 1 => 'VCalendar', 2 => 'Document' },
91    VARS => {
92        NO_LOOKUP => 1, # omit tags from lookup
93        LONG_TAGS => 6, # some X-microsoft tags have unavoidably long ID's
94    },
95    NOTES => q{
96        The VCard module is also used to process iCalendar ICS files since they use
97        a format similar to vCard.  The following table lists standard iCalendar
98        tags, but any existing tags will be extracted.  Top-level iCalendar
99        components (eg. Event, Todo, Timezone, etc.) are used for the family 1 group
100        names, and embedded components (eg. Alarm) are added as a prefix to the tag
101        name.  See L<http://tools.ietf.org/html/rfc5545> for the official iCalendar
102        2.0 specification.
103    },
104    Version     => { Name => 'VCalendarVersion',   Description => 'VCalendar Version' },
105    Calscale    => 'CalendarScale',
106    Method      => { },
107    Prodid      => 'Software',
108    Attach      => 'Attachment',
109    Categories  => { },
110    Class       => 'Classification',
111    Comment     => { },
112    Description => { },
113    Geo => {
114        Name => 'Geolocation',
115        Groups => { 2 => 'Location' },
116        ValueConv => '$val =~ s/^geo://; $val',
117    },
118    Location    => { Name => 'Location',            Groups => { 2 => 'Location' } },
119    'Percent-complete' => 'PercentComplete',
120    Priority    => { },
121    Resources   => { },
122    Status      => { },
123    Summary     => { },
124    Completed   => { Name => 'DateTimeCompleted',   Groups => { 2 => 'Time' }, %timeInfo },
125    Dtend       => { Name => 'DateTimeEnd',         Groups => { 2 => 'Time' }, %timeInfo },
126    Due         => { Name => 'DateTimeDue',         Groups => { 2 => 'Time' }, %timeInfo },
127    Dtstart     => { Name => 'DateTimeStart',       Groups => { 2 => 'Time' }, %timeInfo },
128    Duration    => { },
129    Freebusy    => 'FreeBusyTime',
130    Transp      => 'TimeTransparency',
131    Tzid        => { Name => 'TimezoneID',          Groups => { 2 => 'Time' } },
132    Tzname      => { Name => 'TimezoneName',        Groups => { 2 => 'Time' } },
133    Tzoffsetfrom=> { Name => 'TimezoneOffsetFrom',  Groups => { 2 => 'Time' } },
134    Tzoffsetto  => { Name => 'TimezoneOffsetTo',    Groups => { 2 => 'Time' } },
135    Tzurl       => { Name => 'TimeZoneURL',         Groups => { 2 => 'Time' } },
136    Attendee    => { },
137    Contact     => { },
138    Organizer   => { },
139    'Recurrence-id' => 'RecurrenceID',
140    'Related-to'    => 'RelatedTo',
141    Url         => 'URL',
142    Uid         => 'UID',
143    Exdate      => { Name => 'ExceptionDateTimes',  Groups => { 2 => 'Time' }, %timeInfo },
144    Rdate       => { Name => 'RecurrenceDateTimes', Groups => { 2 => 'Time' }, %timeInfo },
145    Rrule       => { Name => 'RecurrenceRule',      Groups => { 2 => 'Time' } },
146    Action      => { },
147    Repeat      => { },
148    Trigger     => { },
149    Created     => { Name => 'DateCreated',         Groups => { 2 => 'Time' }, %timeInfo },
150    Dtstamp     => { Name => 'DateTimeStamp',       Groups => { 2 => 'Time' }, %timeInfo },
151    'Last-modified' => { Name => 'ModifyDate',      Groups => { 2 => 'Time' }, %timeInfo },
152    Sequence    => 'SequenceNumber',
153    'Request-status' => 'RequestStatus',
154    Acknowledged=> { Name => 'Acknowledged',        Groups => { 2 => 'Time' }, %timeInfo },
155#
156# Observed X-tags (not a comprehensive list):
157#
158    'X-apple-calendar-color'=> 'CalendarColor',
159    'X-apple-default-alarm' => 'DefaultAlarm',
160    'X-apple-local-default-alarm' => 'LocalDefaultAlarm',
161    'X-microsoft-cdo-appt-sequence'     => 'AppointmentSequence',
162    'X-microsoft-cdo-ownerapptid'       => 'OwnerAppointmentID',
163    'X-microsoft-cdo-busystatus'        => 'BusyStatus',
164    'X-microsoft-cdo-intendedstatus'    => 'IntendedBusyStatus',
165    'X-microsoft-cdo-alldayevent'       => 'AllDayEvent',
166    'X-microsoft-cdo-importance' => {
167        Name => 'Importance',
168        PrintConv => {
169            0 => 'Low',
170            1 => 'Normal',
171            2 => 'High',
172        },
173    },
174    'X-microsoft-cdo-insttype' => {
175        Name => 'InstanceType',
176        PrintConv => {
177            0 => 'Non-recurring Appointment',
178            1 => 'Recurring Appointment',
179            2 => 'Single Instance of Recurring Appointment',
180            3 => 'Exception to Recurring Appointment',
181        },
182    },
183    'X-microsoft-donotforwardmeeting'   => 'DoNotForwardMeeting',
184    'X-microsoft-disallow-counter'      => 'DisallowCounterProposal',
185    'X-microsoft-locations' => { Name => 'MeetingLocations', Groups => { 2 => 'Location' } },
186    'X-wr-caldesc'          => 'CalendarDescription',
187    'X-wr-calname'          => 'CalendarName',
188    'X-wr-relcalid'         => 'CalendarID',
189    'X-wr-timezone'         => { Name => 'TimeZone2', Groups => { 2 => 'Time' } },
190    'X-wr-alarmuid'         => 'AlarmUID',
191);
192
193#------------------------------------------------------------------------------
194# Get vCard tag, creating if necessary
195# Inputs: 0) ExifTool ref, 1) tag table ref, 2) tag ID, 3) tag Name,
196#         4) source tagInfo ref, 5) lang code
197# Returns: tagInfo ref
198sub GetVCardTag($$$$;$$)
199{
200    my ($et, $tagTablePtr, $tag, $name, $srcInfo, $langCode) = @_;
201    my $tagInfo = $$tagTablePtr{$tag};
202    unless ($tagInfo) {
203        if ($srcInfo) {
204            $tagInfo = { %$srcInfo };
205        } else {
206            $tagInfo = { };
207            $et->VPrint(0, $$et{INDENT}, "[adding $tag]\n");
208        }
209        $$tagInfo{Name} = $name;
210        delete $$tagInfo{Description};  # create new description
211        AddTagToTable($tagTablePtr, $tag, $tagInfo);
212    }
213    # handle alternate languages (the "language" parameter)
214    $tagInfo = Image::ExifTool::GetLangInfo($tagInfo, $langCode) if $langCode;
215    return $tagInfo;
216}
217
218#------------------------------------------------------------------------------
219# Decode vCard text
220# Inputs: 0) ExifTool ref, 1) vCard text, 2) encoding
221# Returns: decoded text (or array ref for a list of values)
222sub DecodeVCardText($$;$)
223{
224    my ($et, $val, $enc) = @_;
225    $enc = defined($enc) ? lc $enc : '';
226    if ($enc eq 'b' or $enc eq 'base64') {
227        require Image::ExifTool::XMP;
228        $val = Image::ExifTool::XMP::DecodeBase64($val);
229    } else {
230        if ($enc eq 'quoted-printable') {
231            # convert "=HH" hex codes to characters
232            $val =~ s/=([0-9a-f]{2})/chr(hex($1))/ige;
233        }
234        $val = $et->Decode($val, 'UTF8');   # convert from UTF-8
235        # convert unescaped commas to nulls to separate list items
236        $val =~ s/(\\.)|(,)/$1 || "\0"/sge;
237        # unescape necessary characters in value
238        $val =~ s/\\(.)/$unescapeVCard{$1}||$1/sge;
239        # split into list if necessary
240        my @vals = split /\0/, $val;
241        $val = \@vals if @vals > 1;
242    }
243    return $val;
244}
245
246#------------------------------------------------------------------------------
247# Read information in a vCard file
248# Inputs: 0) ExifTool ref, 1) dirInfo ref
249# Returns: 1 on success, 0 if this wasn't a valid vCard file
250sub ProcessVCard($$)
251{
252    local $_;
253    my ($et, $dirInfo) = @_;
254    my $raf = $$dirInfo{RAF};
255    my ($buff, $val, $ok, $component, %compNum, @count);
256
257    return 0 unless $raf->Read($buff, 24) and $raf->Seek(0,0) and $buff=~/^BEGIN:(VCARD|VCALENDAR)\r\n/i;
258    my ($type, $lbl, $tbl, $ext) = uc($1) eq 'VCARD' ? qw(VCard vCard Main VCF) : qw(ICS iCalendar VCalendar ICS);
259    $et->SetFileType($type, undef, $ext);
260    return 1 if $$et{OPTIONS}{FastScan} and $$et{OPTIONS}{FastScan} == 3;
261    local $/ = "\r\n";
262    my $tagTablePtr = GetTagTable("Image::ExifTool::VCard::$tbl");
263    my $more = $raf->ReadLine($buff);   # read first line
264    chomp $buff if $more;
265    while ($more) {
266        # retrieve previous line from $buff
267        $val = $buff if defined $buff;
268        # read ahead to next line to see if is a continuation
269        $more = $raf->ReadLine($buff);
270        if ($more) {
271            chomp $buff;
272            # add continuation line if necessary
273            $buff =~ s/^[ \t]// and $val .= $buff, undef($buff), next;
274        }
275        if ($val =~ /^(BEGIN|END):(V?)(\w+)$/i) {
276            my ($begin, $v, $what) = ((lc($1) eq 'begin' ? 1 : 0), $2, ucfirst lc $3);
277            if ($what eq 'Card' or $what eq 'Calendar') {
278                if ($begin) {
279                    @count = ( { } );   # reset group counters
280                } else {
281                    $ok = 1;    # ok if we read at least on full VCARD or VCALENDAR
282                }
283                next;
284            }
285            # absorb top-level component into family 1 group name
286            if ($isComponent{$what}) {
287                if ($begin) {
288                    unless ($component) {
289                        # begin a new top-level component
290                        @count = ( { } );
291                        $component = $what;
292                        $compNum{$component} = ($compNum{$component} || 0) + 1;
293                        next;
294                    }
295                } elsif ($component and $component eq $what) {
296                    # this top-level component has ended
297                    undef $component;
298                    next;
299                }
300            }
301            # keep count of each component at this level
302            if ($begin) {
303                $count[-1]{$what} = ($count[-1]{$what} || 0) + 1 if $v;
304                push @count, { obj => $what };
305            } elsif (@count > 1) {
306                pop @count;
307            }
308            next;
309        } elsif ($ok) {
310            $ok = 0;
311            $$et{DOC_NUM} = ++$$et{DOC_COUNT};  # read next card as a new document
312        }
313        unless ($val =~ s/^([-A-Za-z0-9.]+)//) {
314            $et->WarnOnce("Unrecognized line in $lbl file");
315            next;
316        }
317        my $tag = $1;
318        # set group if it exists
319        if ($tag =~ s/^([-A-Za-z0-9]+)\.//) {
320            $$et{SET_GROUP1} = ucfirst lc $1;
321        } elsif ($component) {
322            $$et{SET_GROUP1} = $component . $compNum{$component};
323        } else {
324            delete $$et{SET_GROUP1};
325        }
326        my ($name, %param, $p);
327        # vCard tag ID's are case-insensitive, so normalize to lowercase with
328        # an uppercase first letter for use as a tag name
329        $name = ucfirst $tag if $tag =~ /[a-z]/;    # preserve mixed case in name if it exists
330        $tag = ucfirst lc $tag;
331        # get source tagInfo reference
332        my $srcInfo = $et->GetTagInfo($tagTablePtr, $tag);
333        if ($srcInfo) {
334            $name = $$srcInfo{Name};    # use our name
335        } else {
336            $name or $name = $tag;
337            # remove leading "X-" from name if it exists
338            $name =~ s/^X-// and $name = ucfirst $name;
339        }
340        # add object name(s) to tag if necessary
341        if (@count > 1) {
342            my $i;
343            for ($i=$#count-1; $i>=0; --$i) {
344                my $pre = $count[$i-1]{obj};    # use containing object name as tag prefix
345                my $c = $count[$i]{$pre};       # add index for object number
346                $c = '' unless defined $c;
347                $tag = $pre . $c . $tag;
348                $name = $pre . $c . $name;
349            }
350        }
351        # parse parameters
352        while ($val =~ s/^;([-A-Za-z0-9]*)(=?)//) {
353            $p = ucfirst lc $1;
354            # convert old vCard 2.x parameters to the new "TYPE=" format
355            $2 or $val = $1 . $val, $p = 'Type';
356            # read parameter value
357            for (;;) {
358                last unless $val =~ s/^"([^"]*)",?// or $val =~ s/^([^";:,]+,?)//;
359                my $v = $p eq 'Type' ? ucfirst lc $1 : $1;
360                $param{$p} = defined($param{$p}) ? $param{$p} . $v : $v;
361            }
362            if (defined $param{$p}) {
363                $param{$p} =~ s/\\(.)/$unescapeVCard{$1}||$1/sge;
364            } else {
365                $param{$p} = '';
366            }
367        }
368        $val =~ s/^:// or $et->WarnOnce("Invalid line in $lbl file"), next;
369        # add 'Type' parameter to id and name if it exists
370        $param{Type} and $tag .= $param{Type}, $name .= $param{Type};
371        # convert base64-encoded data
372        if ($val =~ s{^data:(\w+)/(\w+);base64,}{}) {
373            my $xtra = ucfirst(lc $1) . ucfirst(lc $2);
374            $tag .= $xtra;
375            $name .= $xtra;
376            $param{Encoding} = 'base64';
377        }
378        $val = DecodeVCardText($et, $val, $param{Encoding});
379        my $tagInfo = GetVCardTag($et, $tagTablePtr, $tag, $name, $srcInfo, $param{Language});
380        $et->HandleTag($tagTablePtr, $tag, $val, TagInfo => $tagInfo);
381        # handle some other parameters that we care about (ignore the rest for now)
382        foreach $p (qw(Geo Label Tzid)) {
383            next unless defined $param{$p};
384            # use tag attributes from our table if it exists
385            my $srcTag2 = $et->GetTagInfo($tagTablePtr, $p);
386            my $pn = $srcTag2 ? $$srcTag2{Name} : $p;
387            $val = DecodeVCardText($et, $param{$p});
388            # add parameter to tag ID and name
389            my ($tg, $nm) = ($tag . $p, $name . $pn);
390            $tagInfo = GetVCardTag($et, $tagTablePtr, $tg, $nm, $srcTag2, $param{Language});
391            $et->HandleTag($tagTablePtr, $tg, $val, TagInfo => $tagInfo);
392        }
393    }
394    delete $$et{SET_GROUP1};
395    delete $$et{DOC_NUM};
396    $ok or $et->Warn("Missing $lbl end");
397    return 1;
398}
399
4001;  # end
401
402__END__
403
404=head1 NAME
405
406Image::ExifTool::VCard - Read vCard and iCalendar meta information
407
408=head1 SYNOPSIS
409
410This module is used by Image::ExifTool
411
412=head1 DESCRIPTION
413
414This module contains definitions required by Image::ExifTool to read meta
415information from vCard VCF and iCalendar ICS files.
416
417=head1 AUTHOR
418
419Copyright 2003-2021, Phil Harvey (philharvey66 at gmail.com)
420
421This library is free software; you can redistribute it and/or modify it
422under the same terms as Perl itself.
423
424=head1 REFERENCES
425
426=over 4
427
428=item L<http://en.m.wikipedia.org/wiki/VCard>
429
430=item L<http://tools.ietf.org/html/rfc6350>
431
432=item L<http://tools.ietf.org/html/rfc5545>
433
434=back
435
436=head1 SEE ALSO
437
438L<Image::ExifTool::TagNames/VCard Tags>,
439L<Image::ExifTool::TagNames/VCard VCalendar Tags>,
440L<Image::ExifTool(3pm)|Image::ExifTool>
441
442=cut
443
444