1#
2# Gramps - a GTK+/GNOME based genealogy program
3#
4# Copyright (C) 2000-2006  Martin Hawlisch, Donald N. Allingham
5# Copyright (C) 2008       Brian G. Matherly
6# Copyright (C) 2011       Michiel D. Nauta
7#
8# This program is free software; you can redistribute it and/or modify
9# it under the terms of the GNU General Public License as published by
10# the Free Software Foundation; either version 2 of the License, or
11# (at your option) any later version.
12#
13# This program is distributed in the hope that it will be useful,
14# but WITHOUT ANY WARRANTY; without even the implied warranty of
15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16# GNU General Public License for more details.
17#
18# You should have received a copy of the GNU General Public License
19# along with this program; if not, write to the Free Software
20# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21#
22
23"Import from vCard (RFC 2426)"
24
25#-------------------------------------------------------------------------
26#
27# standard python modules
28#
29#-------------------------------------------------------------------------
30import sys
31import re
32import time
33
34#------------------------------------------------------------------------
35#
36# Set up logging
37#
38#------------------------------------------------------------------------
39import logging
40LOG = logging.getLogger(".ImportVCard")
41
42#-------------------------------------------------------------------------
43#
44# Gramps modules
45#
46#-------------------------------------------------------------------------
47from gramps.gen.const import GRAMPS_LOCALE as glocale
48_ = glocale.translation.gettext
49ngettext = glocale.translation.ngettext  # else "nearby" comments are ignored
50from gramps.gen.errors import GrampsImportError
51from gramps.gen.lib import (Address, Date, DateError, Event, EventRef,
52        EventType, Name, NameType, Person, Surname, Url, UrlType)
53from gramps.gen.db import DbTxn
54from gramps.gen.plug.utils import OpenFileOrStdin
55from gramps.gen.utils.libformatting import ImportInfo
56
57#-------------------------------------------------------------------------
58#
59# Support Functions
60#
61#-------------------------------------------------------------------------
62def importData(database, filename, user):
63    """Function called by Gramps to import data on persons in VCard format."""
64    parser = VCardParser(database)
65    try:
66        with OpenFileOrStdin(filename) as filehandle:
67            parser.parse(filehandle, user)
68    except EnvironmentError as msg:
69        user.notify_error(_("%s could not be opened\n") % filename, str(msg))
70        return
71    except GrampsImportError as msg:
72        user.notify_error(_("%s could not be opened\n") % filename, str(msg))
73        return
74    ## a "VCARD import report" happens in VCardParser so this is not needed:
75    ## (but the imports_test.py unittest currently requires it, so here it is)
76    return ImportInfo({_("Results"): _("done")})
77
78
79def splitof_nameprefix(name):
80    """
81    Return a (prefix, Surname) tuple by splitting on first uppercase char.
82
83    Shame on Python for not supporting [[:upper:]] in re!
84    """
85    look_for_capital = False
86    for i, char in enumerate(name):
87        if look_for_capital:
88            if char.isupper():
89                return (name[:i].rstrip(), name[i:])
90            else:
91                look_for_capital = False
92        if not char.isalpha():
93            look_for_capital = True
94    return ('', name)
95
96
97def fitin(prototype, receiver, element):
98    """
99    Return the index in string receiver at which element should be inserted
100    to match part of prototype.
101
102    Assume that the part of receiver that is not tested does match.
103    Don't split to work with lists because element may contain a space!
104    Example: fitin("Mr. Gaius Julius Caesar", "Gaius Caesar", "Julius") = 6
105
106    :param prototype: Partly to be matched by inserting element in receiver.
107    :type prototype: str
108    :param receiver: Space separated words that miss words to match prototype.
109    :type receiver: str
110    :param element: Words that need to be inserted; error if not in prototype.
111    :type element: str
112    :returns: Returns index where element fits in receiver, -1 if receiver
113              not in prototype, or throws IndexError if element at end receiver.
114    :rtype: int
115    """
116    receiver_idx = 0
117    receiver_chunks = receiver.split()
118    element_idx = prototype.index(element)
119    i = 0
120    idx = prototype.find(receiver_chunks[i])
121    while idx < element_idx:
122        if idx == -1:
123            return -1
124        receiver_idx += len(receiver_chunks[i]) + 1
125        i += 1
126        idx = prototype.find(receiver_chunks[i])
127    return receiver_idx
128
129#-------------------------------------------------------------------------
130#
131# VCardParser class
132#
133#-------------------------------------------------------------------------
134class VCardParser:
135    """Class to read data in VCard format from a file."""
136    DATE_RE = re.compile(r'^(\d{4}-\d{1,2}-\d{1,2})|(?:(\d{4})-?(\d\d)-?(\d\d))')
137    GROUP_RE = re.compile(r'^(?:[-0-9A-Za-z]+\.)?(.+)$')  # see RFC 2425 sec5.8.2
138    ESCAPE_CHAR = '\\'
139    TOBE_ESCAPED = ['\\', ',', ';']  # order is important
140    LINE_CONTINUATION = [' ', '\t']
141
142    @staticmethod
143    def name_value_split(data):
144        """Property group.name:value split is on first unquoted colon."""
145        colon_idx = data.find(':')
146        if colon_idx < 1:
147            return ()
148        quote_count = data.count('"', 0, colon_idx)
149        while quote_count % 2 == 1:
150            colon_idx = data.find(':', colon_idx + 1)
151            quote_count = data.count('"', 0, colon_idx)
152        group_name, value = data[:colon_idx], data[colon_idx + 1:]
153        name_parts = VCardParser.GROUP_RE.match(group_name)
154        return (name_parts.group(1), value)
155
156    @staticmethod
157    def unesc(data):
158        """Remove VCard escape sequences."""
159        if type(data) == type('string'):
160            for char in reversed(VCardParser.TOBE_ESCAPED):
161                data = data.replace(VCardParser.ESCAPE_CHAR + char, char)
162            return data
163        elif type(data) == type([]):
164            return list(map(VCardParser.unesc, data))
165        else:
166            raise TypeError("VCard unescaping is not implemented for "
167                              "data type %s." % str(type(data)))
168
169    @staticmethod
170    def count_escapes(strng):
171        """Count the number of escape characters at the end of a string."""
172        count = 0
173        for char in reversed(strng):
174            if char != VCardParser.ESCAPE_CHAR:
175                return count
176            count += 1
177        return count
178
179    @staticmethod
180    def split_unescaped(strng, sep):
181        """Split on sep if sep is unescaped."""
182        strng_parts = strng.split(sep)
183        for i in reversed(range(len(strng_parts[:]))):
184            if VCardParser.count_escapes(strng_parts[i]) % 2 == 1:
185                # the sep was escaped so undo split
186                appendix = strng_parts.pop(i + 1)
187                strng_parts[i] += sep + appendix
188        return strng_parts
189
190    def __init__(self, dbase):
191        self.database = dbase
192        self.formatted_name = ''
193        self.name_parts = ''
194        self.next_line = None
195        self.trans = None
196        self.version = None
197        self.person = None
198        self.errors = []
199        self.number_of_errors = 0
200
201    def __get_next_line(self, filehandle):
202        """
203        Read and return the line with the next property of the VCard.
204
205        Also if it spans multiple lines (RFC 2425 sec.5.8.1).
206        """
207        line = self.next_line
208        self.next_line = filehandle.readline()
209        self.line_num = self.line_num + 1
210        while self.next_line and self.next_line[0] in self.LINE_CONTINUATION:
211            line = line.rstrip("\n")
212            # TODO perhaps next lines superflous because of rU open parameter?
213            if len(line) > 0 and line[-1] == "\r":
214                line = line[:-1]
215            line += self.next_line[1:]
216            self.next_line = filehandle.readline()
217            self.line_num = self.line_num + 1
218        if line:
219            line = line.strip()
220        else:
221            line = None
222        return line
223
224    def __add_msg(self, problem, line=None):
225        if problem != "":
226            self.number_of_errors += 1
227        if line:
228            message = _("Line %(line)5d: %(prob)s\n") % {"line": line,
229                                                         "prob": problem}
230        else:
231            message = problem + "\n"
232        self.errors.append(message)
233
234    def parse(self, filehandle, user):
235        """
236        Prepare the database and parse the input file.
237
238        :param filehandle: open file handle positioned at start of the file
239        """
240        tym = time.time()
241        self.person = None
242        self.database.disable_signals()
243        with DbTxn(_("vCard import"), self.database, batch=True) as self.trans:
244            self._parse_vCard_file(filehandle)
245        self.database.enable_signals()
246        self.database.request_rebuild()
247        tym = time.time() - tym
248        # translators: leave all/any {...} untranslated
249        msg = ngettext('Import Complete: {number_of} second',
250                       'Import Complete: {number_of} seconds', tym
251                      ).format(number_of=tym)
252        LOG.debug(msg)
253        if self.number_of_errors == 0:
254            message = _("VCARD import report: No errors detected")
255        else:
256            message = _("VCARD import report: %s errors detected\n") % \
257                self.number_of_errors
258        if hasattr(user.uistate, 'window'):
259            parent_window = user.uistate.window
260        else:
261            parent_window = None
262        user.info(message, "".join(self.errors),
263                  parent=parent_window, monospaced=True)
264
265    def _parse_vCard_file(self, filehandle):
266        """Read each line of the input file and act accordingly."""
267        self.next_line = filehandle.readline()
268        self.line_num = 1
269
270        while True:
271            line = self.__get_next_line(filehandle)
272            if line is None:
273                break
274            if line == "":
275                continue
276
277            if line.find(":") == -1:
278                continue
279            line_parts = self.name_value_split(line)
280            if not line_parts:
281                continue
282
283            # No check for escaped ; because only fields[0] is used.
284            fields = line_parts[0].split(";")
285
286            property_name = fields[0].upper()
287            if property_name == "BEGIN":
288                self.next_person()
289            elif property_name == "END":
290                self.finish_person()
291            elif property_name == "VERSION":
292                self.check_version(fields, line_parts[1])
293            elif property_name == "FN":
294                self.add_formatted_name(fields, line_parts[1])
295            elif property_name == "N":
296                self.add_name_parts(fields, line_parts[1])
297            elif property_name == "NICKNAME":
298                self.add_nicknames(fields, line_parts[1])
299            elif property_name == "SORT-STRING":
300                self.add_sortas(fields, line_parts[1])
301            elif property_name == "ADR":
302                self.add_address(fields, line_parts[1])
303            elif property_name == "TEL":
304                self.add_phone(fields, line_parts[1])
305            elif property_name == "BDAY":
306                self.add_birthday(fields, line_parts[1])
307            elif property_name == "ROLE":
308                self.add_occupation(fields, line_parts[1])
309            elif property_name == "URL":
310                self.add_url(fields, line_parts[1])
311            elif property_name == "EMAIL":
312                self.add_email(fields, line_parts[1])
313            elif property_name == "X-GENDER" or property_name == "GENDER":
314                # VCard 3.0 only has X-GENDER, GENDER is 4.0 syntax,
315                # but we want to be robust here.
316                self.add_gender(fields, line_parts[1])
317            elif property_name == "PRODID":
318                # Included cause VCards made by Gramps have this prop.
319                pass
320            else:
321                self.__add_msg(_("Token >%(token)s< unknown. line skipped: %(line)s") %
322                               {"token": (fields[0], line), "line": self.line_num - 1})
323
324    def finish_person(self):
325        """All info has been collected, write to database."""
326        if self.person is not None:
327            if self.add_name():
328                self.database.add_person(self.person, self.trans)
329        self.person = None
330
331    def next_person(self):
332        """A VCard for another person is started."""
333        if self.person is not None:
334            self.finish_person()
335            self.__add_msg(_("BEGIN property not properly closed by END "
336                           "property, Gramps can't cope with nested VCards."),
337                           self.line_num - 1)
338        self.person = Person()
339        self.formatted_name = ''
340        self.name_parts = ''
341
342    def check_version(self, fields, data):
343        """Check the version of the VCard, only version 3.0 is supported."""
344        self.version = data
345        if self.version != "3.0":
346            raise GrampsImportError(_("Import of VCards version %s is "
347                    "not supported by Gramps.") % self.version)
348
349    def add_formatted_name(self, fields, data):
350        """Read the FN property of a VCard."""
351        if not self.formatted_name:
352            self.formatted_name = self.unesc(str(data)).strip()
353
354    def add_name_parts(self, fields, data):
355        """Read the N property of a VCard."""
356        if not self.name_parts:
357            self.name_parts = data.strip()
358
359    def add_name(self):
360        """
361        Add the name to the person.
362
363        Returns True on success, False on failure.
364        """
365        if not self.name_parts.strip():
366            self.__add_msg(_("VCard is malformed missing the compulsory N "
367                           "property, so there is no name; skip it."),
368                           self.line_num - 1)
369            return False
370        if not self.formatted_name:
371            self.__add_msg(_("VCard is malformed missing the compulsory FN "
372                           "property, get name from N alone."), self.line_num - 1)
373        data_fields = self.split_unescaped(self.name_parts, ';')
374        if len(data_fields) != 5:
375            self.__add_msg(_("VCard is malformed wrong number of name "
376                           "components."), self.line_num - 1)
377
378        name = Name()
379        name.set_type(NameType(NameType.BIRTH))
380
381        if data_fields[0].strip():
382            # assume first surname is primary
383            for surname_str in self.split_unescaped(data_fields[0], ','):
384                surname = Surname()
385                prefix, sname = splitof_nameprefix(self.unesc(surname_str))
386                surname.set_surname(sname.strip())
387                surname.set_prefix(prefix.strip())
388                name.add_surname(surname)
389            name.set_primary_surname()
390
391        if len(data_fields) > 1 and data_fields[1].strip():
392            given_name = ' '.join(self.unesc(
393                                  self.split_unescaped(data_fields[1], ',')))
394        else:
395            given_name = ''
396        if len(data_fields) > 2 and data_fields[2].strip():
397            additional_names = ' '.join(self.unesc(
398                                     self.split_unescaped(data_fields[2], ',')))
399        else:
400            additional_names = ''
401        self.add_firstname(given_name.strip(), additional_names.strip(), name)
402
403        if len(data_fields) > 3 and data_fields[3].strip():
404            name.set_title(' '.join(self.unesc(
405                            self.split_unescaped(data_fields[3], ','))))
406        if len(data_fields) > 4 and data_fields[4].strip():
407            name.set_suffix(' '.join(self.unesc(
408                             self.split_unescaped(data_fields[4], ','))))
409
410        self.person.set_primary_name(name)
411        return True
412
413    def add_firstname(self, given_name, additional_names, name):
414        """
415        Combine given_name and additional_names and add as firstname to name.
416
417        If possible try to add given_name as call name.
418        """
419        default = "%s %s" % (given_name, additional_names)
420        if self.formatted_name:
421            if given_name:
422                if additional_names:
423                    given_name_pos = self.formatted_name.find(given_name)
424                    if given_name_pos != -1:
425                        add_names_pos = self.formatted_name.find(additional_names)
426                        if add_names_pos != -1:
427                            if given_name_pos <= add_names_pos:
428                                firstname = default
429                                # Uncertain if given name is used as callname
430                            else:
431                                firstname = "%s %s" % (additional_names,
432                                                       given_name)
433                                name.set_call_name(given_name)
434                        else:
435                            idx = fitin(self.formatted_name, additional_names,
436                                        given_name)
437                            if idx == -1:
438                                # Additional names is not in formatted name
439                                firstname = default
440                            else:  # Given name in middle of additional names
441                                firstname = "%s%s %s" % (additional_names[:idx],
442                                             given_name, additional_names[idx:])
443                                name.set_call_name(given_name)
444                    else:  # Given name is not in formatted name
445                        firstname = default
446                else:  # There are no additional_names
447                    firstname = given_name
448            else:  # There is no given_name
449                firstname = additional_names
450        else:  # There is no formatted name
451            firstname = default
452        name.set_first_name(firstname.strip())
453        return
454
455    def add_nicknames(self, fields, data):
456        """Read the NICKNAME property of a VCard."""
457        for nick in self.split_unescaped(data, ','):
458            nickname = nick.strip()
459            if nickname:
460                name = Name()
461                name.set_nick_name(self.unesc(nickname))
462                self.person.add_alternate_name(name)
463
464    def add_sortas(self, fields, data):
465        """Read the SORT-STRING property of a VCard."""
466        # TODO
467        pass
468
469    def add_address(self, fields, data):
470        """Read the ADR property of a VCard."""
471        data_fields = self.split_unescaped(data, ';')
472        data_fields = [x.strip() for x in self.unesc(data_fields)]
473        if ''.join(data_fields):
474            addr = Address()
475            def add_street(strng):
476                if strng:
477                    already = addr.get_street()
478                    if already:
479                        addr.set_street("%s %s" % (already, strng))
480                    else:
481                        addr.set_street(strng)
482            addr.add_street = add_street
483            set_func = ['add_street', 'add_street', 'add_street', 'set_city',
484                        'set_state', 'set_postal_code', 'set_country']
485            for i, data in enumerate(data_fields):
486                if i >= len(set_func):
487                    break
488                getattr(addr, set_func[i])(data)
489            self.person.add_address(addr)
490
491    def add_phone(self, fields, data):
492        """Read the TEL property of a VCard."""
493        tel = data.strip()
494        if tel:
495            addr = Address()
496            addr.set_phone(self.unesc(tel))
497            self.person.add_address(addr)
498
499    def add_birthday(self, fields, data):
500        """Read the BDAY property of a VCard."""
501        date_str = data.strip()
502        date_match = VCardParser.DATE_RE.match(date_str)
503        date = Date()
504        if date_match:
505            if date_match.group(2):
506                date_str = "%s-%s-%s" % (date_match.group(2),
507                                       date_match.group(3), date_match.group(4))
508            else:
509                date_str = date_match.group(1)
510            y, m, d = [int(x, 10) for x in date_str.split('-')]
511            try:
512                date.set(value=(d, m, y, False))
513            except DateError:
514                # TRANSLATORS: leave the {vcard_snippet} untranslated
515                # in the format string, but you may re-order it if needed.
516                self.__add_msg(_(
517                    "Invalid date in BDAY {vcard_snippet}, "
518                    "preserving date as text."
519                    ).format(vcard_snippet=data), self.line_num - 1)
520                date.set(modifier=Date.MOD_TEXTONLY, text=data)
521        else:
522            if date_str:
523                # TRANSLATORS: leave the {vcard_snippet} untranslated.
524                self.__add_msg(_(
525                    "Date {vcard_snippet} not in appropriate format "
526                    "yyyy-mm-dd, preserving date as text."
527                    ).format(vcard_snippet=date_str), self.line_num - 1)
528                date.set(modifier=Date.MOD_TEXTONLY, text=date_str)
529            else:  # silently ignore an empty BDAY record
530                return
531        event = Event()
532        event.set_type(EventType(EventType.BIRTH))
533        event.set_date_object(date)
534        self.database.add_event(event, self.trans)
535
536        event_ref = EventRef()
537        event_ref.set_reference_handle(event.get_handle())
538        self.person.set_birth_ref(event_ref)
539
540    def add_occupation(self, fields, data):
541        """Read the ROLE property of a VCard."""
542        occupation = data.strip()
543        if occupation:
544            event = Event()
545            event.set_type(EventType(EventType.OCCUPATION))
546            event.set_description(self.unesc(occupation))
547            self.database.add_event(event, self.trans)
548
549            event_ref = EventRef()
550            event_ref.set_reference_handle(event.get_handle())
551            self.person.add_event_ref(event_ref)
552
553    def add_url(self, fields, data):
554        """Read the URL property of a VCard."""
555        href = data.strip()
556        if href:
557            url = Url()
558            url.set_path(self.unesc(href))
559            self.person.add_url(url)
560
561    def add_email(self, fields, data):
562        """Read the EMAIL property of a VCard."""
563        email = data.strip()
564        if email:
565            url = Url()
566            url.set_type(UrlType(UrlType.EMAIL))
567            url.set_path(self.unesc(email))
568            self.person.add_url(url)
569
570    def add_gender(self, fields, data):
571        """Read the GENDER property of a VCard."""
572        gender_value = data.strip()
573        if gender_value:
574            gender_value = gender_value.upper()
575            gender_value = gender_value[0]
576            if gender_value == 'M':
577                gender = Person.MALE
578            elif gender_value == 'F':
579                gender = Person.FEMALE
580            else:
581                return
582            self.person.set_gender(gender)
583