1# 2# Gramps - a GTK+/GNOME based genealogy program 3# 4# Copyright (C) 2000-2006 Martin Hawlisch, Donald N. Allingham 5# Copyright (C) 2008 Brian G. Matherly 6# Copyright (C) 2011 Michiel D. Nauta 7# 8# This program is free software; you can redistribute it and/or modify 9# it under the terms of the GNU General Public License as published by 10# the Free Software Foundation; either version 2 of the License, or 11# (at your option) any later version. 12# 13# This program is distributed in the hope that it will be useful, 14# but WITHOUT ANY WARRANTY; without even the implied warranty of 15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16# GNU General Public License for more details. 17# 18# You should have received a copy of the GNU General Public License 19# along with this program; if not, write to the Free Software 20# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 21# 22 23"Import from vCard (RFC 2426)" 24 25#------------------------------------------------------------------------- 26# 27# standard python modules 28# 29#------------------------------------------------------------------------- 30import sys 31import re 32import time 33 34#------------------------------------------------------------------------ 35# 36# Set up logging 37# 38#------------------------------------------------------------------------ 39import logging 40LOG = logging.getLogger(".ImportVCard") 41 42#------------------------------------------------------------------------- 43# 44# Gramps modules 45# 46#------------------------------------------------------------------------- 47from gramps.gen.const import GRAMPS_LOCALE as glocale 48_ = glocale.translation.gettext 49ngettext = glocale.translation.ngettext # else "nearby" comments are ignored 50from gramps.gen.errors import GrampsImportError 51from gramps.gen.lib import (Address, Date, DateError, Event, EventRef, 52 EventType, Name, NameType, Person, Surname, Url, UrlType) 53from gramps.gen.db import DbTxn 54from gramps.gen.plug.utils import OpenFileOrStdin 55from gramps.gen.utils.libformatting import ImportInfo 56 57#------------------------------------------------------------------------- 58# 59# Support Functions 60# 61#------------------------------------------------------------------------- 62def importData(database, filename, user): 63 """Function called by Gramps to import data on persons in VCard format.""" 64 parser = VCardParser(database) 65 try: 66 with OpenFileOrStdin(filename) as filehandle: 67 parser.parse(filehandle, user) 68 except EnvironmentError as msg: 69 user.notify_error(_("%s could not be opened\n") % filename, str(msg)) 70 return 71 except GrampsImportError as msg: 72 user.notify_error(_("%s could not be opened\n") % filename, str(msg)) 73 return 74 ## a "VCARD import report" happens in VCardParser so this is not needed: 75 ## (but the imports_test.py unittest currently requires it, so here it is) 76 return ImportInfo({_("Results"): _("done")}) 77 78 79def splitof_nameprefix(name): 80 """ 81 Return a (prefix, Surname) tuple by splitting on first uppercase char. 82 83 Shame on Python for not supporting [[:upper:]] in re! 84 """ 85 look_for_capital = False 86 for i, char in enumerate(name): 87 if look_for_capital: 88 if char.isupper(): 89 return (name[:i].rstrip(), name[i:]) 90 else: 91 look_for_capital = False 92 if not char.isalpha(): 93 look_for_capital = True 94 return ('', name) 95 96 97def fitin(prototype, receiver, element): 98 """ 99 Return the index in string receiver at which element should be inserted 100 to match part of prototype. 101 102 Assume that the part of receiver that is not tested does match. 103 Don't split to work with lists because element may contain a space! 104 Example: fitin("Mr. Gaius Julius Caesar", "Gaius Caesar", "Julius") = 6 105 106 :param prototype: Partly to be matched by inserting element in receiver. 107 :type prototype: str 108 :param receiver: Space separated words that miss words to match prototype. 109 :type receiver: str 110 :param element: Words that need to be inserted; error if not in prototype. 111 :type element: str 112 :returns: Returns index where element fits in receiver, -1 if receiver 113 not in prototype, or throws IndexError if element at end receiver. 114 :rtype: int 115 """ 116 receiver_idx = 0 117 receiver_chunks = receiver.split() 118 element_idx = prototype.index(element) 119 i = 0 120 idx = prototype.find(receiver_chunks[i]) 121 while idx < element_idx: 122 if idx == -1: 123 return -1 124 receiver_idx += len(receiver_chunks[i]) + 1 125 i += 1 126 idx = prototype.find(receiver_chunks[i]) 127 return receiver_idx 128 129#------------------------------------------------------------------------- 130# 131# VCardParser class 132# 133#------------------------------------------------------------------------- 134class VCardParser: 135 """Class to read data in VCard format from a file.""" 136 DATE_RE = re.compile(r'^(\d{4}-\d{1,2}-\d{1,2})|(?:(\d{4})-?(\d\d)-?(\d\d))') 137 GROUP_RE = re.compile(r'^(?:[-0-9A-Za-z]+\.)?(.+)$') # see RFC 2425 sec5.8.2 138 ESCAPE_CHAR = '\\' 139 TOBE_ESCAPED = ['\\', ',', ';'] # order is important 140 LINE_CONTINUATION = [' ', '\t'] 141 142 @staticmethod 143 def name_value_split(data): 144 """Property group.name:value split is on first unquoted colon.""" 145 colon_idx = data.find(':') 146 if colon_idx < 1: 147 return () 148 quote_count = data.count('"', 0, colon_idx) 149 while quote_count % 2 == 1: 150 colon_idx = data.find(':', colon_idx + 1) 151 quote_count = data.count('"', 0, colon_idx) 152 group_name, value = data[:colon_idx], data[colon_idx + 1:] 153 name_parts = VCardParser.GROUP_RE.match(group_name) 154 return (name_parts.group(1), value) 155 156 @staticmethod 157 def unesc(data): 158 """Remove VCard escape sequences.""" 159 if type(data) == type('string'): 160 for char in reversed(VCardParser.TOBE_ESCAPED): 161 data = data.replace(VCardParser.ESCAPE_CHAR + char, char) 162 return data 163 elif type(data) == type([]): 164 return list(map(VCardParser.unesc, data)) 165 else: 166 raise TypeError("VCard unescaping is not implemented for " 167 "data type %s." % str(type(data))) 168 169 @staticmethod 170 def count_escapes(strng): 171 """Count the number of escape characters at the end of a string.""" 172 count = 0 173 for char in reversed(strng): 174 if char != VCardParser.ESCAPE_CHAR: 175 return count 176 count += 1 177 return count 178 179 @staticmethod 180 def split_unescaped(strng, sep): 181 """Split on sep if sep is unescaped.""" 182 strng_parts = strng.split(sep) 183 for i in reversed(range(len(strng_parts[:]))): 184 if VCardParser.count_escapes(strng_parts[i]) % 2 == 1: 185 # the sep was escaped so undo split 186 appendix = strng_parts.pop(i + 1) 187 strng_parts[i] += sep + appendix 188 return strng_parts 189 190 def __init__(self, dbase): 191 self.database = dbase 192 self.formatted_name = '' 193 self.name_parts = '' 194 self.next_line = None 195 self.trans = None 196 self.version = None 197 self.person = None 198 self.errors = [] 199 self.number_of_errors = 0 200 201 def __get_next_line(self, filehandle): 202 """ 203 Read and return the line with the next property of the VCard. 204 205 Also if it spans multiple lines (RFC 2425 sec.5.8.1). 206 """ 207 line = self.next_line 208 self.next_line = filehandle.readline() 209 self.line_num = self.line_num + 1 210 while self.next_line and self.next_line[0] in self.LINE_CONTINUATION: 211 line = line.rstrip("\n") 212 # TODO perhaps next lines superflous because of rU open parameter? 213 if len(line) > 0 and line[-1] == "\r": 214 line = line[:-1] 215 line += self.next_line[1:] 216 self.next_line = filehandle.readline() 217 self.line_num = self.line_num + 1 218 if line: 219 line = line.strip() 220 else: 221 line = None 222 return line 223 224 def __add_msg(self, problem, line=None): 225 if problem != "": 226 self.number_of_errors += 1 227 if line: 228 message = _("Line %(line)5d: %(prob)s\n") % {"line": line, 229 "prob": problem} 230 else: 231 message = problem + "\n" 232 self.errors.append(message) 233 234 def parse(self, filehandle, user): 235 """ 236 Prepare the database and parse the input file. 237 238 :param filehandle: open file handle positioned at start of the file 239 """ 240 tym = time.time() 241 self.person = None 242 self.database.disable_signals() 243 with DbTxn(_("vCard import"), self.database, batch=True) as self.trans: 244 self._parse_vCard_file(filehandle) 245 self.database.enable_signals() 246 self.database.request_rebuild() 247 tym = time.time() - tym 248 # translators: leave all/any {...} untranslated 249 msg = ngettext('Import Complete: {number_of} second', 250 'Import Complete: {number_of} seconds', tym 251 ).format(number_of=tym) 252 LOG.debug(msg) 253 if self.number_of_errors == 0: 254 message = _("VCARD import report: No errors detected") 255 else: 256 message = _("VCARD import report: %s errors detected\n") % \ 257 self.number_of_errors 258 if hasattr(user.uistate, 'window'): 259 parent_window = user.uistate.window 260 else: 261 parent_window = None 262 user.info(message, "".join(self.errors), 263 parent=parent_window, monospaced=True) 264 265 def _parse_vCard_file(self, filehandle): 266 """Read each line of the input file and act accordingly.""" 267 self.next_line = filehandle.readline() 268 self.line_num = 1 269 270 while True: 271 line = self.__get_next_line(filehandle) 272 if line is None: 273 break 274 if line == "": 275 continue 276 277 if line.find(":") == -1: 278 continue 279 line_parts = self.name_value_split(line) 280 if not line_parts: 281 continue 282 283 # No check for escaped ; because only fields[0] is used. 284 fields = line_parts[0].split(";") 285 286 property_name = fields[0].upper() 287 if property_name == "BEGIN": 288 self.next_person() 289 elif property_name == "END": 290 self.finish_person() 291 elif property_name == "VERSION": 292 self.check_version(fields, line_parts[1]) 293 elif property_name == "FN": 294 self.add_formatted_name(fields, line_parts[1]) 295 elif property_name == "N": 296 self.add_name_parts(fields, line_parts[1]) 297 elif property_name == "NICKNAME": 298 self.add_nicknames(fields, line_parts[1]) 299 elif property_name == "SORT-STRING": 300 self.add_sortas(fields, line_parts[1]) 301 elif property_name == "ADR": 302 self.add_address(fields, line_parts[1]) 303 elif property_name == "TEL": 304 self.add_phone(fields, line_parts[1]) 305 elif property_name == "BDAY": 306 self.add_birthday(fields, line_parts[1]) 307 elif property_name == "ROLE": 308 self.add_occupation(fields, line_parts[1]) 309 elif property_name == "URL": 310 self.add_url(fields, line_parts[1]) 311 elif property_name == "EMAIL": 312 self.add_email(fields, line_parts[1]) 313 elif property_name == "X-GENDER" or property_name == "GENDER": 314 # VCard 3.0 only has X-GENDER, GENDER is 4.0 syntax, 315 # but we want to be robust here. 316 self.add_gender(fields, line_parts[1]) 317 elif property_name == "PRODID": 318 # Included cause VCards made by Gramps have this prop. 319 pass 320 else: 321 self.__add_msg(_("Token >%(token)s< unknown. line skipped: %(line)s") % 322 {"token": (fields[0], line), "line": self.line_num - 1}) 323 324 def finish_person(self): 325 """All info has been collected, write to database.""" 326 if self.person is not None: 327 if self.add_name(): 328 self.database.add_person(self.person, self.trans) 329 self.person = None 330 331 def next_person(self): 332 """A VCard for another person is started.""" 333 if self.person is not None: 334 self.finish_person() 335 self.__add_msg(_("BEGIN property not properly closed by END " 336 "property, Gramps can't cope with nested VCards."), 337 self.line_num - 1) 338 self.person = Person() 339 self.formatted_name = '' 340 self.name_parts = '' 341 342 def check_version(self, fields, data): 343 """Check the version of the VCard, only version 3.0 is supported.""" 344 self.version = data 345 if self.version != "3.0": 346 raise GrampsImportError(_("Import of VCards version %s is " 347 "not supported by Gramps.") % self.version) 348 349 def add_formatted_name(self, fields, data): 350 """Read the FN property of a VCard.""" 351 if not self.formatted_name: 352 self.formatted_name = self.unesc(str(data)).strip() 353 354 def add_name_parts(self, fields, data): 355 """Read the N property of a VCard.""" 356 if not self.name_parts: 357 self.name_parts = data.strip() 358 359 def add_name(self): 360 """ 361 Add the name to the person. 362 363 Returns True on success, False on failure. 364 """ 365 if not self.name_parts.strip(): 366 self.__add_msg(_("VCard is malformed missing the compulsory N " 367 "property, so there is no name; skip it."), 368 self.line_num - 1) 369 return False 370 if not self.formatted_name: 371 self.__add_msg(_("VCard is malformed missing the compulsory FN " 372 "property, get name from N alone."), self.line_num - 1) 373 data_fields = self.split_unescaped(self.name_parts, ';') 374 if len(data_fields) != 5: 375 self.__add_msg(_("VCard is malformed wrong number of name " 376 "components."), self.line_num - 1) 377 378 name = Name() 379 name.set_type(NameType(NameType.BIRTH)) 380 381 if data_fields[0].strip(): 382 # assume first surname is primary 383 for surname_str in self.split_unescaped(data_fields[0], ','): 384 surname = Surname() 385 prefix, sname = splitof_nameprefix(self.unesc(surname_str)) 386 surname.set_surname(sname.strip()) 387 surname.set_prefix(prefix.strip()) 388 name.add_surname(surname) 389 name.set_primary_surname() 390 391 if len(data_fields) > 1 and data_fields[1].strip(): 392 given_name = ' '.join(self.unesc( 393 self.split_unescaped(data_fields[1], ','))) 394 else: 395 given_name = '' 396 if len(data_fields) > 2 and data_fields[2].strip(): 397 additional_names = ' '.join(self.unesc( 398 self.split_unescaped(data_fields[2], ','))) 399 else: 400 additional_names = '' 401 self.add_firstname(given_name.strip(), additional_names.strip(), name) 402 403 if len(data_fields) > 3 and data_fields[3].strip(): 404 name.set_title(' '.join(self.unesc( 405 self.split_unescaped(data_fields[3], ',')))) 406 if len(data_fields) > 4 and data_fields[4].strip(): 407 name.set_suffix(' '.join(self.unesc( 408 self.split_unescaped(data_fields[4], ',')))) 409 410 self.person.set_primary_name(name) 411 return True 412 413 def add_firstname(self, given_name, additional_names, name): 414 """ 415 Combine given_name and additional_names and add as firstname to name. 416 417 If possible try to add given_name as call name. 418 """ 419 default = "%s %s" % (given_name, additional_names) 420 if self.formatted_name: 421 if given_name: 422 if additional_names: 423 given_name_pos = self.formatted_name.find(given_name) 424 if given_name_pos != -1: 425 add_names_pos = self.formatted_name.find(additional_names) 426 if add_names_pos != -1: 427 if given_name_pos <= add_names_pos: 428 firstname = default 429 # Uncertain if given name is used as callname 430 else: 431 firstname = "%s %s" % (additional_names, 432 given_name) 433 name.set_call_name(given_name) 434 else: 435 idx = fitin(self.formatted_name, additional_names, 436 given_name) 437 if idx == -1: 438 # Additional names is not in formatted name 439 firstname = default 440 else: # Given name in middle of additional names 441 firstname = "%s%s %s" % (additional_names[:idx], 442 given_name, additional_names[idx:]) 443 name.set_call_name(given_name) 444 else: # Given name is not in formatted name 445 firstname = default 446 else: # There are no additional_names 447 firstname = given_name 448 else: # There is no given_name 449 firstname = additional_names 450 else: # There is no formatted name 451 firstname = default 452 name.set_first_name(firstname.strip()) 453 return 454 455 def add_nicknames(self, fields, data): 456 """Read the NICKNAME property of a VCard.""" 457 for nick in self.split_unescaped(data, ','): 458 nickname = nick.strip() 459 if nickname: 460 name = Name() 461 name.set_nick_name(self.unesc(nickname)) 462 self.person.add_alternate_name(name) 463 464 def add_sortas(self, fields, data): 465 """Read the SORT-STRING property of a VCard.""" 466 # TODO 467 pass 468 469 def add_address(self, fields, data): 470 """Read the ADR property of a VCard.""" 471 data_fields = self.split_unescaped(data, ';') 472 data_fields = [x.strip() for x in self.unesc(data_fields)] 473 if ''.join(data_fields): 474 addr = Address() 475 def add_street(strng): 476 if strng: 477 already = addr.get_street() 478 if already: 479 addr.set_street("%s %s" % (already, strng)) 480 else: 481 addr.set_street(strng) 482 addr.add_street = add_street 483 set_func = ['add_street', 'add_street', 'add_street', 'set_city', 484 'set_state', 'set_postal_code', 'set_country'] 485 for i, data in enumerate(data_fields): 486 if i >= len(set_func): 487 break 488 getattr(addr, set_func[i])(data) 489 self.person.add_address(addr) 490 491 def add_phone(self, fields, data): 492 """Read the TEL property of a VCard.""" 493 tel = data.strip() 494 if tel: 495 addr = Address() 496 addr.set_phone(self.unesc(tel)) 497 self.person.add_address(addr) 498 499 def add_birthday(self, fields, data): 500 """Read the BDAY property of a VCard.""" 501 date_str = data.strip() 502 date_match = VCardParser.DATE_RE.match(date_str) 503 date = Date() 504 if date_match: 505 if date_match.group(2): 506 date_str = "%s-%s-%s" % (date_match.group(2), 507 date_match.group(3), date_match.group(4)) 508 else: 509 date_str = date_match.group(1) 510 y, m, d = [int(x, 10) for x in date_str.split('-')] 511 try: 512 date.set(value=(d, m, y, False)) 513 except DateError: 514 # TRANSLATORS: leave the {vcard_snippet} untranslated 515 # in the format string, but you may re-order it if needed. 516 self.__add_msg(_( 517 "Invalid date in BDAY {vcard_snippet}, " 518 "preserving date as text." 519 ).format(vcard_snippet=data), self.line_num - 1) 520 date.set(modifier=Date.MOD_TEXTONLY, text=data) 521 else: 522 if date_str: 523 # TRANSLATORS: leave the {vcard_snippet} untranslated. 524 self.__add_msg(_( 525 "Date {vcard_snippet} not in appropriate format " 526 "yyyy-mm-dd, preserving date as text." 527 ).format(vcard_snippet=date_str), self.line_num - 1) 528 date.set(modifier=Date.MOD_TEXTONLY, text=date_str) 529 else: # silently ignore an empty BDAY record 530 return 531 event = Event() 532 event.set_type(EventType(EventType.BIRTH)) 533 event.set_date_object(date) 534 self.database.add_event(event, self.trans) 535 536 event_ref = EventRef() 537 event_ref.set_reference_handle(event.get_handle()) 538 self.person.set_birth_ref(event_ref) 539 540 def add_occupation(self, fields, data): 541 """Read the ROLE property of a VCard.""" 542 occupation = data.strip() 543 if occupation: 544 event = Event() 545 event.set_type(EventType(EventType.OCCUPATION)) 546 event.set_description(self.unesc(occupation)) 547 self.database.add_event(event, self.trans) 548 549 event_ref = EventRef() 550 event_ref.set_reference_handle(event.get_handle()) 551 self.person.add_event_ref(event_ref) 552 553 def add_url(self, fields, data): 554 """Read the URL property of a VCard.""" 555 href = data.strip() 556 if href: 557 url = Url() 558 url.set_path(self.unesc(href)) 559 self.person.add_url(url) 560 561 def add_email(self, fields, data): 562 """Read the EMAIL property of a VCard.""" 563 email = data.strip() 564 if email: 565 url = Url() 566 url.set_type(UrlType(UrlType.EMAIL)) 567 url.set_path(self.unesc(email)) 568 self.person.add_url(url) 569 570 def add_gender(self, fields, data): 571 """Read the GENDER property of a VCard.""" 572 gender_value = data.strip() 573 if gender_value: 574 gender_value = gender_value.upper() 575 gender_value = gender_value[0] 576 if gender_value == 'M': 577 gender = Person.MALE 578 elif gender_value == 'F': 579 gender = Person.FEMALE 580 else: 581 return 582 self.person.set_gender(gender) 583