1# 2# Gramps - a GTK+/GNOME based genealogy program 3# 4# Copyright (C) 2000-2006 Martin Hawlisch, Donald N. Allingham 5# Copyright (C) 2008 Brian G. Matherly 6# Copyright (C) 2013 Vassilii Khachaturov 7# 8# This program is free software; you can redistribute it and/or modify 9# it under the terms of the GNU General Public License as published by 10# the Free Software Foundation; either version 2 of the License, or 11# (at your option) any later version. 12# 13# This program is distributed in the hope that it will be useful, 14# but WITHOUT ANY WARRANTY; without even the implied warranty of 15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16# GNU General Public License for more details. 17# 18# You should have received a copy of the GNU General Public License 19# along with this program; if not, write to the Free Software 20# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 21# 22 23"Import from GeneWeb" 24 25#------------------------------------------------------------------------- 26# 27# standard python modules 28# 29#------------------------------------------------------------------------- 30import re 31import time 32 33#------------------------------------------------------------------------ 34# 35# Set up logging 36# 37#------------------------------------------------------------------------ 38import logging 39LOG = logging.getLogger(".ImportGeneWeb") 40 41#------------------------------------------------------------------------- 42# 43# Gramps modules 44# 45#------------------------------------------------------------------------- 46from gramps.gen.const import GRAMPS_LOCALE as glocale 47from gramps.gen.utils.libformatting import ImportInfo 48_ = glocale.translation.gettext 49ngettext = glocale.translation.ngettext # else "nearby" comments are ignored 50from gramps.gen.errors import GedcomError, GrampsImportError 51from gramps.gen.lib import (Attribute, AttributeType, ChildRef, Citation, 52 Date, DateError, Event, EventRef, EventRoleType, EventType, 53 Family, FamilyRelType, Name, NameType, Note, Person, PersonRef, 54 Place, Source, LdsOrd) 55from gramps.gen.db import DbTxn 56from html.entities import name2codepoint 57 58_date_parse = re.compile( 59 r'([kmes~?<>]+)?([0-9/]+)([J|H|F])?(\.\.)?([0-9/]+)?([J|H|F])?') 60_text_parse = re.compile(r'0\((.*)\)') 61 62_mod_map = { 63 '>' : Date.MOD_AFTER, 64 '<' : Date.MOD_BEFORE, 65 '~' : Date.MOD_ABOUT, 66 } 67 68_cal_map = { 69 'J' : Date.CAL_JULIAN, 70 'H' : Date.CAL_HEBREW, 71 'F' : Date.CAL_FRENCH, 72 } 73 74pevents_map = { 75 "#birt" : EventType.BIRTH, #Epers_Birth 76 "#bapt" : EventType.BAPTISM, #Epers_Baptism 77 "#deat" : EventType.DEATH, #Epers_Death 78 "#buri" : EventType.BURIAL, #Epers_Burial 79 "#crem" : EventType.CREMATION, 80 "#acco" : EventType((EventType.CUSTOM, _('Accomplishment'))) , 81 "#acqu" : EventType((EventType.CUSTOM, _('Acquisition'))), 82 "#adhe" : EventType((EventType.CUSTOM, _('Adhesion'))), 83 "#awar" : EventType((EventType.CUSTOM, _('Award'))), 84 "#bapl" : LdsOrd.BAPTISM, #Epers_BaptismLDS 85 "#barm" : EventType.BAR_MITZVAH, #Epers_BarMitzvah 86 "#basm" : EventType.BAS_MITZVAH, #Epers_BatMitzvah 87 "#bles" : EventType.BLESS, #Epers_Benediction 88 "#cens" : EventType.CENSUS, 89 "#chgn" : EventType((EventType.CUSTOM, _('Change Name'))), 90 "#circ" : EventType((EventType.CUSTOM, _('Circumcision'))), 91 "#conf" : EventType.CONFIRMATION, #Epers_Confirmation 92 "#conl" : LdsOrd.CONFIRMATION, #Epers_ConfirmationLDS 93 "#degr" : EventType.DEGREE, 94 "#demm" : EventType((EventType.CUSTOM, _('Military Demobilisation'))), 95 "#dist" : EventType((EventType.CUSTOM, _('Award'))), 96 "#dotl" : LdsOrd.ENDOWMENT, #Epers_DotationLDS 97 "#educ" : EventType.EDUCATION, #Epers_Education 98 "#elec" : EventType.ELECTED, #Epers_Election 99 "#emig" : EventType.EMIGRATION, 100 "#endl" : EventType((EventType.CUSTOM, _('Dotation'))), 101 "#exco" : EventType((EventType.CUSTOM, _('Excommunication'))), 102 "#fcom" : EventType.FIRST_COMMUN, 103 "#flkl" : EventType((EventType.CUSTOM, _('LDS Family Link'))), 104 "#fune" : EventType((EventType.CUSTOM, _('Funeral'))), 105 "#grad" : EventType.GRADUATION, 106 "#hosp" : EventType((EventType.CUSTOM, _('Hospitalisation'))), 107 "#illn" : EventType((EventType.CUSTOM, _('Illness'))), 108 "#immi" : EventType.IMMIGRATION, 109 "#lpas" : EventType((EventType.CUSTOM, _('List Passenger'))), 110 "#mdis" : EventType((EventType.CUSTOM, _('Military Distinction'))), 111 "#mobm" : EventType((EventType.CUSTOM, _('Militaty Mobilisation'))), 112 "#mpro" : EventType((EventType.CUSTOM, _('Military Promotion'))), 113 "#mser" : EventType.MILITARY_SERV, #Epers_MilitaryService 114 "#natu" : EventType.NATURALIZATION, #Epers_Naturalisation 115 "#occu" : EventType.OCCUPATION, #Epers_Occupation 116 "#ordn" : EventType.ORDINATION, #Epers_Ordination 117 "#prop" : EventType.PROPERTY, #Epers_Property 118 "#resi" : EventType.RESIDENCE, #Epers_Residence 119 "#reti" : EventType.RETIREMENT, 120 "#slgc" : EventType((EventType.CUSTOM, _('LDS Seal to child'))), #Epers_ScellentChildLDS 121 "#slgp" : LdsOrd.SEAL_TO_PARENTS, #Epers_ScellentParentLDS 122 "#slgs" : LdsOrd.SEAL_TO_SPOUSE, 123 "#vteb" : EventType((EventType.CUSTOM, _('Sold property'))), #Epers_VenteBien 124 "#will" : EventType.WILL, #Epers_Will 125 } 126 127fevents_map = { 128 "#marr" : EventType.MARRIAGE, #Efam_Marriage 129 "#nmar" : EventType.NUM_MARRIAGES, 130 "#nmen" : EventType((EventType.CUSTOM, _('No mention'))), #Efam_NoMention 131 "#enga" : EventType.ENGAGEMENT, #Efam_Engage 132 "#div" : EventType.DIVORCE, 133 "#sep" : EventType((EventType.CUSTOM, _('Separated'))), #Efam_Separated 134 "#anul" : EventType.ANNULMENT, #Efam_Annulation 135 "#marb" : EventType.MARR_BANNS, #Efam_MarriageBann 136 "#marc" : EventType.MARR_CONTR, #Efam_MarriageContract) 137 "#marl" : EventType.MARR_LIC, #Efam_MarriageLicense) 138 "#resi" : EventType.RESIDENCE, #Efam_Residence) 139 } 140 141#------------------------------------------------------------------------- 142# 143# 144# 145#------------------------------------------------------------------------- 146def importData(database, filename, user): 147 148 global callback 149 150 try: 151 g = GeneWebParser(database,filename) 152 except IOError as msg: 153 user.notify_error(_("%s could not be opened\n") % filename,str(msg)) 154 return 155 156 try: 157 status = g.parse_geneweb_file() 158 except IOError as msg: 159 errmsg = _("%s could not be opened\n") % filename 160 user.notify_error(errmsg,str(msg)) 161 return 162 return ImportInfo({_("Results"): _("done")}) 163 164#------------------------------------------------------------------------- 165# For a description of the file format see 166# http://cristal.inria.fr/~ddr/GeneWeb/en/gwformat.htm 167# https://github.com/geneanet/geneweb/issues/315 168#------------------------------------------------------------------------- 169class GeneWebParser: 170 def __init__(self, dbase, file): 171 self.db = dbase 172 if file: # Unit tests can create the parser w/o underlying file 173 self.f = open(file, "rUb") 174 self.filename = file 175 self.encoding = 'iso-8859-1' 176 self.gwplus = False 177 178 def get_next_line(self): 179 self.lineno += 1 180 line = self.f.readline() 181 182 try: 183 line = line.decode('utf-8') 184 except GrampsImportError as err: 185 self.errmsg(str(err)) 186 187 if line: 188 try: 189 line = str(line.strip()) 190 except UnicodeDecodeError: 191 line = line.decode(self.encoding).strip() 192 else: 193 line = None 194 return line 195 196 def parse_geneweb_file(self): 197 with DbTxn(_("GeneWeb import"), self.db, batch=True) as self.trans: 198 self.db.disable_signals() 199 t = time.time() 200 self.lineno = 0 201 self.index = 0 202 self.fam_count = 0 203 self.indi_count = 0 204 205 self.fkeys = [] 206 self.ikeys = {} 207 self.pkeys = {} 208 self.skeys = {} 209 210 self.current_mode = None 211 self.current_family = None 212 self.current_husband_handle = None 213 self.current_child_birthplace_handle = None 214 self.current_child_source_handle = None 215 try: 216 while 1: 217 line = self.get_next_line() 218 if line is None: 219 break 220 if line == "": 221 continue 222 223 fields = line.split(" ") 224 225 LOG.debug("LINE: %s" %line) 226 227 if fields[0] == "gwplus": 228 self.gwplus = True 229 self.encoding = 'utf-8' 230 elif fields[0] == "encoding:": 231 self.encoding = fields[1] 232 elif fields[0] == "fam": 233 self.current_mode = "fam" 234 self.read_family_line(line,fields) 235 elif fields[0] == "rel": 236 self.current_mode = "rel" 237 self.read_relationship_person(line,fields) 238 elif fields[0] == "src": 239 self.read_source_line(line,fields) 240 elif fields[0] in ("wit", "wit:"): 241 self.read_witness_line(line,fields) 242 elif fields[0] == "cbp": 243 self.read_children_birthplace_line(line,fields) 244 elif fields[0] == "csrc": 245 self.read_children_source_line(line,fields) 246 elif fields[0] == "beg" and self.current_mode == "fam": 247 self.read_children_lines() 248 elif fields[0] == "beg" and self.current_mode == "rel": 249 self.read_relation_lines() 250 elif fields[0] == "comm": 251 self.read_family_comment(line,fields) 252 elif fields[0] == "notes": 253 self.read_person_notes_lines(line,fields) 254 elif fields[0] == "fevt" and self.current_mode == "fam": 255 #self.read_fevent_line(self.get_next_line()) 256 pass 257 elif fields[0] == "pevt": 258 #self.read_pevent_line(self.get_next_line(), fields) 259 pass 260 elif fields[0] == "notes-db": 261 self.read_database_notes_lines(line,fields) 262 elif fields[0] == "pages-ext" or "wizard-note": 263 pass 264 elif fields[0] == "end": 265 self.current_mode = None 266 else: 267 LOG.warning("parse_geneweb_file(): Token >%s< unknown. line %d skipped: %s" % 268 (fields[0],self.lineno,line)) 269 except GedcomError as err: 270 self.errmsg(str(err)) 271 272 t = time.time() - t 273 # translators: leave all/any {...} untranslated 274 msg = ngettext('Import Complete: {number_of} second', 275 'Import Complete: {number_of} seconds', t 276 ).format(number_of=t) 277 278 self.db.enable_signals() 279 self.db.request_rebuild() 280 281 LOG.debug(msg) 282 LOG.debug("Families: %d" % len(self.fkeys)) 283 LOG.debug("Individuals: %d" % len(self.ikeys)) 284 return None 285 286 def read_family_line(self,line,fields): 287 self.current_husband_handle = None 288 self.current_child_birthplace_handle = None 289 self.current_child_source_handle = None 290 self.current_family = Family() 291 self.db.add_family(self.current_family,self.trans) 292 #self.db.commit_family(self.current_family,self.trans) 293 self.fkeys.append(self.current_family.get_handle()) 294 idx = 1; 295 296 LOG.debug("\nHusband:") 297 (idx, husband) = self.parse_person(fields,idx,Person.MALE,None) 298 if husband: 299 self.current_husband_handle = husband.get_handle() 300 self.current_family.set_father_handle(husband.get_handle()) 301 self.db.commit_family(self.current_family,self.trans) 302 husband.add_family_handle(self.current_family.get_handle()) 303 self.db.commit_person(husband,self.trans) 304 LOG.debug("Marriage:") 305 idx = self.parse_marriage(fields,idx) 306 LOG.debug("Wife:") 307 (idx,wife) = self.parse_person(fields,idx,Person.FEMALE,None) 308 if wife: 309 self.current_family.set_mother_handle(wife.get_handle()) 310 self.db.commit_family(self.current_family,self.trans) 311 wife.add_family_handle(self.current_family.get_handle()) 312 self.db.commit_person(wife,self.trans) 313 return None 314 315 def read_relationship_person(self,line,fields): 316 LOG.debug(r"\Relationships:") 317 (idx,person) = self.parse_person(fields,1,Person.UNKNOWN,None) 318 if person: 319 self.current_relationship_person_handle = person.get_handle() 320 321 def read_relation_lines(self): 322 if not self.current_relationship_person_handle: 323 LOG.warning("Unknown person for relationship in line %d!" % self.lineno) 324 return None 325 rel_person = self.db.get_person_from_handle(self.current_relationship_person_handle) 326 while 1: 327 line = self.get_next_line() 328 if line is None or line == "end": 329 break 330 if line == "": 331 continue 332 333 # match relationship type and related person 334 line_re = re.compile("^- ([^:]+): (.*)$") 335 matches = line_re.match(line) 336 if matches: 337 #split related person into fields 338 fields = matches.groups()[1].split(" ") 339 if fields: 340 (idx,asso_p) = self.parse_person(fields,0,Person.UNKNOWN,None) 341 pref = PersonRef() 342 pref.set_relation(matches.groups()[0]) 343 LOG.warning("TODO: Handle association types properly") 344 pref.set_reference_handle(asso_p.get_handle()) 345 rel_person.add_person_ref(pref) 346 self.db.commit_person(rel_person,self.trans) 347 else: 348 LOG.warning("Invalid name of person in line %d" % self.lineno) 349 else: 350 LOG.warning("Invalid relationship in line %d" % self.lineno) 351 break 352 self.current_mode = None 353 return None 354 355 def read_source_line(self,line,fields): 356 if not self.current_family: 357 LOG.warning("Unknown family of child in line %d!" % self.lineno) 358 return None 359 source = self.get_or_create_source(self.decode(fields[1])) 360 self.current_family.add_citation(source.get_handle()) 361 self.db.commit_family(self.current_family,self.trans) 362 return None 363 364 def read_witness_line(self,line,fields): 365 LOG.debug("Witness:") 366 if fields[1] == "m:": 367 (idx,wit_p) = self.parse_person(fields,2,Person.MALE,None) 368 elif fields[1] == "f:": 369 (idx,wit_p) = self.parse_person(fields,2,Person.FEMALE,None) 370 else: 371 (idx,wit_p) = self.parse_person(fields,1,None,None) 372 if wit_p: 373 mev = None 374 # search marriage event 375 for evr in self.current_family.get_event_ref_list(): 376 ev = self.db.get_event_from_handle(evr.get_reference_handle()) 377 if ev.get_type() == EventType.MARRIAGE: 378 mev = ev # found. 379 if not mev: # No marriage event found create a new one 380 mev = self.create_event(EventType.MARRIAGE, None, None, None, None) 381 mar_ref = EventRef() 382 mar_ref.set_reference_handle(mev.get_handle()) 383 self.current_family.add_event_ref(mar_ref) 384 wit_ref = EventRef() 385 wit_ref.set_role(EventRoleType(EventRoleType.WITNESS)) 386 wit_ref.set_reference_handle(mev.get_handle()) 387 wit_p.add_event_ref(wit_ref) 388 self.db.commit_person(wit_p,self.trans) 389 return None 390 391 def read_children_lines(self): 392 father_surname = "Dummy" 393 if not self.current_husband_handle: 394 LOG.warning("Unknown father for child in line %d!" % self.lineno) 395 return None 396 husb = self.db.get_person_from_handle(self.current_husband_handle) 397 father_surname = husb.get_primary_name().get_surname() 398 if not self.current_family: 399 LOG.warning("Unknown family of child in line %d!" % self.lineno) 400 return None 401 while 1: 402 line = self.get_next_line() 403 if line is None: 404 break 405 if line == "": 406 continue 407 408 fields = line.split(" ") 409 if fields[0] == "-": 410 LOG.debug("Child:") 411 child = None 412 if fields[1] == "h": 413 (idx,child) = self.parse_person(fields,2,Person.MALE,father_surname) 414 elif fields[1] == "f": 415 (idx,child) = self.parse_person(fields,2,Person.FEMALE,father_surname) 416 else: 417 (idx,child) = self.parse_person(fields,1,Person.UNKNOWN,father_surname) 418 419 if child: 420 childref = ChildRef() 421 childref.set_reference_handle(child.get_handle()) 422 self.current_family.add_child_ref( childref) 423 self.db.commit_family(self.current_family,self.trans) 424 child.add_parent_family_handle( self.current_family.get_handle()) 425 if self.current_child_birthplace_handle: 426 birth = None 427 birth_ref = child.get_birth_ref() 428 if birth_ref: 429 birth = self.db.get_event_from_handle(birth_ref.ref) 430 if not birth: 431 birth = self.create_event(EventType.BIRTH) 432 birth_ref = EventRef() 433 birth_ref.set_reference_handle(birth.get_handle()) 434 child.set_birth_ref(birth_ref) 435 birth.set_place_handle(self.current_child_birthplace_handle) 436 self.db.commit_event(birth,self.trans) 437 if self.current_child_source_handle: 438 child.add_citation(self.current_child_source_handle) 439 self.db.commit_person(child,self.trans) 440 else: 441 break 442 self.current_mode = None 443 return None 444 445 446 def read_children_birthplace_line(self,line,fields): 447 cbp = self.get_or_create_place(self.decode(fields[1])) 448 if cbp: 449 self.current_child_birthplace_handle = cbp.get_handle() 450 return None 451 452 def read_children_source_line(self,line,fields): 453 csrc = self.get_or_create_source(self.decode(fields[1])) 454 self.current_child_source_handle = csrc.handle 455 return None 456 457 def read_family_comment(self,line,fields): 458 if not self.current_family: 459 LOG.warning("Unknown family of child in line %d!" % self.lineno) 460 return None 461 n = Note() 462 n.set(line) 463 self.db.add_note(n,self.trans) 464 self.current_family.add_note(n.handle) 465 self.db.commit_family(self.current_family,self.trans) 466 return None 467 468 def _read_notes_lines(self, note_tag): 469 note_txt = "" 470 while True: 471 line = self.get_next_line() 472 if line is None: 473 break 474 475 fields = line.split(" ") 476 if fields[0] == "end" and fields[1] == note_tag: 477 break 478 elif fields[0] == "beg": 479 continue 480 else: 481 if note_txt: 482 note_txt = note_txt + "\n" + line 483 else: 484 note_txt = note_txt + line 485 if note_txt: 486 n = Note() 487 n.set(note_txt) 488 self.db.add_note(n,self.trans) 489 return n.handle 490 return None 491 492 def read_person_notes_lines(self,line,fields): 493 (idx,person) = self.parse_person(fields,1,None,None) 494 note_handle = self._read_notes_lines( fields[0]) 495 if note_handle: 496 person.add_note(note_handle) 497 self.db.commit_person(person,self.trans) 498 499 def read_database_notes_lines(self,line,fields): 500 note_handle = self._read_notes_lines( fields[0]) 501 502 def parse_marriage(self,fields,idx): 503 mariageDataRe = re.compile("^[+#-0-9].*$") 504 505 mar_date = None 506 mar_place = None 507 mar_source = None 508 509 sep_date = None 510 div_date = None 511 512 married = 1 513 engaged = 0 514 515 # skip to marriage date in case person contained unmatches tokens 516 #Alex: this failed when fields[idx] was an empty line. Fixed. 517 #while idx < len(fields) and not fields[idx][0] == "+": 518 while idx < len(fields) and not (fields[idx] and fields[idx][0] == "+"): 519 if fields[idx]: 520 LOG.warning(("parse_marriage(): Unknown field: " + 521 "'%s' in line %d!") % (fields[idx], self.lineno)) 522 idx += 1 523 524 while idx < len(fields) and mariageDataRe.match(fields[idx]): 525 field = fields[idx] 526 idx += 1 527 if field.startswith("+"): 528 field = field[1:] 529 mar_date = self.parse_date(self.decode(field)) 530 LOG.debug(" Married at: %s" % field) 531 elif field.startswith("-"): 532 field = field[1:] 533 div_date = self.parse_date(self.decode(field)) 534 LOG.debug(" Div at: %s" % field) 535 elif field == "#mp" and idx < len(fields): 536 mar_place = self.get_or_create_place(self.decode(fields[idx])) 537 LOG.debug(" Marriage place: %s" % fields[idx]) 538 idx += 1 539 elif field == "#ms" and idx < len(fields): 540 mar_source = self.get_or_create_source(self.decode(fields[idx])) 541 LOG.debug(" Marriage source: %s" % fields[idx]) 542 idx += 1 543 elif field == "#sep" and idx < len(fields): 544 sep_date = self.parse_date(self.decode(fields[idx])) 545 LOG.debug(" Seperated since: %s" % fields[idx]) 546 idx += 1 547 elif field == "#nm": 548 LOG.debug(" Are not married.") 549 married = 0 550 elif field == "#noment": 551 LOG.debug(" Not mentioned.") 552 elif field == "#eng": 553 LOG.debug(" Are engaged.") 554 engaged = 1 555 else: 556 LOG.warning(("parse_marriage(): Unknown field " + 557 "'%s'for mariage in line %d!") % (field, self.lineno)) 558 559 if mar_date or mar_place or mar_source: 560 mar = self.create_event( 561 EventType.MARRIAGE, None, mar_date, mar_place, mar_source) 562 mar_ref = EventRef() 563 mar_ref.set_reference_handle(mar.get_handle()) 564 mar_ref.set_role(EventRoleType.FAMILY) 565 self.current_family.add_event_ref(mar_ref) 566 self.current_family.set_relationship( 567 FamilyRelType(FamilyRelType.MARRIED)) 568 569 if div_date: 570 div = self.create_event( 571 EventType.DIVORCE, None, div_date, None, None) 572 div_ref = EventRef() 573 div_ref.set_reference_handle(div.get_handle()) 574 div_ref.set_role(EventRoleType.FAMILY) 575 self.current_family.add_event_ref(div_ref) 576 577 if sep_date or engaged: 578 sep = self.create_event( 579 EventType.ENGAGEMENT, None, sep_date, None, None) 580 sep_ref = EventRef() 581 sep_ref.set_reference_handle(sep.get_handle()) 582 sep_ref.set_role(EventRoleType.FAMILY) 583 self.current_family.add_event_ref(sep_ref) 584 585 if not married: 586 self.current_family.set_relationship( 587 FamilyRelType(FamilyRelType.UNMARRIED)) 588 589 self.db.commit_family(self.current_family,self.trans) 590 return idx 591 592 def parse_person(self,fields,idx,gender,father_surname): 593 594 if not father_surname: 595 if not idx < len(fields): 596 LOG.warning("Missing surname of person in line %d!" % self.lineno) 597 surname ="" 598 else: 599 surname = self.decode(fields[idx]) 600 idx += 1 601 else: 602 surname = father_surname 603 604 if not idx < len(fields): 605 LOG.warning("Missing firstname of person in line %d!" % self.lineno) 606 firstname = "" 607 else: 608 firstname = self.decode(fields[idx]) 609 idx += 1 610 if idx < len(fields) and father_surname: 611 noSurnameRe = re.compile(r"^[({\[~><?0-9#].*$") 612 if not noSurnameRe.match(fields[idx]): 613 surname = self.decode(fields[idx]) 614 idx += 1 615 616 LOG.debug("Person: %s %s" % (firstname, surname)) 617 person = self.get_or_create_person(firstname,surname) 618 name = Name() 619 name.set_type( NameType(NameType.BIRTH)) 620 name.set_first_name(firstname) 621 surname_obj = name.get_primary_surname() 622 surname_obj.set_surname(surname) 623 person.set_primary_name(name) 624 if person.get_gender() == Person.UNKNOWN and gender is not None: 625 person.set_gender(gender) 626 self.db.commit_person(person,self.trans) 627 personDataRe = re.compile(r"^[kmes0-9<>~#\[({!].*$") 628 dateRe = re.compile("^[kmes0-9~<>?]+.*$") 629 630 source = None 631 birth_parsed = False 632 birth_date = None 633 birth_place = None 634 birth_source = None 635 636 bapt_date = None 637 bapt_place = None 638 bapt_source = None 639 640 death_date = None 641 death_place = None 642 death_source = None 643 death_cause = None 644 645 crem_date = None 646 bur_date = None 647 bur_place = None 648 bur_source = None 649 650 public_name = None 651 firstname_aliases = [] 652 nick_names = [] 653 name_aliases = [] 654 surname_aliases = [] 655 656 while idx < len(fields) and personDataRe.match(fields[idx]): 657 field = fields[idx] 658 idx += 1 659 if field.startswith('('): 660 LOG.debug("Public Name: %s" % field) 661 public_name = self.decode(field[1:-1]) 662 elif field.startswith('{'): 663 LOG.debug("Firstsname Alias: %s" % field) 664 firstname_aliases.append(self.decode(field[1:-1])) 665 elif field.startswith('['): 666 LOG.debug("Title: %s" % field) 667 titleparts = self.decode(field[1:-1]).split(":") 668 tname = ttitle = tplace = tstart = tend = tnth = None 669 try: 670 tname = titleparts[0] 671 ttitle = titleparts[1] 672 if titleparts[2]: 673 tplace = self.get_or_create_place(titleparts[2]) 674 tstart = self.parse_date(titleparts[3]) 675 tend = self.parse_date(titleparts[4]) 676 tnth = titleparts[5] 677 except IndexError: # not all parts are written all the time 678 pass 679 if tnth: # Append title numer to title 680 # TODO for Arabic, should the next comma be translated? 681 ttitle += ", " + tnth 682 title = self.create_event( 683 EventType.NOB_TITLE, ttitle, tstart, tplace) 684 # TODO: Geneweb has a start date and an end date, and therefore 685 # supports stuff like: FROM about 1955 TO between 1998 and 1999 686 # gramps only supports one single date or range. 687 if tname and tname != "*": 688 n = Note() 689 n.set(tname) 690 self.db.add_note(n,self.trans) 691 title.add_note( n.handle) 692 title_ref = EventRef() 693 title_ref.set_reference_handle(title.get_handle()) 694 person.add_event_ref(title_ref) 695 elif field == '#nick' and idx < len(fields): 696 LOG.debug("Nick Name: %s" % fields[idx]) 697 nick_names.append(self.decode(fields[idx])) 698 idx += 1 699 elif field == '#occu' and idx < len(fields): 700 LOG.debug("Occupation: %s" % fields[idx]) 701 occu = self.create_event( 702 EventType.OCCUPATION, self.decode(fields[idx])) 703 occu_ref = EventRef() 704 occu_ref.set_reference_handle(occu.get_handle()) 705 person.add_event_ref(occu_ref) 706 idx += 1 707 elif field == '#alias' and idx < len(fields): 708 LOG.debug("Name Alias: %s" % fields[idx]) 709 name_aliases.append(self.decode(fields[idx])) 710 idx += 1 711 elif field == '#salias' and idx < len(fields): 712 LOG.debug("Surname Alias: %s" % fields[idx]) 713 surname_aliases.append(self.decode(fields[idx])) 714 idx += 1 715 elif field == '#image' and idx < len(fields): 716 LOG.debug("Image: %s" % fields[idx]) 717 idx += 1 718 elif field == '#src' and idx < len(fields): 719 LOG.debug("Source: %s" % fields[idx]) 720 source = self.get_or_create_source(self.decode(fields[idx])) 721 idx += 1 722 elif field == '#bs' and idx < len(fields): 723 LOG.debug("Birth Source: %s" % fields[idx]) 724 birth_source = self.get_or_create_source(self.decode(fields[idx])) 725 idx += 1 726 elif field[0] == '!': 727 LOG.debug("Baptize at: %s" % field[1:]) 728 bapt_date = self.parse_date(self.decode(field[1:])) 729 elif field == '#bp' and idx < len(fields): 730 LOG.debug("Birth Place: %s" % fields[idx]) 731 birth_place = self.get_or_create_place(self.decode(fields[idx])) 732 idx += 1 733 elif field == '#pp' and idx < len(fields): 734 LOG.debug("Baptize Place: %s" % fields[idx]) 735 bapt_place = self.get_or_create_place(self.decode(fields[idx])) 736 idx += 1 737 elif field == '#ps' and idx < len(fields): 738 LOG.debug("Baptize Source: %s" % fields[idx]) 739 bapt_source = self.get_or_create_source(self.decode(fields[idx])) 740 idx += 1 741 elif field == '#dp' and idx < len(fields): 742 LOG.debug("Death Place: %s" % fields[idx]) 743 death_place = self.get_or_create_place(self.decode(fields[idx])) 744 idx += 1 745 elif field == '#ds' and idx < len(fields): 746 LOG.debug("Death Source: %s" % fields[idx]) 747 death_source = self.get_or_create_source(self.decode(fields[idx])) 748 idx += 1 749 elif field == '#buri' and idx < len(fields): 750 if fields[idx][0]!='#': # bug in GeneWeb: empty #buri fields 751 LOG.debug("Burial Date: %s" % fields[idx]) 752 bur_date = self.parse_date(self.decode(fields[idx])) 753 idx += 1 754 elif field == '#crem' and idx < len(fields): 755 LOG.debug("Cremention Date: %s" % fields[idx]) 756 crem_date = self.parse_date(self.decode(fields[idx])) 757 idx += 1 758 elif field == '#rp' and idx < len(fields): 759 LOG.debug("Burial Place: %s" % fields[idx]) 760 bur_place = self.get_or_create_place(self.decode(fields[idx])) 761 idx += 1 762 elif field == '#rs' and idx < len(fields): 763 LOG.debug("Burial Source: %s" % fields[idx]) 764 bur_source = self.get_or_create_source(self.decode(fields[idx])) 765 idx += 1 766 elif field == '#apubl': 767 LOG.debug("This is a public record") 768 elif field == '#apriv': 769 LOG.debug("This is a private record") 770 person.set_privacy(True) 771 elif field == '#h': 772 LOG.debug("This is a restricted record") 773 #TODO: Gramps does currently not feature this level 774 person.set_privacy(True) 775 elif dateRe.match(field): 776 if not birth_parsed: 777 LOG.debug("Birth Date: %s" % field) 778 birth_date = self.parse_date(self.decode(field)) 779 birth_parsed = True 780 else: 781 LOG.debug("Death Date: %s" % field) 782 death_date = self.parse_date(self.decode(field)) 783 if field == "mj": 784 death_cause = "Died joung" 785 elif field.startswith("k"): 786 death_cause = "Killed" 787 elif field.startswith("m"): 788 death_cause = "Murdered" 789 elif field.startswith("e"): 790 death_cause = "Executed" 791 elif field.startswith("d"): 792 death_cause = "Disappeared" 793 #TODO: Set special death types more properly 794 else: 795 LOG.warning(("parse_person(): Unknown field " + 796 "'%s' for person in line %d!") % (field, self.lineno)) 797 798 if public_name: 799 name = person.get_primary_name() 800 name.set_type(NameType(NameType.BIRTH)) 801 person.add_alternate_name(name) 802 name = Name() 803 name.set_type(NameType(NameType.AKA)) 804 name.set_first_name(public_name) 805 surname_obj = name.get_primary_surname() 806 surname_obj.set_surname(surname) 807 person.set_primary_name(name) 808 809 for aka in nick_names: 810 name = Attribute() 811 name.set_type(AttributeType(AttributeType.NICKNAME)) 812 name.set_value(aka) 813 person.add_attribute(name) 814 815 for aka in firstname_aliases: 816 name = Name() 817 name.set_type(NameType(NameType.AKA)) 818 name.set_first_name(aka) 819 surname_obj = name.get_primary_surname() 820 surname_obj.set_surname(surname) 821 person.add_alternate_name(name) 822 823 for aka in name_aliases: 824 name = Name() 825 name.set_type(NameType(NameType.AKA)) 826 name.set_first_name(aka) 827 surname_obj = name.get_primary_surname() 828 surname_obj.set_surname(surname) 829 person.add_alternate_name(name) 830 831 for aka in surname_aliases: 832 name = Name() 833 name.set_type(NameType(NameType.AKA)) 834 if public_name: 835 name.set_first_name(public_name) 836 else: 837 name.set_first_name(firstname) 838 surname_obj = name.get_primary_surname() 839 surname_obj.set_surname(aka) 840 person.add_alternate_name(name) 841 842 if source: 843 person.add_citation(source.get_handle()) 844 845 if birth_date or birth_place or birth_source: 846 birth = self.create_event(EventType.BIRTH, None, birth_date, birth_place, birth_source) 847 birth_ref = EventRef() 848 birth_ref.set_reference_handle( birth.get_handle()) 849 person.set_birth_ref( birth_ref) 850 851 if bapt_date or bapt_place or bapt_source: 852 babt = self.create_event(EventType.BAPTISM, None, bapt_date, bapt_place, bapt_source) 853 babt_ref = EventRef() 854 babt_ref.set_reference_handle( babt.get_handle()) 855 person.add_event_ref( babt_ref) 856 857 if death_date or death_place or death_source or death_cause: 858 death = self.create_event(EventType.DEATH, None, death_date, death_place, death_source) 859 if death_cause: 860 death.set_description(death_cause) 861 self.db.commit_event(death,self.trans) 862 death_ref = EventRef() 863 death_ref.set_reference_handle( death.get_handle()) 864 person.set_death_ref( death_ref) 865 866 if bur_date: 867 bur = self.create_event(EventType.BURIAL, None, bur_date, bur_place, bur_source) 868 bur_ref = EventRef() 869 bur_ref.set_reference_handle( bur.get_handle()) 870 person.add_event_ref( bur_ref) 871 872 if crem_date: 873 crem = self.create_event(EventType.CREMATION, None, crem_date, bur_place, bur_source) 874 crem_ref = EventRef() 875 crem_ref.set_reference_handle( crem.get_handle()) 876 person.add_event_ref(crem_ref) 877 878 self.db.commit_person(person,self.trans) 879 880 return (idx,person) 881 882 def parse_date(self,field): 883 if field == "0": 884 return None 885 date = Date() 886 matches = _text_parse.match(field) 887 if matches: 888 groups = matches.groups() 889 date.set_as_text(groups[0]) 890 date.set_modifier(Date.MOD_TEXTONLY) 891 return date 892 893 matches = _date_parse.match(field) 894 if matches: 895 groups = matches.groups() 896 mod = _mod_map.get(groups[0],Date.MOD_NONE) 897 if groups[3] == "..": 898 mod = Date.MOD_SPAN 899 cal2 = _cal_map.get(groups[5],Date.CAL_GREGORIAN) 900 sub2 = self.sub_date(groups[4]) 901 else: 902 sub2 = (0,0,0) 903 cal1 = _cal_map.get(groups[2],Date.CAL_GREGORIAN) 904 sub1 = self.sub_date(groups[1]) 905 try: 906 date.set(Date.QUAL_NONE,mod, cal1, 907 (sub1[0],sub1[1],sub1[2],0,sub2[0],sub2[1],sub2[2],0)) 908 except DateError as e: 909 # TRANSLATORS: leave the {date} and {gw_snippet} untranslated 910 # in the format string, but you may re-order them if needed. 911 LOG.warning(_( 912 "Invalid date {date} in {gw_snippet}, " 913 "preserving date as text." 914 ).format(date=e.date.__dict__, gw_snippet=field)) 915 date.set(modifier=Date.MOD_TEXTONLY, text=field) 916 return date 917 else: 918 return None 919 920 def sub_date(self,data): 921 vals = data.split('/') 922 if len(vals) == 1: 923 return (0,0,int(vals[0])) 924 elif len(vals) == 2: 925 return (0,int(vals[0]),int(vals[1])) 926 else: 927 return (int(vals[0]),int(vals[1]),int(vals[2])) 928 929 def create_event(self,type,desc=None,date=None,place=None,source=None): 930 event = Event() 931 if type: 932 event.set_type(EventType(type)) 933 if desc: 934 event.set_description(desc) 935 if date: 936 event.set_date_object(date) 937 if place: 938 event.set_place_handle(place.get_handle()) 939 if source: 940 event.add_citation(source.get_handle()) 941 self.db.add_event(event,self.trans) 942 self.db.commit_event(event,self.trans) 943 return event 944 945 def get_or_create_person(self,firstname,lastname): 946 person = None 947 mykey = firstname+lastname 948 if mykey in self.ikeys and firstname != "?" and lastname != "?": 949 person = self.db.get_person_from_handle(self.ikeys[mykey]) 950 else: 951 person = Person() 952 self.db.add_person(person,self.trans) 953 self.db.commit_person(person,self.trans) 954 self.ikeys[mykey] = person.get_handle() 955 return person 956 957 def get_or_create_place(self,place_name): 958 place = None 959 if place_name in self.pkeys: 960 place = self.db.get_place_from_handle(self.pkeys[place_name]) 961 else: 962 place = Place() 963 place.set_title(place_name) 964 self.db.add_place(place,self.trans) 965 self.db.commit_place(place,self.trans) 966 self.pkeys[place_name] = place.get_handle() 967 return place 968 969 def get_or_create_source(self,source_name): 970 source = None 971 if source_name in self.skeys: 972 source = self.db.get_source_from_handle(self.skeys[source_name]) 973 else: 974 source = Source() 975 source.set_title(source_name) 976 self.db.add_source(source,self.trans) 977 self.db.commit_source(source,self.trans) 978 self.skeys[source_name] = source.get_handle() 979 citation = Citation() 980 citation.set_reference_handle(source.get_handle()) 981 self.db.add_citation(citation, self.trans) 982 self.db.commit_citation(citation, self.trans) 983 return citation 984 985 def read_fevent_line(self, event): 986 987 if fevents_map.get(event[0:5]) == None: 988 return #need to fix custom event types not in the map 989 990 fev = None 991 # get events for the current family 992 for evr in self.current_family.get_event_ref_list(): 993 ev = self.db.get_event_from_handle(evr.get_reference_handle()) 994 if ev.get_type() == fevents_map.get(event[0:5]): 995 fev = ev # found. Need to also check EventRef role 996 return 997 if not fev: # No event found create a new one 998 if evr.get_role() != EventRoleType(EventRoleType.FAMILY): 999 continue 1000 else: 1001 LOG.info((ev.get_type(), self.current_family.handle)) 1002 self.new_gwplus_fevent(event) 1003 while True: 1004 line = self.get_next_line() 1005 if line and line[0:5] in fevents_map: 1006 self.new_gwplus_fevent(line) 1007 elif line and line[0:4] == "wit:": 1008 continue 1009 else: 1010 self.current_mode = None 1011 #self.db.commit_family(self.current_family,self.trans) 1012 break 1013 1014 def read_pevent_line(self, event, fields): 1015 1016 name = fields[2] + fields[1] 1017 1018 try: 1019 self.person = self.ikeys[name] 1020 # check key on {ikey} 1021 except: 1022 self.person = "(TO_CHECK: %s)" % fields[1:] 1023 #GrampsImportError() 1024 1025 lastname = fields[1] 1026 firstname = fields[2] 1027 self.current_person = self.get_or_create_person(firstname, lastname) 1028 1029 #name = Name() 1030 #name.set_type(NameType(NameType.BIRTH)) 1031 #name.set_first_name(firstname) 1032 #surname_obj = name.get_primary_surname() 1033 #surname_obj.set_surname(surname) 1034 #self.current_person.set_primary_name(name) 1035 1036 if pevents_map.get(event[0:5]) == None: 1037 return #need to fix custom event types not in the map 1038 1039 self.current_event = None 1040 # get events for the current person 1041 for evr in self.current_person.get_event_ref_list(): 1042 ev = self.db.get_event_from_handle(evr.get_reference_handle()) 1043 if ev.get_type() == pevents_map.get(event[0:5]): 1044 self.current_event = ev # found. Need to also check EventRef role 1045 if not self.current_event: # No event found create a new one 1046 self.current_event = self.new_gwplus_pevent(event) 1047 while True: 1048 line = self.get_next_line() 1049 if line and line[0:5] in pevents_map: 1050 self.current_mode = "person_event" 1051 self.current_event = self.new_gwplus_pevent(line) 1052 elif line and line[0:4] == "note": 1053 n = Note() 1054 n.set(line[5:]) 1055 self.db.add_note(n, self.trans) 1056 if self.current_event: 1057 self.current_event.add_note(n.handle) 1058 self.db.commit_event(self.current_event, self.trans) 1059 else: 1060 print('note', n.handle) 1061 else: 1062 self.current_mode = None 1063 #self.db.commit_person(self.current_person,self.trans) 1064 break 1065 1066 def new_gwplus_fevent(self, line): 1067 1068 source = place = note = type = None 1069 date = self.parse_date(self.decode(line[6:])) 1070 1071 idx = 0 1072 LOG.info((line, fevents_map.get(line[0:5]))) 1073 type = fevents_map.get(line[0:5]) 1074 data = line.split() 1075 date = self.parse_date(self.decode(line[6:])) 1076 for part in data: 1077 idx += 1 1078 if part == "#p": 1079 place = self.get_or_create_place(self.decode(data[idx])) 1080 if part == "#s": 1081 source = self.get_or_create_source(self.decode(data[idx])) 1082 self.current_event = self.create_event(type, None, None, None, None) 1083 print('new event', self.current_event.handle) 1084 if date: 1085 print(date) 1086 self.current_event.set_date_object(date) 1087 if place: 1088 print('place', place.handle) 1089 self.current_event.set_place_handle(place.get_handle()) 1090 if source: 1091 print('source', source.handle) 1092 self.current_event.add_citation(source.get_handle()) 1093 self.db.commit_event(self.current_event, self.trans) 1094 nev_ref = EventRef() 1095 nev_ref.set_reference_handle(self.current_event.get_handle()) 1096 self.current_family.add_event_ref(nev_ref) 1097 self.db.commit_family(self.current_family, self.trans) 1098 return self.current_event 1099 1100 def new_gwplus_pevent(self, line): 1101 1102 source = place = note = type = None 1103 date = self.parse_date(self.decode(line[6:])) 1104 1105 idx = 0 1106 LOG.info((self.person, line, pevents_map.get(line[0:5]))) 1107 type = pevents_map.get(line[0:5]) 1108 data = line.split() 1109 date = self.parse_date(self.decode(line[6:])) 1110 for part in data: 1111 idx += 1 1112 if part == "#p": 1113 place = self.get_or_create_place(self.decode(data[idx])) 1114 if part == "#s": 1115 source = self.get_or_create_source(self.decode(data[idx])) 1116 self.current_event = self.create_event(type, None, None, None, None) 1117 print('new event', self.current_event.handle) 1118 if date: 1119 print(date) 1120 self.current_event.set_date_object(date) 1121 if place: 1122 print('place', place.handle) 1123 self.current_event.set_place_handle(place.get_handle()) 1124 if source: 1125 print('source', source.handle) 1126 self.current_event.add_citation(source.get_handle()) 1127 self.db.commit_event(self.current_event, self.trans) 1128 nev_ref = EventRef() 1129 nev_ref.set_reference_handle(self.current_event.get_handle()) 1130 self.current_person.add_event_ref(nev_ref) 1131 self.db.commit_person(self.current_person, self.trans) 1132 return self.current_event 1133 1134 def decode(self,s): 1135 s = s.replace('_',' ') 1136 charref_re = re.compile('(&#)(x?)([0-9a-zA-Z]+)(;)') 1137 for match in charref_re.finditer(s): 1138 try: 1139 if match.group(2): # HEX 1140 nchar = chr(int(match.group(3),16)) 1141 else: # Decimal 1142 nchar = chr(int(match.group(3))) 1143 s = s.replace(match.group(0), nchar) 1144 except UnicodeDecodeError: 1145 pass 1146 1147 # replace named entities 1148 entref_re = re.compile('(&)([a-zA-Z]+)(;)') 1149 for match in entref_re.finditer(s): 1150 try: 1151 if match.group(2) in name2codepoint: 1152 nchar = chr(name2codepoint[match.group(2)]) 1153 s = s.replace(match.group(0), nchar) 1154 except UnicodeDecodeError: 1155 pass 1156 1157 return(s) 1158 1159 def debug(self, txt): 1160 LOG.debug(txt) 1161