1# 2# Gramps - a GTK+/GNOME based genealogy program 3# 4# Copyright (C) 2000-2007 Donald N. Allingham 5# Copyright (C) 2009-2010 Gary Burton 6# Copyright (C) 2010 Nick Hall 7# Copyright (C) 2011 Tim G L Lyons 8# Copyright (C) 2016 Paul R. Culley 9# 10# This program is free software; you can redistribute it and/or modify 11# it under the terms of the GNU General Public License as published by 12# the Free Software Foundation; either version 2 of the License, or 13# (at your option) any later version. 14# 15# This program is distributed in the hope that it will be useful, 16# but WITHOUT ANY WARRANTY; without even the implied warranty of 17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18# GNU General Public License for more details. 19# 20# You should have received a copy of the GNU General Public License 21# along with this program; if not, write to the Free Software 22# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 23# 24 25""" 26Import from GEDCOM 27 28The GEDCOM file format is defined by the GEDCOM 5.5 Specfication, found 29at http://www.familysearch.org/GEDCOM/GEDCOM55.EXE 30 31The basic structure is a line with three attributes: 32 33<LEVEL> <TOKEN> <DATA> 34 35Because of this structure, it does not lend itself to more traditional 36parsing techniques, such as LALR. The LEVEL token implies too much to be 37useful in this context. While this makes parsing more difficult, it 38does provide one very beneficial feature for GEDCOM: Error recoverability. 39GEDCOM is a poorly implemented standard, primarily because it is a poor 40standard to begin with. 41 42Most commercial applications that implement GEDCOM output add custom 43extensions, and feel free to violate the existing structure. If one were 44cynical, one might believe that the commercial programs were trying to 45make it difficult to transfer your data to another application. 46 47This parser takes a different approach to parsing a GEDCOM file. The first 48state, Lexer, reads lines from the file, and does some basic lexical 49analysis on each line (actually several lines, since it automatically 50combines CONT and CONC tagged lines). Each logical line returned to this 51parser contains: 52 53 Level, Token, Token text, Data, and line number. 54 55The Data field is typically text, but in some cases, it may be a integer 56value representing an enumerated type or a Gramps object (in the case of 57dates). 58 59The parser works on the current level. Each context and level has a an 60associated table (dictionary) of functions indexed by the corresponding 61TOKEN. When a token is found, we index into the table to find the function 62associated with the token. If no token is found, a function that skips the 63line and all subordinate (lines with a higher number). If a function is 64found, then we call that function, which in turn processes the line, and 65all tokens at the lower level. 66 67For example: 68 691 BIRT 70 2 DATE 1 JAN 2000 71 2 UKNOWN TAG 72 3 NOTE DATA 73 74The function parsing the individual at level 1, would encounter the BIRT tag. 75It would look up the BIRT token in the table to see if a function as defined 76for this TOKEN, and pass control to this function. This function would then 77start parsing level 2. It would encounter the DATE tag, look up the 78corresponding function in the level 2 table, and pass control to its 79associated function. This function would terminate, and return control back to 80the level 2 parser, which would then encounter the "UKNOWN" tag. Since this is 81not a valid token, it would not be in the table, and a function that would skip 82all lines until the next level 2 token is found (in this case, skipping the 83"3 NOTE DATA" line. 84""" 85 86#------------------------------------------------------------------------- 87# 88# standard python modules 89# 90#------------------------------------------------------------------------- 91import os 92import re 93import time 94# from xml.parsers.expat import ParserCreate 95from collections import defaultdict, OrderedDict 96import string 97import mimetypes 98from io import StringIO, TextIOWrapper 99from urllib.parse import urlparse 100 101#------------------------------------------------------------------------ 102# 103# Set up logging 104# 105#------------------------------------------------------------------------ 106import logging 107LOG = logging.getLogger(".libgedcom") 108 109#------------------------------------------------------------------------- 110# 111# Gramps modules 112# 113#------------------------------------------------------------------------- 114from gramps.gen.const import GRAMPS_LOCALE as glocale 115_ = glocale.translation.gettext 116from gramps.gen.errors import GedcomError 117from gramps.gen.lib import ( 118 Address, Attribute, AttributeType, ChildRef, 119 ChildRefType, Citation, Date, Event, EventRef, EventRoleType, 120 EventType, Family, FamilyRelType, LdsOrd, Location, Media, 121 MediaRef, Name, NameType, Note, NoteType, Person, PersonRef, Place, 122 RepoRef, Repository, RepositoryType, Researcher, 123 Source, SourceMediaType, SrcAttribute, 124 Surname, Tag, Url, UrlType, PlaceType, PlaceRef, PlaceName) 125from gramps.gen.db import DbTxn 126from gramps.gen.updatecallback import UpdateCallback 127from gramps.gen.utils.file import media_path 128from gramps.gen.utils.id import create_id 129from gramps.gen.utils.lds import TEMPLES 130from gramps.gen.utils.unknown import make_unknown, create_explanation_note 131from gramps.gen.datehandler._dateparser import DateParser 132from gramps.gen.db.dbconst import EVENT_KEY 133from gramps.gen.lib.const import IDENTICAL 134from gramps.gen.lib import (StyledText, StyledTextTag, StyledTextTagType) 135from gramps.gen.lib.urlbase import UrlBase 136from gramps.plugins.lib.libplaceimport import PlaceImport 137from gramps.gen.display.place import displayer as _pd 138from gramps.gen.utils.grampslocale import GrampsLocale 139 140#------------------------------------------------------------------------- 141# 142# constants 143# 144#------------------------------------------------------------------------- 145TOKEN_UNKNOWN = 0 146TOKEN_ABBR = 1 147TOKEN_ADDR = 2 148TOKEN_ADOP = 3 149TOKEN_ADR1 = 4 150TOKEN_ADR2 = 5 151TOKEN_AFN = 6 152TOKEN_IGNORE = 7 153TOKEN_REFN = 8 154TOKEN__AKA = 9 155TOKEN_ALIA = 11 156TOKEN_ANCI = 12 157TOKEN_ASSO = 13 158TOKEN_AUTH = 14 159TOKEN_BAPL = 15 160TOKEN_BIRT = 16 161TOKEN__CAT = 17 162TOKEN_CAUS = 18 163TOKEN_CHAN = 19 164TOKEN_CHAR = 20 165TOKEN_CHIL = 21 166TOKEN_CITY = 22 167TOKEN__COMM = 23 168TOKEN_CONC = 24 169TOKEN_CONT = 25 170TOKEN_COPR = 26 171TOKEN_CORP = 27 172TOKEN_CTRY = 28 173TOKEN_DATA = 29 174TOKEN_DATE = 30 175TOKEN_DEAT = 32 176TOKEN_DESI = 33 177TOKEN_DEST = 34 178TOKEN_ENDL = 35 179TOKEN_EVEN = 36 180TOKEN_FAM = 38 181TOKEN_FAMC = 39 182TOKEN_FAMS = 40 183TOKEN_FILE = 41 184TOKEN_FORM = 42 185TOKEN__FREL = 43 186TOKEN_GEDC = 44 187TOKEN_GIVN = 45 188TOKEN__GODP = 46 189TOKEN_HUSB = 47 190TOKEN_INDI = 48 191TOKEN_LABL = 49 192TOKEN_LANG = 50 193TOKEN__LOC = 51 194TOKEN__MARNM = 52 195TOKEN__MREL = 53 196TOKEN__NAME = 54 197TOKEN_NAME = 55 198TOKEN_NCHI = 56 199TOKEN_NICK = 57 200TOKEN_NOTE = 58 201TOKEN_NPFX = 59 202TOKEN_NSFX = 60 203TOKEN_OBJE = 61 204TOKEN_OFFI = 62 205TOKEN_PAGE = 63 206TOKEN_PEDI = 64 207TOKEN_PERI = 65 208TOKEN_PHON = 66 209TOKEN_PLAC = 67 210TOKEN_POST = 68 211TOKEN__PRIMARY = 69 212TOKEN__PRIV = 70 213TOKEN_PUBL = 71 214TOKEN_QUAY = 72 215TOKEN_RELI = 74 216TOKEN_REPO = 75 217TOKEN_RESI = 76 218TOKEN_RFN = 77 219TOKEN_RIN = 78 220TOKEN__SCHEMA = 79 221TOKEN_SEX = 80 222TOKEN_SLGC = 81 223TOKEN_SLGS = 82 224TOKEN_SOUR = 83 225TOKEN_SPFX = 84 226TOKEN_STAE = 85 227TOKEN__STAT = 86 228TOKEN_STAT = 87 229TOKEN_SUBM = 88 230TOKEN_SUBN = 89 231TOKEN_SURN = 90 232TOKEN_TAXT = 91 233TOKEN_TEMP = 92 234TOKEN_TEXT = 93 235TOKEN_TIME = 94 236TOKEN_TITL = 95 237TOKEN__TODO = 96 238TOKEN_TRLR = 97 239TOKEN_TYPE = 98 240TOKEN__UID = 99 241TOKEN_VERS = 100 242TOKEN_WIFE = 101 243TOKEN__WITN = 102 244TOKEN__WTN = 103 245TOKEN_AGNC = 104 246TOKEN_HEAD = 105 247TOKEN_CALN = 106 248TOKEN_MEDI = 107 249TOKEN_RELA = 108 250TOKEN__LKD = 109 251TOKEN_BLOB = 110 252TOKEN_CONL = 111 253TOKEN_AGE = 112 254TOKEN_RESN = 114 255TOKEN_ID = 115 256TOKEN_GEVENT = 116 257TOKEN_RNOTE = 117 258TOKEN_GATTR = 118 259TOKEN_ATTR = 119 260TOKEN_MAP = 120 261TOKEN_LATI = 121 262TOKEN_LONG = 122 263TOKEN_FACT = 123 264TOKEN_EMAIL = 124 265TOKEN_WWW = 125 266TOKEN_FAX = 126 267TOKEN_ROLE = 127 268TOKEN__MAR = 128 269TOKEN__MARN = 129 270TOKEN__ADPN = 130 271TOKEN__FSFTID = 131 272TOKEN__PHOTO = 132 273TOKEN__LINK = 133 274TOKEN__PRIM = 134 275TOKEN__JUST = 135 276TOKEN__TEXT = 136 277TOKEN__DATE = 137 278 279TOKENS = { 280 "_ADPN" : TOKEN__ADPN, 281 "_AKA" : TOKEN__AKA, 282 "_AKAN" : TOKEN__AKA, 283 "_ALIA" : TOKEN_ALIA, 284 "_ANCES_ORDRE" : TOKEN_IGNORE, 285 "_CAT" : TOKEN_IGNORE, 286 "_CHUR" : TOKEN_IGNORE, 287 "_COMM" : TOKEN__COMM, 288 "_DATE" : TOKEN__DATE, 289 "_DATE2" : TOKEN_IGNORE, 290 "_DETAIL" : TOKEN_IGNORE, 291 "_EMAIL" : TOKEN_EMAIL, 292 "_E-MAIL" : TOKEN_EMAIL, 293 "_FREL" : TOKEN__FREL, 294 "_FSFTID" : TOKEN__FSFTID, 295 "_GODP" : TOKEN__GODP, 296 "_ITALIC" : TOKEN_IGNORE, 297 "_JUST" : TOKEN__JUST, # FTM Citation Quality Justification 298 "_LEVEL" : TOKEN_IGNORE, 299 "_LINK" : TOKEN__LINK, 300 "_LKD" : TOKEN__LKD, 301 "_LOC" : TOKEN__LOC, 302 "_MAR" : TOKEN__MAR, 303 "_MARN" : TOKEN__MARN, 304 "_MARNM" : TOKEN__MARNM, 305 "_MASTER" : TOKEN_IGNORE, 306 "_MEDI" : TOKEN_MEDI, 307 "_MREL" : TOKEN__MREL, 308 "_NAME" : TOKEN__NAME, 309 "_PAREN" : TOKEN_IGNORE, 310 "_PHOTO" : TOKEN__PHOTO, 311 "_PLACE" : TOKEN_IGNORE, 312 "_PREF" : TOKEN__PRIMARY, 313 "_PRIM" : TOKEN__PRIM, 314 "_PRIMARY" : TOKEN__PRIMARY, 315 "_PRIV" : TOKEN__PRIV, 316 "_PUBLISHER" : TOKEN_IGNORE, 317 "_SCBK" : TOKEN_IGNORE, 318 "_SCHEMA" : TOKEN__SCHEMA, 319 "_SSHOW" : TOKEN_IGNORE, 320 "_STAT" : TOKEN__STAT, 321 "_TEXT" : TOKEN__TEXT, 322 "_TODO" : TOKEN__TODO, 323 "_TYPE" : TOKEN_TYPE, 324 "_UID" : TOKEN__UID, 325 "_URL" : TOKEN_WWW, 326 "_WITN" : TOKEN__WITN, 327 "_WTN" : TOKEN__WTN, 328 "ABBR" : TOKEN_ABBR, 329 "ABBREVIATION" : TOKEN_ABBR, 330 "ADDR" : TOKEN_ADDR, 331 "ADDRESS" : TOKEN_ADDR, 332 "ADDRESS1" : TOKEN_ADR1, 333 "ADDRESS2" : TOKEN_ADR2, 334 "ADOP" : TOKEN_ADOP, 335 "ADOPT" : TOKEN_ADOP, 336 "ADR1" : TOKEN_ADR1, 337 "ADR2" : TOKEN_ADR2, 338 "AFN" : TOKEN_AFN, 339 "AGE" : TOKEN_AGE, 340 "AGENCY" : TOKEN_IGNORE, 341 "AGNC" : TOKEN_AGNC, 342 "AKA" : TOKEN__AKA, 343 "ALIA" : TOKEN_ALIA, 344 "ALIAS" : TOKEN_ALIA, 345 "ANCI" : TOKEN_ANCI, 346 "ASSO" : TOKEN_ASSO, 347 "ASSOCIATES" : TOKEN_ASSO, 348 "AUTH" : TOKEN_AUTH, 349 "AUTHOR" : TOKEN_AUTH, 350 "BAPL" : TOKEN_BAPL, 351 "BAPTISM-LDS" : TOKEN_BAPL, 352 "BIRT" : TOKEN_BIRT, 353 "BIRTH" : TOKEN_BIRT, 354 "BLOB" : TOKEN_BLOB, 355 "CALL_NUMBER" : TOKEN_CALN, 356 "CALN" : TOKEN_CALN, 357 "CAUS" : TOKEN_CAUS, 358 "CAUSE" : TOKEN_CAUS, 359 "CHAN" : TOKEN_CHAN, 360 "CHANGE" : TOKEN_CHAN, 361 "CHAR" : TOKEN_CHAR, 362 "CHARACTER" : TOKEN_CHAR, 363 "CHIL" : TOKEN_CHIL, 364 "CHILD" : TOKEN_CHIL, 365 "CHILDREN_COUNT" : TOKEN_NCHI, 366 "CITY" : TOKEN_CITY, 367 "CONC" : TOKEN_CONC, 368 "CONCATENATION" : TOKEN_CONC, 369 "CONCATENTATE" : TOKEN_CONC, 370 "CONL" : TOKEN_CONL, 371 "CONT" : TOKEN_CONT, 372 "CONTINUATION" : TOKEN_CONT, 373 "CONTINUED" : TOKEN_CONT, 374 "COPR" : TOKEN_COPR, 375 "COPYRIGHT" : TOKEN_COPR, 376 "CORP" : TOKEN_CORP, 377 "CORPORATION" : TOKEN_CORP, 378 "COUNTRY" : TOKEN_CTRY, 379 "CTRY" : TOKEN_CTRY, 380 "DATA" : TOKEN_DATA, 381 "DATE" : TOKEN_DATE, 382 "DEAT" : TOKEN_DEAT, 383 "DEATH" : TOKEN_DEAT, 384 "DESI" : TOKEN_DESI, 385 "DEST" : TOKEN_DEST, 386 "DESTINATION" : TOKEN_DEST, 387 "EMAI" : TOKEN_EMAIL, 388 "EMAIL" : TOKEN_EMAIL, 389 "ENDL" : TOKEN_ENDL, 390 "ENDOWMENT" : TOKEN_ENDL, 391 "EVEN" : TOKEN_EVEN, 392 "EVENT" : TOKEN_EVEN, 393 "FACT" : TOKEN_FACT, 394 "FAM" : TOKEN_FAM, 395 "FAMC" : TOKEN_FAMC, 396 "FAMILY" : TOKEN_FAM, 397 "FAMILY_CHILD" : TOKEN_FAMC, 398 "FAMILY_SPOUSE" : TOKEN_FAMS, 399 "FAMS" : TOKEN_FAMS, 400 "FAX" : TOKEN_FAX, 401 "FILE" : TOKEN_FILE, 402 "FORM" : TOKEN_FORM, 403 "GEDC" : TOKEN_GEDC, 404 "GEDCOM" : TOKEN_GEDC, 405 "GIVEN_NAME" : TOKEN_GIVN, 406 "GIVN" : TOKEN_GIVN, 407 "HEAD" : TOKEN_HEAD, 408 "HEADER" : TOKEN_HEAD, 409 "HUSB" : TOKEN_HUSB, 410 "HUSBAND" : TOKEN_HUSB, 411 "INDI" : TOKEN_INDI, 412 "INDIVIDUAL" : TOKEN_INDI, 413 "LABEL" : TOKEN_LABL, 414 "LABL" : TOKEN_LABL, 415 "LANG" : TOKEN_LANG, 416 "LATI" : TOKEN_LATI, 417 "LONG" : TOKEN_LONG, 418 "MAP" : TOKEN_MAP, 419 "MEDI" : TOKEN_MEDI, 420 "MEDIA" : TOKEN_MEDI, 421 "NAME" : TOKEN_NAME, 422 "NAME_PREFIX" : TOKEN_NPFX, 423 "NAME_SUFFIX" : TOKEN_NSFX, 424 "NCHI" : TOKEN_NCHI, 425 "NICK" : TOKEN_NICK, 426 "NICKNAME" : TOKEN_NICK, 427 "NOTE" : TOKEN_NOTE, 428 "NPFX" : TOKEN_NPFX, 429 "NSFX" : TOKEN_NSFX, 430 "OBJE" : TOKEN_OBJE, 431 "OBJECT" : TOKEN_OBJE, 432 "OFFI" : TOKEN_OFFI, 433 "PAGE" : TOKEN_PAGE, 434 "PEDI" : TOKEN_PEDI, 435 "PEDIGREE" : TOKEN_PEDI, 436 "PERI" : TOKEN_PERI, 437 "PHON" : TOKEN_PHON, 438 "PHONE" : TOKEN_PHON, 439 "PHONE_NUMBER" : TOKEN_PHON, 440 "PLAC" : TOKEN_PLAC, 441 "PLACE" : TOKEN_PLAC, 442 "POST" : TOKEN_POST, 443 "POSTAL_CODE" : TOKEN_POST, 444 "PUBL" : TOKEN_PUBL, 445 "PUBLICATION" : TOKEN_PUBL, 446 "QUALITY_OF_DATA" : TOKEN_QUAY, 447 "QUAY" : TOKEN_QUAY, 448 "REFERENCE" : TOKEN_REFN, 449 "REFN" : TOKEN_REFN, 450 "RELA" : TOKEN_RELA, 451 "RELI" : TOKEN_RELI, 452 "RELIGION" : TOKEN_RELI, 453 "REPO" : TOKEN_REPO, 454 "REPOSITORY" : TOKEN_REPO, 455 "RESN" : TOKEN_RESN, 456 "RFN" : TOKEN_RFN, 457 "RIN" : TOKEN_RIN, 458 "ROLE" : TOKEN_ROLE, 459 "SCHEMA" : TOKEN__SCHEMA, 460 "SEX" : TOKEN_SEX, 461 "SLGC" : TOKEN_SLGC, 462 "SLGS" : TOKEN_SLGS, 463 "SOUR" : TOKEN_SOUR, 464 "SOURCE" : TOKEN_SOUR, 465 "SPFX" : TOKEN_SPFX, 466 "STAE" : TOKEN_STAE, 467 "STAT" : TOKEN_STAT, 468 "STATE" : TOKEN_STAE, 469 "STATUS" : TOKEN_STAT, 470 "SUBM" : TOKEN_SUBM, 471 "SUBMISSION" : TOKEN_SUBN, 472 "SUBMITTER" : TOKEN_SUBM, 473 "SUBN" : TOKEN_SUBN, 474 "SURN" : TOKEN_SURN, 475 "SURN_PREFIX" : TOKEN_SPFX, 476 "SURNAME" : TOKEN_SURN, 477 "TAXT" : TOKEN_TAXT, 478 "TEMP" : TOKEN_TEMP, 479 "TEMPLE" : TOKEN_TEMP, 480 "TEXT" : TOKEN_TEXT, 481 "TIME" : TOKEN_TIME, 482 "TITL" : TOKEN_TITL, 483 "TITLE" : TOKEN_TITL, 484 "TRAILER" : TOKEN_TRLR, 485 "TRLR" : TOKEN_TRLR, 486 "TYPE" : TOKEN_TYPE, 487 "URL" : TOKEN_WWW, 488 "VERS" : TOKEN_VERS, 489 "VERSION" : TOKEN_VERS, 490 "WIFE" : TOKEN_WIFE, 491 "WWW" : TOKEN_WWW, 492} 493 494ADOPT_NONE = 0 495ADOPT_EVENT = 1 496ADOPT_FTW = 2 497ADOPT_LEGACY = 3 498ADOPT_PEDI = 4 499ADOPT_STD = 5 500CONC_OK = 0 501CONC_BROKEN = 1 502ALT_NAME_NONE = 0 503ALT_NAME_STD = 1 504ALT_NAME_ALIAS = 2 505ALT_NAME_AKA = 3 506ALT_NAME_EVENT_AKA = 4 507ALT_NAME_UALIAS = 5 508CALENDAR_NO = 0 509CALENDAR_YES = 1 510OBJE_NO = 0 511OBJE_YES = 1 512PREFIX_NO = 0 513PREFIX_YES = 1 514RESIDENCE_ADDR = 0 515RESIDENCE_PLAC = 1 516SOURCE_REFS_NO = 0 517SOURCE_REFS_YES = 1 518 519TYPE_BIRTH = ChildRefType() 520TYPE_ADOPT = ChildRefType(ChildRefType.ADOPTED) 521TYPE_FOSTER = ChildRefType(ChildRefType.FOSTER) 522 523RELATION_TYPES = ( 524 ChildRefType.BIRTH, 525 ChildRefType.UNKNOWN, 526 ChildRefType.NONE) 527 528PEDIGREE_TYPES = { 529 'birth' : ChildRefType(), 530 'natural': ChildRefType(), 531 'step' : ChildRefType(ChildRefType.STEPCHILD), 532 'adopted': TYPE_ADOPT, 533 'foster' : TYPE_FOSTER, } 534 535FTW_BAD_PLACE = [ 536 EventType.OCCUPATION, 537 EventType.RELIGION, 538 EventType.DEGREE, ] 539 540MEDIA_MAP = { 541 'audio' : SourceMediaType.AUDIO, 542 'book' : SourceMediaType.BOOK, 543 'card' : SourceMediaType.CARD, 544 'electronic' : SourceMediaType.ELECTRONIC, 545 'fiche' : SourceMediaType.FICHE, 546 'microfiche' : SourceMediaType.FICHE, 547 'microfilm' : SourceMediaType.FICHE, 548 'film' : SourceMediaType.FILM, 549 'magazine' : SourceMediaType.MAGAZINE, 550 'manuscript' : SourceMediaType.MANUSCRIPT, 551 'map' : SourceMediaType.MAP, 552 'newspaper' : SourceMediaType.NEWSPAPER, 553 'photo' : SourceMediaType.PHOTO, 554 'tombstone' : SourceMediaType.TOMBSTONE, 555 'grave' : SourceMediaType.TOMBSTONE, 556 'video' : SourceMediaType.VIDEO, 557} 558 559OBJ_NOTETYPE = { 560 "Attribute" : NoteType.ATTRIBUTE, 561 "Address" : NoteType.ADDRESS, 562 "Citation" : NoteType.CITATION, 563 "Event" : NoteType.EVENT, 564 "Family" : NoteType.FAMILY, 565 "LdsOrd" : NoteType.LDS, 566 "Media" : NoteType.MEDIA, 567 "Name" : NoteType.GENERAL, 568 "Place" : NoteType.PLACE, 569 "Person" : NoteType.PERSON, 570 "Repository" : NoteType.REPO, 571 "RepoRef" : NoteType.REPOREF, 572 "Source" : NoteType.SOURCE, 573 "PersonRef" : NoteType.ASSOCIATION, 574} 575 576#------------------------------------------------------------------------- 577# 578# Integer to GEDCOM tag mappings for constants 579# 580#------------------------------------------------------------------------- 581CALENDAR_MAP_GEDCOM2XML = { 582 "FRENCH R" : Date.CAL_FRENCH, 583 "JULIAN" : Date.CAL_JULIAN, 584 "HEBREW" : Date.CAL_HEBREW, 585} 586 587QUALITY_MAP = { 588 'CAL' : Date.QUAL_CALCULATED, 589 'INT' : Date.QUAL_CALCULATED, 590 'EST' : Date.QUAL_ESTIMATED, 591} 592 593SEX_MAP = { 594 'F' : Person.FEMALE, 595 'M' : Person.MALE, 596} 597 598FAMILYCONSTANTEVENTS = { 599 EventType.ANNULMENT : "ANUL", 600 EventType.DIV_FILING : "DIVF", 601 EventType.DIVORCE : "DIV", 602 EventType.CENSUS : "CENS", 603 EventType.ENGAGEMENT : "ENGA", 604 EventType.MARR_BANNS : "MARB", 605 EventType.MARR_CONTR : "MARC", 606 EventType.MARR_LIC : "MARL", 607 EventType.MARR_SETTL : "MARS", 608 EventType.MARRIAGE : "MARR" 609} 610 611PERSONALCONSTANTEVENTS = { 612 EventType.ADOPT : "ADOP", 613 EventType.ADULT_CHRISTEN : "CHRA", 614 EventType.BIRTH : "BIRT", 615 EventType.DEATH : "DEAT", 616 EventType.BAPTISM : "BAPM", 617 EventType.BAR_MITZVAH : "BARM", 618 EventType.BAS_MITZVAH : "BASM", 619 EventType.BLESS : "BLES", 620 EventType.BURIAL : "BURI", 621 # EventType.CAUSE_DEATH : "CAUS", Not legal Gedcom since v5.0 622 EventType.ORDINATION : "ORDN", 623 EventType.CENSUS : "CENS", 624 EventType.CHRISTEN : "CHR", 625 EventType.CONFIRMATION : "CONF", 626 EventType.CREMATION : "CREM", 627 EventType.DEGREE : "_DEG", 628 EventType.DIV_FILING : "DIVF", 629 EventType.EDUCATION : "EDUC", 630 EventType.ELECTED : "_ELEC", # FTM custom tag 631 EventType.EMIGRATION : "EMIG", 632 EventType.FIRST_COMMUN : "FCOM", 633 EventType.GRADUATION : "GRAD", 634 EventType.MED_INFO : "_MDCL", 635 EventType.MILITARY_SERV : "_MILT", 636 EventType.NATURALIZATION : "NATU", 637 EventType.NOB_TITLE : "TITL", 638 EventType.NUM_MARRIAGES : "NMR", 639 EventType.IMMIGRATION : "IMMI", 640 EventType.OCCUPATION : "OCCU", 641 EventType.PROBATE : "PROB", 642 EventType.PROPERTY : "PROP", 643 EventType.RELIGION : "RELI", 644 EventType.RESIDENCE : "RESI", 645 EventType.RETIREMENT : "RETI", 646 EventType.WILL : "WILL", 647} 648 649FAMILYCONSTANTATTRIBUTES = { 650 AttributeType.NUM_CHILD : "NCHI", 651} 652 653PERSONALCONSTANTATTRIBUTES = { 654 AttributeType.CASTE : "CAST", 655 AttributeType.DESCRIPTION : "DSCR", 656 AttributeType.ID : "IDNO", 657 AttributeType.NATIONAL : "NATI", 658 AttributeType.NUM_CHILD : "NCHI", 659 AttributeType.SSN : "SSN", 660} 661 662#------------------------------------------------------------------------- 663# 664# Gedcom to int constants 665# 666#------------------------------------------------------------------------- 667LDS_STATUS = { 668 "BIC" : LdsOrd.STATUS_BIC, 669 "CANCELED" : LdsOrd.STATUS_CANCELED, 670 "CHILD" : LdsOrd.STATUS_CHILD, 671 "CLEARED" : LdsOrd.STATUS_CLEARED, 672 "COMPLETED": LdsOrd.STATUS_COMPLETED, 673 "DNS" : LdsOrd.STATUS_DNS, 674 "INFANT" : LdsOrd.STATUS_INFANT, 675 "PRE-1970" : LdsOrd.STATUS_PRE_1970, 676 "QUALIFIED": LdsOrd.STATUS_QUALIFIED, 677 "DNS/CAN" : LdsOrd.STATUS_DNS_CAN, 678 "STILLBORN": LdsOrd.STATUS_STILLBORN, 679 "SUBMITTED": LdsOrd.STATUS_SUBMITTED, 680 "UNCLEARED": LdsOrd.STATUS_UNCLEARED, 681} 682# ------------------------------------------------------------------------- 683# 684# Custom event friendly names. These are non-standard GEDCOM "NEW_TAG" 685# tags that start with an '_' i.e. "_DNA". FTM has several of these, other 686# programs may have more. If a tag with this format is encountered it is 687# checked in this table for a "friendly" name translation and thereafter is 688# displayed and exported as such. If the tag is NOT in this table and not 689# otherwise handled by the code, the tag itself is used for display and 690# export. For example "_XYZ" is not in the table and will be displayed as 691# "_XYZ" and exported as an EVEN.TYPE=_XYZ 692# As Custom entries, they do not appear in Gramps Events add choice unless 693# already imported via GEDCOM. 694# 695# ------------------------------------------------------------------------- 696CUSTOMEVENTTAGS = { 697 "_CIRC" : _("Circumcision"), 698 "_COML" : _("Common Law Marriage"), 699 "_DEST" : _("Destination"), 700 "_DNA" : _("DNA"), 701 "_DCAUSE" : _("Cause of Death"), 702 "_EMPLOY" : _("Employment"), 703 "_EXCM" : _("Excommunication"), 704 "_EYC" : _("Eye Color"), 705 "_FUN" : _("Funeral"), 706 "_HEIG" : _("Height"), 707 "_INIT" : _("Initiatory (LDS)"), 708 "_MILTID" : _("Military ID"), 709 "_MISN" : _("Mission (LDS)"), 710 "_NAMS" : _("Namesake"), 711 "_ORDI" : _("Ordinance"), 712 "_ORIG" : _("Origin"), 713 "_SEPR" : _("Separation"), # Applies to Families 714 "_WEIG" : _("Weight"), 715} 716# table for skipping illegal control chars in GEDCOM import 717# Only 09, 0A, 0D are allowed. 718STRIP_DICT = dict.fromkeys(list(range(9)) + list(range(11, 13)) + 719 list(range(14, 32))) 720# The C1 Control characters are not treated in Latin-1 (ISO-8859-1) as 721# undefined, but if they have been used, the file is probably supposed to be 722# cp1252 723DEL_AND_C1 = dict.fromkeys(list(range(0x7F, 0x9F))) 724 725#------------------------------------------------------------------------- 726# 727# GEDCOM events to Gramps events conversion 728# 729#------------------------------------------------------------------------- 730GED_TO_GRAMPS_EVENT = {} 731for __val, __key in PERSONALCONSTANTEVENTS.items(): 732 if __key != "": 733 GED_TO_GRAMPS_EVENT[__key] = __val 734 735for __val, __key in FAMILYCONSTANTEVENTS.items(): 736 if __key != "": 737 GED_TO_GRAMPS_EVENT[__key] = __val 738 739GED_TO_GRAMPS_ATTR = {} 740for __val, __key in PERSONALCONSTANTATTRIBUTES.items(): 741 if __key != "": 742 GED_TO_GRAMPS_ATTR[__key] = __val 743 744#------------------------------------------------------------------------- 745# 746# GEDCOM Date Constants 747# 748#------------------------------------------------------------------------- 749HMONTH = [ 750 "", "TSH", "CSH", "KSL", "TVT", "SHV", "ADR", 751 "ADS", "NSN", "IYR", "SVN", "TMZ", "AAV", "ELL"] 752 753FMONTH = [ 754 "", "VEND", "BRUM", "FRIM", "NIVO", "PLUV", "VENT", 755 "GERM", "FLOR", "PRAI", "MESS", "THER", "FRUC", "COMP"] 756 757MONTH = [ 758 "", "JAN", "FEB", "MAR", "APR", "MAY", "JUN", 759 "JUL", "AUG", "SEP", "OCT", "NOV", "DEC"] 760 761CALENDAR_MAP = { 762 Date.CAL_HEBREW : (HMONTH, '@#DHEBREW@'), 763 Date.CAL_FRENCH : (FMONTH, '@#DFRENCH R@'), 764 Date.CAL_JULIAN : (MONTH, '@#DJULIAN@'), 765 Date.CAL_SWEDISH : (MONTH, '@#DUNKNOWN@'), 766} 767 768CALENDAR_MAP_PARSESTRING = { 769 Date.CAL_HEBREW : ' (h)', 770 Date.CAL_FRENCH : ' (f)', 771 Date.CAL_JULIAN : ' (j)', 772 Date.CAL_SWEDISH : ' (s)', 773} 774 775#how wrong calendar use is shown 776CALENDAR_MAP_WRONGSTRING = { 777 Date.CAL_HEBREW : ' <hebrew>', 778 Date.CAL_FRENCH : ' <french rep>', 779 Date.CAL_JULIAN : ' <julian>', 780 Date.CAL_SWEDISH : ' <swedish>', 781} 782 783DATE_MODIFIER = { 784 Date.MOD_ABOUT : "ABT", 785 Date.MOD_BEFORE : "BEF", 786 Date.MOD_AFTER : "AFT", 787 #Date.MOD_INTERPRETED : "INT", 788} 789 790DATE_QUALITY = { 791 Date.QUAL_CALCULATED : "CAL", 792 Date.QUAL_ESTIMATED : "EST", 793} 794 795#------------------------------------------------------------------------- 796# 797# regular expressions 798# 799#------------------------------------------------------------------------- 800NOTE_RE = re.compile(r"\s*\d+\s+\@(\S+)\@\s+NOTE(.*)$") 801CONT_RE = re.compile(r"\s*\d+\s+CONT\s?(.*)$") 802CONC_RE = re.compile(r"\s*\d+\s+CONC\s?(.*)$") 803PERSON_RE = re.compile(r"\s*\d+\s+\@(\S+)\@\s+INDI(.*)$") 804MOD = re.compile(r"\s*(INT|EST|CAL)\s+(.*)$") 805CAL = re.compile(r"\s*(ABT|BEF|AFT)?\s*@#D?([^@]+)@\s*(.*)$") 806RANGE = re.compile( 807 r"\s*BET\s+@#D?([^@]+)@\s*(.*)\s+AND\s+@#D?([^@]+)@\s*(.*)$") 808RANGE1 = re.compile(r"\s*BET\s+\s*(.*)\s+AND\s+@#D?([^@]+)@\s*(.*)$") 809RANGE2 = re.compile(r"\s*BET\s+@#D?([^@]+)@\s*(.*)\s+AND\s+\s*(.*)$") 810SPAN = re.compile(r"\s*FROM\s+@#D?([^@]+)@\s*(.*)\s+TO\s+@#D?([^@]+)@\s*(.*)$") 811SPAN1 = re.compile(r"\s*FROM\s+\s*(.*)\s+TO\s+@#D?([^@]+)@\s*(.*)$") 812SPAN2 = re.compile(r"\s*FROM\s+@#D?([^@]+)@\s*(.*)\s+TO\s+\s*(.*)$") 813NAME_RE = re.compile(r"/?([^/]*)(/([^/]*)(/([^/]*))?)?") 814SURNAME_RE = re.compile(r"/([^/]*)/([^/]*)") 815 816 817#----------------------------------------------------------------------- 818# 819# GedcomDateParser 820# 821#----------------------------------------------------------------------- 822class GedcomDateParser(DateParser): 823 """ Parse the dates """ 824 month_to_int = { 825 'jan' : 1, 'feb' : 2, 'mar' : 3, 'apr' : 4, 826 'may' : 5, 'jun' : 6, 'jul' : 7, 'aug' : 8, 827 'sep' : 9, 'oct' : 10, 'nov' : 11, 'dec' : 12, } 828 829 _locale = GrampsLocale(lang='en_US') # no register_datehandler here 830 831 def dhformat_changed(self): 832 """ Allow overriding so a subclass can modify it """ 833 self.dhformat = "%m/%d/%y" 834 835 836#------------------------------------------------------------------------- 837# 838# Lexer - serves as the lexical analysis engine 839# 840#------------------------------------------------------------------------- 841class Lexer: 842 """ low level line reading and early parsing """ 843 def __init__(self, ifile, __add_msg): 844 self.ifile = ifile 845 self.current_list = [] 846 self.eof = False 847 self.cnv = None 848 self.cnt = 0 849 self.index = 0 850 self.func_map = {TOKEN_CONT : self.__fix_token_cont, 851 TOKEN_CONC : self.__fix_token_conc} 852 self.__add_msg = __add_msg 853 854 def readline(self): 855 """ read a line from file with possibility of putting it back """ 856 if len(self.current_list) <= 1 and not self.eof: 857 self.__readahead() 858 try: 859 return GedLine(self.current_list.pop()) 860 except: 861 LOG.debug('Error in reading Gedcom line', exc_info=True) 862 return None 863 864 def __fix_token_cont(self, data): 865 line = self.current_list[0] 866 new_value = line[2] + '\n' + data[2] 867 self.current_list[0] = (line[0], line[1], new_value, line[3], line[4]) 868 869 def __fix_token_conc(self, data): 870 line = self.current_list[0] 871 if len(line[2]) == 4: 872 # This deals with lines of the form 873 # 0 @<XREF:NOTE>@ NOTE 874 # 1 CONC <SUBMITTER TEXT> 875 # The previous line contains only a tag and no data so concat a 876 # space to separate the new line from the tag. This prevents the 877 # first letter of the new line being lost later 878 # in _GedcomParse.__parse_record 879 new_value = line[2] + ' ' + data[2] 880 else: 881 new_value = line[2] + data[2] 882 self.current_list[0] = (line[0], line[1], new_value, line[3], line[4]) 883 884 def __readahead(self): 885 while len(self.current_list) < 5: 886 line = self.ifile.readline() 887 self.index += 1 888 if not line: 889 self.eof = True 890 return 891 892 original_line = line 893 try: 894 # According to the GEDCOM 5.5 standard, 895 # Chapter 1 subsection Grammar "leading whitespace preceeding 896 # a GEDCOM line should be ignored" 897 # We will also strip the terminator which is any combination 898 # of carriage_return and line_feed 899 line = line.lstrip(' ').rstrip('\n\r') 900 # split into level+delim+rest 901 line = line.partition(' ') 902 level = int(line[0]) 903 # there should only be one space after the level, 904 # but we can ignore more, 905 line = line[2].lstrip(' ') 906 # then split into tag+delim+line_value 907 # or xfef_id+delim+rest 908 # the xref_id can have spaces in it 909 if line.startswith('@'): 910 line = line.split('@', 2) 911 # line is now [None, alphanum+pointer_string, rest] 912 tag = '@' + line[1] + '@' 913 line_value = line[2].lstrip() 914 # Ignore meaningless @IDENT@ on CONT or CONC line 915 # as noted at http://www.tamurajones.net/IdentCONT.xhtml 916 if (line_value.lstrip().startswith("CONT ") or 917 line_value.lstrip().startswith("CONC ")): 918 line = line_value.lstrip().partition(' ') 919 tag = line[0] 920 line_value = line[2] 921 else: 922 line = line.partition(' ') 923 tag = line[0] 924 line_value = line[2] 925 except: 926 problem = _("Line ignored ") 927 text = original_line.rstrip('\n\r') 928 prob_width = 66 929 problem = problem.ljust(prob_width)[0:(prob_width - 1)] 930 text = text.replace("\n", "\n".ljust(prob_width + 22)) 931 message = "%s %s" % (problem, text) 932 self.__add_msg(message) 933 continue 934 935 # Need to un-double '@' See Gedcom 5.5 spec 'any_char' 936 line_value = line_value.replace('@@', '@') 937 token = TOKENS.get(tag, TOKEN_UNKNOWN) 938 data = (level, token, line_value, tag, self.index) 939 940 func = self.func_map.get(data[1]) 941 if func: 942 func(data) 943 else: 944 # There will normally only be one space between tag and 945 # line_value, but in case there is more then one, remove extra 946 # spaces after CONC/CONT processing 947 # Also, Gedcom spec says there should be no spaces at end of 948 # line, however some programs put them there (FTM), so let's 949 # leave them in place. 950 data = data[:2] + (data[2].lstrip(),) + data[3:] 951 self.current_list.insert(0, data) 952 953 def clean_up(self): 954 """ 955 Break circular references to parsing methods stored in dictionaries 956 to aid garbage collection 957 """ 958 for key in list(self.func_map.keys()): 959 del self.func_map[key] 960 del self.func_map 961 962 963#----------------------------------------------------------------------- 964# 965# GedLine - represents a tokenized version of a GEDCOM line 966# 967#----------------------------------------------------------------------- 968class GedLine: 969 """ 970 GedLine is a class the represents a GEDCOM line. The form of a GEDCOM line 971 is: 972 973 <LEVEL> <TOKEN> <TEXT> 974 975 This gets parsed into 976 977 Line Number, Level, Token Value, Token Text, and Data 978 979 Data is dependent on the context the Token Value. For most of tokens, 980 this is just a text string. However, for certain tokens where we know 981 the context, we can provide some value. The current parsed tokens are: 982 983 TOKEN_DATE - Date 984 TOKEN_SEX - Person gender item 985 TOEKN_UKNOWN - Check to see if this is a known event 986 """ 987 __DATE_CNV = GedcomDateParser() 988 989 @staticmethod 990 def __extract_date(text): 991 """ 992 Converts the specified text to a Date object. 993 """ 994 dateobj = Date() 995 # Horrible hack for importing illegal GEDCOM from Apple Macintosh 996 # Classic 'Gene' program 997 text = text.replace('BET ABT', 'EST BET') 998 999 # extract out the MOD line 1000 match = MOD.match(text) 1001 mod = '' 1002 if match: 1003 (mod, text) = match.groups() 1004 qual = QUALITY_MAP.get(mod, Date.QUAL_NONE) 1005 mod += ' ' 1006 else: 1007 qual = Date.QUAL_NONE 1008 1009 # parse the range if we match, if so, return 1010 match = RANGE.match(text) 1011 match1 = RANGE1.match(text) 1012 match2 = RANGE2.match(text) 1013 if match or match1 or match2: 1014 if match: 1015 (cal1, data1, cal2, data2) = match.groups() 1016 elif match1: 1017 cal1 = Date.CAL_GREGORIAN 1018 (data1, cal2, data2) = match1.groups() 1019 elif match2: 1020 cal2 = Date.CAL_GREGORIAN 1021 (cal1, data1, data2) = match2.groups() 1022 cal1 = CALENDAR_MAP_GEDCOM2XML.get(cal1, Date.CAL_GREGORIAN) 1023 cal2 = CALENDAR_MAP_GEDCOM2XML.get(cal2, Date.CAL_GREGORIAN) 1024 if cal1 != cal2: 1025 #not supported by GRAMPS, import as text, we construct a string 1026 # that the parser will not parse as a correct date 1027 return GedLine.__DATE_CNV.parse( 1028 '%sbetween %s%s and %s%s' % 1029 (mod, data1, CALENDAR_MAP_WRONGSTRING.get(cal1, ''), 1030 CALENDAR_MAP_WRONGSTRING.get(cal2, ''), data2)) 1031 1032 #add hebrew, ... calendar so that months are recognized 1033 data1 += CALENDAR_MAP_PARSESTRING.get(cal1, '') 1034 data2 += CALENDAR_MAP_PARSESTRING.get(cal2, '') 1035 start = GedLine.__DATE_CNV.parse(data1) 1036 stop = GedLine.__DATE_CNV.parse(data2) 1037 dateobj.set(Date.QUAL_NONE, Date.MOD_RANGE, cal1, 1038 start.get_start_date() + stop.get_start_date()) 1039 dateobj.set_quality(qual) 1040 return dateobj 1041 1042 # parse a span if we match 1043 match = SPAN.match(text) 1044 match1 = SPAN1.match(text) 1045 match2 = SPAN2.match(text) 1046 if match or match1 or match2: 1047 if match: 1048 (cal1, data1, cal2, data2) = match.groups() 1049 elif match1: 1050 cal1 = Date.CAL_GREGORIAN 1051 (data1, cal2, data2) = match1.groups() 1052 elif match2: 1053 cal2 = Date.CAL_GREGORIAN 1054 (cal1, data1, data2) = match2.groups() 1055 cal1 = CALENDAR_MAP_GEDCOM2XML.get(cal1, Date.CAL_GREGORIAN) 1056 cal2 = CALENDAR_MAP_GEDCOM2XML.get(cal2, Date.CAL_GREGORIAN) 1057 if cal1 != cal2: 1058 #not supported by GRAMPS, import as text, we construct a string 1059 # that the parser will not parse as a correct date 1060 return GedLine.__DATE_CNV.parse( 1061 '%sfrom %s%s to %s%s' % 1062 (mod, data1, CALENDAR_MAP_WRONGSTRING.get(cal1, ''), 1063 CALENDAR_MAP_WRONGSTRING.get(cal2, ''), data2)) 1064 #add hebrew, ... calendar so that months are recognized 1065 data1 += CALENDAR_MAP_PARSESTRING.get(cal1, '') 1066 data2 += CALENDAR_MAP_PARSESTRING.get(cal2, '') 1067 start = GedLine.__DATE_CNV.parse(data1) 1068 stop = GedLine.__DATE_CNV.parse(data2) 1069 dateobj.set(Date.QUAL_NONE, Date.MOD_SPAN, cal1, 1070 start.get_start_date() + stop.get_start_date()) 1071 dateobj.set_quality(qual) 1072 return dateobj 1073 1074 match = CAL.match(text) 1075 if match: 1076 (abt, call, data) = match.groups() 1077 call = CALENDAR_MAP_GEDCOM2XML.get(call, Date.CAL_GREGORIAN) 1078 data += CALENDAR_MAP_PARSESTRING.get(call, '') 1079 if abt: 1080 dateobj = GedLine.__DATE_CNV.parse("%s %s" % (abt, data)) 1081 else: 1082 dateobj = GedLine.__DATE_CNV.parse(data) 1083 dateobj.set_quality(qual) 1084 return dateobj 1085 dateobj = GedLine.__DATE_CNV.parse(text) 1086 dateobj.set_quality(qual) 1087 return dateobj 1088 1089 def __init__(self, data): 1090 """ 1091 If the level is 0, then this is a top level instance. In this case, 1092 we may find items in the form of: 1093 1094 <LEVEL> @ID@ <ITEM> 1095 1096 If this is not the top level, we check the MAP_DATA array to see if 1097 there is a conversion function for the data. 1098 """ 1099 self.line = data[4] 1100 self.level = data[0] 1101 self.token = data[1] 1102 self.token_text = data[3].strip() 1103 self.data = str(data[2]) 1104 1105 if self.level == 0: 1106 if (self.token_text and self.token_text[0] == '@' and 1107 self.token_text[-1] == '@'): 1108 self.token = TOKEN_ID 1109 self.token_text = self.token_text[1:-1] 1110 self.data = self.data.strip() 1111 else: 1112 func = _MAP_DATA.get(self.token) 1113 if func: 1114 func(self) 1115 1116 def calc_sex(self): 1117 """ 1118 Converts the data field to a gen.lib token indicating the gender 1119 """ 1120 try: 1121 self.data = SEX_MAP.get(self.data.strip()[0], 1122 Person.UNKNOWN) 1123 except: 1124 self.data = Person.UNKNOWN 1125 1126 def calc_date(self): 1127 """ 1128 Converts the data field to a Date object 1129 """ 1130 self.data = self.__extract_date(self.data) 1131 self.token = TOKEN_DATE 1132 1133 def calc_unknown(self): 1134 """ 1135 Checks to see if the token maps a known GEDCOM event. If so, we 1136 change the type from UNKNOWN to TOKEN_GEVENT (gedcom event), and 1137 the data is assigned to the associated Gramps EventType 1138 """ 1139 token = GED_TO_GRAMPS_EVENT.get(self.token_text) 1140 if token: 1141 event = Event() 1142 event.set_description(self.data) 1143 event.set_type(token) 1144 self.token = TOKEN_GEVENT 1145 self.data = event 1146 else: 1147 token = GED_TO_GRAMPS_ATTR.get(self.token_text) 1148 if token: 1149 attr = Attribute() 1150 attr.set_value(self.data) 1151 attr.set_type(token) 1152 self.token = TOKEN_ATTR 1153 self.data = attr 1154 1155 def calc_note(self): 1156 """ look for a note xref @N0001@ """ 1157 gid = self.data.strip() 1158 if len(gid) > 2 and gid[0] == '@' and gid[-1] == '@': 1159 self.token = TOKEN_RNOTE 1160 self.data = gid[1:-1] 1161 1162 def calc_nchi(self): 1163 """ set attribute for number of children """ 1164 attr = Attribute() 1165 attr.set_value(self.data) 1166 attr.set_type(AttributeType.NUM_CHILD) 1167 self.data = attr 1168 self.token = TOKEN_ATTR 1169 1170 def calc_attr(self): 1171 """ set attribure for general attributes """ 1172 attr = Attribute() 1173 attr.set_value(self.data) 1174 attr.set_type((AttributeType.CUSTOM, self.token_text)) 1175 self.data = attr 1176 self.token = TOKEN_ATTR 1177 1178 def __repr__(self): 1179 return "%d: %d (%d:%s) %s" % (self.line, self.level, self.token, 1180 self.token_text, self.data) 1181 1182_MAP_DATA = { 1183 TOKEN_UNKNOWN : GedLine.calc_unknown, 1184 TOKEN_DATE : GedLine.calc_date, 1185 TOKEN__DATE : GedLine.calc_date, 1186 TOKEN_SEX : GedLine.calc_sex, 1187 TOKEN_NOTE : GedLine.calc_note, 1188 TOKEN_NCHI : GedLine.calc_nchi, 1189 TOKEN__STAT : GedLine.calc_attr, 1190 TOKEN__UID : GedLine.calc_attr, 1191 TOKEN_AFN : GedLine.calc_attr, 1192 TOKEN__FSFTID : GedLine.calc_attr, } 1193 1194 1195#------------------------------------------------------------------------- 1196# 1197# File Readers 1198# 1199#------------------------------------------------------------------------- 1200class BaseReader: 1201 """ base char level reader """ 1202 def __init__(self, ifile, encoding, __add_msg): 1203 self.ifile = ifile 1204 self.enc = encoding 1205 self.__add_msg = __add_msg 1206 1207 def reset(self): 1208 """ return to beginning """ 1209 self.ifile.seek(0) 1210 1211 def readline(self): 1212 """ Read a single line """ 1213 raise NotImplementedError() 1214 1215 def report_error(self, problem, line): 1216 """ Create an error message """ 1217 line = line.rstrip('\n\r') 1218 prob_width = 66 1219 problem = problem.ljust(prob_width)[0:(prob_width - 1)] 1220 text = line.replace("\n", "\n".ljust(prob_width + 22)) 1221 message = "%s %s" % (problem, text) 1222 self.__add_msg(message) 1223 1224 1225class UTF8Reader(BaseReader): 1226 """ The main UTF-8 reader, uses Python for char handling """ 1227 def __init__(self, ifile, __add_msg, enc): 1228 BaseReader.__init__(self, ifile, enc, __add_msg) 1229 self.reset() 1230 if enc == 'UTF_8_SIG': 1231 self.ifile = TextIOWrapper(ifile, encoding='utf_8_sig', 1232 errors='replace', newline=None) 1233 else: 1234 self.ifile = TextIOWrapper(ifile, encoding='utf_8', 1235 errors='replace', newline=None) 1236 1237 def readline(self): 1238 line = self.ifile.readline() 1239 return line.translate(STRIP_DICT) 1240 1241 1242class UTF16Reader(BaseReader): 1243 """ The main UTF-16 reader, uses Python for char handling """ 1244 def __init__(self, ifile, __add_msg): 1245 BaseReader.__init__(self, ifile, 'UTF16', __add_msg) 1246 self.ifile = TextIOWrapper(ifile, encoding='utf_16', 1247 errors='replace', newline=None) 1248 self.reset() 1249 1250 def readline(self): 1251 line = self.ifile.readline() 1252 return line.translate(STRIP_DICT) 1253 1254 1255class AnsiReader(BaseReader): 1256 """ The main ANSI (latin1) reader, uses Python for char handling """ 1257 def __init__(self, ifile, __add_msg): 1258 BaseReader.__init__(self, ifile, 'latin1', __add_msg) 1259 self.ifile = TextIOWrapper(ifile, encoding='latin1', 1260 errors='replace', newline=None) 1261 1262 def readline(self): 1263 line = self.ifile.readline() 1264 if line.translate(DEL_AND_C1) != line: 1265 self.report_error("DEL or C1 control chars in line did you mean " 1266 "CHAR cp1252??", line) 1267 return line.translate(STRIP_DICT) 1268 1269 1270class CP1252Reader(BaseReader): 1271 """ The extra credit CP1252 reader, uses Python for char handling """ 1272 def __init__(self, ifile, __add_msg): 1273 BaseReader.__init__(self, ifile, 'cp1252', __add_msg) 1274 self.ifile = TextIOWrapper(ifile, encoding='cp1252', 1275 errors='replace', newline=None) 1276 1277 def readline(self): 1278 line = self.ifile.readline() 1279 return line.translate(STRIP_DICT) 1280 1281 1282class AnselReader(BaseReader): 1283 """ 1284 ANSEL to Unicode Conversion 1285 1286 ANSEL references: 1287 http://lcweb2.loc.gov/diglib/codetables/45.html 1288 http://www.gymel.com/charsets/ANSEL.html 1289 1290 list of ANSEL codes that replicate ASCII 1291 note that DEL (127=0x7F) is a control char 1292 Note: spec allows control-chars that Gramps probably doesn't use 1293 but 10=0x0A _is_ needed (!) 1294 --- 1295 Also: there are two additional control chars 0x98,0x9c (unicode same) 1296 which we also ignore for now (start/emd of string (or sort sequence) 1297 --- 1298 ?: should we allow TAB, as a Gramps extension? 1299 """ 1300 __printable_ascii = list(map(chr, list(range(32, 127)))) # up thru 126 1301 # LF CR Esc GS RS US 1302 __use_ASCII = list(map(chr, [10, 13, 27, 29, 30, 31])) + __printable_ascii 1303 1304 # mappings of single byte ANSEL codes to unicode 1305 __onebyte = { 1306 b'\xA1' : '\u0141', b'\xA2' : '\u00d8', b'\xA3' : '\u0110', 1307 b'\xA4' : '\u00de', b'\xA5' : '\u00c6', b'\xA6' : '\u0152', 1308 b'\xA7' : '\u02b9', b'\xA8' : '\u00b7', b'\xA9' : '\u266d', 1309 b'\xAA' : '\u00ae', b'\xAB' : '\u00b1', b'\xAC' : '\u01a0', 1310 b'\xAD' : '\u01af', b'\xAE' : '\u02bc', b'\xB0' : '\u02bb', 1311 b'\xB1' : '\u0142', b'\xB2' : '\u00f8', b'\xB3' : '\u0111', 1312 b'\xB4' : '\u00fe', b'\xB5' : '\u00e6', b'\xB6' : '\u0153', 1313 b'\xB7' : '\u02ba', b'\xB8' : '\u0131', b'\xB9' : '\u00a3', 1314 b'\xBA' : '\u00f0', b'\xBC' : '\u01a1', b'\xBD' : '\u01b0', 1315 b'\xBE' : '\u25a1', b'\xBF' : '\u25a0', 1316 b'\xC0' : '\u00b0', b'\xC1' : '\u2113', b'\xC2' : '\u2117', 1317 b'\xC3' : '\u00a9', b'\xC4' : '\u266f', b'\xC5' : '\u00bf', 1318 b'\xC6' : '\u00a1', b'\xC7' : '\u00df', b'\xC8' : '\u20ac', 1319 b'\xCD' : '\u0065', b'\xCE' : '\u006f', b'\xCF' : '\u00df', } 1320 1321 # combining forms (in ANSEL, they precede the modified ASCII character 1322 # whereas the unicode combining term follows the character modified 1323 # Note: unicode allows multiple modifiers, but ANSEL may not (TDB?), 1324 # so we ignore multiple combining forms in this module 1325 # 8d & 8e are zero-width joiner (ZWJ), and zero-width non-joiner ZWNJ 1326 # (strange things) probably not commonly found in our needs, unless one 1327 # starts writing persian (or???) poetry in ANSEL 1328 __acombiners = { 1329 b'\x8D' : '\u200d', b'\x8E' : '\u200c', b'\xE0' : '\u0309', 1330 b'\xE1' : '\u0300', b'\xE2' : '\u0301', b'\xE3' : '\u0302', 1331 b'\xE4' : '\u0303', b'\xE5' : '\u0304', b'\xE6' : '\u0306', 1332 b'\xE7' : '\u0307', b'\xE8' : '\u0308', b'\xE9' : '\u030c', 1333 b'\xEA' : '\u030a', b'\xEB' : '\ufe20', b'\xEC' : '\ufe21', 1334 b'\xED' : '\u0315', b'\xEE' : '\u030b', b'\xEF' : '\u0310', 1335 b'\xF0' : '\u0327', b'\xF1' : '\u0328', b'\xF2' : '\u0323', 1336 b'\xF3' : '\u0324', b'\xF4' : '\u0325', b'\xF5' : '\u0333', 1337 b'\xF6' : '\u0332', b'\xF7' : '\u0326', b'\xF8' : '\u031c', 1338 b'\xF9' : '\u032e', b'\xFA' : '\ufe22', b'\xFB' : '\ufe23', 1339 b'\xFC' : '\u0338', 1340 b'\xFE' : '\u0313', } 1341 1342 # mappings of two byte (precomposed forms) ANSEL codes to unicode 1343 __twobyte = { 1344 b'\xE0\x41' : '\u1ea2', b'\xE0\x45' : '\u1eba', 1345 b'\xE0\x49' : '\u1ec8', b'\xE0\x4F' : '\u1ece', 1346 b'\xE0\x55' : '\u1ee6', b'\xE0\x59' : '\u1ef6', 1347 b'\xE0\x61' : '\u1ea3', b'\xE0\x65' : '\u1ebb', 1348 b'\xE0\x69' : '\u1ec9', b'\xE0\x6F' : '\u1ecf', 1349 b'\xE0\x75' : '\u1ee7', b'\xE0\x79' : '\u1ef7', 1350 b'\xE1\x41' : '\u00c0', b'\xE1\x45' : '\u00c8', 1351 b'\xE1\x49' : '\u00cc', b'\xE1\x4F' : '\u00d2', 1352 b'\xE1\x55' : '\u00d9', b'\xE1\x57' : '\u1e80', 1353 b'\xE1\x59' : '\u1ef2', b'\xE1\x61' : '\u00e0', 1354 b'\xE1\x65' : '\u00e8', b'\xE1\x69' : '\u00ec', 1355 b'\xE1\x6F' : '\u00f2', b'\xE1\x75' : '\u00f9', 1356 b'\xE1\x77' : '\u1e81', b'\xE1\x79' : '\u1ef3', 1357 b'\xE2\x41' : '\u00c1', b'\xE2\x43' : '\u0106', 1358 b'\xE2\x45' : '\u00c9', b'\xE2\x47' : '\u01f4', 1359 b'\xE2\x49' : '\u00cd', b'\xE2\x4B' : '\u1e30', 1360 b'\xE2\x4C' : '\u0139', b'\xE2\x4D' : '\u1e3e', 1361 b'\xE2\x4E' : '\u0143', b'\xE2\x4F' : '\u00d3', 1362 b'\xE2\x50' : '\u1e54', b'\xE2\x52' : '\u0154', 1363 b'\xE2\x53' : '\u015a', b'\xE2\x55' : '\u00da', 1364 b'\xE2\x57' : '\u1e82', b'\xE2\x59' : '\u00dd', 1365 b'\xE2\x5A' : '\u0179', b'\xE2\x61' : '\u00e1', 1366 b'\xE2\x63' : '\u0107', b'\xE2\x65' : '\u00e9', 1367 b'\xE2\x67' : '\u01f5', b'\xE2\x69' : '\u00ed', 1368 b'\xE2\x6B' : '\u1e31', b'\xE2\x6C' : '\u013a', 1369 b'\xE2\x6D' : '\u1e3f', b'\xE2\x6E' : '\u0144', 1370 b'\xE2\x6F' : '\u00f3', b'\xE2\x70' : '\u1e55', 1371 b'\xE2\x72' : '\u0155', b'\xE2\x73' : '\u015b', 1372 b'\xE2\x75' : '\u00fa', b'\xE2\x77' : '\u1e83', 1373 b'\xE2\x79' : '\u00fd', b'\xE2\x7A' : '\u017a', 1374 b'\xE2\xA5' : '\u01fc', b'\xE2\xB5' : '\u01fd', 1375 b'\xE3\x41' : '\u00c2', b'\xE3\x43' : '\u0108', 1376 b'\xE3\x45' : '\u00ca', b'\xE3\x47' : '\u011c', 1377 b'\xE3\x48' : '\u0124', b'\xE3\x49' : '\u00ce', 1378 b'\xE3\x4A' : '\u0134', b'\xE3\x4F' : '\u00d4', 1379 b'\xE3\x53' : '\u015c', b'\xE3\x55' : '\u00db', 1380 b'\xE3\x57' : '\u0174', b'\xE3\x59' : '\u0176', 1381 b'\xE3\x5A' : '\u1e90', b'\xE3\x61' : '\u00e2', 1382 b'\xE3\x63' : '\u0109', b'\xE3\x65' : '\u00ea', 1383 b'\xE3\x67' : '\u011d', b'\xE3\x68' : '\u0125', 1384 b'\xE3\x69' : '\u00ee', b'\xE3\x6A' : '\u0135', 1385 b'\xE3\x6F' : '\u00f4', b'\xE3\x73' : '\u015d', 1386 b'\xE3\x75' : '\u00fb', b'\xE3\x77' : '\u0175', 1387 b'\xE3\x79' : '\u0177', b'\xE3\x7A' : '\u1e91', 1388 b'\xE4\x41' : '\u00c3', b'\xE4\x45' : '\u1ebc', 1389 b'\xE4\x49' : '\u0128', b'\xE4\x4E' : '\u00d1', 1390 b'\xE4\x4F' : '\u00d5', b'\xE4\x55' : '\u0168', 1391 b'\xE4\x56' : '\u1e7c', b'\xE4\x59' : '\u1ef8', 1392 b'\xE4\x61' : '\u00e3', b'\xE4\x65' : '\u1ebd', 1393 b'\xE4\x69' : '\u0129', b'\xE4\x6E' : '\u00f1', 1394 b'\xE4\x6F' : '\u00f5', b'\xE4\x75' : '\u0169', 1395 b'\xE4\x76' : '\u1e7d', b'\xE4\x79' : '\u1ef9', 1396 b'\xE5\x41' : '\u0100', b'\xE5\x45' : '\u0112', 1397 b'\xE5\x47' : '\u1e20', b'\xE5\x49' : '\u012a', 1398 b'\xE5\x4F' : '\u014c', b'\xE5\x55' : '\u016a', 1399 b'\xE5\x61' : '\u0101', b'\xE5\x65' : '\u0113', 1400 b'\xE5\x67' : '\u1e21', b'\xE5\x69' : '\u012b', 1401 b'\xE5\x6F' : '\u014d', b'\xE5\x75' : '\u016b', 1402 b'\xE5\xA5' : '\u01e2', b'\xE5\xB5' : '\u01e3', 1403 b'\xE6\x41' : '\u0102', b'\xE6\x45' : '\u0114', 1404 b'\xE6\x47' : '\u011e', b'\xE6\x49' : '\u012c', 1405 b'\xE6\x4F' : '\u014e', b'\xE6\x55' : '\u016c', 1406 b'\xE6\x61' : '\u0103', b'\xE6\x65' : '\u0115', 1407 b'\xE6\x67' : '\u011f', b'\xE6\x69' : '\u012d', 1408 b'\xE6\x6F' : '\u014f', b'\xE6\x75' : '\u016d', 1409 b'\xE7\x42' : '\u1e02', b'\xE7\x43' : '\u010a', 1410 b'\xE7\x44' : '\u1e0a', b'\xE7\x45' : '\u0116', 1411 b'\xE7\x46' : '\u1e1e', b'\xE7\x47' : '\u0120', 1412 b'\xE7\x48' : '\u1e22', b'\xE7\x49' : '\u0130', 1413 b'\xE7\x4D' : '\u1e40', b'\xE7\x4E' : '\u1e44', 1414 b'\xE7\x50' : '\u1e56', b'\xE7\x52' : '\u1e58', 1415 b'\xE7\x53' : '\u1e60', b'\xE7\x54' : '\u1e6a', 1416 b'\xE7\x57' : '\u1e86', b'\xE7\x58' : '\u1e8a', 1417 b'\xE7\x59' : '\u1e8e', b'\xE7\x5A' : '\u017b', 1418 b'\xE7\x62' : '\u1e03', b'\xE7\x63' : '\u010b', 1419 b'\xE7\x64' : '\u1e0b', b'\xE7\x65' : '\u0117', 1420 b'\xE7\x66' : '\u1e1f', b'\xE7\x67' : '\u0121', 1421 b'\xE7\x68' : '\u1e23', b'\xE7\x6D' : '\u1e41', 1422 b'\xE7\x6E' : '\u1e45', b'\xE7\x70' : '\u1e57', 1423 b'\xE7\x72' : '\u1e59', b'\xE7\x73' : '\u1e61', 1424 b'\xE7\x74' : '\u1e6b', b'\xE7\x77' : '\u1e87', 1425 b'\xE7\x78' : '\u1e8b', b'\xE7\x79' : '\u1e8f', 1426 b'\xE7\x7A' : '\u017c', b'\xE8\x41' : '\u00c4', 1427 b'\xE8\x45' : '\u00cb', b'\xE8\x48' : '\u1e26', 1428 b'\xE8\x49' : '\u00cf', b'\xE8\x4F' : '\u00d6', 1429 b'\xE8\x55' : '\u00dc', b'\xE8\x57' : '\u1e84', 1430 b'\xE8\x58' : '\u1e8c', b'\xE8\x59' : '\u0178', 1431 b'\xE8\x61' : '\u00e4', b'\xE8\x65' : '\u00eb', 1432 b'\xE8\x68' : '\u1e27', b'\xE8\x69' : '\u00ef', 1433 b'\xE8\x6F' : '\u00f6', b'\xE8\x74' : '\u1e97', 1434 b'\xE8\x75' : '\u00fc', b'\xE8\x77' : '\u1e85', 1435 b'\xE8\x78' : '\u1e8d', b'\xE8\x79' : '\u00ff', 1436 b'\xE9\x41' : '\u01cd', b'\xE9\x43' : '\u010c', 1437 b'\xE9\x44' : '\u010e', b'\xE9\x45' : '\u011a', 1438 b'\xE9\x47' : '\u01e6', b'\xE9\x49' : '\u01cf', 1439 b'\xE9\x4B' : '\u01e8', b'\xE9\x4C' : '\u013d', 1440 b'\xE9\x4E' : '\u0147', b'\xE9\x4F' : '\u01d1', 1441 b'\xE9\x52' : '\u0158', b'\xE9\x53' : '\u0160', 1442 b'\xE9\x54' : '\u0164', b'\xE9\x55' : '\u01d3', 1443 b'\xE9\x5A' : '\u017d', b'\xE9\x61' : '\u01ce', 1444 b'\xE9\x63' : '\u010d', b'\xE9\x64' : '\u010f', 1445 b'\xE9\x65' : '\u011b', b'\xE9\x67' : '\u01e7', 1446 b'\xE9\x69' : '\u01d0', b'\xE9\x6A' : '\u01f0', 1447 b'\xE9\x6B' : '\u01e9', b'\xE9\x6C' : '\u013e', 1448 b'\xE9\x6E' : '\u0148', b'\xE9\x6F' : '\u01d2', 1449 b'\xE9\x72' : '\u0159', b'\xE9\x73' : '\u0161', 1450 b'\xE9\x74' : '\u0165', b'\xE9\x75' : '\u01d4', 1451 b'\xE9\x7A' : '\u017e', b'\xEA\x41' : '\u00c5', 1452 b'\xEA\x61' : '\u00e5', b'\xEA\x75' : '\u016f', 1453 b'\xEA\x77' : '\u1e98', b'\xEA\x79' : '\u1e99', 1454 b'\xEA\xAD' : '\u016e', b'\xEE\x4F' : '\u0150', 1455 b'\xEE\x55' : '\u0170', b'\xEE\x6F' : '\u0151', 1456 b'\xEE\x75' : '\u0171', b'\xF0\x20' : '\u00b8', 1457 b'\xF0\x43' : '\u00c7', b'\xF0\x44' : '\u1e10', 1458 b'\xF0\x47' : '\u0122', b'\xF0\x48' : '\u1e28', 1459 b'\xF0\x4B' : '\u0136', b'\xF0\x4C' : '\u013b', 1460 b'\xF0\x4E' : '\u0145', b'\xF0\x52' : '\u0156', 1461 b'\xF0\x53' : '\u015e', b'\xF0\x54' : '\u0162', 1462 b'\xF0\x63' : '\u00e7', b'\xF0\x64' : '\u1e11', 1463 b'\xF0\x67' : '\u0123', b'\xF0\x68' : '\u1e29', 1464 b'\xF0\x6B' : '\u0137', b'\xF0\x6C' : '\u013c', 1465 b'\xF0\x6E' : '\u0146', b'\xF0\x72' : '\u0157', 1466 b'\xF0\x73' : '\u015f', b'\xF0\x74' : '\u0163', 1467 b'\xF1\x41' : '\u0104', b'\xF1\x45' : '\u0118', 1468 b'\xF1\x49' : '\u012e', b'\xF1\x4F' : '\u01ea', 1469 b'\xF1\x55' : '\u0172', b'\xF1\x61' : '\u0105', 1470 b'\xF1\x65' : '\u0119', b'\xF1\x69' : '\u012f', 1471 b'\xF1\x6F' : '\u01eb', b'\xF1\x75' : '\u0173', 1472 b'\xF2\x41' : '\u1ea0', b'\xF2\x42' : '\u1e04', 1473 b'\xF2\x44' : '\u1e0c', b'\xF2\x45' : '\u1eb8', 1474 b'\xF2\x48' : '\u1e24', b'\xF2\x49' : '\u1eca', 1475 b'\xF2\x4B' : '\u1e32', b'\xF2\x4C' : '\u1e36', 1476 b'\xF2\x4D' : '\u1e42', b'\xF2\x4E' : '\u1e46', 1477 b'\xF2\x4F' : '\u1ecc', b'\xF2\x52' : '\u1e5a', 1478 b'\xF2\x53' : '\u1e62', b'\xF2\x54' : '\u1e6c', 1479 b'\xF2\x55' : '\u1ee4', b'\xF2\x56' : '\u1e7e', 1480 b'\xF2\x57' : '\u1e88', b'\xF2\x59' : '\u1ef4', 1481 b'\xF2\x5A' : '\u1e92', b'\xF2\x61' : '\u1ea1', 1482 b'\xF2\x62' : '\u1e05', b'\xF2\x64' : '\u1e0d', 1483 b'\xF2\x65' : '\u1eb9', b'\xF2\x68' : '\u1e25', 1484 b'\xF2\x69' : '\u1ecb', b'\xF2\x6B' : '\u1e33', 1485 b'\xF2\x6C' : '\u1e37', b'\xF2\x6D' : '\u1e43', 1486 b'\xF2\x6E' : '\u1e47', b'\xF2\x6F' : '\u1ecd', 1487 b'\xF2\x72' : '\u1e5b', b'\xF2\x73' : '\u1e63', 1488 b'\xF2\x74' : '\u1e6d', b'\xF2\x75' : '\u1ee5', 1489 b'\xF2\x76' : '\u1e7f', b'\xF2\x77' : '\u1e89', 1490 b'\xF2\x79' : '\u1ef5', b'\xF2\x7A' : '\u1e93', 1491 b'\xF3\x55' : '\u1e72', b'\xF3\x75' : '\u1e73', 1492 b'\xF4\x41' : '\u1e00', b'\xF4\x61' : '\u1e01', 1493 b'\xF9\x48' : '\u1e2a', b'\xF9\x68' : '\u1e2b', } 1494 1495 def __ansel_to_unicode(self, text): 1496 """ Convert an ANSEL encoded text to unicode """ 1497 1498 buff = StringIO() 1499 error = "" 1500 while text: 1501 if text[0] < 128: 1502 if chr(text[0]) in AnselReader.__use_ASCII: 1503 head = chr(text[0]) 1504 else: 1505 # substitute space for disallowed (control) chars 1506 error += " (%#X)" % text[0] 1507 head = ' ' 1508 text = text[1:] 1509 else: 1510 if text[0:2] in AnselReader.__twobyte: 1511 head = AnselReader.__twobyte[text[0:2]] 1512 text = text[2:] 1513 elif bytes([text[0]]) in AnselReader.__onebyte: 1514 head = AnselReader.__onebyte[bytes([text[0]])] 1515 text = text[1:] 1516 elif bytes([text[0]]) in AnselReader.__acombiners: 1517 cmb = AnselReader.__acombiners[bytes([text[0]])] 1518 # always consume the combiner 1519 text = text[1:] 1520 next_byte = text[0] 1521 if next_byte < 128 and chr(next_byte) in \ 1522 AnselReader.__printable_ascii: 1523 # consume next as well 1524 text = text[1:] 1525 # unicode: combiner follows base-char 1526 head = chr(next_byte) + cmb 1527 else: 1528 # just drop the unexpected combiner 1529 error += " (%#X)" % text[0] 1530 continue 1531 else: 1532 error += " (%#X)" % text[0] 1533 head = '\ufffd' # "Replacement Char" 1534 text = text[1:] 1535 buff.write(head) 1536 ans = buff.getvalue() 1537 1538 if error: 1539 # e.g. Illegal character (oxAB) (0xCB)... 1 NOTE xyz?pqr?lmn 1540 self.report_error(_("Illegal character%s") % error, ans) 1541 buff.close() 1542 return ans 1543 1544 def __init__(self, ifile, __add_msg): 1545 BaseReader.__init__(self, ifile, "ANSEL", __add_msg) 1546 # In theory, we should have been able to skip the encode/decode from 1547 # ascii. But this way allows us to use pythons universal newline 1548 self.ifile = TextIOWrapper(ifile, encoding='ascii', 1549 errors='surrogateescape', newline=None) 1550 1551 def readline(self): 1552 line = self.ifile.readline() 1553 linebytes = line.encode(encoding='ascii', 1554 errors='surrogateescape') 1555 return self.__ansel_to_unicode(linebytes) 1556 1557 1558#------------------------------------------------------------------------- 1559# 1560# CurrentState 1561# 1562#------------------------------------------------------------------------- 1563class CurrentState: 1564 """ 1565 Keep track of the current state variables. 1566 """ 1567 def __init__(self, person=None, level=0, event=None, event_ref=None): 1568 """ 1569 Initialize the object. 1570 """ 1571 self.name_cnt = 0 1572 self.person = person 1573 self.family = None 1574 self.level = level 1575 self.event = event 1576 self.event_ref = event_ref 1577 self.source_ref = None 1578 self.citation = None 1579 self.note = None 1580 self.lds_ord = None 1581 self.msg = "" 1582 self.primary = False # _PRIMARY tag on an INDI.FAMC tag 1583 self.filename = "" 1584 self.title = "" 1585 self.addr = None 1586 self.res = None 1587 self.source = None 1588 self.ftype = None 1589 self.pf = None # method for parsing places 1590 self.location = None 1591 self.place_fields = None # method for parsing places 1592 self.ref = None # PersonRef 1593 self.handle = None # 1594 self.form = "" # Multimedia format 1595 self.frel = None # Child relation to father 1596 self.mrel = None 1597 self.repo = None 1598 self.attr = None 1599 self.obj = None 1600 self.name = "" 1601 self.ignore = False 1602 self.repo_ref = None 1603 self.place = None 1604 self.media = None 1605 self.photo = "" # Person primary photo 1606 self.prim = None # Photo is primary 1607 1608 def __getattr__(self, name): 1609 """ 1610 Return the value associated with the specified attribute. 1611 """ 1612 return self.__dict__.get(name) 1613 1614 def __setattr__(self, name, value): 1615 """ 1616 Set the value associated with the specified attribute. 1617 """ 1618 self.__dict__[name] = value 1619 1620 1621#------------------------------------------------------------------------- 1622# 1623# PlaceParser 1624# 1625#------------------------------------------------------------------------- 1626class PlaceParser: 1627 """ 1628 Provide the ability to parse GEDCOM FORM statements for places, and 1629 the parse the line of text, mapping the text components to Location 1630 values based of the FORM statement. 1631 """ 1632 1633 __field_map = { 1634 'addr' : Location.set_street, 1635 'subdivision' : Location.set_street, 1636 'addr1' : Location.set_street, 1637 'adr1' : Location.set_street, 1638 'street' : Location.set_street, 1639 'addr2' : Location.set_locality, 1640 'adr2' : Location.set_locality, 1641 'locality' : Location.set_locality, 1642 'neighborhood' : Location.set_locality, 1643 'city' : Location.set_city, 1644 'town' : Location.set_city, 1645 'village' : Location.set_city, 1646 'county' : Location.set_county, 1647 'country' : Location.set_country, 1648 'state' : Location.set_state, 1649 'state/province': Location.set_state, 1650 'region' : Location.set_state, 1651 'province' : Location.set_state, 1652 'area code' : Location.set_postal_code, 1653 'post code' : Location.set_postal_code, 1654 'zip code' : Location.set_postal_code, } 1655 1656 def __init__(self, line=None): 1657 self.parse_function = [] 1658 1659 if line: 1660 self.parse_form(line) 1661 1662 def parse_form(self, line): 1663 """ 1664 Parses the GEDCOM PLAC.FORM into a list of function 1665 pointers (if possible). It does this my mapping the text strings 1666 (separated by commas) to the corresponding Location 1667 method via the __field_map variable 1668 """ 1669 for item in line.data.split(','): 1670 item = item.lower().strip() 1671 fcn = self.__field_map.get(item, lambda x, y: None) 1672 self.parse_function.append(fcn) 1673 1674 def load_place(self, place_import, place, text): 1675 """ 1676 Takes the text string representing a place, splits it into 1677 its subcomponents (comma separated), and calls the approriate 1678 function based of its position, depending on the parsed value 1679 from the FORM statement. 1680 """ 1681 items = [item.strip() for item in text.split(',')] 1682 if len(items) != len(self.parse_function): 1683 return 1684 index = 0 1685 loc = Location() 1686 for item in items: 1687 self.parse_function[index](loc, item) 1688 index += 1 1689 1690 location = (loc.get_street(), 1691 loc.get_locality(), 1692 loc.get_parish(), 1693 loc.get_city(), 1694 loc.get_county(), 1695 loc.get_state(), 1696 loc.get_country()) 1697 1698 for level, name in enumerate(location): 1699 if name: 1700 break 1701 1702 if name: 1703 type_num = 7 - level 1704 else: 1705 name = place.title 1706 type_num = PlaceType.UNKNOWN 1707 place.name.set_value(name) 1708 place.set_type(PlaceType(type_num)) 1709 code = loc.get_postal_code() 1710 place.set_code(code) 1711 if place.handle: # if handle is available, store immediately 1712 place_import.store_location(location, place.handle) 1713 else: # return for storage later 1714 return location 1715 1716 1717#------------------------------------------------------------------------- 1718# 1719# IdFinder 1720# 1721#------------------------------------------------------------------------- 1722class IdFinder: 1723 """ 1724 Provide method of finding the next available ID. 1725 """ 1726 def __init__(self, keys, prefix): 1727 """ 1728 Initialize the object. 1729 """ 1730 self.ids = set(keys) 1731 self.index = 0 1732 self.prefix = prefix 1733 1734 def find_next(self): 1735 """ 1736 Return the next available GRAMPS' ID for a Event object based 1737 off the person ID prefix. 1738 1739 @return: Returns the next available index 1740 @rtype: str 1741 """ 1742 index = self.prefix % self.index 1743 while index in self.ids: 1744 self.index += 1 1745 index = self.prefix % self.index 1746 self.ids.add(index) 1747 self.index += 1 1748 return index 1749 1750 1751#------------------------------------------------------------------------- 1752# 1753# IdMapper 1754# 1755#------------------------------------------------------------------------- 1756class IdMapper: 1757 """ This class provide methods to keep track of the correspoindence between 1758 Gedcom xrefs (@P1023@) and Gramps IDs. """ 1759 def __init__(self, has_gid, find_next, id2user_format): 1760 self.has_gid = has_gid 1761 self.find_next = find_next 1762 self.id2user_format = id2user_format 1763 self.swap = {} 1764 1765 def __getitem__(self, gid): 1766 if gid == "": 1767 # We need to find the next gramps ID provided it is not already 1768 # the target of a swap 1769 new_val = self.find_next() 1770 while new_val in list(self.swap.values()): 1771 new_val = self.find_next() 1772 else: 1773 # remove any @ signs 1774 gid = self.clean(gid) 1775 if gid in self.swap: 1776 return self.swap[gid] 1777 else: 1778 # now standardise the format 1779 formatted_gid = self.id2user_format(gid) 1780 # I1 and I0001 will both format as I0001. If we had already 1781 # encountered I1, it would be in self.swap, so we would already 1782 # have found it. If we had already encountered I0001 and we are 1783 # now looking for I1, it wouldn't be in self.swap, and we now 1784 # find that I0001 is in use, so we have to create a new id. 1785 if self.has_gid(formatted_gid) or \ 1786 (formatted_gid in list(self.swap.values())): 1787 new_val = self.find_next() 1788 while new_val in list(self.swap.values()): 1789 new_val = self.find_next() 1790 else: 1791 new_val = formatted_gid 1792 # we need to distinguish between I1 and I0001, so we record the map 1793 # from the original format 1794 self.swap[gid] = new_val 1795 return new_val 1796 1797 def clean(self, gid): 1798 """ remove '@' from start and end of xref """ 1799 temp = gid.strip() 1800 if len(temp) > 1 and temp[0] == '@' and temp[-1] == '@': 1801 temp = temp[1:-1] 1802 return temp 1803 1804 def map(self): 1805 """ return the xref to GID translation map """ 1806 return self.swap 1807 1808 1809#------------------------------------------------------------------------- 1810# 1811# GedcomParser 1812# 1813#------------------------------------------------------------------------- 1814class GedcomParser(UpdateCallback): 1815 """ 1816 Performs the second pass of the GEDCOM parser, which does all the heavy 1817 lifting. 1818 """ 1819 1820 __TRUNC_MSG = _("Your GEDCOM file is corrupted. " 1821 "It appears to have been truncated.") 1822 _EMPTY_LOC = Location().serialize() 1823 1824 SyntaxError = "Syntax Error" 1825 BadFile = "Not a GEDCOM file" 1826 1827 @staticmethod 1828 def __find_from_handle(gramps_id, table): 1829 """ 1830 Find a handle corresponding to the specified Gramps ID. 1831 1832 The passed table contains the mapping. If the value is found, we return 1833 it, otherwise we create a new handle, store it, and return it. 1834 1835 """ 1836 intid = table.get(gramps_id) 1837 if not intid: 1838 intid = create_id() 1839 table[gramps_id] = intid 1840 return intid 1841 1842 @staticmethod 1843 def __parse_name_personal(text): 1844 """ 1845 Parses a GEDCOM NAME value into an Name structure 1846 """ 1847 name = Name() 1848 1849 match = SURNAME_RE.match(text) 1850 if match: 1851 #/surname/ extra, we assume extra is given name 1852 names = match.groups() 1853 name.set_first_name(names[1].strip()) 1854 surn = Surname() 1855 surn.set_surname(names[0].strip()) 1856 surn.set_primary() 1857 name.set_surname_list([surn]) 1858 else: 1859 try: 1860 names = NAME_RE.match(text).groups() 1861 # given /surname/ extra, we assume extra is suffix 1862 name.set_first_name(names[0].strip()) 1863 surn = Surname() 1864 surn.set_surname(names[2].strip()) 1865 surn.set_primary() 1866 name.set_surname_list([surn]) 1867 name.set_suffix(names[4].strip()) 1868 except: 1869 # something strange, set as first name 1870 name.set_first_name(text.strip()) 1871 return name 1872 1873 def __init__(self, dbase, ifile, filename, user, stage_one, 1874 default_source, default_tag_format=None): 1875 UpdateCallback.__init__(self, user.callback) 1876 self.user = user 1877 self.set_total(stage_one.get_line_count()) 1878 self.repo2id = {} 1879 self.trans = None 1880 self.errors = [] 1881 self.number_of_errors = 0 1882 self.maxpeople = stage_one.get_person_count() 1883 self.dbase = dbase 1884 self.import_researcher = self.dbase.get_total() == 0 1885 event_ids = [] 1886 for event in dbase.iter_events(): 1887 event_ids.append(event.gramps_id) 1888 self.emapper = IdFinder(event_ids, dbase.event_prefix) 1889 self.famc_map = stage_one.get_famc_map() 1890 self.fams_map = stage_one.get_fams_map() 1891 1892 self.place_parser = PlaceParser() 1893 self.inline_srcs = OrderedDict() 1894 self.media_map = {} 1895 self.note_type_map = {} 1896 self.genby = "" 1897 self.genvers = "" 1898 self.subm = "" 1899 self.use_def_src = default_source 1900 self.func_list = [] 1901 if self.use_def_src: 1902 self.def_src = Source() 1903 fname = os.path.basename(filename).split('\\')[-1] 1904 self.def_src.set_title(_("Import from GEDCOM (%s)") % fname) 1905 if default_tag_format: 1906 name = time.strftime(default_tag_format) 1907 tag = self.dbase.get_tag_from_name(name) 1908 if tag: 1909 self.default_tag = tag 1910 else: 1911 self.default_tag = Tag() 1912 self.default_tag.set_name(name) 1913 else: 1914 self.default_tag = None 1915 self.dir_path = os.path.dirname(filename) 1916 self.is_ftw = False 1917 self.addr_is_detail = False 1918 self.groups = None 1919 self.want_parse_warnings = True 1920 1921 self.pid_map = IdMapper( 1922 self.dbase.has_person_gramps_id, 1923 self.dbase.find_next_person_gramps_id, 1924 self.dbase.id2user_format) 1925 self.fid_map = IdMapper( 1926 self.dbase.has_family_gramps_id, 1927 self.dbase.find_next_family_gramps_id, 1928 self.dbase.fid2user_format) 1929 self.sid_map = IdMapper( 1930 self.dbase.has_source_gramps_id, 1931 self.dbase.find_next_source_gramps_id, 1932 self.dbase.sid2user_format) 1933 self.oid_map = IdMapper( 1934 self.dbase.has_media_gramps_id, 1935 self.dbase.find_next_media_gramps_id, 1936 self.dbase.oid2user_format) 1937 self.rid_map = IdMapper( 1938 self.dbase.has_repository_gramps_id, 1939 self.dbase.find_next_repository_gramps_id, 1940 self.dbase.rid2user_format) 1941 self.nid_map = IdMapper( 1942 self.dbase.has_note_gramps_id, 1943 self.dbase.find_next_note_gramps_id, 1944 self.dbase.nid2user_format) 1945 1946 self.gid2id = {} 1947 self.oid2id = {} 1948 self.sid2id = {} 1949 self.lid2id = {} 1950 self.fid2id = {} 1951 self.rid2id = {} 1952 self.nid2id = {} 1953 1954 self.place_import = PlaceImport(self.dbase) 1955 1956 # 1957 # Parse table for <<SUBMITTER_RECORD>> below the level 0 SUBM tag 1958 # 1959 # n @<XREF:SUBM>@ SUBM {1:1} 1960 # +1 NAME <SUBMITTER_NAME> {1:1} 1961 # +1 <<ADDRESS_STRUCTURE>> {0:1} 1962 # +1 <<MULTIMEDIA_LINK>> {0:M} 1963 # +1 LANG <LANGUAGE_PREFERENCE> {0:3} 1964 # +1 <<NOTE_STRUCTURE>> {0:M} 1965 # +1 RFN <SUBMITTER_REGISTERED_RFN> {0:1} 1966 # +1 RIN <AUTOMATED_RECORD_ID> {0:1} 1967 # +1 <<CHANGE_DATE>> {0:1} 1968 1969 # (N.B. GEDCOM allows multiple SUBMitter records) 1970 self.subm_parse_tbl = { 1971 # +1 NAME <SUBMITTER_NAME> 1972 TOKEN_NAME : self.__subm_name, 1973 # +1 <<ADDRESS_STRUCTURE>> 1974 TOKEN_ADDR : self.__subm_addr, 1975 TOKEN_PHON : self.__subm_phon, 1976 TOKEN_EMAIL : self.__subm_email, 1977 TOKEN_WWW : self.__repo_www, 1978 TOKEN_FAX : self.__repo_fax, 1979 # +1 <<MULTIMEDIA_LINK>> 1980 # +1 LANG <LANGUAGE_PREFERENCE> 1981 # +1 <<NOTE_STRUCTURE>> 1982 TOKEN_NOTE : self.__repo_note, 1983 TOKEN_RNOTE : self.__repo_note, 1984 # +1 RFN <SUBMITTER_REGISTERED_RFN> 1985 # +1 RIN <AUTOMATED_RECORD_ID> 1986 # +1 <<CHANGE_DATE>> 1987 TOKEN_CHAN : self.__repo_chan, } 1988 self.func_list.append(self.subm_parse_tbl) 1989 1990 # 1991 # Parse table for <<INDIVIDUAL_RECORD>> below the level 0 INDI tag 1992 # 1993 # n @<XREF:INDI>@ INDI {1:1} 1994 # +1 RESN <RESTRICTION_NOTICE> {0:1} 1995 # +1 <<PERSONAL_NAME_STRUCTURE>> {0:M} 1996 # +1 SEX <SEX_VALUE> {0:1} 1997 # +1 <<INDIVIDUAL_EVENT_STRUCTURE>> {0:M} 1998 # +1 <<INDIVIDUAL_ATTRIBUTE_STRUCTURE>> {0:M} 1999 # +1 <<LDS_INDIVIDUAL_ORDINANCE>> {0:M} 2000 # +1 <<CHILD_TO_FAMILY_LINK>> {0:M} 2001 # +1 <<SPOUSE_TO_FAMILY_LINK>> {0:M} 2002 # +1 SUBM @<XREF:SUBM>@ {0:M} 2003 # +1 <<ASSOCIATION_STRUCTURE>> {0:M} 2004 # +1 ALIA @<XREF:INDI>@ {0:M} 2005 # +1 ANCI @<XREF:SUBM>@ {0:M} 2006 # +1 DESI @<XREF:SUBM>@ {0:M} 2007 # +1 <<SOURCE_CITATION>> {0:M} 2008 # +1 <<MULTIMEDIA_LINK>> {0:M} 2009 # +1 <<NOTE_STRUCTURE>> {0:M} 2010 # +1 RFN <PERMANENT_RECORD_FILE_NUMBER> {0:1} 2011 # +1 AFN <ANCESTRAL_FILE_NUMBER> {0:1} 2012 # +1 REFN <USER_REFERENCE_NUMBER> {0:M} 2013 # +2 TYPE <USER_REFERENCE_TYPE> {0:1} 2014 # +1 RIN <AUTOMATED_RECORD_ID> {0:1} 2015 # +1 <<CHANGE_DATE>> {0:1} 2016 2017 self.indi_parse_tbl = { 2018 # +1 RESN <RESTRICTION_NOTICE> {0:1} 2019 TOKEN_RESN : self.__person_resn, 2020 # +1 <<PERSONAL_NAME_STRUCTURE>> {0:M} 2021 TOKEN_NAME : self.__person_name, 2022 # +1 SEX <SEX_VALUE> {0:1} 2023 TOKEN_SEX : self.__person_sex, 2024 # +1 <<INDIVIDUAL_EVENT_STRUCTURE>> {0:M} 2025 TOKEN_EVEN : self.__person_even, 2026 TOKEN_GEVENT: self.__person_std_event, 2027 TOKEN_BIRT : self.__person_birt, 2028 TOKEN_RELI : self.__person_reli, 2029 TOKEN_ADOP : self.__person_adop, 2030 TOKEN_DEAT : self.__person_deat, 2031 # +1 <<INDIVIDUAL_ATTRIBUTE_STRUCTURE>> {0:M} 2032 # +1 AFN <ANCESTRAL_FILE_NUMBER> {0:1} 2033 TOKEN_ATTR : self.__person_std_attr, 2034 TOKEN_FACT : self.__person_fact, 2035 #+1 <<LDS_INDIVIDUAL_ORDINANCE>> {0:M} 2036 TOKEN_BAPL : self.__person_bapl, 2037 TOKEN_CONL : self.__person_conl, 2038 TOKEN_ENDL : self.__person_endl, 2039 TOKEN_SLGC : self.__person_slgc, 2040 #+1 <<CHILD_TO_FAMILY_LINK>> {0:M} 2041 TOKEN_FAMC : self.__person_famc, 2042 # +1 <<SPOUSE_TO_FAMILY_LINK>> {0:M} 2043 TOKEN_FAMS : self.__person_fams, 2044 # +1 SUBM @<XREF:SUBM>@ {0:M} 2045 TOKEN_SUBM : self.__skip_record, 2046 # +1 <<ASSOCIATION_STRUCTURE>> {0:M} 2047 TOKEN_ASSO : self.__person_asso, 2048 # +1 ALIA @<XREF:INDI>@ {0:M} 2049 TOKEN_ALIA : self.__person_alt_name, 2050 # +1 ANCI @<XREF:SUBM>@ {0:M} 2051 TOKEN_ANCI : self.__skip_record, 2052 # +1 DESI @<XREF:SUBM>@ {0:M} 2053 TOKEN_DESI : self.__skip_record, 2054 # +1 <<SOURCE_CITATION>> {0:M} 2055 TOKEN_SOUR : self.__person_sour, 2056 # +1 <<MULTIMEDIA_LINK>> {0:M} 2057 TOKEN_OBJE : self.__person_object, 2058 # +1 <<NOTE_STRUCTURE>> {0:M} 2059 TOKEN_NOTE : self.__person_note, 2060 TOKEN_RNOTE : self.__person_note, 2061 TOKEN__COMM : self.__person_note, 2062 # +1 RFN <PERMANENT_RECORD_FILE_NUMBER> {0:1} 2063 TOKEN_RFN : self.__person_attr, 2064 # +1 REFN <USER_REFERENCE_NUMBER> {0:M} 2065 # +2 TYPE <USER_REFERENCE_TYPE> {0:1} 2066 TOKEN_REFN : self.__person_refn, 2067 # TYPE should be below REFN, but will work here anyway 2068 TOKEN_TYPE : self.__person_attr, 2069 # +1 RIN <AUTOMATED_RECORD_ID> {0:1} 2070 TOKEN_RIN : self.__person_attr, 2071 # +1 <<CHANGE_DATE>> {0:1} 2072 TOKEN_CHAN : self.__person_chan, 2073 # The following tags are not part of Gedcom spec but are commonly 2074 # found here anyway 2075 TOKEN_ADDR : self.__person_addr, 2076 TOKEN_PHON : self.__person_phon, 2077 TOKEN_FAX : self.__person_fax, 2078 TOKEN_EMAIL : self.__person_email, 2079 TOKEN_WWW : self.__person_www, 2080 TOKEN__TODO : self.__skip_record, 2081 TOKEN_TITL : self.__person_titl, 2082 TOKEN__PHOTO: self.__person_photo, } 2083 self.func_list.append(self.indi_parse_tbl) 2084 2085 self.name_parse_tbl = { 2086 # +1 NPFX <NAME_PIECE_PREFIX> {0:1} 2087 TOKEN_NPFX : self.__name_npfx, 2088 # +1 GIVN <NAME_PIECE_GIVEN> {0:1} 2089 TOKEN_GIVN : self.__name_givn, 2090 # NICK <NAME_PIECE_NICKNAME> {0:1} 2091 TOKEN_NICK : self.__name_nick, 2092 # +1 SPFX <NAME_PIECE_SURNAME_PREFIX {0:1} 2093 TOKEN_SPFX : self.__name_spfx, 2094 # +1 SURN <NAME_PIECE_SURNAME> {0:1} 2095 TOKEN_SURN : self.__name_surn, 2096 # +1 NSFX <NAME_PIECE_SUFFIX> {0:1} 2097 TOKEN_NSFX : self.__name_nsfx, 2098 # +1 <<SOURCE_CITATION>> {0:M} 2099 TOKEN_SOUR : self.__name_sour, 2100 # +1 <<NOTE_STRUCTURE>> {0:M} 2101 TOKEN_NOTE : self.__name_note, 2102 TOKEN_RNOTE : self.__name_note, 2103 # Extensions 2104 TOKEN_ALIA : self.__name_alia, 2105 TOKEN__MARNM : self.__name_marnm, 2106 TOKEN__MAR : self.__name_marnm, # Generated by geni.com 2107 TOKEN__MARN : self.__name_marnm, # Gen'd by BROSKEEP 6.1.31 WIN 2108 TOKEN__AKA : self.__name_aka, # PAF and AncestQuest 2109 TOKEN_TYPE : self.__name_type, # This is legal GEDCOM 5.5.1 2110 TOKEN_BIRT : self.__ignore, 2111 TOKEN_DATE : self.__name_date, 2112 # This handles date as a subsidiary of "1 ALIA" which might be used 2113 # by Family Tree Maker and Reunion, and by cheating (handling a 2114 # lower level from the current parse table) handles date as 2115 # subsidiary to "2 _MARN", "2 _AKAN" and "2 _ADPN" which has been 2116 # found in Brother's keeper. 2117 TOKEN__ADPN : self.__name_adpn, } 2118 self.func_list.append(self.name_parse_tbl) 2119 2120 # 2121 # Parse table for <<REPOSITORY_RECORD>> below the level 0 REPO tag 2122 # 2123 # n @<XREF:REPO>@ REPO {1:1} 2124 # +1 NAME <NAME_OF_REPOSITORY> {0:1} 2125 # +1 <<ADDRESS_STRUCTURE>> {0:1} 2126 # +1 <<NOTE_STRUCTURE>> {0:M} 2127 # +1 REFN <USER_REFERENCE_NUMBER> {0:M} 2128 # +2 TYPE <USER_REFERENCE_TYPE> {0:1} 2129 # +1 RIN <AUTOMATED_RECORD_ID> {0:1} 2130 # +1 <<CHANGE_DATE>> {0:1} 2131 2132 self.repo_parse_tbl = { 2133 TOKEN_NAME : self.__repo_name, 2134 TOKEN_ADDR : self.__repo_addr, 2135 TOKEN_RIN : self.__ignore, 2136 TOKEN_NOTE : self.__repo_note, 2137 TOKEN_RNOTE : self.__repo_note, 2138 TOKEN_CHAN : self.__repo_chan, 2139 TOKEN_PHON : self.__repo_phon, 2140 TOKEN_EMAIL : self.__repo_email, 2141 TOKEN_WWW : self.__repo_www, 2142 TOKEN_FAX : self.__repo_fax, } 2143 self.func_list.append(self.repo_parse_tbl) 2144 2145 self.event_parse_tbl = { 2146 # n TYPE <EVENT_DESCRIPTOR> {0:1} 2147 TOKEN_TYPE : self.__event_type, 2148 # n DATE <DATE_VALUE> {0:1} p.*/* 2149 TOKEN_DATE : self.__event_date, 2150 # n <<PLACE_STRUCTURE>> {0:1} p.* 2151 TOKEN_PLAC : self.__event_place, 2152 # n <<ADDRESS_STRUCTURE>> {0:1} p.* 2153 TOKEN_ADDR : self.__event_addr, 2154 # n AGE <AGE_AT_EVENT> {0:1} p.* 2155 TOKEN_AGE : self.__event_age, 2156 # n AGNC <RESPONSIBLE_AGENCY> {0:1} p.* 2157 TOKEN_AGNC : self.__event_agnc, 2158 # n CAUS <CAUSE_OF_EVENT> {0:1} p.* 2159 TOKEN_CAUS : self.__event_cause, 2160 # n <<SOURCE_CITATION>> {0:M} p.* 2161 TOKEN_SOUR : self.__event_source, 2162 # n <<MULTIMEDIA_LINK>> {0:M} p.*, * 2163 TOKEN_OBJE : self.__event_object, 2164 # n <<NOTE_STRUCTURE>> {0:M} p. 2165 TOKEN_NOTE : self.__event_inline_note, 2166 TOKEN_RNOTE : self.__event_note, 2167 # Other 2168 TOKEN__PRIV : self.__event_privacy, 2169 TOKEN_OFFI : self.__event_note, 2170 TOKEN_PHON : self.__event_phon, 2171 TOKEN__GODP : self.__event_witness, 2172 TOKEN__WITN : self.__event_witness, 2173 TOKEN__WTN : self.__event_witness, 2174 TOKEN_RELI : self.__ignore, 2175 # Not legal, but inserted by PhpGedView 2176 TOKEN_TIME : self.__event_time, 2177 TOKEN_ASSO : self.__ignore, 2178 TOKEN_IGNORE : self.__ignore, 2179 TOKEN_STAT : self.__ignore, 2180 TOKEN_TEMP : self.__ignore, 2181 TOKEN_HUSB : self.__event_husb, 2182 TOKEN_WIFE : self.__event_wife, 2183 TOKEN_FAMC : self.__person_birth_famc, 2184 # Not legal, but inserted by Ultimate Family Tree 2185 TOKEN_CHAN : self.__ignore, 2186 TOKEN_QUAY : self.__ignore, 2187 # Not legal, but inserted by FamilyTreeBuilder 2188 TOKEN_RIN : self.__event_rin, 2189 TOKEN_ATTR : self.__event_attr, # FTB for _UID 2190 TOKEN_EMAIL : self.__event_email, # FTB for RESI events 2191 TOKEN_WWW : self.__event_www, # FTB for RESI events 2192 TOKEN_FAX : self.__event_fax, # legal... 2193 } 2194 self.func_list.append(self.event_parse_tbl) 2195 2196 self.adopt_parse_tbl = { 2197 TOKEN_TYPE : self.__event_type, 2198 TOKEN__PRIV : self.__event_privacy, 2199 TOKEN_DATE : self.__event_date, 2200 TOKEN_SOUR : self.__event_source, 2201 TOKEN_PLAC : self.__event_place, 2202 TOKEN_ADDR : self.__event_addr, 2203 TOKEN_PHON : self.__event_phon, 2204 TOKEN_CAUS : self.__event_cause, 2205 TOKEN_AGNC : self.__event_agnc, 2206 TOKEN_AGE : self.__event_age, 2207 TOKEN_NOTE : self.__event_note, 2208 TOKEN_RNOTE : self.__event_note, 2209 TOKEN_OFFI : self.__event_note, 2210 TOKEN__GODP : self.__event_witness, 2211 TOKEN__WITN : self.__event_witness, 2212 TOKEN__WTN : self.__event_witness, 2213 TOKEN_RELI : self.__ignore, 2214 TOKEN_TIME : self.__ignore, 2215 TOKEN_ASSO : self.__ignore, 2216 TOKEN_IGNORE : self.__ignore, 2217 TOKEN_STAT : self.__ignore, 2218 TOKEN_TEMP : self.__ignore, 2219 TOKEN_OBJE : self.__event_object, 2220 TOKEN_FAMC : self.__person_adopt_famc, 2221 # Not legal, but inserted by Ultimate Family Tree 2222 TOKEN_CHAN : self.__ignore, 2223 TOKEN_QUAY : self.__ignore, 2224 } 2225 self.func_list.append(self.adopt_parse_tbl) 2226 2227 self.famc_parse_tbl = { 2228 # n FAMC @<XREF:FAM>@ {1:1} 2229 # +1 PEDI <PEDIGREE_LINKAGE_TYPE> {0:1} p.* 2230 TOKEN_PEDI : self.__person_famc_pedi, 2231 # +1 _FREL <Father PEDIGREE_LINKAGE_TYPE> {0:1} non-standard 2232 TOKEN__FREL : self.__person_famc_frel, 2233 # +1 _MREL <Mother PEDIGREE_LINKAGE_TYPE> {0:1} non-standard 2234 TOKEN__MREL : self.__person_famc_mrel, 2235 # +1 <<NOTE_STRUCTURE>> {0:M} p.* 2236 TOKEN_NOTE : self.__person_famc_note, 2237 TOKEN_RNOTE : self.__person_famc_note, 2238 # Extras 2239 TOKEN__PRIMARY: self.__person_famc_primary, 2240 TOKEN_SOUR : self.__person_famc_sour, 2241 # GEDit 2242 TOKEN_STAT : self.__ignore, 2243 } 2244 self.func_list.append(self.famc_parse_tbl) 2245 2246 self.person_fact_parse_tbl = { 2247 TOKEN_TYPE : self.__person_fact_type, 2248 TOKEN_SOUR : self.__person_attr_source, 2249 TOKEN_NOTE : self.__person_attr_note, 2250 TOKEN_RNOTE : self.__person_attr_note, 2251 } 2252 self.func_list.append(self.person_fact_parse_tbl) 2253 2254 self.person_attr_parse_tbl = { 2255 TOKEN_TYPE : self.__person_attr_type, 2256 TOKEN_CAUS : self.__ignore, 2257 TOKEN_DATE : self.__ignore, 2258 TOKEN_TIME : self.__ignore, 2259 TOKEN_ADDR : self.__ignore, 2260 TOKEN_IGNORE : self.__ignore, 2261 TOKEN_STAT : self.__ignore, 2262 TOKEN_TEMP : self.__ignore, 2263 TOKEN_OBJE : self.__ignore, 2264 TOKEN_SOUR : self.__person_attr_source, 2265 TOKEN_PLAC : self.__person_attr_place, 2266 TOKEN_NOTE : self.__person_attr_note, 2267 TOKEN_RNOTE : self.__person_attr_note, 2268 } 2269 self.func_list.append(self.person_attr_parse_tbl) 2270 2271 self.lds_parse_tbl = { 2272 TOKEN_TEMP : self.__lds_temple, 2273 TOKEN_DATE : self.__lds_date, 2274 TOKEN_FAMC : self.__lds_famc, 2275 TOKEN_FORM : self.__lds_form, 2276 TOKEN_PLAC : self.__lds_plac, 2277 TOKEN_SOUR : self.__lds_sour, 2278 TOKEN_NOTE : self.__lds_note, 2279 TOKEN_RNOTE : self.__lds_note, 2280 TOKEN_STAT : self.__lds_stat, 2281 } 2282 self.func_list.append(self.lds_parse_tbl) 2283 2284 self.asso_parse_tbl = { 2285 TOKEN_RELA : self.__person_asso_rela, 2286 TOKEN_SOUR : self.__person_asso_sour, 2287 TOKEN_NOTE : self.__person_asso_note, 2288 TOKEN_RNOTE : self.__person_asso_note, 2289 } 2290 self.func_list.append(self.asso_parse_tbl) 2291 2292 self.citation_parse_tbl = { 2293 TOKEN_PAGE : self.__citation_page, 2294 TOKEN_DATE : self.__citation_date, 2295 TOKEN_DATA : self.__citation_data, 2296 TOKEN_OBJE : self.__citation_obje, 2297 TOKEN_REFN : self.__citation_refn, 2298 TOKEN_EVEN : self.__citation_even, 2299 TOKEN_IGNORE : self.__ignore, 2300 TOKEN__LKD : self.__ignore, 2301 TOKEN_QUAY : self.__citation_quay, 2302 TOKEN_NOTE : self.__citation_note, 2303 TOKEN_RNOTE : self.__citation_note, 2304 TOKEN_TEXT : self.__citation_data_text, 2305 TOKEN__LINK : self.__citation_link, 2306 TOKEN__JUST : self.__citation__just, 2307 } 2308 self.func_list.append(self.citation_parse_tbl) 2309 2310 self.media_parse_tbl = { 2311 TOKEN_FORM : self.__media_ref_form, 2312 TOKEN_MEDI : self.__media_ref_medi, # v5.5.1 2313 TOKEN_TITL : self.__media_ref_titl, 2314 TOKEN_FILE : self.__media_ref_file, 2315 TOKEN_NOTE : self.__obje_note, # illegal, but often there 2316 TOKEN_RNOTE : self.__obje_note, # illegal, but often there 2317 TOKEN__PRIM : self.__media_ref_prim, # LFT etc. 2318 TOKEN_IGNORE : self.__ignore, 2319 } 2320 self.func_list.append(self.media_parse_tbl) 2321 2322 self.parse_loc_tbl = { 2323 TOKEN_ADR1 : self.__location_adr1, 2324 TOKEN_ADR2 : self.__location_adr2, 2325 TOKEN_CITY : self.__location_city, 2326 TOKEN_STAE : self.__location_stae, 2327 TOKEN_POST : self.__location_post, 2328 TOKEN_CTRY : self.__location_ctry, 2329 # Not legal GEDCOM - not clear why these are included at this level 2330 TOKEN_ADDR : self.__ignore, 2331 TOKEN_DATE : self.__ignore, # there is nowhere to put a date 2332 TOKEN_NOTE : self.__location_note, 2333 TOKEN_RNOTE : self.__location_note, 2334 TOKEN__LOC : self.__ignore, 2335 TOKEN__NAME : self.__ignore, 2336 TOKEN_PHON : self.__location_phone, 2337 TOKEN_IGNORE : self.__ignore, 2338 } 2339 self.func_list.append(self.parse_loc_tbl) 2340 2341 # 2342 # Parse table for <<FAM_RECORD>> below the level 0 FAM tag 2343 # 2344 # n @<XREF:FAM>@ FAM {1:1} 2345 # +1 <<FAMILY_EVENT_STRUCTURE>> {0:M} 2346 # +1 HUSB @<XREF:INDI>@ {0:1} 2347 # +1 WIFE @<XREF:INDI>@ {0:1} 2348 # +1 CHIL @<XREF:INDI>@ {0:M} 2349 # +1 NCHI <COUNT_OF_CHILDREN> {0:1} 2350 # +1 SUBM @<XREF:SUBM>@ {0:M} 2351 # +1 <<LDS_SPOUSE_SEALING>> {0:M} 2352 # +1 <<SOURCE_CITATION>> {0:M} 2353 # +1 <<MULTIMEDIA_LINK>> {0:M} 2354 # +1 <<NOTE_STRUCTURE>> {0:M} 2355 # +1 REFN <USER_REFERENCE_NUMBER> {0:M} 2356 # +1 RIN <AUTOMATED_RECORD_ID> {0:1} 2357 # +1 <<CHANGE_DATE>> {0:1} 2358 2359 self.family_func = { 2360 # +1 <<FAMILY_EVENT_STRUCTURE>> {0:M} 2361 TOKEN_GEVENT : self.__family_std_event, 2362 TOKEN_EVEN : self.__fam_even, 2363 # +1 HUSB @<XREF:INDI>@ {0:1} 2364 TOKEN_HUSB : self.__family_husb, 2365 # +1 WIFE @<XREF:INDI>@ {0:1} 2366 TOKEN_WIFE : self.__family_wife, 2367 # +1 CHIL @<XREF:INDI>@ {0:M} 2368 TOKEN_CHIL : self.__family_chil, 2369 # +1 NCHI <COUNT_OF_CHILDREN> {0:1} 2370 # +1 SUBM @<XREF:SUBM>@ {0:M} 2371 # +1 <<LDS_SPOUSE_SEALING>> {0:M} 2372 TOKEN_SLGS : self.__family_slgs, 2373 # +1 <<SOURCE_CITATION>> {0:M} 2374 TOKEN_SOUR : self.__family_source, 2375 # +1 <<MULTIMEDIA_LINK>> {0:M} 2376 TOKEN_OBJE : self.__family_object, 2377 # +1 <<NOTE_STRUCTURE>> {0:M} 2378 TOKEN__COMM : self.__family_comm, 2379 TOKEN_NOTE : self.__family_note, 2380 TOKEN_RNOTE : self.__family_note, 2381 # +1 REFN <USER_REFERENCE_NUMBER> {0:M} 2382 TOKEN_REFN : self.__family_refn, 2383 # TYPE should be below REFN, but will work here anyway 2384 TOKEN_TYPE : self.__family_cust_attr, 2385 # +1 RIN <AUTOMATED_RECORD_ID> {0:1} 2386 # +1 <<CHANGE_DATE>> {0:1} 2387 TOKEN_CHAN : self.__family_chan, 2388 TOKEN_ENDL : self.__ignore, 2389 TOKEN_ADDR : self.__ignore, 2390 TOKEN_RIN : self.__family_cust_attr, 2391 TOKEN_SUBM : self.__ignore, 2392 TOKEN_ATTR : self.__family_attr, 2393 } 2394 self.func_list.append(self.family_func) 2395 2396 self.family_rel_tbl = { 2397 TOKEN__FREL : self.__family_frel, 2398 TOKEN__MREL : self.__family_mrel, 2399 TOKEN_ADOP : self.__family_adopt, 2400 TOKEN__STAT : self.__family_stat, 2401 } 2402 self.func_list.append(self.family_rel_tbl) 2403 2404 # 2405 # Parse table for <<SOURCE_RECORD>> below the level 0 SOUR tag 2406 # 2407 # n @<XREF:SOUR>@ SOUR {1:1} 2408 # +1 DATA {0:1} 2409 # +2 EVEN <EVENTS_RECORDED> {0:M} 2410 # +3 DATE <DATE_PERIOD> {0:1} 2411 # +3 PLAC <SOURCE_JURISDICTION_PLACE> {0:1} 2412 # +2 AGNC <RESPONSIBLE_AGENCY> {0:1} 2413 # +2 <<NOTE_STRUCTURE>> {0:M} 2414 # +1 AUTH <SOURCE_ORIGINATOR> {0:1} 2415 # +2 [CONT|CONC] <SOURCE_ORIGINATOR> {0:M} 2416 # +1 TITL <SOURCE_DESCRIPTIVE_TITLE> {0:1} 2417 # +2 [CONT|CONC] <SOURCE_DESCRIPTIVE_TITLE> {0:M} 2418 # +1 ABBR <SOURCE_FILED_BY_ENTRY> {0:1} 2419 # +1 PUBL <SOURCE_PUBLICATION_FACTS> {0:1} 2420 # +2 [CONT|CONC] <SOURCE_PUBLICATION_FACTS> {0:M} 2421 # +1 TEXT <TEXT_FROM_SOURCE> {0:1} 2422 # +2 [CONT|CONC] <TEXT_FROM_SOURCE> {0:M} 2423 # +1 <<SOURCE_REPOSITORY_CITATION>> {0:1} 2424 # +1 <<MULTIMEDIA_LINK>> {0:M} 2425 # +1 <<NOTE_STRUCTURE>> {0:M} 2426 # +1 REFN <USER_REFERENCE_NUMBER> {0:M} 2427 # +2 TYPE <USER_REFERENCE_TYPE> {0:1} 2428 # +1 RIN <AUTOMATED_RECORD_ID> {0:1} 2429 # +1 <<CHANGE_DATE>> {0:1} 2430 2431 self.source_func = { 2432 TOKEN_TITL : self.__source_title, 2433 TOKEN_TAXT : self.__source_taxt_peri, 2434 TOKEN_PERI : self.__source_taxt_peri, 2435 TOKEN_AUTH : self.__source_auth, 2436 TOKEN_PUBL : self.__source_publ, 2437 TOKEN_NOTE : self.__source_note, 2438 TOKEN_RNOTE : self.__source_note, 2439 TOKEN_TEXT : self.__source_text, 2440 TOKEN_ABBR : self.__source_abbr, 2441 TOKEN_REFN : self.__source_attr, 2442 TOKEN_RIN : self.__source_attr, 2443 TOKEN_REPO : self.__source_repo, 2444 TOKEN_OBJE : self.__source_object, 2445 TOKEN_CHAN : self.__source_chan, 2446 TOKEN_MEDI : self.__source_attr, 2447 TOKEN__NAME : self.__source_attr, 2448 TOKEN_DATA : self.__ignore, 2449 # TYPE should be below REFN, but will work here anyway 2450 TOKEN_TYPE : self.__source_attr, 2451 TOKEN_CALN : self.__ignore, 2452 # not legal, but Ultimate Family Tree does this 2453 TOKEN_DATE : self.__ignore, 2454 TOKEN_IGNORE : self.__ignore, 2455 } 2456 self.func_list.append(self.source_func) 2457 2458 # 2459 # Parse table for <<MULTIMEDIA_RECORD>> below the level 0 OBJE tag 2460 # 2461 # n @XREF:OBJE@ OBJE {1:1} # v5.5 layout 2462 # +1 FILE <MULTIMEDIA_FILE_REFN> {1:1} # de-facto extension 2463 # +1 FORM <MULTIMEDIA_FORMAT> {1:1} 2464 # +1 TITL <DESCRIPTIVE_TITLE> {0:1} 2465 # +1 <<NOTE_STRUCTURE>> {0:M} 2466 # +1 BLOB {1:1} # Deprecated, no support 2467 # +2 CONT <ENCODED_MULTIMEDIA_LINE> {1:M} 2468 # +1 OBJE @<XREF:OBJE>@ /* chain */ {0:1} # Deprecated, no support 2469 # +1 REFN <USER_REFERENCE_NUMBER> {0:M} 2470 # +2 TYPE <USER_REFERENCE_TYPE> {0:1} 2471 # +1 RIN <AUTOMATED_RECORD_ID> {0:1} 2472 # +1 <<CHANGE_DATE>> {0:1} 2473 # 2474 # n @XREF:OBJE@ OBJE {1:1} # v5.5.1 layout 2475 # +1 FILE <MULTIMEDIA_FILE_REFN> {1:M} # multi files, no support 2476 # +2 FORM <MULTIMEDIA_FORMAT> {1:1} 2477 # +3 TYPE <SOURCE_MEDIA_TYPE> {0:1} 2478 # +2 TITL <DESCRIPTIVE_TITLE> {0:1} 2479 # +2 DATE <mm/dd/yyy hh:mn:ss AM> {0:1} # FTM extension 2480 # +2 TEXT <COMMENT, by user or exif> {0:1} # FTM extension 2481 # +1 REFN <USER_REFERENCE_NUMBER> {0:M} 2482 # +2 TYPE <USER_REFERENCE_TYPE> {0:1} 2483 # +1 RIN <AUTOMATED_RECORD_ID> {0:1} 2484 # +1 <<NOTE_STRUCTURE>> {0:M} 2485 # +1 <<SOURCE_CITATION>> {0:M} 2486 # +1 <<CHANGE_DATE>> {0:1} 2487 2488 self.obje_func = { 2489 TOKEN_FORM : self.__obje_form, 2490 TOKEN_TYPE : self.__obje_type, # v5.5.1 2491 TOKEN_TITL : self.__obje_title, 2492 TOKEN_FILE : self.__obje_file, # de-facto extension 2493 TOKEN_TEXT : self.__obje_text, # FTM extension 2494 TOKEN__TEXT : self.__obje_text, # FTM 2017 extension 2495 TOKEN_DATE : self.__obje_date, # FTM extension 2496 TOKEN__DATE : self.__obje_date, # FTM 2017 extension 2497 TOKEN_NOTE : self.__obje_note, 2498 TOKEN_RNOTE : self.__obje_note, 2499 TOKEN_SOUR : self.__obje_sour, 2500 TOKEN_BLOB : self.__ignore, # v5.5.1 deprecated 2501 TOKEN_REFN : self.__obje_refn, 2502 TOKEN_RIN : self.__obje_rin, 2503 TOKEN_CHAN : self.__obje_chan, 2504 } 2505 self.func_list.append(self.obje_func) 2506 2507 self.parse_addr_tbl = { 2508 TOKEN_DATE : self.__address_date, 2509 TOKEN_ADR1 : self.__address_adr1, 2510 TOKEN_ADR2 : self.__address_adr2, 2511 TOKEN_CITY : self.__address_city, 2512 TOKEN_STAE : self.__address_state, 2513 TOKEN_POST : self.__address_post, 2514 TOKEN_CTRY : self.__address_country, 2515 TOKEN_PHON : self.__ignore, 2516 TOKEN_SOUR : self.__address_sour, 2517 TOKEN_NOTE : self.__address_note, 2518 TOKEN_RNOTE : self.__address_note, 2519 TOKEN__LOC : self.__ignore, 2520 TOKEN__NAME : self.__ignore, 2521 TOKEN_IGNORE : self.__ignore, 2522 TOKEN_TYPE : self.__ignore, 2523 TOKEN_CAUS : self.__ignore, 2524 } 2525 self.func_list.append(self.parse_addr_tbl) 2526 2527 self.event_cause_tbl = { 2528 TOKEN_SOUR : self.__event_cause_source, 2529 } 2530 self.func_list.append(self.event_cause_tbl) 2531 2532 self.event_place_map = { 2533 TOKEN_NOTE : self.__event_place_note, 2534 TOKEN_RNOTE : self.__event_place_note, 2535 TOKEN_FORM : self.__event_place_form, 2536 # Not legal. 2537 TOKEN_OBJE : self.__event_place_object, 2538 TOKEN_SOUR : self.__event_place_sour, 2539 TOKEN__LOC : self.__ignore, 2540 TOKEN_MAP : self.__place_map, 2541 # Not legal, but generated by Ultimate Family Tree 2542 TOKEN_QUAY : self.__ignore, 2543 } 2544 self.func_list.append(self.event_place_map) 2545 2546 self.place_map_tbl = { 2547 TOKEN_LATI : self.__place_lati, 2548 TOKEN_LONG : self.__place_long, 2549 } 2550 self.func_list.append(self.place_map_tbl) 2551 2552 self.repo_ref_tbl = { 2553 TOKEN_CALN : self.__repo_ref_call, 2554 TOKEN_NOTE : self.__repo_ref_note, 2555 TOKEN_RNOTE : self.__repo_ref_note, 2556 TOKEN_MEDI : self.__repo_ref_medi, 2557 TOKEN_IGNORE : self.__ignore, 2558 } 2559 self.func_list.append(self.repo_ref_tbl) 2560 2561 self.parse_person_adopt = { 2562 TOKEN_ADOP : self.__person_adopt_famc_adopt, 2563 } 2564 self.func_list.append(self.parse_person_adopt) 2565 2566 self.opt_note_tbl = { 2567 TOKEN_RNOTE : self.__optional_note, 2568 TOKEN_NOTE : self.__optional_note, 2569 } 2570 self.func_list.append(self.opt_note_tbl) 2571 2572 self.citation_data_tbl = { 2573 TOKEN_DATE : self.__citation_data_date, 2574 TOKEN_TEXT : self.__citation_data_text, 2575 TOKEN_RNOTE : self.__citation_data_note, 2576 TOKEN_NOTE : self.__citation_data_note, 2577 } 2578 self.func_list.append(self.citation_data_tbl) 2579 2580 self.citation_even_tbl = { 2581 TOKEN_ROLE : self.__citation_even_role, 2582 } 2583 self.func_list.append(self.citation_even_tbl) 2584 2585 # 2586 # Parse table for <<HEADER>> record below the level 0 HEAD tag 2587 # 2588 # n HEAD {1:1} 2589 # +1 SOUR <APPROVED_SYSTEM_ID> {1:1} 2590 # +2 VERS <VERSION_NUMBER> {0:1} 2591 # +2 NAME <NAME_OF_PRODUCT> {0:1} 2592 # +2 CORP <NAME_OF_BUSINESS> {0:1} 2593 # +3 <<ADDRESS_STRUCTURE>> {0:1} 2594 # +2 DATA <NAME_OF_SOURCE_DATA> {0:1} 2595 # +3 DATE <PUBLICATION_DATE> {0:1} 2596 # +3 COPR <COPYRIGHT_SOURCE_DATA> {0:1} 2597 # +1 DEST <RECEIVING_SYSTEM_NAME> {0:1*} 2598 # +1 DATE <TRANSMISSION_DATE> {0:1} 2599 # +2 TIME <TIME_VALUE> {0:1} 2600 # +1 SUBM @<XREF:SUBM>@ {1:1} 2601 # +1 SUBN @<XREF:SUBN>@ {0:1} 2602 # +1 FILE <FILE_NAME> {0:1} 2603 # +1 COPR <COPYRIGHT_GEDCOM_FILE> {0:1} 2604 # +1 GEDC {1:1} 2605 # +2 VERS <VERSION_NUMBER> {1:1} 2606 # +2 FORM <GEDCOM_FORM> {1:1} 2607 # +1 CHAR <CHARACTER_SET> {1:1} 2608 # +2 VERS <VERSION_NUMBER> {0:1} 2609 # +1 LANG <LANGUAGE_OF_TEXT> {0:1} 2610 # +1 PLAC {0:1} 2611 # +2 FORM <PLACE_HIERARCHY> {1:1} 2612 # +1 NOTE <GEDCOM_CONTENT_DESCRIPTION> {0:1} 2613 # +2 [CONT|CONC] <GEDCOM_CONTENT_DESCRIPTION> {0:M} 2614 2615 # * NOTE: Submissions to the Family History Department for Ancestral 2616 # File submission or for clearing temple ordinances must use a 2617 # DESTination of ANSTFILE or TempleReady. 2618 2619 self.head_parse_tbl = { 2620 TOKEN_SOUR : self.__header_sour, 2621 TOKEN_NAME : self.__header_sour_name, # This should be below SOUR 2622 TOKEN_VERS : self.__header_sour_vers, # This should be below SOUR 2623 TOKEN_FILE : self.__header_file, 2624 TOKEN_COPR : self.__header_copr, 2625 TOKEN_SUBM : self.__header_subm, 2626 TOKEN_CORP : self.__ignore, # This should be below SOUR 2627 TOKEN_DATA : self.__ignore, # This should be below SOUR 2628 TOKEN_SUBN : self.__header_subn, 2629 TOKEN_LANG : self.__header_lang, 2630 TOKEN_TIME : self.__ignore, # This should be below DATE 2631 TOKEN_DEST : self.__header_dest, 2632 TOKEN_CHAR : self.__header_char, 2633 TOKEN_GEDC : self.__header_gedc, 2634 TOKEN_PLAC : self.__header_plac, 2635 TOKEN_DATE : self.__header_date, 2636 TOKEN_NOTE : self.__header_note, 2637 TOKEN__SCHEMA: self.__ignore, 2638 } 2639 self.func_list.append(self.head_parse_tbl) 2640 2641 self.header_sour_parse_tbl = { 2642 TOKEN_VERS : self.__header_sour_vers, 2643 TOKEN_NAME : self.__header_sour_name, 2644 TOKEN_CORP : self.__header_sour_corp, 2645 TOKEN_DATA : self.__header_sour_data, 2646 } 2647 self.func_list.append(self.header_sour_parse_tbl) 2648 2649 self.header_sour_data = { 2650 TOKEN_DATE : self.__header_sour_date, 2651 TOKEN_COPR : self.__header_sour_copr, 2652 } 2653 self.func_list.append(self.header_sour_data) 2654 2655 self.header_corp_addr = { 2656 TOKEN_ADDR : self.__repo_addr, 2657 TOKEN_PHON : self.__repo_phon, 2658 TOKEN_FAX : self.__repo_fax, 2659 TOKEN_WWW : self.__repo_www, 2660 TOKEN_EMAIL : self.__repo_email, 2661 } 2662 self.func_list.append(self.header_corp_addr) 2663 2664 self.header_subm = { 2665 TOKEN_NAME : self.__header_subm_name, 2666 } 2667 self.func_list.append(self.header_subm) 2668 2669 self.place_form = { 2670 TOKEN_FORM : self.__place_form, 2671 } 2672 self.func_list.append(self.place_form) 2673 2674 # 2675 # Parse table for <<NOTE_RECORD>> below the level 0 NOTE tag 2676 # 2677 # n @<XREF:NOTE>@ NOTE <SUBMITTER_TEXT> {1:1} 2678 # +1 [ CONC | CONT] <SUBMITTER_TEXT> {0:M} 2679 # +1 <<SOURCE_CITATION>> {0:M} 2680 # +1 REFN <USER_REFERENCE_NUMBER> {0:M} 2681 # +2 TYPE <USER_REFERENCE_TYPE> {0:1} 2682 # +1 RIN <AUTOMATED_RECORD_ID> {0:1} 2683 # +1 <<CHANGE_DATE>> {0:1} 2684 2685 self.note_parse_tbl = { 2686 TOKEN_SOUR : self.__ignore, 2687 TOKEN_REFN : self.__ignore, 2688 TOKEN_RIN : self.__ignore, 2689 TOKEN_CHAN : self.__note_chan, 2690 } 2691 self.func_list.append(self.note_parse_tbl) 2692 2693 # look for existing place titles, build a map 2694 self.place_names = defaultdict(list) 2695 cursor = dbase.get_place_cursor() 2696 data = next(cursor) 2697 while data: 2698 (handle, val) = data 2699 self.place_names[val[2]].append(handle) 2700 data = next(cursor) 2701 cursor.close() 2702 2703 enc = stage_one.get_encoding() 2704 2705 if enc == "ANSEL": 2706 rdr = AnselReader(ifile, self.__add_msg) 2707 elif enc in ("UTF-8", "UTF8", "UTF_8_SIG"): 2708 rdr = UTF8Reader(ifile, self.__add_msg, enc) 2709 elif enc in ("UTF-16LE", "UTF-16BE", "UTF16", "UNICODE"): 2710 rdr = UTF16Reader(ifile, self.__add_msg) 2711 elif enc in ("CP1252", "WINDOWS-1252"): 2712 rdr = CP1252Reader(ifile, self.__add_msg) 2713 else: 2714 rdr = AnsiReader(ifile, self.__add_msg) 2715 2716 self.lexer = Lexer(rdr, self.__add_msg) 2717 self.filename = filename 2718 self.backoff = False 2719 2720 fullpath = os.path.normpath(os.path.abspath(filename)) 2721 self.geddir = os.path.dirname(fullpath) 2722 2723 self.error_count = 0 2724 amap = PERSONALCONSTANTATTRIBUTES 2725 2726 self.attrs = list(amap.values()) 2727 self.gedattr = dict([key, val] for val, key in amap.items()) 2728 2729 def parse_gedcom_file(self, use_trans=False): 2730 """ 2731 Parses the opened GEDCOM file. 2732 2733 LINEAGE_LINKED_GEDCOM: = 2734 0 <<HEADER>> {1:1} 2735 0 <<SUBMISSION_RECORD>> {0:1} 2736 0 <<RECORD>> {1:M} 2737 0 TRLR {1:1} 2738 2739 """ 2740 no_magic = self.maxpeople < 1000 2741 with DbTxn(_("GEDCOM import"), self.dbase, not use_trans, 2742 no_magic=no_magic) as self.trans: 2743 2744 self.dbase.disable_signals() 2745 self.__parse_header_head() 2746 self.want_parse_warnings = False 2747 self.__parse_header() 2748 self.want_parse_warnings = True 2749 if self.use_def_src: 2750 self.dbase.add_source(self.def_src, self.trans) 2751 if self.default_tag and self.default_tag.handle is None: 2752 self.dbase.add_tag(self.default_tag, self.trans) 2753 self.__parse_record() 2754 self.__parse_trailer() 2755 for title, handle in self.inline_srcs.items(): 2756 src = Source() 2757 src.set_handle(handle) 2758 src.set_title(title) 2759 self.dbase.add_source(src, self.trans) 2760 self.__clean_up() 2761 2762 self.place_import.generate_hierarchy(self.trans) 2763 2764 if not self.dbase.get_feature("skip-check-xref"): 2765 self.__check_xref() 2766 self.dbase.enable_signals() 2767 self.dbase.request_rebuild() 2768 if self.number_of_errors == 0: 2769 message = _("GEDCOM import report: No errors detected") 2770 else: 2771 message = _("GEDCOM import report: %s errors detected") % \ 2772 self.number_of_errors 2773 if hasattr(self.user.uistate, 'window'): 2774 parent_window = self.user.uistate.window 2775 else: 2776 parent_window = None 2777 self.user.info(message, "".join(self.errors), 2778 parent=parent_window, monospaced=True) 2779 2780 def __clean_up(self): 2781 """ 2782 Break circular references to parsing methods stored in dictionaries 2783 to aid garbage collection 2784 """ 2785 for func_map in self.func_list: 2786 for key in list(func_map.keys()): 2787 del func_map[key] 2788 del func_map 2789 del self.func_list 2790 del self.update 2791 self.lexer.clean_up() 2792 2793 def __find_person_handle(self, gramps_id): 2794 """ 2795 Return the database handle associated with the person's Gramps ID 2796 """ 2797 return self.__find_from_handle(gramps_id, self.gid2id) 2798 2799 def __find_family_handle(self, gramps_id): 2800 """ 2801 Return the database handle associated with the family's Gramps ID 2802 """ 2803 return self.__find_from_handle(gramps_id, self.fid2id) 2804 2805 def __find_media_handle(self, gramps_id): 2806 """ 2807 Return the database handle associated with the media object's Gramps ID 2808 """ 2809 return self.__find_from_handle(gramps_id, self.oid2id) 2810 2811 def __find_note_handle(self, gramps_id): 2812 """ 2813 Return the database handle associated with the media object's Gramps ID 2814 """ 2815 return self.__find_from_handle(gramps_id, self.nid2id) 2816 2817 def __find_or_create_person(self, gramps_id): 2818 """ 2819 Finds or creates a person based on the Gramps ID. If the ID is 2820 already used (is in the db), we return the item in the db. Otherwise, 2821 we create a new person, assign the handle and Gramps ID. 2822 """ 2823 person = Person() 2824 intid = self.gid2id.get(gramps_id) 2825 if self.dbase.has_person_handle(intid): 2826 person.unserialize(self.dbase.get_raw_person_data(intid)) 2827 else: 2828 intid = self.__find_from_handle(gramps_id, self.gid2id) 2829 person.set_handle(intid) 2830 person.set_gramps_id(gramps_id) 2831 return person 2832 2833 def __find_or_create_family(self, gramps_id): 2834 """ 2835 Finds or creates a family based on the Gramps ID. If the ID is 2836 already used (is in the db), we return the item in the db. Otherwise, 2837 we create a new family, assign the handle and Gramps ID. 2838 """ 2839 family = Family() 2840 # Add a counter for reordering the children later: 2841 family.child_ref_count = 0 2842 intid = self.fid2id.get(gramps_id) 2843 if self.dbase.has_family_handle(intid): 2844 family.unserialize(self.dbase.get_raw_family_data(intid)) 2845 else: 2846 intid = self.__find_from_handle(gramps_id, self.fid2id) 2847 family.set_handle(intid) 2848 family.set_gramps_id(gramps_id) 2849 return family 2850 2851 def __find_or_create_media(self, gramps_id): 2852 """ 2853 Finds or creates a media object based on the Gramps ID. If the ID is 2854 already used (is in the db), we return the item in the db. Otherwise, 2855 we create a new media object, assign the handle and Gramps ID. 2856 """ 2857 obj = Media() 2858 intid = self.oid2id.get(gramps_id) 2859 if self.dbase.has_media_handle(intid): 2860 obj.unserialize(self.dbase.get_raw_media_data(intid)) 2861 else: 2862 intid = self.__find_from_handle(gramps_id, self.oid2id) 2863 obj.set_handle(intid) 2864 obj.set_gramps_id(gramps_id) 2865 return obj 2866 2867 def __find_or_create_source(self, gramps_id): 2868 """ 2869 Find or create a source based on the Gramps ID. 2870 2871 If the ID is already used (is in the db), we return the item in the 2872 db. Otherwise, we create a new source, assign the handle and Gramps ID. 2873 2874 """ 2875 obj = Source() 2876 intid = self.sid2id.get(gramps_id) 2877 if self.dbase.has_source_handle(intid): 2878 obj.unserialize(self.dbase.get_raw_source_data(intid)) 2879 else: 2880 intid = self.__find_from_handle(gramps_id, self.sid2id) 2881 obj.set_handle(intid) 2882 obj.set_gramps_id(gramps_id) 2883 return obj 2884 2885 def __find_or_create_repository(self, gramps_id): 2886 """ 2887 Finds or creates a repository based on the Gramps ID. If the ID is 2888 already used (is in the db), we return the item in the db. Otherwise, 2889 we create a new repository, assign the handle and Gramps ID. 2890 2891 Some GEDCOM "flavors" destroy the specification, and declare the 2892 repository inline instead of in a object. 2893 """ 2894 repository = Repository() 2895 intid = self.rid2id.get(gramps_id) 2896 if self.dbase.has_repository_handle(intid): 2897 repository.unserialize(self.dbase.get_raw_repository_data(intid)) 2898 else: 2899 intid = self.__find_from_handle(gramps_id, self.rid2id) 2900 repository.set_handle(intid) 2901 repository.set_gramps_id(gramps_id) 2902 return repository 2903 2904 def __find_or_create_note(self, gramps_id): 2905 """ 2906 Finds or creates a note based on the Gramps ID. If the ID is 2907 already used (is in the db), we return the item in the db. Otherwise, 2908 we create a new note, assign the handle and Gramps ID. 2909 If no Gramps ID is passed in, we not only make a Note with GID, we 2910 commit it. 2911 """ 2912 note = Note() 2913 if not gramps_id: 2914 need_commit = True 2915 gramps_id = self.dbase.find_next_note_gramps_id() 2916 else: 2917 need_commit = False 2918 2919 intid = self.nid2id.get(gramps_id) 2920 if self.dbase.has_note_handle(intid): 2921 note.unserialize(self.dbase.get_raw_note_data(intid)) 2922 else: 2923 intid = self.__find_from_handle(gramps_id, self.nid2id) 2924 note.set_handle(intid) 2925 note.set_gramps_id(gramps_id) 2926 if need_commit: 2927 self.dbase.add_note(note, self.trans) 2928 return note 2929 2930 def __loc_is_empty(self, location): 2931 """ 2932 Determines whether a location is empty. 2933 2934 @param location: The current location 2935 @type location: gen.lib.Location 2936 @return True of False 2937 """ 2938 if location is None: 2939 return True 2940 elif location.serialize() == self._EMPTY_LOC: 2941 return True 2942 elif location.is_empty(): 2943 return True 2944 return False 2945 2946 def __find_place(self, title, location, placeref_list): 2947 """ 2948 Finds an existing place based on the title and primary location. 2949 2950 @param title: The place title 2951 @type title: string 2952 @param location: The current location 2953 @type location: gen.lib.Location 2954 @return gen.lib.Place 2955 """ 2956 for place_handle in self.place_names[title]: 2957 place = self.dbase.get_place_from_handle(place_handle) 2958 if place.get_title() == title: 2959 if self.__loc_is_empty(location) and \ 2960 self.__loc_is_empty(self.__get_first_loc(place)) and \ 2961 place.get_placeref_list() == placeref_list: 2962 return place 2963 elif (not self.__loc_is_empty(location) and 2964 not self.__loc_is_empty(self.__get_first_loc(place)) and 2965 self.__get_first_loc(place).is_equivalent(location) == 2966 IDENTICAL) and \ 2967 place.get_placeref_list() == placeref_list: 2968 return place 2969 return None 2970 2971 def __add_place(self, event, sub_state): 2972 """ 2973 Add a new place to an event if not already present, or update a 2974 place. 2975 2976 @param event: The event 2977 @type event: gen.lib.Event 2978 @param substate: The sub-state for PLAC or ADDR elements (i.e. parsed 2979 by event_parse_tbl) 2980 @type sub_state: CurrentState 2981 """ 2982 if sub_state.place: 2983 # see whether this place already exists 2984 place = self.__find_place(sub_state.place.get_title(), 2985 self.__get_first_loc(sub_state.place), 2986 sub_state.place.get_placeref_list()) 2987 if place is None: 2988 place = sub_state.place 2989 place_title = _pd.display(self.dbase, place) 2990 location = sub_state.pf.load_place(self.place_import, place, 2991 place_title) 2992 self.dbase.add_place(place, self.trans) 2993 # if 'location was created, then store it, now that we have a 2994 # handle. 2995 if location: 2996 self.place_import.store_location(location, place.handle) 2997 self.place_names[place.get_title()].append(place.get_handle()) 2998 event.set_place_handle(place.get_handle()) 2999 else: 3000 place.merge(sub_state.place) 3001 place_title = _pd.display(self.dbase, place) 3002 location = sub_state.pf.load_place(self.place_import, place, 3003 place_title) 3004 self.dbase.commit_place(place, self.trans) 3005 if location: 3006 self.place_import.store_location(location, place.handle) 3007 event.set_place_handle(place.get_handle()) 3008 3009 def __find_file(self, fullname, altpath): 3010 # try to find the media file 3011 fullname = fullname.replace('\\', os.path.sep) 3012 3013 try: 3014 if os.path.isfile(fullname): 3015 return (1, fullname) 3016 except UnicodeEncodeError: 3017 # FIXME: problem possibly caused by umlaut/accented character 3018 # in filename 3019 return (0, fullname) 3020 # strip off Windows drive letter, if present 3021 if len(fullname) > 3 and fullname[1] == ':': 3022 fullname = fullname[2:] 3023 # look where we found the '.ged', using the full path in fullname 3024 other = os.path.join(altpath, fullname) 3025 if os.path.isfile(other): 3026 return (1, other) 3027 # lets try reducing to just where we found '.ged' 3028 other = os.path.join(altpath, os.path.basename(fullname)) 3029 if os.path.isfile(other): 3030 return (1, other) 3031 # lets try using the base path for relative media paths 3032 other = os.path.join(media_path(self.dbase), fullname) 3033 if os.path.isfile(other): 3034 return (1, fullname) 3035 # lets try using the base path for relative media paths with base name 3036 other = os.path.join(media_path(self.dbase), 3037 os.path.basename(fullname)) 3038 if os.path.isfile(other): 3039 return (1, os.path.basename(fullname)) 3040 return (0, fullname) 3041 3042 def __get_next_line(self): 3043 """ 3044 Get the next line for analysis from the lexical analyzer. Return the 3045 same value if the _backup flag is set. 3046 """ 3047 if not self.backoff: 3048 self.groups = self.lexer.readline() 3049 self.update() 3050 3051 # EOF ? 3052 if not self.groups: 3053 self.backoff = False 3054 # We will add the truncation warning message to the error 3055 # messages report, even though it probably won't be reported 3056 # because the exception below gets raised before the report is 3057 # produced. We do this in case __add_msg is changed in the 3058 # future to do something else 3059 self.__add_msg(self.__TRUNC_MSG) 3060 self.groups = None 3061 raise GedcomError(self.__TRUNC_MSG) 3062 3063 self.backoff = False 3064 return self.groups 3065 3066 def __chk_subordinate(self, level, state, token): 3067 """ 3068 checks for a single subordinate line with specific token. If any other 3069 lines are present, they are not understood. 3070 3071 @param level: Current level in the file 3072 @type level: int 3073 @param state: The current state 3074 @type state: CurrentState 3075 @param token: The token to search for 3076 @type token: int 3077 """ 3078 skips = 0 3079 got_line = None 3080 while True: 3081 line = self.__get_next_line() 3082 if self.__level_is_finished(line, level): 3083 if skips: 3084 # This improves formatting when there are long sequences of 3085 # skipped lines 3086 self.__add_msg("", None, None) 3087 return got_line 3088 if line.token == token: 3089 got_line = line 3090 else: 3091 self.__add_msg(_("Line ignored as not understood"), 3092 line, state) 3093 skips += 1 3094 3095 def __undefined(self, line, state): 3096 """ 3097 @param line: The current line in GedLine format 3098 @type line: GedLine 3099 @param state: The current state 3100 @type state: CurrentState 3101 """ 3102 self.__not_recognized(line, state) 3103 3104 def __ignore(self, line, state): 3105 """ 3106 Prints a message when an unexpected token is found. If the token is 3107 known, then the line is considered "not supported", otherwise the line 3108 is "not understood". 3109 3110 @param line: The current line in GedLine format 3111 @type line: GedLine 3112 @param state: The current state 3113 @type state: CurrentState 3114 """ 3115 if line.token == TOKEN_UNKNOWN: 3116 self.__add_msg(_("Line ignored as not understood"), line, state) 3117 else: 3118 self.__add_msg(_("Tag recognized but not supported"), line, state) 3119 self.__skip_subordinate_levels(line.level + 1, state) 3120 3121 def __not_recognized(self, line, state): 3122 """ 3123 Prints a message when an undefined token is found. All subordinate 3124 items to the current item are ignored. 3125 3126 @param level: Current level in the file 3127 @type level: int 3128 """ 3129 self.__add_msg(_("Line ignored as not understood"), line, state) 3130 self.__skip_subordinate_levels(line.level + 1, state) 3131 3132 def __skip_record(self, _line, state): 3133 """ 3134 @param line: The current line in GedLine format 3135 @type line: GedLine 3136 @param state: The current state 3137 @type state: CurrentState 3138 """ 3139 self.__skip_subordinate_levels(2, state) 3140 3141 def __skip_subordinate_levels(self, level, state): 3142 """ 3143 Skip add lines of the specified level or lower. 3144 """ 3145 skips = 0 3146 while True: 3147 line = self.__get_next_line() 3148 if self.__level_is_finished(line, level): 3149 if skips: 3150 # This improves formatting when there are long sequences of 3151 # skipped lines 3152 self.__add_msg("", None, None) 3153 return 3154 self.__add_msg(_("Skipped subordinate line"), line, state) 3155 skips += 1 3156 3157 def __level_is_finished(self, text, level): 3158 """ 3159 Check to see if the level has been completed, indicated by finding 3160 a level indiciated by the passed level value. If the level is finished, 3161 then make sure to call self._backup to reset the text pointer. 3162 """ 3163 done = text.level < level 3164 if done: 3165 self._backup() 3166 return done 3167 3168 def __add_msg(self, problem, line=None, state=None): 3169 if problem != "": 3170 self.number_of_errors += 1 3171 if line: 3172 prob_width = 66 3173 problem = problem.ljust(prob_width)[0:(prob_width - 1)] 3174 text = str(line.data).replace("\n", "\n".ljust(prob_width + 22)) 3175 message = "%s Line %5d: %s %s %s\n" % (problem, line.line, 3176 line.level, 3177 line.token_text, text) 3178 else: 3179 message = problem + "\n" 3180 if state: 3181 state.msg += message 3182 self.errors.append(message) 3183 3184 def __check_msgs(self, record_name, state, obj): 3185 if state.msg == "": 3186 return 3187 message = _("Records not imported into ") + record_name + ":\n\n" + \ 3188 state.msg 3189 new_note = Note() 3190 tag = StyledTextTag(StyledTextTagType.FONTFACE, 'Monospace', 3191 [(0, len(message))]) 3192 text = StyledText(message, [tag]) 3193 new_note.set_styledtext(text) 3194 new_note.set_handle(create_id()) 3195 gramps_id = self.nid_map[""] 3196 new_note.set_gramps_id(gramps_id) 3197 note_type = NoteType() 3198 note_type.set((NoteType.CUSTOM, _("GEDCOM import"))) 3199 new_note.set_type(note_type) 3200 self.dbase.add_note(new_note, self.trans) 3201 # If possible, attach the note to the relevant object 3202 if obj: 3203 obj.add_note(new_note.get_handle()) 3204 3205 def _backup(self): 3206 """ 3207 Set the _backup flag so that the current line can be accessed by the 3208 next level up. 3209 """ 3210 self.backoff = True 3211 3212 def __check_xref(self): 3213 3214 def __check(_map, has_gid_func, class_func, commit_func, 3215 gramps_id2handle, msg): 3216 for input_id, gramps_id in _map.map().items(): 3217 # Check whether an object exists for the mapped gramps_id 3218 if not has_gid_func(gramps_id): 3219 _handle = self.__find_from_handle(gramps_id, 3220 gramps_id2handle) 3221 if msg == "FAM": 3222 make_unknown(gramps_id, self.explanation.handle, 3223 class_func, commit_func, self.trans, 3224 db=self.dbase) 3225 self.missing_references += 1 3226 self.__add_msg(_("Error: %(msg)s '%(gramps_id)s'" 3227 " (input as @%(xref)s@) not in input" 3228 " GEDCOM. Record synthesised") % 3229 {'msg' : msg, 'gramps_id' : gramps_id, 3230 'xref' : input_id}) 3231 else: 3232 make_unknown(gramps_id, self.explanation.handle, 3233 class_func, commit_func, self.trans) 3234 self.missing_references += 1 3235 self.__add_msg(_("Error: %(msg)s '%(gramps_id)s'" 3236 " (input as @%(xref)s@) not in input" 3237 " GEDCOM. Record with typifying" 3238 " attribute 'Unknown' created") % 3239 {'msg' : msg, 'gramps_id' : gramps_id, 3240 'xref' : input_id}) 3241 3242 self.explanation = create_explanation_note(self.dbase) 3243 3244 self.missing_references = 0 3245 __check(self.pid_map, self.dbase.has_person_gramps_id, 3246 self.__find_or_create_person, self.dbase.commit_person, 3247 self.gid2id, "INDI") 3248 __check(self.fid_map, self.dbase.has_family_gramps_id, 3249 self.__find_or_create_family, self.dbase.commit_family, 3250 self.fid2id, "FAM") 3251 __check(self.sid_map, self.dbase.has_source_gramps_id, 3252 self.__find_or_create_source, self.dbase.commit_source, 3253 self.sid2id, "SOUR") 3254 __check(self.oid_map, self.dbase.has_media_gramps_id, 3255 self.__find_or_create_media, self.dbase.commit_media, 3256 self.oid2id, "OBJE") 3257 __check(self.rid_map, self.dbase.has_repository_gramps_id, 3258 self.__find_or_create_repository, self.dbase.commit_repository, 3259 self.rid2id, "REPO") 3260 __check(self.nid_map, self.dbase.has_note_gramps_id, 3261 self.__find_or_create_note, self.dbase.commit_note, 3262 self.nid2id, "NOTE") 3263 3264 # Check persons membership in referenced families 3265 def __input_fid(gramps_id): 3266 for (key, val) in self.fid_map.map().items(): 3267 if val == gramps_id: 3268 return key 3269 3270 for input_id, gramps_id in self.pid_map.map().items(): 3271 person_handle = self.__find_from_handle(gramps_id, self.gid2id) 3272 person = self.dbase.get_person_from_handle(person_handle) 3273 for family_handle in person.get_family_handle_list(): 3274 family = self.dbase.get_family_from_handle(family_handle) 3275 if family and family.get_father_handle() != person_handle and \ 3276 family.get_mother_handle() != person_handle: 3277 person.remove_family_handle(family_handle) 3278 self.dbase.commit_person(person, self.trans) 3279 self.__add_msg(_("Error: family '%(family)s' (input as" 3280 " @%(orig_family)s@) person %(person)s" 3281 " (input as %(orig_person)s) is not a" 3282 " member of the referenced family." 3283 " Family reference removed from person") % 3284 {'family' : family.gramps_id, 3285 'orig_family' : 3286 __input_fid(family.gramps_id), 3287 'person' : person.gramps_id, 3288 'orig_person' : input_id}) 3289 3290 def __input_pid(gramps_id): 3291 for (key, val) in self.pid_map.map().items(): 3292 if val == gramps_id: 3293 return key 3294 3295 for input_id, gramps_id in self.fid_map.map().items(): 3296 family_handle = self.__find_from_handle(gramps_id, self.fid2id) 3297 family = self.dbase.get_family_from_handle(family_handle) 3298 father_handle = family.get_father_handle() 3299 mother_handle = family.get_mother_handle() 3300 3301 if father_handle: 3302 father = self.dbase.get_person_from_handle(father_handle) 3303 if father and \ 3304 family_handle not in father.get_family_handle_list(): 3305 father.add_family_handle(family_handle) 3306 self.dbase.commit_person(father, self.trans) 3307 self.__add_msg("Error: family '%(family)s' (input as" 3308 " @%(orig_family)s@) father '%(father)s'" 3309 " (input as '%(orig_father)s') does not " 3310 "refer back to the family. Reference added." 3311 % {'family' : family.gramps_id, 3312 'orig_family' : input_id, 3313 'father' : father.gramps_id, 3314 'orig_father' : 3315 __input_pid(father.gramps_id)}) 3316 3317 if mother_handle: 3318 mother = self.dbase.get_person_from_handle(mother_handle) 3319 if mother and \ 3320 family_handle not in mother.get_family_handle_list(): 3321 mother.add_family_handle(family_handle) 3322 self.dbase.commit_person(mother, self.trans) 3323 self.__add_msg("Error: family '%(family)s' (input as" 3324 " @%(orig_family)s@) mother '%(mother)s'" 3325 " (input as '%(orig_mother)s') does not " 3326 "refer back to the family. Reference added." 3327 % {'family' : family.gramps_id, 3328 'orig_family' : input_id, 3329 'mother' : mother.gramps_id, 3330 'orig_mother' : 3331 __input_pid(mother.gramps_id)}) 3332 3333 for child_ref in family.get_child_ref_list(): 3334 child_handle = child_ref.ref 3335 child = self.dbase.get_person_from_handle(child_handle) 3336 if child: 3337 if family_handle not in \ 3338 child.get_parent_family_handle_list(): 3339 # The referenced child has no reference to the family. 3340 # There was a link from the FAM record to the child, 3341 # but no FAMC link from the child to the FAM. 3342 child.add_parent_family_handle(family_handle) 3343 self.dbase.commit_person(child, self.trans) 3344 self.__add_msg("Error: family '%(family)s' (input as" 3345 " @%(orig_family)s@) child '%(child)s'" 3346 " (input as '%(orig_child)s') does not " 3347 "refer back to the family. " 3348 "Reference added." % 3349 {'family' : family.gramps_id, 3350 'orig_family' : input_id, 3351 'child' : child.gramps_id, 3352 'orig_child' : 3353 __input_pid(child.gramps_id)}) 3354 3355 if self.missing_references: 3356 self.dbase.commit_note(self.explanation, self.trans, time.time()) 3357 txt = _("\nThe imported file was not self-contained.\n" 3358 "To correct for that, %(new)d objects were created and\n" 3359 "their typifying attribute was set to 'Unknown'.\n" 3360 "Where possible these 'Unknown' objects are \n" 3361 "referenced by note %(unknown)s.\n" 3362 ) % {'new': self.missing_references, 3363 'unknown': self.explanation.gramps_id} 3364 self.__add_msg(txt) 3365 self.number_of_errors -= 1 3366 3367 def __merge_address(self, free_form_address, addr, line, state): 3368 """ 3369 Merge freeform and structured addrssses. 3370 n ADDR <ADDRESS_LINE> {0:1} 3371 +1 CONT <ADDRESS_LINE> {0:M} 3372 +1 ADR1 <ADDRESS_LINE1> {0:1} (Street) 3373 +1 ADR2 <ADDRESS_LINE2> {0:1} (Locality) 3374 +1 CITY <ADDRESS_CITY> {0:1} 3375 +1 STAE <ADDRESS_STATE> {0:1} 3376 +1 POST <ADDRESS_POSTAL_CODE> {0:1} 3377 +1 CTRY <ADDRESS_COUNTRY> {0:1} 3378 3379 This is done along the lines suggested by Tamura Jones in 3380 http://www.tamurajones.net/GEDCOMADDR.xhtml as a result of bug 6382. 3381 "When a GEDCOM reader encounters a double address, it should read the 3382 structured address. ... A GEDCOM reader that does verify that the 3383 addresses are the same should issue an error if they are not". 3384 3385 This is called for SUBMitter addresses (__subm_addr), INDIvidual 3386 addresses (__person_addr), REPO addresses and HEADer corp address 3387 (__repo_address) and EVENt addresses (__event_adr). 3388 3389 The structured address (if any) will have been accumulated into an 3390 object of type LocationBase, which will either be a Location, or an 3391 Address object. 3392 3393 If ADDR is provided, but none of ADR1, ADR2, CITY, STAE, or POST (not 3394 CTRY), then Street is set to the freeform address. N.B. this is a 3395 change for Repository addresses and HEADer Corp address where 3396 previously the free-form address was deconstrucated into different 3397 structured components. N.B. PAF provides a free-form address and a 3398 country, so this allows for that case. 3399 3400 If both forms of address are provided, then the structured address is 3401 used, and if the ADDR/CONT contains anything not in the structured 3402 address, a warning is issued. 3403 3404 If just ADR1, ADR2, CITY, STAE, POST or CTRY are provided (this is not 3405 actually legal GEDCOM symtax, but may be possible by GEDCOM extensions) 3406 then just the structrued address is used. 3407 The routine returns a string suitable for a title. 3408 """ 3409 title = '' 3410 free_form_address = free_form_address.replace('\n', ', ') 3411 if not (addr.get_street() or addr.get_locality() or 3412 addr.get_city() or addr.get_state() or 3413 addr.get_postal_code()): 3414 3415 addr.set_street(free_form_address) 3416 return free_form_address 3417 else: 3418 # structured address provided 3419 addr_list = free_form_address.split(",") 3420 str_list = [] 3421 for func in (addr.get_street(), addr.get_locality(), 3422 addr.get_city(), addr.get_state(), 3423 addr.get_postal_code(), addr.get_country()): 3424 str_list += [i.strip(',' + string.whitespace) 3425 for i in func.split("\n")] 3426 for elmn in addr_list: 3427 if elmn.strip(',' + string.whitespace) not in str_list: 3428 # message means that the element %s was ignored, but 3429 # expressed the wrong way round because the message is 3430 # truncated for output 3431 self.__add_msg(_("ADDR element ignored '%s'" 3432 % elmn), line, state) 3433 # The free-form address ADDR is discarded 3434 # Assemble a title out of structured address 3435 for elmn in str_list: 3436 if elmn: 3437 if title != '': 3438 # TODO for Arabic, should the next comma be translated? 3439 title += ', ' 3440 title += elmn 3441 return title 3442 3443 def __parse_trailer(self): 3444 """ 3445 Looks for the expected TRLR token 3446 """ 3447 try: 3448 line = self.__get_next_line() 3449 if line and line.token != TOKEN_TRLR: 3450 state = CurrentState() 3451 self.__not_recognized(line, state) 3452 self.__check_msgs(_("TRLR (trailer)"), state, None) 3453 except TypeError: 3454 return 3455 3456 def __parse_submitter(self, line): 3457 """ 3458 Parses the submitter data 3459 3460 n @<XREF:SUBM>@ SUBM 3461 +1 NAME <SUBMITTER_NAME> 3462 +1 <<ADDRESS_STRUCTURE>> 3463 +1 <<MULTIMEDIA_LINK>> 3464 +1 LANG <LANGUAGE_PREFERENCE> 3465 +1 RFN <SUBMITTER_REGISTERED_RFN> 3466 +1 RIN <AUTOMATED_RECORD_ID> 3467 +1 <<CHANGE_DATE>> 3468 """ 3469 researcher = Researcher() 3470 state = CurrentState() 3471 state.res = researcher 3472 state.level = 1 3473 repo = Repository() 3474 state.repo = repo 3475 self.__parse_level(state, self.subm_parse_tbl, self.__undefined) 3476 # If this is the submitter that we were told about in the HEADer, then 3477 # we will need to update the researcher 3478 if line.token_text == self.subm and self.import_researcher: 3479 self.dbase.set_researcher(state.res) 3480 3481 localized_submitter = _("(Submitter):") 3482 if state.res.get_name() == "": 3483 submitter_name = "SUBM %s @%s@" % (localized_submitter, 3484 line.token_text) 3485 else: 3486 submitter_name = "SUBM %s (@%s@) %s" % (localized_submitter, 3487 line.token_text, 3488 state.res.get_name()) 3489 if self.use_def_src: 3490 repo.set_name(submitter_name) 3491 repo.set_handle(create_id()) 3492 repo.set_gramps_id(self.rid_map[""]) 3493 3494 addr = Address() 3495 addr.set_street(state.res.get_address()) 3496 addr.set_locality(state.res.get_locality()) 3497 addr.set_city(state.res.get_city()) 3498 addr.set_state(state.res.get_state()) 3499 addr.set_country(state.res.get_country()) 3500 addr.set_postal_code(state.res.get_postal_code()) 3501 addr.set_county(state.res.get_county()) 3502 addr.set_phone(state.res.get_phone()) 3503 repo.add_address(addr) 3504 rtype = RepositoryType() 3505 rtype.set((RepositoryType.CUSTOM, _('GEDCOM data'))) 3506 repo.set_type(rtype) 3507 self.__check_msgs(submitter_name, state, repo) 3508 self.dbase.commit_repository(repo, self.trans, state.repo.change) 3509 repo_ref = RepoRef() 3510 repo_ref.set_reference_handle(repo.handle) 3511 mtype = SourceMediaType() 3512 mtype.set((SourceMediaType.UNKNOWN, '')) 3513 repo_ref.set_media_type(mtype) 3514 self.def_src.add_repo_reference(repo_ref) 3515 self.dbase.commit_source(self.def_src, self.trans) 3516 else: 3517 self.__check_msgs(submitter_name, state, None) 3518 3519 def __parse_record(self): 3520 """ 3521 Parse the top level (0 level) instances. 3522 RECORD: = 3523 [ 3524 n <<FAM_RECORD>> {1:1} 3525 | 3526 n <<INDIVIDUAL_RECORD>> {1:1} 3527 | 3528 n <<MULTIMEDIA_RECORD>> {1:M} 3529 | 3530 n <<NOTE_RECORD>> {1:1} 3531 | 3532 n <<REPOSITORY_RECORD>> {1:1} 3533 | 3534 n <<SOURCE_RECORD>> {1:1} 3535 | 3536 n <<SUBMITTER_RECORD>> {1:1} 3537 ] 3538 3539 This also deals with the SUBN (submission) record, of which there 3540 should be exactly one. 3541 """ 3542 while True: 3543 line = self.__get_next_line() 3544 key = line.data 3545 if not line or line.token == TOKEN_TRLR: 3546 self._backup() 3547 break 3548 if line.token == TOKEN_UNKNOWN: 3549 state = CurrentState() 3550 self.__add_msg(_("Unknown tag"), line, state) 3551 self.__skip_subordinate_levels(1, state) 3552 self.__check_msgs(_("Top Level"), state, None) 3553 elif key in ("FAM", "FAMILY"): 3554 self.__parse_fam(line) 3555 elif key in ("INDI", "INDIVIDUAL"): 3556 self.__parse_indi(line) 3557 elif key in ("OBJE", "OBJECT"): 3558 self.__parse_obje(line) 3559 elif key in ("REPO", "REPOSITORY"): 3560 self.__parse_repo(line) 3561 elif key in ("SUBM", "SUBMITTER"): 3562 self.__parse_submitter(line) 3563 elif key == "SUBN": 3564 state = CurrentState(level=1) 3565 self.__parse_submission(line, state) 3566 self.__check_msgs(_("Top Level"), state, None) 3567 elif line.token in (TOKEN_SUBM, TOKEN_SUBN, TOKEN_IGNORE): 3568 state = CurrentState() 3569 self.__skip_subordinate_levels(1, state) 3570 self.__check_msgs(_("Top Level"), state, None) 3571 elif key in ("SOUR", "SOURCE"): 3572 self.__parse_source(line.token_text, 1) 3573 elif (line.data.startswith("SOUR ") or 3574 line.data.startswith("SOURCE ")): 3575 # A source formatted in a single line, for example: 3576 # 0 @S62@ SOUR This is the title of the source 3577 source = self.__find_or_create_source(self.sid_map[line.data]) 3578 source.set_title(line.data[5:]) 3579 self.dbase.commit_source(source, self.trans) 3580 elif key[0:4] == "NOTE": 3581 try: 3582 line.data = line.data[5:] 3583 except: 3584 # don't think this path is ever taken, but if it is.. 3585 # ensure a message is emitted & subordinates skipped 3586 line.data = None 3587 self.__parse_inline_note(line, 1) 3588 else: 3589 state = CurrentState() 3590 self.__not_recognized(line, state) 3591 self.__check_msgs(_("Top Level"), state, None) 3592 3593 def __parse_level(self, state, __map, default): 3594 """ 3595 Loop trough the current GEDCOM level, calling the appropriate 3596 functions associated with the TOKEN. 3597 3598 If no matching function for the token is found, the default function 3599 is called instead. 3600 3601 """ 3602 while True: 3603 line = self.__get_next_line() 3604 if line.level < state.level: 3605 self.backoff = True 3606 return 3607 else: 3608 func = __map.get(line.token, default) 3609 func(line, state) 3610 3611 #---------------------------------------------------------------------- 3612 # 3613 # INDI parsing 3614 # 3615 #---------------------------------------------------------------------- 3616 3617 def __parse_indi(self, line): 3618 """ 3619 Handling of the GEDCOM INDI tag and all lines subordinate to the 3620 current line. 3621 3622 n @XREF:INDI@ INDI {1:1} 3623 +1 RESN <RESTRICTION_NOTICE> {0:1} 3624 +1 <<PERSONAL_NAME_STRUCTURE>> {0:M} 3625 +1 SEX <SEX_VALUE> {0:1} 3626 +1 <<INDIVIDUAL_EVENT_STRUCTURE>> {0:M} 3627 +1 <<INDIVIDUAL_ATTRIBUTE_STRUCTURE>> {0:M} 3628 +1 <<LDS_INDIVIDUAL_ORDINANCE>> {0:M} 3629 +1 <<CHILD_TO_FAMILY_LINK>> {0:M} 3630 +1 <<SPOUSE_TO_FAMILY_LINK>> {0:M} 3631 +1 SUBM @<XREF:SUBM>@ {0:M} 3632 +1 <<ASSOCIATION_STRUCTURE>> {0:M} 3633 +1 ALIA @<XREF:INDI>@ {0:M} 3634 +1 ANCI @<XREF:SUBM>@ {0:M} 3635 +1 DESI @<XREF:SUBM>@ {0:M} 3636 +1 <<SOURCE_CITATION>> {0:M} 3637 +1 <<MULTIMEDIA_LINK>> {0:M} 3638 +1 <<NOTE_STRUCTURE>> {0:M} 3639 +1 RFN <PERMANENT_RECORD_FILE_NUMBER> {0:1} 3640 +1 AFN <ANCESTRAL_FILE_NUMBER> {0:1} 3641 +1 REFN <USER_REFERENCE_NUMBER> {0:M} 3642 +2 TYPE <USER_REFERENCE_TYPE> {0:1} 3643 +1 RIN <AUTOMATED_RECORD_ID> {0:1} 3644 +1 <<CHANGE_DATE>> {0:1} 3645 """ 3646 3647 # find the person 3648 real_id = self.pid_map[line.token_text] 3649 person = self.__find_or_create_person(real_id) 3650 3651 # set up the state for the parsing 3652 state = CurrentState(person=person, level=1) 3653 3654 # do the actual parsing 3655 self.__parse_level(state, self.indi_parse_tbl, self.__person_event) 3656 3657 # Add the default reference if no source has found 3658 self.__add_default_source(person) 3659 3660 # Add a default tag if provided 3661 self.__add_default_tag(person) 3662 3663 # Set up primary photo if present 3664 self.__do_photo(state) 3665 3666 self.__check_msgs(_("INDI (individual) Gramps ID %s") % 3667 person.get_gramps_id(), state, person) 3668 # commit the person to the database 3669 self.dbase.commit_person(person, self.trans, state.person.change) 3670 3671 def __person_sour(self, line, state): 3672 """ 3673 @param line: The current line in GedLine format 3674 @type line: GedLine 3675 @param state: The current state 3676 @type state: CurrentState 3677 """ 3678 citation_handle = self.handle_source(line, state.level, state) 3679 state.person.add_citation(citation_handle) 3680 3681 def __person_refn(self, line, state): 3682 """ 3683 @param line: The current line in GedLine format 3684 @type line: GedLine 3685 @param state: The current state 3686 @type state: CurrentState 3687 """ 3688 self.__do_refn(line, state, state.person) 3689 3690 def __person_attr(self, line, state): 3691 """ 3692 @param line: The current line in GedLine format 3693 @type line: GedLine 3694 @param state: The current state 3695 @type state: CurrentState 3696 """ 3697 attr = Attribute() 3698 attr.set_type((AttributeType.CUSTOM, line.token_text)) 3699 attr.set_value(line.data) 3700 state.person.add_attribute(attr) 3701 3702 def __person_event(self, line, state): 3703 """ 3704 @param line: The current line in GedLine format 3705 @type line: GedLine 3706 @param state: The current state 3707 @type state: CurrentState 3708 """ 3709 # We can get here when a tag that is not valid in the indi_parse_tbl 3710 # parse table is encountered. The tag may be of the form "_XXX". We 3711 # try to convert to a friendly name, if fails use the tag itself as 3712 # the TYPE in a custom event 3713 cust_tag = CUSTOMEVENTTAGS.get(line.token_text, line.token_text) 3714 cust_type = EventType((EventType.CUSTOM, cust_tag)) 3715 event_ref = self.__build_event_pair(state, cust_type, 3716 self.event_parse_tbl, 3717 str(line.data)) 3718 state.person.add_event_ref(event_ref) 3719 3720 def __fam_even(self, line, state): 3721 """ 3722 @param line: The current line in GedLine format 3723 @type line: GedLine 3724 @param state: The current state 3725 @type state: CurrentState 3726 """ 3727 event_ref = self.__build_family_event_pair(state, 3728 EventType.CUSTOM, 3729 self.event_parse_tbl, 3730 line.data) 3731 state.family.add_event_ref(event_ref) 3732 3733 def __person_chan(self, line, state): 3734 """ 3735 @param line: The current line in GedLine format 3736 @type line: GedLine 3737 @param state: The current state 3738 @type state: CurrentState 3739 """ 3740 self.__parse_change(line, state.person, state.level + 1, state) 3741 3742 def __person_resn(self, line, state): 3743 """ 3744 Parses the RESN tag, adding it as an attribute. 3745 3746 @param line: The current line in GedLine format 3747 @type line: GedLine 3748 @param state: The current state 3749 @type state: CurrentState 3750 """ 3751 attr = Attribute() 3752 attr.set_type((AttributeType.CUSTOM, 'RESN')) 3753 state.person.add_attribute(attr) 3754 3755 def __person_alt_name(self, line, state): 3756 """ 3757 This parses the standard GEDCOM structure: 3758 3759 n @XREF:INDI@ INDI {1:1} 3760 +1 ALIA @<XREF:INDI>@ {0:M} 3761 3762 The ALIA tag is supposed to cross reference another person. We will 3763 store this in the Association record. 3764 3765 ALIA {ALIAS}: = An indicator to link different record descriptions of a 3766 person who may be the same person. 3767 3768 Some systems use the ALIA tag as an alternate NAME tag, which is not 3769 legal in GEDCOM, but oddly enough, is easy to support. This parses the 3770 illegal (ALIA or ALIAS) or non-standard (_ALIA) GEDCOM. "1 ALIA" is 3771 used by Family Tree Maker and Reunion. "1 ALIAS" and "1 _ALIA" do not 3772 appear to be used. 3773 3774 n @XREF:INDI@ INDI {1:1} 3775 +1 <ALIA> <NAME_PERSONAL> {1:1} 3776 +2 NPFX <NAME_PIECE_PREFIX> {0:1} 3777 +2 GIVN <NAME_PIECE_GIVEN> {0:1} 3778 +2 NICK <NAME_PIECE_NICKNAME> {0:1} 3779 +2 SPFX <NAME_PIECE_SURNAME_PREFIX> {0:1} 3780 +2 SURN <NAME_PIECE_SURNAME> {0:1} 3781 +2 NSFX <NAME_PIECE_SUFFIX> {0:1} 3782 +2 <<SOURCE_CITATION>> {0:M} 3783 +3 <<NOTE_STRUCTURE>> {0:M} 3784 +3 <<MULTIMEDIA_LINK>> {0:M} 3785 +2 <<NOTE_STRUCTURE>> {0:M} 3786 where <ALIA> == ALIA | _ALIA | ALIAS 3787 3788 @param line: The current line in GedLine format 3789 @type line: GedLine 3790 @param state: The current state 3791 @type state: CurrentState 3792 """ 3793 if line.data == '': 3794 self.__add_msg(_("Empty Alias <NAME PERSONAL> ignored"), 3795 line, state) 3796 self.__skip_subordinate_levels(state.level + 1, state) 3797 elif line.data[0] == '@': 3798 handle = self.__find_person_handle(self.pid_map[line.data]) 3799 ref = PersonRef() 3800 ref.ref = handle 3801 ref.rel = "Alias" 3802 state.person.add_person_ref(ref) 3803 else: 3804 self.__parse_alias_name(line, state) 3805 3806 def __parse_alias_name(self, line, state): 3807 """ 3808 Parse a level 1 alias name and subsidiary levels when called from 3809 __person_alt_name (when the <NAME_PERSONAL> does not start with @). 3810 Also parses a level 2 alias name and subsidiary levels when called 3811 from __name_alias. 3812 3813 +1 <ALIA> <NAME_PERSONAL> {1:1} 3814 +2 NPFX <NAME_PIECE_PREFIX> {0:1} 3815 +2 GIVN <NAME_PIECE_GIVEN> {0:1} 3816 +2 NICK <NAME_PIECE_NICKNAME> {0:1} 3817 +2 SPFX <NAME_PIECE_SURNAME_PREFIX> {0:1} 3818 +2 SURN <NAME_PIECE_SURNAME> {0:1} 3819 +2 NSFX <NAME_PIECE_SUFFIX> {0:1} 3820 +2 <<SOURCE_CITATION>> {0:M} 3821 +3 <<NOTE_STRUCTURE>> {0:M} 3822 +3 <<MULTIMEDIA_LINK>> {0:M} 3823 +2 <<NOTE_STRUCTURE>> {0:M} 3824 where <ALIA> == ALIA | _ALIA | ALIAS 3825 3826 @param line: The current line in GedLine format 3827 @type line: GedLine 3828 @param state: The current state 3829 @type state: CurrentState 3830 """ 3831 name = self.__parse_name_personal(line.data) 3832 name.set_type(NameType.AKA) 3833 state.person.add_alternate_name(name) 3834 3835 # Create a new state, and parse the remainder of the NAME level 3836 sub_state = CurrentState() 3837 sub_state.person = state.person 3838 sub_state.name = name 3839 sub_state.level = state.level + 1 3840 3841 self.__parse_level(sub_state, self.name_parse_tbl, self.__undefined) 3842 state.msg += sub_state.msg 3843 3844 def __person_object(self, line, state): 3845 """ 3846 @param line: The current line in GedLine format 3847 @type line: GedLine 3848 @param state: The current state 3849 @type state: CurrentState 3850 """ 3851 self.__obje(line, state, state.person) 3852 3853 def __person_photo(self, line, state): 3854 """ 3855 This handles the FTM _PHOTO feature, which identifies an OBJE to use 3856 as the person's primary photo. 3857 """ 3858 state.photo = line.data # Just save it for now. 3859 3860 def __person_name(self, line, state): 3861 """ 3862 Parsers the NAME token in a GEDCOM file. The text is in the format 3863 of (according to the GEDCOM Spec): 3864 > <TEXT>|/<TEXT>/|<TEXT>/<TEXT>/|/<TEXT>/<TEXT>|<TEXT>/<TEXT>/<TEXT> 3865 We have encountered some variations that use: 3866 > <TEXT>/ 3867 3868 The basic Name structure is: 3869 3870 n NAME <NAME_PERSONAL> {1:1} 3871 +1 NPFX <NAME_PIECE_PREFIX> {0:1} 3872 +1 GIVN <NAME_PIECE_GIVEN> {0:1} 3873 +1 NICK <NAME_PIECE_NICKNAME> {0:1} 3874 +1 SPFX <NAME_PIECE_SURNAME_PREFIX {0:1} 3875 +1 SURN <NAME_PIECE_SURNAME> {0:1} 3876 +1 NSFX <NAME_PIECE_SUFFIX> {0:1} 3877 +1 <<SOURCE_CITATION>> {0:M} 3878 +1 <<NOTE_STRUCTURE>> {0:M} 3879 3880 @param line: The current line in GedLine format 3881 @type line: GedLine 3882 @param state: The current state 3883 @type state: CurrentState 3884 """ 3885 3886 # build a Name structure from the text 3887 3888 name = self.__parse_name_personal(line.data) 3889 3890 # Add the name as the primary name if this is the first one that 3891 # we have encountered for this person. Assume that if this is the 3892 # first name, that it is a birth name. Otherwise, label it as an 3893 # "Also Known As (AKA)". GEDCOM does not seem to have the concept 3894 # of different name types 3895 3896 if state.name_cnt == 0: 3897 name.set_type(NameType.BIRTH) 3898 state.person.set_primary_name(name) 3899 else: 3900 name.set_type(NameType.AKA) 3901 state.person.add_alternate_name(name) 3902 state.name_cnt += 1 3903 3904 # Create a new state, and parse the remainder of the NAME level 3905 sub_state = CurrentState() 3906 sub_state.person = state.person 3907 sub_state.name = name 3908 sub_state.level = state.level + 1 3909 3910 self.__parse_level(sub_state, self.name_parse_tbl, self.__undefined) 3911 state.msg += sub_state.msg 3912 3913 def __person_sex(self, line, state): 3914 """ 3915 Parses the SEX line of a GEDCOM file. It has the format of: 3916 3917 +1 SEX <SEX_VALUE> {0:1} 3918 3919 @param line: The current line in GedLine format 3920 @type line: GedLine 3921 @param state: The current state 3922 @type state: CurrentState 3923 """ 3924 state.person.set_gender(line.data) 3925 3926 def __person_even(self, line, state): 3927 """ 3928 Parses the custom EVEN tag, which has the format of: 3929 3930 n <<EVENT_TYPE>> {1:1} 3931 +1 <<EVENT_DETAIL>> {0:1} p.* 3932 3933 @param line: The current line in GedLine format 3934 @type line: GedLine 3935 @param state: The current state 3936 @type state: CurrentState 3937 """ 3938 event_ref = self.__build_event_pair(state, EventType.CUSTOM, 3939 self.event_parse_tbl, line.data) 3940 state.person.add_event_ref(event_ref) 3941 3942 def __person_std_event(self, line, state): 3943 """ 3944 Parses GEDCOM event types that map to a Gramps standard type. 3945 Additional parsing required is for the event detail: 3946 3947 +1 <<EVENT_DETAIL>> {0:1} p.* 3948 3949 @param line: The current line in GedLine format 3950 @type line: GedLine 3951 @param state: The current state 3952 @type state: CurrentState 3953 """ 3954 3955 event = line.data 3956 event.set_gramps_id(self.emapper.find_next()) 3957 event_ref = EventRef() 3958 self.dbase.add_event(event, self.trans) 3959 3960 sub_state = CurrentState() 3961 sub_state.person = state.person 3962 sub_state.level = state.level + 1 3963 sub_state.event = event 3964 sub_state.event_ref = event_ref 3965 sub_state.pf = self.place_parser 3966 3967 self.__parse_level(sub_state, self.event_parse_tbl, self.__undefined) 3968 state.msg += sub_state.msg 3969 3970 self.__add_place(event, sub_state) 3971 3972 self.dbase.commit_event(event, self.trans) 3973 event_ref.ref = event.handle 3974 state.person.add_event_ref(event_ref) 3975 3976 def __person_reli(self, line, state): 3977 """ 3978 Parses the RELI tag. 3979 3980 n RELI [Y|<NULL>] {1:1} 3981 +1 <<EVENT_DETAIL>> {0:1} p.* 3982 3983 @param line: The current line in GedLine format 3984 @type line: GedLine 3985 @param state: The current state 3986 @type state: CurrentState 3987 """ 3988 event_ref = self.__build_event_pair(state, EventType.RELIGION, 3989 self.event_parse_tbl, line.data) 3990 state.person.add_event_ref(event_ref) 3991 3992 def __person_birt(self, line, state): 3993 """ 3994 Parses GEDCOM BIRT tag into a Gramps birth event. Additional work 3995 must be done, since additional handling must be done by Gramps to set 3996 this up as a birth reference event. 3997 3998 n BIRT [Y|<NULL>] {1:1} 3999 +1 <<EVENT_DETAIL>> {0:1} p.* 4000 +1 FAMC @<XREF:FAM>@ {0:1} p.* 4001 4002 I'm not sure what value the FAMC actually offers here, since 4003 the FAMC record should handle this. Why it is a valid sub value 4004 is beyond me. 4005 4006 @param line: The current line in GedLine format 4007 @type line: GedLine 4008 @param state: The current state 4009 @type state: CurrentState 4010 """ 4011 event_ref = self.__build_event_pair(state, EventType.BIRTH, 4012 self.event_parse_tbl, line.data) 4013 if state.person.get_birth_ref(): 4014 state.person.add_event_ref(event_ref) 4015 else: 4016 state.person.set_birth_ref(event_ref) 4017 4018 def __person_adop(self, line, state): 4019 """ 4020 Parses GEDCOM ADOP tag, subordinate to the INDI tag. Additinal tags 4021 are needed by the tag, so we pass a different function map. 4022 4023 n ADOP [Y|<NULL>] {1:1} 4024 +1 <<EVENT_DETAIL>> {0:1} p.* 4025 +1 FAMC @<XREF:FAM>@ {0:1} p.* 4026 +2 ADOP <ADOPTED_BY_WHICH_PARENT> {0:1} 4027 4028 @param line: The current line in GedLine format 4029 @type line: GedLine 4030 @param state: The current state 4031 @type state: CurrentState 4032 """ 4033 event_ref = self.__build_event_pair(state, EventType.ADOPT, 4034 self.adopt_parse_tbl, line.data) 4035 state.person.add_event_ref(event_ref) 4036 4037 def __person_deat(self, line, state): 4038 """ 4039 Parses GEDCOM DEAT tag into a Gramps birth event. Additional work 4040 must be done, since additional handling must be done by Gramps to set 4041 this up as a death reference event. 4042 4043 n DEAT [Y|<NULL>] {1:1} 4044 +1 <<EVENT_DETAIL>> {0:1} p.* 4045 4046 @param line: The current line in GedLine format 4047 @type line: GedLine 4048 @param state: The current state 4049 @type state: CurrentState 4050 """ 4051 event_ref = self.__build_event_pair(state, EventType.DEATH, 4052 self.event_parse_tbl, line.data) 4053 if state.person.get_death_ref(): 4054 state.person.add_event_ref(event_ref) 4055 else: 4056 state.person.set_death_ref(event_ref) 4057 4058 def __person_note(self, line, state): 4059 """ 4060 Parses a note associated with the person 4061 4062 @param line: The current line in GedLine format 4063 @type line: GedLine 4064 @param state: The current state 4065 @type state: CurrentState 4066 """ 4067 self.__parse_note(line, state.person, state) 4068 4069 def __person_rnote(self, line, state): 4070 """ 4071 Parses a note associated with the person 4072 4073 @param line: The current line in GedLine format 4074 @type line: GedLine 4075 @param state: The current state 4076 @type state: CurrentState 4077 """ 4078 self.__parse_note(line, state.person, state) 4079 4080 def __person_addr(self, line, state): 4081 """ 4082 Parses the INDIvidual <ADDRESS_STRUCTURE> 4083 4084 n ADDR <ADDRESS_LINE> {0:1} 4085 +1 CONT <ADDRESS_LINE> {0:M} 4086 +1 ADR1 <ADDRESS_LINE1> {0:1} (Street) 4087 +1 ADR2 <ADDRESS_LINE2> {0:1} (Locality) 4088 +1 CITY <ADDRESS_CITY> {0:1} 4089 +1 STAE <ADDRESS_STATE> {0:1} 4090 +1 POST <ADDRESS_POSTAL_CODE> {0:1} 4091 +1 CTRY <ADDRESS_COUNTRY> {0:1} 4092 n PHON <PHONE_NUMBER> {0:3} 4093 4094 @param line: The current line in GedLine format 4095 @type line: GedLine 4096 @param state: The current state 4097 @type state: CurrentState 4098 """ 4099 if self.is_ftw: 4100 self.__person_resi(line, state) 4101 return 4102 free_form = line.data 4103 4104 sub_state = CurrentState(level=state.level + 1) 4105 sub_state.addr = Address() 4106 4107 self.__parse_level(sub_state, self.parse_addr_tbl, self.__ignore) 4108 state.msg += sub_state.msg 4109 4110 self.__merge_address(free_form, sub_state.addr, line, state) 4111 state.person.add_address(sub_state.addr) 4112 4113 def __person_resi(self, line, state): 4114 """ 4115 Parses GEDCOM ADDR tag, subordinate to the INDI tag, when sourced by 4116 FTM. We treat this as a RESI event, because FTM puts standard event 4117 details below the ADDR line. 4118 4119 n ADDR <ADDRESS_LINE> {0:1} 4120 +1 <<EVENT_DETAIL>> {0:1} p.* 4121 4122 @param line: The current line in GedLine format 4123 @type line: GedLine 4124 @param state: The current state 4125 @type state: CurrentState 4126 """ 4127 self.backoff = True # reprocess the current ADDR line 4128 line.level += 1 # as if it was next level down 4129 event_ref = self.__build_event_pair(state, EventType.RESIDENCE, 4130 self.event_parse_tbl, '') 4131 state.person.add_event_ref(event_ref) 4132 4133 def __person_phon(self, line, state): 4134 """ 4135 n PHON <PHONE_NUMBER> {0:3} 4136 4137 @param line: The current line in GedLine format 4138 @type line: GedLine 4139 @param state: The current state 4140 @type state: CurrentState 4141 """ 4142 url = Url() 4143 url.set_path(line.data) 4144 url.set_type(UrlType(_('Phone'))) 4145 state.person.add_url(url) 4146 4147 def __person_fax(self, line, state): 4148 """ 4149 O INDI 4150 1 FAX <PHONE_NUMBER> {0:3} 4151 4152 @param line: The current line in GedLine format 4153 @type line: GedLine 4154 @param state: The current state 4155 @type state: CurrentState 4156 """ 4157 url = Url() 4158 url.set_path(line.data) 4159 url.set_type(UrlType(_('FAX'))) 4160 state.person.add_url(url) 4161 4162 def __person_email(self, line, state): 4163 """ 4164 O INDI 4165 1 EMAIL <EMAIL> {0:3} 4166 4167 @param line: The current line in GedLine format 4168 @type line: GedLine 4169 @param state: The current state 4170 @type state: CurrentState 4171 """ 4172 url = Url() 4173 url.set_path(line.data) 4174 url.set_type(UrlType(UrlType.EMAIL)) 4175 state.person.add_url(url) 4176 4177 def __person_www(self, line, state): 4178 """ 4179 O INDI 4180 1 WWW <URL> {0:3} 4181 4182 @param line: The current line in GedLine format 4183 @type line: GedLine 4184 @param state: The current state 4185 @type state: CurrentState 4186 """ 4187 url = Url() 4188 url.set_path(line.data) 4189 url.set_type(UrlType(UrlType.WEB_HOME)) 4190 state.person.add_url(url) 4191 4192 def __person_titl(self, line, state): 4193 """ 4194 @param line: The current line in GedLine format 4195 @type line: GedLine 4196 @param state: The current state 4197 @type state: CurrentState 4198 """ 4199 event = Event() 4200 event_ref = EventRef() 4201 event.set_gramps_id(self.emapper.find_next()) 4202 event.set_type(EventType.NOB_TITLE) 4203 event.set_description(line.data) 4204 4205 sub_state = CurrentState() 4206 sub_state.person = state.person 4207 sub_state.level = state.level + 1 4208 sub_state.event = event 4209 sub_state.event_ref = event_ref 4210 sub_state.pf = self.place_parser 4211 4212 self.__parse_level(sub_state, self.event_parse_tbl, self.__undefined) 4213 state.msg += sub_state.msg 4214 4215 self.__add_place(event, sub_state) 4216 4217 self.dbase.add_event(event, self.trans) 4218 event_ref.ref = event.handle 4219 state.person.add_event_ref(event_ref) 4220 4221 def __person_attr_plac(self, line, state): 4222 """ 4223 @param line: The current line in GedLine format 4224 @type line: GedLine 4225 @param state: The current state 4226 @type state: CurrentState 4227 """ 4228 if state.attr.get_value() == "": 4229 state.attr.set_value(line.data) 4230 4231 def __name_type(self, line, state): 4232 """ 4233 @param line: The current line in GedLine format 4234 @type line: GedLine 4235 @param state: The current state 4236 @type state: CurrentState 4237 """ 4238 if line.data.upper() in ("_OTHN", "_AKA", "AKA", "AKAN"): 4239 state.name.set_type(NameType.AKA) 4240 elif line.data.upper() in ("_MAR", "_MARN", "_MARNM", "MARRIED"): 4241 state.name.set_type(NameType.MARRIED) 4242 else: 4243 state.name.set_type((NameType.CUSTOM, line.data)) 4244 4245 def __name_date(self, line, state): 4246 """ 4247 @param line: The current line in GedLine format 4248 @type line: GedLine 4249 @param state: The current state 4250 @type state: CurrentState 4251 """ 4252 if state.name: 4253 state.name.set_date_object(line.data) 4254 4255 def __name_note(self, line, state): 4256 """ 4257 @param line: The current line in GedLine format 4258 @type line: GedLine 4259 @param state: The current state 4260 @type state: CurrentState 4261 """ 4262 self.__parse_note(line, state.name, state) 4263 4264 def __name_alia(self, line, state): 4265 """ 4266 This parses the illegal (ALIA or ALIAS) or non-standard (_ALIA) GEDCOM 4267 tag as a subsidiary of the NAME tag. 4268 4269 n @XREF:INDI@ INDI {1:1} 4270 +1 NAME <NAME_PERSONAL> {1:1} 4271 +2 NPFX <NAME_PIECE_PREFIX> {0:1} 4272 +2 GIVN <NAME_PIECE_GIVEN> {0:1} 4273 +2 NICK <NAME_PIECE_NICKNAME> {0:1} 4274 +2 SPFX <NAME_PIECE_SURNAME_PREFIX> {0:1} 4275 +2 SURN <NAME_PIECE_SURNAME> {0:1} 4276 +2 NSFX <NAME_PIECE_SUFFIX> {0:1} 4277 +2 <ALIA> <NAME_PERSONAL> {1:1} 4278 +3 NPFX <NAME_PIECE_PREFIX> {0:1} 4279 +3 GIVN <NAME_PIECE_GIVEN> {0:1} 4280 +3 NICK <NAME_PIECE_NICKNAME> {0:1} 4281 +3 SPFX <NAME_PIECE_SURNAME_PREFIX> {0:1} 4282 +3 SURN <NAME_PIECE_SURNAME> {0:1} 4283 +3 NSFX <NAME_PIECE_SUFFIX> {0:1} 4284 +3 <<SOURCE_CITATION>> {0:M} 4285 +4 <<NOTE_STRUCTURE>> {0:M} 4286 +4 <<MULTIMEDIA_LINK>> {0:M} 4287 +3 <<NOTE_STRUCTURE>> {0:M} 4288 +2 <<SOURCE_CITATION>> {0:M} 4289 +3 <<NOTE_STRUCTURE>> {0:M} 4290 +3 <<MULTIMEDIA_LINK>> {0:M} 4291 +2 <<NOTE_STRUCTURE>> {0:M} 4292 4293 Note that the subsidiary name structure detail will overwrite the ALIA 4294 name (if the same elements are provided in both), so the names should 4295 match. 4296 4297 "2 _ALIA" is used for example, by PRO-GEN v 3.0a and "2 ALIA" is used 4298 by GTEdit and Brother's keeper 5.2 for windows. It had been supported 4299 in previous versions of Gramps but as it was probably incorrectly coded 4300 as it would only work if the name started with '@'. 4301 4302 @param line: The current line in GedLine format 4303 @type line: GedLine 4304 @param state: The current state 4305 @type state: CurrentState 4306 """ 4307 self.__parse_alias_name(line, state) 4308 4309 def __name_npfx(self, line, state): 4310 """ 4311 @param line: The current line in GedLine format 4312 @type line: GedLine 4313 @param state: The current state 4314 @type state: CurrentState 4315 """ 4316 state.name.set_title(line.data.strip()) 4317 self.__skip_subordinate_levels(state.level + 1, state) 4318 4319 def __name_givn(self, line, state): 4320 """ 4321 @param line: The current line in GedLine format 4322 @type line: GedLine 4323 @param state: The current state 4324 @type state: CurrentState 4325 """ 4326 state.name.set_first_name(line.data.strip()) 4327 self.__skip_subordinate_levels(state.level + 1, state) 4328 4329 def __name_spfx(self, line, state): 4330 """ 4331 @param line: The current line in GedLine format 4332 @type line: GedLine 4333 @param state: The current state 4334 @type state: CurrentState 4335 """ 4336 spfx = line.data.strip().split(", ")[0] 4337 if state.name.get_surname_list(): 4338 state.name.get_surname_list()[0].set_prefix(spfx) 4339 else: 4340 surn = Surname() 4341 surn.set_prefix(spfx) 4342 surn.set_primary() 4343 state.name.set_surname_list([surn]) 4344 self.__skip_subordinate_levels(state.level + 1, state) 4345 4346 def __name_surn(self, line, state): 4347 """ 4348 @param line: The current line in GedLine format 4349 @type line: GedLine 4350 @param state: The current state 4351 @type state: CurrentState 4352 """ 4353 names = line.data.strip().split(", ") 4354 overwrite = bool(state.name.get_surname_list()) 4355 for name in names: 4356 if overwrite: 4357 state.name.get_surname_list()[0].set_surname(name) 4358 overwrite = False 4359 else: 4360 surn = Surname() 4361 surn.set_surname(name) 4362 surn.set_primary(primary=not state.name.get_surname_list()) 4363 state.name.get_surname_list().append(surn) 4364 self.__skip_subordinate_levels(state.level + 1, state) 4365 4366 def __name_marnm(self, line, state): 4367 """ 4368 This is non-standard GEDCOM. _MARNM is reported to be used in Ancestral 4369 Quest and Personal Ancestral File 5. This will also handle a usage 4370 which has been found in Brother's Keeper (BROSKEEP VERS 6.1.31 WINDOWS) 4371 as follows: 4372 4373 0 @I203@ INDI 4374 1 NAME John Richard/Doe/ 4375 2 _MARN Some Other Name 4376 3 DATE 27 JUN 1817 4377 4378 @param line: The current line in GedLine format 4379 @type line: GedLine 4380 @param state: The current state 4381 @type state: CurrentState 4382 """ 4383 text = line.data.strip() 4384 data = text.split() 4385 if len(data) == 1: 4386 name = Name(state.person.primary_name) 4387 surn = Surname() 4388 surn.set_surname(data[0].strip()) 4389 surn.set_primary() 4390 name.set_surname_list([surn]) 4391 name.set_type(NameType.MARRIED) 4392 state.person.add_alternate_name(name) 4393 elif len(data) > 1: 4394 name = self.__parse_name_personal(text) 4395 name.set_type(NameType.MARRIED) 4396 state.person.add_alternate_name(name) 4397 4398 def __name_nsfx(self, line, state): 4399 """ 4400 @param line: The current line in GedLine format 4401 @type line: GedLine 4402 @param state: The current state 4403 @type state: CurrentState 4404 """ 4405 if state.name.get_suffix() == "" or \ 4406 state.name.get_suffix() == line.data: 4407 #suffix might be set before when parsing name string 4408 state.name.set_suffix(line.data) 4409 else: 4410 #previously set suffix different, to not loose information, append 4411 state.name.set_suffix(state.name.get_suffix() + ' ' + line.data) 4412 self.__skip_subordinate_levels(state.level + 1, state) 4413 4414 def __name_nick(self, line, state): 4415 """ 4416 @param line: The current line in GedLine format 4417 @type line: GedLine 4418 @param state: The current state 4419 @type state: CurrentState 4420 """ 4421 state.name.set_nick_name(line.data.strip()) 4422 self.__skip_subordinate_levels(state.level + 1, state) 4423 4424 def __name_aka(self, line, state): 4425 """ 4426 This parses the non-standard GEDCOM tags _AKA or _AKAN as a subsidiary 4427 to the NAME tag, which is reported to have been found in Ancestral 4428 Quest and Personal Ancestral File 4 and 5. Note: example AQ and PAF 4429 files have separate 2 NICK and 2 _AKA lines for the same person. The 4430 NICK will be stored by Gramps in the nick_name field of the name 4431 structure, while the _AKA, if it is a single word, will be stored in 4432 the NICKNAME attribute. If more than one word it is stored as an AKA 4433 alternate name. 4434 4435 This will also handle a usage which has been found in in Brother's 4436 Keeper (BROSKEEP VERS 6.1.31 WINDOWS) as follows: 4437 4438 0 @I203@ INDI 4439 1 NAME John Richard/Doe/ 4440 2 _AKAN Some Other Name 4441 3 DATE 27 JUN 1817 4442 4443 @param line: The current line in GedLine format 4444 @type line: GedLine 4445 @param state: The current state 4446 @type state: CurrentState 4447 """ 4448 lname = line.data.split() 4449 name_len = len(lname) 4450 if name_len == 1: 4451 attr = Attribute() 4452 attr.set_type(AttributeType.NICKNAME) 4453 attr.set_value(line.data) 4454 state.person.add_attribute(attr) 4455 elif name_len == 0: 4456 return 4457 else: 4458 name = Name() 4459 surname = Surname() 4460 surname.set_surname(lname[-1].strip()) 4461 surname.set_primary() 4462 name.set_surname_list([surname]) 4463 name.set_first_name(' '.join(lname[0:name_len - 1])) 4464# name = self.__parse_name_personal(line.data) 4465 name.set_type(NameType.AKA) 4466 state.person.add_alternate_name(name) 4467 4468 def __name_adpn(self, line, state): 4469 """ 4470 @param line: The current line in GedLine format 4471 @type line: GedLine 4472 @param state: The current state 4473 @type state: CurrentState 4474 """ 4475 text = line.data.strip() 4476 data = text.split() 4477 if len(data) == 1: 4478 name = Name(state.person.primary_name) 4479 surn = Surname() 4480 surn.set_surname(data[0].strip()) 4481 surn.set_primary() 4482 name.set_surname_list([surn]) 4483 name.set_type((NameType.CUSTOM, "Adopted")) 4484 state.person.add_alternate_name(name) 4485 elif len(data) > 1: 4486 name = self.__parse_name_personal(text) 4487 name.set_type((NameType.CUSTOM, "Adopted")) 4488 state.person.add_alternate_name(name) 4489 4490 def __name_sour(self, line, state): 4491 """ 4492 @param line: The current line in GedLine format 4493 @type line: GedLine 4494 @param state: The current state 4495 @type state: CurrentState 4496 """ 4497 citation_handle = self.handle_source(line, state.level, state) 4498 state.name.add_citation(citation_handle) 4499 4500 def __person_std_attr(self, line, state): 4501 """ 4502 Parses an TOKEN that Gramps recognizes as an Attribute 4503 4504 @param line: The current line in GedLine format 4505 @type line: GedLine 4506 @param state: The current state 4507 @type state: CurrentState 4508 """ 4509 sub_state = CurrentState() 4510 sub_state.person = state.person 4511 sub_state.attr = line.data 4512 sub_state.level = state.level + 1 4513 state.person.add_attribute(sub_state.attr) 4514 self.__parse_level(sub_state, self.person_attr_parse_tbl, 4515 self.__ignore) 4516 state.msg += sub_state.msg 4517 4518 def __person_fact(self, line, state): 4519 """ 4520 Parses an TOKEN that Gramps recognizes as an Attribute 4521 4522 @param line: The current line in GedLine format 4523 @type line: GedLine 4524 @param state: The current state 4525 @type state: CurrentState 4526 """ 4527 sub_state = CurrentState() 4528 sub_state.person = state.person 4529 sub_state.attr = Attribute() 4530 sub_state.attr.set_value(line.data) 4531 sub_state.level = state.level + 1 4532 state.person.add_attribute(sub_state.attr) 4533 self.__parse_level(sub_state, self.person_fact_parse_tbl, 4534 self.__ignore) 4535 state.msg += sub_state.msg 4536 4537 def __person_fact_type(self, line, state): 4538 state.attr.set_type(line.data) 4539 4540 def __person_bapl(self, line, state): 4541 """ 4542 Parses an BAPL TOKEN, producing a Gramps LdsOrd instance 4543 4544 @param line: The current line in GedLine format 4545 @type line: GedLine 4546 @param state: The current state 4547 @type state: CurrentState 4548 """ 4549 self.build_lds_ord(state, LdsOrd.BAPTISM) 4550 4551 def __person_conl(self, line, state): 4552 """ 4553 Parses an CONL TOKEN, producing a Gramps LdsOrd instance 4554 4555 @param line: The current line in GedLine format 4556 @type line: GedLine 4557 @param state: The current state 4558 @type state: CurrentState 4559 """ 4560 self.build_lds_ord(state, LdsOrd.CONFIRMATION) 4561 4562 def __person_endl(self, line, state): 4563 """ 4564 Parses an ENDL TOKEN, producing a Gramps LdsOrd instance 4565 4566 @param line: The current line in GedLine format 4567 @type line: GedLine 4568 @param state: The current state 4569 @type state: CurrentState 4570 """ 4571 self.build_lds_ord(state, LdsOrd.ENDOWMENT) 4572 4573 def __person_slgc(self, line, state): 4574 """ 4575 Parses an SLGC TOKEN, producing a Gramps LdsOrd instance 4576 4577 @param line: The current line in GedLine format 4578 @type line: GedLine 4579 @param state: The current state 4580 @type state: CurrentState 4581 """ 4582 self.build_lds_ord(state, LdsOrd.SEAL_TO_PARENTS) 4583 4584 def build_lds_ord(self, state, lds_type): 4585 """ 4586 Parses an LDS ordinance, using the type passed to the routine 4587 4588 @param state: The current state 4589 @type state: CurrentState 4590 @param lds_type: The type of the LDS ordinance 4591 @type line: LdsOrd type 4592 """ 4593 sub_state = CurrentState() 4594 sub_state.level = state.level + 1 4595 sub_state.lds_ord = LdsOrd() 4596 sub_state.lds_ord.set_type(lds_type) 4597 sub_state.place = None 4598 sub_state.place_fields = PlaceParser() 4599 sub_state.person = state.person 4600 state.person.lds_ord_list.append(sub_state.lds_ord) 4601 4602 self.__parse_level(sub_state, self.lds_parse_tbl, self.__ignore) 4603 state.msg += sub_state.msg 4604 4605 if sub_state.place: 4606 place_title = _pd.display(self.dbase, sub_state.place) 4607 sub_state.place_fields.load_place(self.place_import, 4608 sub_state.place, 4609 place_title) 4610 4611 def __lds_temple(self, line, state): 4612 """ 4613 Parses the TEMP tag, looking up the code for a match. 4614 4615 @param line: The current line in GedLine format 4616 @type line: GedLine 4617 @param state: The current state 4618 @type state: CurrentState 4619 """ 4620 value = self.__extract_temple(line) 4621 if value: 4622 state.lds_ord.set_temple(value) 4623 4624 def __lds_date(self, line, state): 4625 """ 4626 Parses the DATE tag for the LdsOrd 4627 4628 @param line: The current line in GedLine format 4629 @type line: GedLine 4630 @param state: The current state 4631 @type state: CurrentState 4632 """ 4633 state.lds_ord.set_date_object(line.data) 4634 4635 def __lds_famc(self, line, state): 4636 """ 4637 Parses the FAMC tag attached to the LdsOrd 4638 4639 @param line: The current line in GedLine format 4640 @type line: GedLine 4641 @param state: The current state 4642 @type state: CurrentState 4643 """ 4644 gid = self.fid_map[line.data] 4645 state.lds_ord.set_family_handle(self.__find_family_handle(gid)) 4646 4647 def __lds_form(self, line, state): 4648 """ 4649 Parses the FORM tag thate defines the place structure for a place. 4650 This tag, if found, will override any global place structure. 4651 4652 @param line: The current line in GedLine format 4653 @type line: GedLine 4654 @param state: The current state 4655 @type state: CurrentState 4656 """ 4657 state.pf = PlaceParser(line) 4658 4659 def __lds_plac(self, line, state): 4660 """ 4661 Parses the PLAC tag attached to the LdsOrd. Create a new place if 4662 needed and set the title. 4663 4664 @param line: The current line in GedLine format 4665 @type line: GedLine 4666 @param state: The current state 4667 @type state: CurrentState 4668 """ 4669 try: 4670 title = line.data 4671 place = self.__find_place(title, None, None) 4672 if place is None: 4673 place = Place() 4674 place.set_title(title) 4675 place.name.set_value(title) 4676 self.dbase.add_place(place, self.trans) 4677 self.place_names[place.get_title()].append(place.get_handle()) 4678 else: 4679 pass 4680 state.lds_ord.set_place_handle(place.handle) 4681 except NameError: 4682 return 4683 4684 def __lds_sour(self, line, state): 4685 """ 4686 Parses the SOUR tag attached to the LdsOrd. 4687 4688 @param line: The current line in GedLine format 4689 @type line: GedLine 4690 @param state: The current state 4691 @type state: CurrentState 4692 """ 4693 citation_handle = self.handle_source(line, state.level, state) 4694 state.lds_ord.add_citation(citation_handle) 4695 4696 def __lds_note(self, line, state): 4697 """ 4698 Parses the NOTE tag attached to the LdsOrd. 4699 4700 @param line: The current line in GedLine format 4701 @type line: GedLine 4702 @param state: The current state 4703 @type state: CurrentState 4704 """ 4705 self.__parse_note(line, state.lds_ord, state) 4706 4707 def __lds_stat(self, line, state): 4708 """ 4709 Parses the STAT (status) tag attached to the LdsOrd. 4710 4711 @param line: The current line in GedLine format 4712 @type line: GedLine 4713 @param state: The current state 4714 @type state: CurrentState 4715 """ 4716 status = LDS_STATUS.get(line.data, LdsOrd.STATUS_NONE) 4717 state.lds_ord.set_status(status) 4718 4719 def __person_famc(self, line, state): 4720 """ 4721 Handles the parsing of the FAMC line, which indicates which family the 4722 person is a child of. 4723 4724 n FAMC @<XREF:FAM>@ {1:1} 4725 +1 PEDI <PEDIGREE_LINKAGE_TYPE> {0:1} p.* 4726 +1 _FREL <Father relationship type> {0:1} non-standard Extension 4727 +1 _MREL <Mother relationship type> {0:1} non-standard Extension 4728 +1 <<NOTE_STRUCTURE>> {0:M} p.* 4729 4730 @param line: The current line in GedLine format 4731 @type line: GedLine 4732 @param state: The current state 4733 @type state: CurrentState 4734 """ 4735 4736 if not line.data: # handles empty FAMC line 4737 self.__not_recognized(line, state) 4738 return 4739 sub_state = CurrentState() 4740 sub_state.person = state.person 4741 sub_state.level = state.level + 1 4742 sub_state.ftype = None 4743 sub_state.primary = False 4744 4745 gid = self.fid_map[line.data] 4746 handle = self.__find_family_handle(gid) 4747 4748 self.__parse_level(sub_state, self.famc_parse_tbl, self.__undefined) 4749 state.msg += sub_state.msg 4750 4751 # if the handle is not already in the person's parent family list, we 4752 # need to add it to thie list. 4753 4754 flist = state.person.get_parent_family_handle_list() 4755 if handle not in flist: 4756 state.person.add_parent_family_handle(handle) 4757 4758 # search childrefs 4759 family, _new = self.dbase.find_family_from_handle(handle, 4760 self.trans) 4761 family.set_gramps_id(gid) 4762 4763 for ref in family.get_child_ref_list(): 4764 if ref.ref == state.person.handle: 4765 break 4766 else: 4767 ref = ChildRef() 4768 ref.ref = state.person.handle 4769 family.add_child_ref(ref) 4770 if sub_state.ftype: 4771 ref.set_mother_relation(sub_state.ftype) 4772 ref.set_father_relation(sub_state.ftype) 4773 else: 4774 if sub_state.frel: 4775 ref.set_father_relation(sub_state.frel) 4776 if sub_state.mrel: 4777 ref.set_mother_relation(sub_state.mrel) 4778 self.dbase.commit_family(family, self.trans) 4779 4780 def __person_famc_pedi(self, line, state): 4781 """ 4782 Parses the PEDI tag attached to a INDI.FAMC record. No values are set 4783 at this point, because we have to do some post processing. Instead, we 4784 assign the ftype field of the state variable. We convert the text from 4785 the line to an index into the PEDIGREE_TYPES dictionary, which will map 4786 to the correct ChildTypeRef. 4787 4788 @param line: The current line in GedLine format 4789 @type line: GedLine 4790 @param state: The current state 4791 @type state: CurrentState 4792 """ 4793 state.ftype = PEDIGREE_TYPES.get(line.data.lower(), 4794 ChildRefType.UNKNOWN) 4795 4796 def __person_famc_frel(self, line, state): 4797 """ 4798 Parses the _FREL tag attached to a INDI.FAMC record. No values are set 4799 at this point, because we have to do some post processing. Instead, we 4800 assign the frel field of the state variable. We convert the text from 4801 the line to an index into the PEDIGREE_TYPES dictionary, which will map 4802 to the correct ChildTypeRef. 4803 4804 @param line: The current line in GedLine format 4805 @type line: GedLine 4806 @param state: The current state 4807 @type state: CurrentState 4808 """ 4809 state.frel = PEDIGREE_TYPES.get(line.data.lower().strip(), None) 4810 if state.frel is None: 4811 state.frel = ChildRefType(line.data.capitalize().strip()) 4812 4813 def __person_famc_mrel(self, line, state): 4814 """ 4815 Parses the _MREL tag attached to a INDI.FAMC record. No values are set 4816 at this point, because we have to do some post processing. Instead, we 4817 assign the mrel field of the state variable. We convert the text from 4818 the line to an index into the PEDIGREE_TYPES dictionary, which will map 4819 to the correct ChildTypeRef. 4820 4821 @param line: The current line in GedLine format 4822 @type line: GedLine 4823 @param state: The current state 4824 @type state: CurrentState 4825 """ 4826 state.mrel = PEDIGREE_TYPES.get(line.data.lower().strip(), None) 4827 if state.mrel is None: 4828 state.mrel = ChildRefType(line.data.capitalize().strip()) 4829 4830 def __person_famc_note(self, line, state): 4831 """ 4832 Parses the INDI.FAMC.NOTE tag . 4833 4834 @param line: The current line in GedLine format 4835 @type line: GedLine 4836 @param state: The current state 4837 @type state: CurrentState 4838 """ 4839 self.__parse_note(line, state.person, state) 4840 4841 def __person_famc_primary(self, line, state): 4842 """ 4843 Parses the _PRIMARY tag on an INDI.FAMC tag. This value is stored in 4844 the state record to be used later. 4845 4846 @param line: The current line in GedLine format 4847 @type line: GedLine 4848 @param state: The current state 4849 @type state: CurrentState 4850 """ 4851 state.primary = True 4852 4853 def __person_famc_sour(self, line, state): 4854 """ 4855 Parses the SOUR tag on an INDI.FAMC tag. Gramps has no corresponding 4856 record on its family relationship, so we add the source to the Person 4857 record. 4858 4859 @param line: The current line in GedLine format 4860 @type line: GedLine 4861 @param state: The current state 4862 @type state: CurrentState 4863 """ 4864 citation_handle = self.handle_source(line, state.level, state) 4865 state.person.add_citation(citation_handle) 4866 4867 def __person_fams(self, line, state): 4868 """ 4869 Parses the INDI.FAMS record, which indicates the family in which the 4870 person is a spouse. 4871 4872 n FAMS @<XREF:FAM>@ {1:1} p.* 4873 +1 <<NOTE_STRUCTURE>> {0:M} p.* 4874 4875 @param line: The current line in GedLine format 4876 @type line: GedLine 4877 @param state: The current state 4878 @type state: CurrentState 4879 """ 4880 gid = self.fid_map[line.data] 4881 handle = self.__find_family_handle(gid) 4882 state.person.add_family_handle(handle) 4883 4884 sub_state = CurrentState(level=state.level + 1) 4885 sub_state.obj = state.person 4886 self.__parse_level(sub_state, self.opt_note_tbl, self.__ignore) 4887 state.msg += sub_state.msg 4888 4889 def __person_asso(self, line, state): 4890 """ 4891 Parse the ASSO tag, add the referenced person to the person we 4892 are currently parsing. The GEDCOM spec indicates that valid ASSO tag 4893 is: 4894 4895 n ASSO @<XREF:INDI>@ {0:M} 4896 4897 And the sub tags are: 4898 4899 ASSOCIATION_STRUCTURE:= 4900 +1 RELA <RELATION_IS_DESCRIPTOR> {1:1} 4901 +1 <<NOTE_STRUCTURE>> {0:M} 4902 +1 <<SOURCE_CITATION>> {0:M} 4903 4904 The Gedcom spec notes that the ASSOCIATION_STRUCTURE 4905 can only link to an INDIVIDUAL_RECORD 4906 4907 @param line: The current line in GedLine format 4908 @type line: GedLine 4909 @param state: The current state 4910 @type state: CurrentState 4911 """ 4912 4913 # find the id and person that we are referencing 4914 handle = self.__find_person_handle(self.pid_map[line.data]) 4915 4916 # create a new PersonRef, and assign the handle, add the 4917 # PersonRef to the active person 4918 4919 sub_state = CurrentState() 4920 sub_state.person = state.person 4921 sub_state.level = state.level + 1 4922 sub_state.ref = PersonRef() 4923 sub_state.ref.ref = handle 4924 sub_state.ignore = False 4925 4926 self.__parse_level(sub_state, self.asso_parse_tbl, self.__ignore) 4927 state.msg += sub_state.msg 4928 if not sub_state.ignore: 4929 state.person.add_person_ref(sub_state.ref) 4930 4931 def __person_asso_rela(self, line, state): 4932 """ 4933 Parses the INDI.ASSO.RELA tag. 4934 4935 @param line: The current line in GedLine format 4936 @type line: GedLine 4937 @param state: The current state 4938 @type state: CurrentState 4939 """ 4940 state.ref.rel = line.data 4941 4942 def __person_asso_sour(self, line, state): 4943 """ 4944 Parses the INDI.ASSO.SOUR tag. 4945 4946 @param line: The current line in GedLine format 4947 @type line: GedLine 4948 @param state: The current state 4949 @type state: CurrentState 4950 """ 4951 state.ref.add_citation(self.handle_source(line, state.level, state)) 4952 4953 def __person_asso_note(self, line, state): 4954 """ 4955 Parses the INDI.ASSO.NOTE tag. 4956 4957 @param line: The current line in GedLine format 4958 @type line: GedLine 4959 @param state: The current state 4960 @type state: CurrentState 4961 """ 4962 self.__parse_note(line, state.ref, state) 4963 4964 #------------------------------------------------------------------- 4965 # 4966 # FAM parsing 4967 # 4968 #------------------------------------------------------------------- 4969 4970 def __parse_fam(self, line): 4971 """ 4972 n @<XREF:FAM>@ FAM {1:1} 4973 +1 <<FAMILY_EVENT_STRUCTURE>> {0:M} 4974 +1 HUSB @<XREF:INDI>@ {0:1} 4975 +1 WIFE @<XREF:INDI>@ {0:1} 4976 +1 CHIL @<XREF:INDI>@ {0:M} 4977 +1 NCHI <COUNT_OF_CHILDREN> {0:1} 4978 +1 SUBM @<XREF:SUBM>@ {0:M} 4979 +1 <<LDS_SPOUSE_SEALING>> {0:M} 4980 +1 <<SOURCE_CITATION>> {0:M} 4981 +1 <<MULTIMEDIA_LINK>> {0:M} 4982 +1 <<NOTE_STRUCTURE>> {0:M} 4983 +1 REFN <USER_REFERENCE_NUMBER> {0:M} 4984 +1 RIN <AUTOMATED_RECORD_ID> {0:1} 4985 +1 <<CHANGE_DATE>> {0:1} 4986 """ 4987 # create a family 4988 4989 gid = self.fid_map[line.token_text] 4990 family = self.__find_or_create_family(gid) 4991 4992 # parse the family 4993 4994 state = CurrentState(level=1) 4995 state.family = family 4996 4997 self.__parse_level(state, self.family_func, self.__family_even) 4998 4999 # handle addresses attached to families 5000 if state.addr is not None: 5001 father_handle = family.get_father_handle() 5002 father = self.dbase.get_person_from_handle(father_handle) 5003 if father: 5004 father.add_address(state.addr) 5005 self.dbase.commit_person(father, self.trans) 5006 mother_handle = family.get_mother_handle() 5007 mother = self.dbase.get_person_from_handle(mother_handle) 5008 if mother: 5009 mother.add_address(state.addr) 5010 self.dbase.commit_person(mother, self.trans) 5011 5012 for child_ref in family.get_child_ref_list(): 5013 child_handle = child_ref.ref 5014 child = self.dbase.get_person_from_handle(child_handle) 5015 if child: 5016 child.add_address(state.addr) 5017 self.dbase.commit_person(child, self.trans) 5018 5019 # add default reference if no reference exists 5020 self.__add_default_source(family) 5021 5022 # Add a default tag if provided 5023 self.__add_default_tag(family) 5024 5025 self.__check_msgs(_("FAM (family) Gramps ID %s") % 5026 family.get_gramps_id(), state, family) 5027 # commit family to database 5028 self.dbase.commit_family(family, self.trans, family.change) 5029 5030 def __family_husb(self, line, state): 5031 """ 5032 Parses the husband line of a family 5033 5034 n HUSB @<XREF:INDI>@ {0:1} 5035 5036 @param line: The current line in GedLine format 5037 @type line: GedLine 5038 @param state: The current state 5039 @type state: CurrentState 5040 """ 5041 handle = self.__find_person_handle(self.pid_map[line.data]) 5042 state.family.set_father_handle(handle) 5043 5044 def __family_wife(self, line, state): 5045 """ 5046 Parses the wife line of a family 5047 5048 n WIFE @<XREF:INDI>@ {0:1} 5049 5050 @param line: The current line in GedLine format 5051 @type line: GedLine 5052 @param state: The current state 5053 @type state: CurrentState 5054 """ 5055 handle = self.__find_person_handle(self.pid_map[line.data]) 5056 state.family.set_mother_handle(handle) 5057 5058 def __family_std_event(self, line, state): 5059 """ 5060 Parses GEDCOM event types that map to a Gramps standard type. 5061 Additional parsing required is for the event detail: 5062 5063 +1 <<EVENT_DETAIL>> {0:1} p.* 5064 5065 @param line: The current line in GedLine format 5066 @type line: GedLine 5067 @param state: The current state 5068 @type state: CurrentState 5069 """ 5070 event = line.data 5071 event.set_gramps_id(self.emapper.find_next()) 5072 event_ref = EventRef() 5073 event_ref.set_role(EventRoleType.FAMILY) 5074 self.dbase.add_event(event, self.trans) 5075 5076 sub_state = CurrentState() 5077 sub_state.person = state.person 5078 sub_state.level = state.level + 1 5079 sub_state.event = event 5080 sub_state.event_ref = event_ref 5081 sub_state.pf = self.place_parser 5082 5083 self.__parse_level(sub_state, self.event_parse_tbl, self.__undefined) 5084 state.msg += sub_state.msg 5085 5086 self.__add_place(event, sub_state) 5087 5088 if event.type == EventType.MARRIAGE: 5089 descr = event.get_description() 5090 if descr == "Civil Union": 5091 state.family.type.set(FamilyRelType.CIVIL_UNION) 5092 event.set_description('') 5093 elif descr == "Unmarried": 5094 state.family.type.set(FamilyRelType.UNMARRIED) 5095 event.set_description('') 5096 else: 5097 state.family.type.set(FamilyRelType.MARRIED) 5098 if descr == "Y": 5099 event.set_description('') 5100 5101 self.dbase.commit_event(event, self.trans) 5102 event_ref.ref = event.handle 5103 state.family.add_event_ref(event_ref) 5104 5105 def __family_even(self, line, state): 5106 """ 5107 Parses GEDCOM event types that map to a Gramps standard type. 5108 Additional parsing required is for the event detail: 5109 5110 +1 <<EVENT_DETAIL>> {0:1} p.* 5111 5112 @param line: The current line in GedLine format 5113 @type line: GedLine 5114 @param state: The current state 5115 @type state: CurrentState 5116 """ 5117 # We can get here when a tag that is not valid in the family_func 5118 # parse table is encountered. The tag may be of the form "_XXX". We 5119 # try to convert to a friendly name, if fails use the tag itself as 5120 # the TYPE in a custom event 5121 cust_tag = CUSTOMEVENTTAGS.get(line.token_text, line.token_text) 5122 cust_type = EventType((EventType.CUSTOM, cust_tag)) 5123 event = Event() 5124 event_ref = EventRef() 5125 event_ref.set_role(EventRoleType.FAMILY) 5126 event.set_gramps_id(self.emapper.find_next()) 5127 event.set_type(cust_type) 5128 # in case a description ever shows up 5129 if line.data and line.data != 'Y': 5130 event.set_description(str(line.data)) 5131 self.dbase.add_event(event, self.trans) 5132 5133 sub_state = CurrentState() 5134 sub_state.person = state.person 5135 sub_state.level = state.level + 1 5136 sub_state.event = event 5137 sub_state.event_ref = event_ref 5138 sub_state.pf = self.place_parser 5139 5140 self.__parse_level(sub_state, self.event_parse_tbl, self.__undefined) 5141 state.msg += sub_state.msg 5142 5143 self.__add_place(event, sub_state) 5144 5145 self.dbase.commit_event(event, self.trans) 5146 event_ref.ref = event.handle 5147 state.family.add_event_ref(event_ref) 5148 5149 def __family_chil(self, line, state): 5150 """ 5151 Parses the child line of a family 5152 5153 n CHIL @<XREF:INDI>@ {0:1} 5154 5155 @param line: The current line in GedLine format 5156 @type line: GedLine 5157 @param state: The current state 5158 @type state: CurrentState 5159 """ 5160 sub_state = CurrentState() 5161 sub_state.family = state.family 5162 sub_state.level = state.level + 1 5163 sub_state.mrel = None 5164 sub_state.frel = None 5165 5166 self.__parse_level(sub_state, self.family_rel_tbl, self.__ignore) 5167 state.msg += sub_state.msg 5168 5169 child = self.__find_or_create_person(self.pid_map[line.data]) 5170 5171 reflist = [ref for ref in state.family.get_child_ref_list() 5172 if ref.ref == child.handle] 5173 5174 if reflist: # The child has been referenced already 5175 ref = reflist[0] 5176 if sub_state.frel: 5177 ref.set_father_relation(sub_state.frel) 5178 if sub_state.mrel: 5179 ref.set_mother_relation(sub_state.mrel) 5180 # then we will set the order now: 5181 self.set_child_ref_order(state.family, ref) 5182 else: 5183 ref = ChildRef() 5184 ref.ref = child.handle 5185 if sub_state.frel: 5186 ref.set_father_relation(sub_state.frel) 5187 if sub_state.mrel: 5188 ref.set_mother_relation(sub_state.mrel) 5189 state.family.add_child_ref(ref) 5190 5191 def set_child_ref_order(self, family, child_ref): 5192 """ 5193 Sets the child_ref in family.child_ref_list to be in the position 5194 family.child_ref_count. This reorders the children to be in the 5195 order given in the FAM section. 5196 """ 5197 family.child_ref_list.remove(child_ref) 5198 family.child_ref_list.insert(family.child_ref_count, child_ref) 5199 family.child_ref_count += 1 5200 5201 def __family_slgs(self, line, state): 5202 """ 5203 n SLGS {1:1} 5204 +1 STAT <LDS_SPOUSE_SEALING_DATE_STATUS> {0:1} 5205 +1 DATE <DATE_LDS_ORD> {0:1} 5206 +1 TEMP <TEMPLE_CODE> {0:1} 5207 +1 PLAC <PLACE_LIVING_ORDINANCE> {0:1} 5208 +1 <<SOURCE_CITATION>> {0:M} 5209 +1 <<NOTE_STRUCTURE>> {0:M} 5210 5211 @param line: The current line in GedLine format 5212 @type line: GedLine 5213 @param state: The current state 5214 @type state: CurrentState 5215 """ 5216 sub_state = CurrentState() 5217 sub_state.level = state.level + 1 5218 sub_state.lds_ord = LdsOrd() 5219 sub_state.lds_ord.set_type(LdsOrd.SEAL_TO_SPOUSE) 5220 sub_state.place = None 5221 sub_state.family = state.family 5222 sub_state.place_fields = PlaceParser() 5223 state.family.lds_ord_list.append(sub_state.lds_ord) 5224 5225 self.__parse_level(sub_state, self.lds_parse_tbl, self.__ignore) 5226 state.msg += sub_state.msg 5227 5228 if sub_state.place: 5229 place_title = _pd.display(self.dbase, sub_state.place) 5230 sub_state.place_fields.load_place(self.place_import, 5231 sub_state.place, 5232 place_title) 5233 5234 def __family_source(self, line, state): 5235 """ 5236 n SOUR @<XREF:SOUR>@ /* pointer to source record */ {1:1} p.* 5237 +1 PAGE <WHERE_WITHIN_SOURCE> {0:1} p.* 5238 +1 EVEN <EVENT_TYPE_CITED_FROM> {0:1} p.* 5239 +1 DATA {0:1} 5240 +1 QUAY <CERTAINTY_ASSESSMENT> {0:1} p.* 5241 +1 <<MULTIMEDIA_LINK>> {0:M} p.*, * 5242 +1 <<NOTE_STRUCTURE>> {0:M} p.* 5243 5244 | /* Systems not using source records */ 5245 n SOUR <SOURCE_DESCRIPTION> {1:1} p.* 5246 +1 [ CONC | CONT ] <SOURCE_DESCRIPTION> {0:M} 5247 +1 TEXT <TEXT_FROM_SOURCE> {0:M} p.* 5248 +1 <<NOTE_STRUCTURE>> {0:M} p.* 5249 5250 @param line: The current line in GedLine format 5251 @type line: GedLine 5252 @param state: The current state 5253 @type state: CurrentState 5254 """ 5255 citation_handle = self.handle_source(line, state.level, state) 5256 state.family.add_citation(citation_handle) 5257 5258 def __family_object(self, line, state): 5259 """ 5260 +1 <<MULTIMEDIA_LINK>> {0:M} 5261 5262 @param line: The current line in GedLine format 5263 @type line: GedLine 5264 @param state: The current state 5265 @type state: CurrentState 5266 """ 5267 self.__obje(line, state, state.family) 5268 5269 def __family_comm(self, line, state): 5270 """ 5271 @param line: The current line in GedLine format 5272 @type line: GedLine 5273 @param state: The current state 5274 @type state: CurrentState 5275 """ 5276 note = line.data 5277 state.family.add_note(note) 5278 self.__skip_subordinate_levels(state.level + 1, state) 5279 5280 def __family_note(self, line, state): 5281 """ 5282 +1 <<NOTE_STRUCTURE>> {0:M} 5283 5284 @param line: The current line in GedLine format 5285 @type line: GedLine 5286 @param state: The current state 5287 @type state: CurrentState 5288 """ 5289 self.__parse_note(line, state.family, state) 5290 5291 def __family_chan(self, line, state): 5292 """ 5293 +1 <<CHANGE_DATE>> {0:1} 5294 5295 @param line: The current line in GedLine format 5296 @type line: GedLine 5297 @param state: The current state 5298 @type state: CurrentState 5299 """ 5300 self.__parse_change(line, state.family, state.level + 1, state) 5301 5302 def __family_attr(self, line, state): 5303 """ 5304 Parses an TOKEN that Gramps recognizes as an Attribute 5305 @param line: The current line in GedLine format 5306 @type line: GedLine 5307 @param state: The current state 5308 @type state: CurrentState 5309 """ 5310 sub_state = CurrentState() 5311 sub_state.person = state.person 5312 sub_state.attr = line.data 5313 sub_state.level = state.level + 1 5314 state.family.add_attribute(line.data) 5315 self.__parse_level(sub_state, self.person_attr_parse_tbl, 5316 self.__ignore) 5317 state.msg += sub_state.msg 5318 5319 def __family_refn(self, line, state): 5320 """ 5321 @param line: The current line in GedLine format 5322 @type line: GedLine 5323 @param state: The current state 5324 @type state: CurrentState 5325 """ 5326 self.__do_refn(line, state, state.family) 5327 5328 def __family_cust_attr(self, line, state): 5329 """ 5330 @param line: The current line in GedLine format 5331 @type line: GedLine 5332 @param state: The current state 5333 @type state: CurrentState 5334 """ 5335 attr = Attribute() 5336 attr.set_type(line.token_text) 5337 attr.set_value(line.data) 5338 state.family.add_attribute(attr) 5339 5340 def __obje(self, line, state, pri_obj): 5341 """ 5342 Embedded form 5343 5344 n OBJE @<XREF:OBJE>@ {1:1} 5345 +1 _PRIM <Y/N> {0:1} # Indicates primary photo 5346 5347 Linked form 5348 5349 n OBJE {1:1} 5350 +1 FORM <MULTIMEDIA_FORMAT> {1:1} # v5.5 layout 5351 +1 TITL <DESCRIPTIVE_TITLE> {0:1} 5352 +1 FILE <MULTIMEDIA_FILE_REFERENCE> {1:1} # v5.5.1 allows multiple 5353 +2 FORM <MULTIMEDIA_FORMAT> {1:1} # v5.5.1 layout 5354 +3 MEDI <SOURCE_MEDIA_TYPE> {0:1} # v5.5.1 layout 5355 +1 <<NOTE_STRUCTURE>> {0:M} 5356 +1 _PRIM <Y/N> {0:1} # Indicates primary photo 5357 5358 @param line: The current line in GedLine format 5359 @type line: GedLine 5360 @param state: The current state 5361 @type state: CurrentState 5362 @param pri_obj: The Primary object to which this is attached 5363 @type state: Person # or Family, or Source etc. 5364 """ 5365 if line.data and line.data[0] == '@': 5366 # Reference to a named multimedia object defined elsewhere 5367 gramps_id = self.oid_map[line.data] 5368 handle = self.__find_media_handle(gramps_id) 5369 # check to see if this is a primary photo 5370 line = self.__chk_subordinate(state.level + 1, state, TOKEN__PRIM) 5371 if line and line.data == 'Y': 5372 state.photo = handle 5373 oref = MediaRef() 5374 oref.set_reference_handle(handle) 5375 pri_obj.add_media_reference(oref) 5376 return 5377 # 5378 # The remainder of this code is similar in concept to __parse_obje 5379 # except that it combines references to the same media file by 5380 # comparing path names. If they are the same, then only the first 5381 # is kept. This does mean that if there are different notes etc. on a 5382 # later OBJE, they will be lost. 5383 # 5384 sub_state = CurrentState() 5385 sub_state.form = "" 5386 sub_state.attr = None 5387 sub_state.filename = "" 5388 sub_state.title = "" 5389 sub_state.media = Media() 5390 sub_state.level = state.level + 1 5391 sub_state.prim = "" 5392 5393 self.__parse_level(sub_state, self.media_parse_tbl, self.__ignore) 5394 state.msg += sub_state.msg 5395 if sub_state.filename == "": 5396 self.__add_msg(_("Filename omitted"), line, state) 5397 # The following lines are commented out because Gramps is NOT a 5398 # Gedcom validator! 5399 # if sub_state.form == "": 5400 # self.__add_msg(_("Form omitted"), line, state) 5401 5402 # The following code that detects URL is an older v5.5 usage; the 5403 # modern option is to use the EMAIL tag. 5404 if isinstance(sub_state.form, str) and sub_state.form == "url": 5405 if isinstance(pri_obj, UrlBase): 5406 url = Url() 5407 url.set_path(sub_state.filename) 5408 url.set_description(sub_state.title) 5409 url.set_type(UrlType.WEB_HOME) 5410 pri_obj.add_url(url) 5411 else: # some primary objects (Event) son't have spot for URL 5412 new_note = Note(sub_state.filename) 5413 new_note.set_gramps_id(self.nid_map[""]) 5414 new_note.set_handle(create_id()) 5415 new_note.set_type(OBJ_NOTETYPE.get(type(pri_obj).__name__, 5416 NoteType.GENERAL)) 5417 self.dbase.commit_note(new_note, self.trans, new_note.change) 5418 pri_obj.add_note(new_note.get_handle()) 5419 5420 else: 5421 # to allow import of references to URLs (especially for import from 5422 # geni.com), do not try to find the file if it is blatently a URL 5423 res = urlparse(sub_state.filename) 5424 if sub_state.filename != '' and (res.scheme == '' or 5425 len(res.scheme) == 1 or 5426 res.scheme == 'file'): 5427 (valid, path) = self.__find_file(sub_state.filename, 5428 self.dir_path) 5429 if not valid: 5430 self.__add_msg(_("Could not import %s") % 5431 sub_state.filename, line, state) 5432 else: 5433 path = sub_state.filename 5434 # Multiple references to the same media silently drops the later 5435 # ones, even if title, etc. are different 5436 photo_handle = self.media_map.get(path) 5437 if photo_handle is None: 5438 photo = Media() 5439 photo.set_path(path) 5440 if sub_state.title: 5441 photo.set_description(sub_state.title) 5442 else: 5443 photo.set_description(path.replace('\\', '/')) 5444 full_path = os.path.abspath(path) 5445 # deal with mime types 5446 value = mimetypes.guess_type(full_path) 5447 if value and value[0]: # found from filename 5448 photo.set_mime_type(value[0]) 5449 else: # get from OBJE.FILE.FORM 5450 if '/' in sub_state.form: # already has expanded mime type 5451 photo.set_mime_type(sub_state.form) 5452 else: 5453 value = mimetypes.types_map.get('.' + sub_state.form, 5454 _('unknown')) 5455 photo.set_mime_type(value) 5456 if sub_state.attr: 5457 photo.attribute_list.append(sub_state.attr) 5458 self.dbase.add_media(photo, self.trans) 5459 self.media_map[path] = photo.handle 5460 else: 5461 photo = self.dbase.get_media_from_handle(photo_handle) 5462 # copy notes to our media 5463 for note in sub_state.media.get_note_list(): 5464 photo.add_note(note) 5465 self.dbase.commit_media(photo, self.trans) 5466 5467 if sub_state.prim == "Y": 5468 state.photo = photo.handle 5469 oref = MediaRef() 5470 oref.set_reference_handle(photo.handle) 5471 pri_obj.add_media_reference(oref) 5472 5473 def __media_ref_form(self, line, state): 5474 """ 5475 +1 FORM <MULTIMEDIA_FORMAT> {1:1} 5476 5477 @param line: The current line in GedLine format 5478 @type line: GedLine 5479 @param state: The current state 5480 @type state: CurrentState 5481 """ 5482 state.form = line.data.lower() 5483 5484 def __media_ref_medi(self, line, state): 5485 """ 5486 +1 MEDI <SOURCE_MEDIA_TYPE> {0:1} (Photo, Audio, Book, etc.) 5487 5488 @param line: The current line in GedLine format 5489 @type line: GedLine 5490 @param state: The current state 5491 @type state: CurrentState 5492 """ 5493 state.attr = Attribute() 5494 mtype = MEDIA_MAP.get(line.data.lower(), 5495 (SourceMediaType.CUSTOM, line.data)) 5496 state.attr.set_type(_('Media-Type')) 5497 state.attr.set_value(str(SourceMediaType(mtype))) 5498 5499 def __media_ref_titl(self, line, state): 5500 """ 5501 +1 TITL <DESCRIPTIVE_TITLE> {0:1} 5502 5503 @param line: The current line in GedLine format 5504 @type line: GedLine 5505 @param state: The current state 5506 @type state: CurrentState 5507 """ 5508 state.title = line.data 5509 5510 def __media_ref_file(self, line, state): 5511 """ 5512 +1 FILE <MULTIMEDIA_FILE_REFERENCE> {1:1} 5513 5514 @param line: The current line in GedLine format 5515 @type line: GedLine 5516 @param state: The current state 5517 @type state: CurrentState 5518 """ 5519 if state.filename != "": 5520 self.__add_msg(_("Multiple FILE in a single OBJE ignored"), 5521 line, state) 5522 self.__skip_subordinate_levels(state.level + 1, state) 5523 return 5524 state.filename = line.data 5525 5526 def __media_ref_prim(self, line, state): 5527 """ 5528 +1 _PRIM <Y/N> {0:1} 5529 5530 Indicates that this OBJE is the primary photo. 5531 5532 @param line: The current line in GedLine format 5533 @type line: GedLine 5534 @param state: The current state 5535 @type state: CurrentState 5536 """ 5537 state.prim = line.data 5538 5539 def __family_adopt(self, line, state): 5540 """ 5541 n ADOP 5542 5543 @param line: The current line in GedLine format 5544 @type line: GedLine 5545 @param state: The current state 5546 @type state: CurrentState 5547 """ 5548 state.frel = TYPE_ADOPT 5549 state.mrel = TYPE_ADOPT 5550 5551 def __family_frel(self, line, state): 5552 """ 5553 The _FREL key is a FTW/FTM specific extension to indicate father/child 5554 relationship. 5555 5556 n _FREL <type> 5557 5558 @param line: The current line in GedLine format 5559 @type line: GedLine 5560 @param state: The current state 5561 @type state: CurrentState 5562 """ 5563 state.frel = PEDIGREE_TYPES.get(line.data.strip().lower()) 5564 5565 def __family_mrel(self, line, state): 5566 """ 5567 The _MREL key is a FTW/FTM specific extension to indicate father/child 5568 relationship. 5569 5570 n _MREL <type> 5571 5572 @param line: The current line in GedLine format 5573 @type line: GedLine 5574 @param state: The current state 5575 @type state: CurrentState 5576 """ 5577 state.mrel = PEDIGREE_TYPES.get(line.data.strip().lower()) 5578 5579 def __family_stat(self, line, state): 5580 """ 5581 @param line: The current line in GedLine format 5582 @type line: GedLine 5583 @param state: The current state 5584 @type state: CurrentState 5585 """ 5586 state.mrel = TYPE_BIRTH 5587 state.frel = TYPE_BIRTH 5588 5589 def __event_object(self, line, state): 5590 """ 5591 @param line: The current line in GedLine format 5592 @type line: GedLine 5593 @param state: The current state 5594 @type state: CurrentState 5595 """ 5596 self.__obje(line, state, state.event) 5597 5598 def __event_type(self, line, state): 5599 """ 5600 Parses the TYPE line for an event. 5601 5602 @param line: The current line in GedLine format 5603 @type line: GedLine 5604 @param state: The current state 5605 @type state: CurrentState 5606 """ 5607 if state.event.get_type().is_custom(): 5608 if line.data in GED_TO_GRAMPS_EVENT: 5609 name = EventType(GED_TO_GRAMPS_EVENT[line.data]) 5610 else: 5611 try: 5612 name = EventType((EventType.CUSTOM, line.data)) 5613 except AttributeError: 5614 name = EventType(EventType.UNKNOWN) 5615 state.event.set_type(name) 5616 else: 5617 try: 5618 if line.data not in GED_TO_GRAMPS_EVENT and \ 5619 line.data[0] != 'Y': 5620 state.event.set_description(line.data) 5621 except IndexError: 5622 return 5623 5624 def __event_date(self, line, state): 5625 """ 5626 @param line: The current line in GedLine format 5627 @type line: GedLine 5628 @param state: The current state 5629 @type state: CurrentState 5630 """ 5631 state.event.set_date_object(line.data) 5632 5633 def __event_place(self, line, state): 5634 """ 5635 Parse the place portion of a event. A special case has to be made for 5636 Family Tree Maker, which violates the GEDCOM spec. It uses the PLAC 5637 field to store the description or value associated with the event. 5638 5639 n PLAC <PLACE_VALUE> {1:1} 5640 +1 FORM <PLACE_HIERARCHY> {0:1} 5641 +1 <<SOURCE_CITATION>> {0:M} 5642 +1 <<NOTE_STRUCTURE>> {0:M} 5643 5644 @param line: The current line in GedLine format 5645 @type line: GedLine 5646 @param state: The current state 5647 @type state: CurrentState 5648 """ 5649 5650 if self.is_ftw and (state.event.type in FTW_BAD_PLACE) \ 5651 and not state.event.get_description(): 5652 state.event.set_description(line.data) 5653 else: 5654 place = state.place 5655 if place: 5656 # We encounter a PLAC, having previously encountered an ADDR 5657 if state.place.place_type.string != _("Address"): 5658 # We have previously found a PLAC 5659 self.__add_msg(_("A second PLAC ignored"), line, state) 5660 # ignore this second PLAC, and use the old one 5661 else: 5662 # This is the first PLAC 5663 place.set_title(line.data) 5664 place.name.set_value(line.data) 5665 else: 5666 # The first thing we encounter is PLAC 5667 state.place = Place() 5668 place = state.place 5669 place.set_title(line.data) 5670 place.name.set_value(line.data) 5671 5672 sub_state = CurrentState() 5673 sub_state.place = place 5674 sub_state.level = state.level + 1 5675 5676 self.__parse_level(sub_state, self.event_place_map, 5677 self.__undefined) 5678 state.msg += sub_state.msg 5679 if sub_state.pf: # if we found local PLAC:FORM 5680 state.pf = sub_state.pf # save to override global value 5681 # merge notes etc into place 5682 state.place.merge(sub_state.place) 5683 5684 def __event_place_note(self, line, state): 5685 """ 5686 @param line: The current line in GedLine format 5687 @type line: GedLine 5688 @param state: The current state 5689 @type state: CurrentState 5690 """ 5691 self.__parse_note(line, state.place, state) 5692 5693 def __event_place_form(self, line, state): 5694 """ 5695 @param line: The current line in GedLine format 5696 @type line: GedLine 5697 @param state: The current state 5698 @type state: CurrentState 5699 """ 5700 state.pf = PlaceParser(line) 5701 5702 def __event_place_object(self, line, state): 5703 """ 5704 @param line: The current line in GedLine format 5705 @type line: GedLine 5706 @param state: The current state 5707 @type state: CurrentState 5708 """ 5709 self.__obje(line, state, state.place) 5710 5711 def __event_place_sour(self, line, state): 5712 """ 5713 @param line: The current line in GedLine format 5714 @type line: GedLine 5715 @param state: The current state 5716 @type state: CurrentState 5717 """ 5718 state.place.add_citation(self.handle_source(line, state.level, state)) 5719 5720 def __place_map(self, line, state): 5721 """ 5722 5723 n MAP 5724 n+1 LONG <PLACE_LONGITUDE> 5725 n+1 LATI <PLACE_LATITUDE> 5726 5727 @param line: The current line in GedLine format 5728 @type line: GedLine 5729 @param state: The current state 5730 @type state: CurrentState 5731 """ 5732 sub_state = CurrentState() 5733 sub_state.level = state.level + 1 5734 sub_state.place = state.place 5735 self.__parse_level(sub_state, self.place_map_tbl, self.__undefined) 5736 state.msg += sub_state.msg 5737 state.place = sub_state.place 5738 5739 def __place_lati(self, line, state): 5740 """ 5741 @param line: The current line in GedLine format 5742 @type line: GedLine 5743 @param state: The current state 5744 @type state: CurrentState 5745 """ 5746 state.place.set_latitude(line.data) 5747 5748 def __place_long(self, line, state): 5749 """ 5750 @param line: The current line in GedLine format 5751 @type line: GedLine 5752 @param state: The current state 5753 @type state: CurrentState 5754 """ 5755 state.place.set_longitude(line.data) 5756 5757 def __event_addr(self, line, state): 5758 """ 5759 @param line: The current line in GedLine format 5760 @type line: GedLine 5761 @param state: The current state 5762 @type state: CurrentState 5763 """ 5764 free_form = line.data 5765 5766 sub_state = CurrentState(level=state.level + 1) 5767 sub_state.location = Location() 5768 sub_state.event = state.event 5769 sub_state.place = Place() # temp stash for notes, citations etc 5770 5771 self.__parse_level(sub_state, self.parse_loc_tbl, self.__undefined) 5772 state.msg += sub_state.msg 5773 5774 title = self.__merge_address(free_form, sub_state.location, 5775 line, state) 5776 5777 location = sub_state.location 5778 5779 if self.addr_is_detail and state.place: 5780 # Commit the enclosing place 5781 place = self.__find_place(state.place.get_title(), None, 5782 state.place.get_placeref_list()) 5783 if place is None: 5784 place = state.place 5785 self.dbase.add_place(place, self.trans) 5786 self.place_names[place.get_title()].append(place.get_handle()) 5787 else: 5788 place.merge(state.place) 5789 self.dbase.commit_place(place, self.trans) 5790 place_title = _pd.display(self.dbase, place) 5791 state.pf.load_place(self.place_import, place, place_title) 5792 5793 # Create the Place Details (it is committed with the event) 5794 place_detail = Place() 5795 place_detail.set_name(PlaceName(value=title)) 5796 place_detail.set_title(title) 5797 # For RootsMagic etc. Place Details e.g. address, hospital, ... 5798 place_detail.set_type((PlaceType.CUSTOM, _("Detail"))) 5799 placeref = PlaceRef() 5800 placeref.ref = place.get_handle() 5801 place_detail.set_placeref_list([placeref]) 5802 state.place = place_detail 5803 else: 5804 place = state.place 5805 if place: 5806 # We encounter an ADDR having previously encountered a PLAC 5807 if len(place.get_alternate_locations()) != 0 and \ 5808 not self.__get_first_loc(place).is_empty(): 5809 # We have perviously found an ADDR, or have populated 5810 # location from PLAC title 5811 self.__add_msg(_("Location already populated; ADDR " 5812 "ignored"), line, state) 5813 # ignore this second ADDR, and use the old one 5814 else: 5815 # This is the first ADDR 5816 place.add_alternate_locations(location) 5817 else: 5818 # The first thing we encounter is ADDR 5819 state.place = Place() 5820 place = state.place 5821 place.add_alternate_locations(location) 5822 place.set_name(PlaceName(value=title)) 5823 place.set_title(title) 5824 place.set_type((PlaceType.CUSTOM, _("Address"))) 5825 5826 # merge notes etc into place 5827 state.place.merge(sub_state.place) 5828 5829 def __add_location(self, place, location): 5830 """ 5831 @param place: A place object we have found or created 5832 @type place: Place 5833 @param location: A location we want to add to this place 5834 @type location: gen.lib.location 5835 """ 5836 for loc in place.get_alternate_locations(): 5837 if loc.is_equivalent(location) == IDENTICAL: 5838 return 5839 place.add_alternate_locations(location) 5840 5841 def __get_first_loc(self, place): 5842 """ 5843 @param place: A place object 5844 @type place: Place 5845 @return location: the first alternate location if any else None 5846 @type location: gen.lib.location 5847 """ 5848 if len(place.get_alternate_locations()) == 0: 5849 return None 5850 else: 5851 return place.get_alternate_locations()[0] 5852 5853 def __event_privacy(self, line, state): 5854 """ 5855 @param line: The current line in GedLine format 5856 @type line: GedLine 5857 @param state: The current state 5858 @type state: CurrentState 5859 """ 5860 state.event.set_privacy(True) 5861 5862 def __event_note(self, line, state): 5863 """ 5864 @param line: The current line in GedLine format 5865 @type line: GedLine 5866 @param state: The current state 5867 @type state: CurrentState 5868 """ 5869 self.__parse_note(line, state.event, state) 5870 5871 def __event_inline_note(self, line, state): 5872 """ 5873 @param line: The current line in GedLine format 5874 @type line: GedLine 5875 @param state: The current state 5876 @type state: CurrentState 5877 """ 5878 if line.data[0:13] == "Description: ": 5879 state.event.set_description(line.data[13:]) 5880 else: 5881 self.__parse_note(line, state.event, state) 5882 5883 def __event_source(self, line, state): 5884 """ 5885 @param line: The current line in GedLine format 5886 @type line: GedLine 5887 @param state: The current state 5888 @type state: CurrentState 5889 """ 5890 state.event.add_citation(self.handle_source(line, state.level, state)) 5891 5892 def __event_rin(self, line, state): 5893 """ 5894 @param line: The current line in GedLine format 5895 @type line: GedLine 5896 @param state: The current state 5897 @type state: CurrentState 5898 """ 5899 attr = Attribute() 5900 attr.set_type(line.token_text) 5901 attr.set_value(line.data) 5902 state.event.add_attribute(attr) 5903 5904 def __event_attr(self, line, state): 5905 """ 5906 @param line: The current line in GedLine format 5907 @type line: GedLine 5908 @param state: The current state 5909 @type state: CurrentState 5910 """ 5911 state.event.add_attribute(line.data) 5912 5913 def __event_phon(self, line, state): 5914 """ 5915 @param line: The current line in GedLine format 5916 @type line: GedLine 5917 @param state: The current state 5918 @type state: CurrentState 5919 """ 5920 attr = Attribute() 5921 attr.set_type(_("Phone")) 5922 attr.set_value(line.data) 5923 state.event.add_attribute(attr) 5924 5925 def __event_fax(self, line, state): 5926 """ 5927 @param line: The current line in GedLine format 5928 @type line: GedLine 5929 @param state: The current state 5930 @type state: CurrentState 5931 """ 5932 attr = Attribute() 5933 attr.set_type(_("FAX")) 5934 attr.set_value(line.data) 5935 state.event.add_attribute(attr) 5936 5937 def __event_email(self, line, state): 5938 """ 5939 @param line: The current line in GedLine format 5940 @type line: GedLine 5941 @param state: The current state 5942 @type state: CurrentState 5943 """ 5944 attr = Attribute() 5945 attr.set_type(_("EMAIL")) 5946 attr.set_value(line.data) 5947 state.event.add_attribute(attr) 5948 5949 def __event_www(self, line, state): 5950 """ 5951 @param line: The current line in GedLine format 5952 @type line: GedLine 5953 @param state: The current state 5954 @type state: CurrentState 5955 """ 5956 attr = Attribute() 5957 attr.set_type(_("WWW")) 5958 attr.set_value(line.data) 5959 state.event.add_attribute(attr) 5960 5961 def __event_cause(self, line, state): 5962 """ 5963 @param line: The current line in GedLine format 5964 @type line: GedLine 5965 @param state: The current state 5966 @type state: CurrentState 5967 """ 5968 attr = Attribute() 5969 attr.set_type(AttributeType.CAUSE) 5970 attr.set_value(line.data) 5971 state.event.add_attribute(attr) 5972 5973 sub_state = CurrentState() 5974 sub_state.event = state.event 5975 sub_state.level = state.level + 1 5976 sub_state.attr = attr 5977 5978 self.__parse_level(sub_state, self.event_cause_tbl, self.__undefined) 5979 state.msg += sub_state.msg 5980 5981 def __event_cause_source(self, line, state): 5982 """ 5983 @param line: The current line in GedLine format 5984 @type line: GedLine 5985 @param state: The current state 5986 @type state: CurrentState 5987 """ 5988 state.attr.add_citation(self.handle_source(line, state.level, state)) 5989 5990 def __event_age(self, line, state): 5991 """ 5992 @param line: The current line in GedLine format 5993 @type line: GedLine 5994 @param state: The current state 5995 @type state: CurrentState 5996 """ 5997 attr = Attribute() 5998 attr.set_type(AttributeType.AGE) 5999 attr.set_value(line.data) 6000 state.event_ref.add_attribute(attr) 6001 6002 def __event_husb(self, line, state): 6003 """ 6004 @param line: The current line in GedLine format 6005 @type line: GedLine 6006 @param state: The current state 6007 @type state: CurrentState 6008 """ 6009 while True: 6010 line = self.__get_next_line() 6011 if self.__level_is_finished(line, state.level + 1): 6012 break 6013 elif line.token == TOKEN_AGE: 6014 attr = Attribute() 6015 attr.set_type(AttributeType.FATHER_AGE) 6016 attr.set_value(line.data) 6017 state.event_ref.add_attribute(attr) 6018 elif line.token == TOKEN_WIFE: 6019 #wife event can be on same level, if so call it and finish 6020 self.__event_wife(line, state) 6021 break 6022 6023 def __event_wife(self, line, state): 6024 """ 6025 @param line: The current line in GedLine format 6026 @type line: GedLine 6027 @param state: The current state 6028 @type state: CurrentState 6029 """ 6030 while True: 6031 line = self.__get_next_line() 6032 if self.__level_is_finished(line, state.level + 1): 6033 break 6034 elif line.token == TOKEN_AGE: 6035 attr = Attribute() 6036 attr.set_type(AttributeType.MOTHER_AGE) 6037 attr.set_value(line.data) 6038 state.event_ref.add_attribute(attr) 6039 elif line.token == TOKEN_HUSB: 6040 #husband event can be on same level, if so call it and finish 6041 self.__event_husb(line, state) 6042 break 6043 6044 def __event_agnc(self, line, state): 6045 """ 6046 @param line: The current line in GedLine format 6047 @type line: GedLine 6048 @param state: The current state 6049 @type state: CurrentState 6050 """ 6051 attr = Attribute() 6052 attr.set_type(AttributeType.AGENCY) 6053 attr.set_value(line.data) 6054 state.event.add_attribute(attr) 6055 6056 def __event_time(self, line, state): 6057 """ 6058 @param line: The current line in GedLine format 6059 @type line: GedLine 6060 @param state: The current state 6061 @type state: CurrentState 6062 """ 6063 if hasattr(state, 'event'): 6064 #read in time as attribute of event 6065 attr = Attribute() 6066 attr.set_type(AttributeType.TIME) 6067 attr.set_value(line.data) 6068 state.event.add_attribute(attr) 6069 6070 def __event_witness(self, line, state): 6071 """ 6072 Parse the witness of an event 6073 6074 @param line: The current line in GedLine format 6075 @type line: GedLine 6076 @param state: The current state 6077 @type state: CurrentState 6078 """ 6079 if line.data and line.data[0] == "@": 6080 # n _WITN @<XREF:INDI>@ 6081 # +1 TYPE <TYPE_OF_RELATION> 6082 assert state.event.handle # event handle is required to be set 6083 wit = self.__find_or_create_person(self.pid_map[line.data]) 6084 event_ref = EventRef() 6085 event_ref.set_reference_handle(state.event.handle) 6086 while True: 6087 line = self.__get_next_line() 6088 if self.__level_is_finished(line, state.level + 1): 6089 break 6090 elif line.token == TOKEN_TYPE: 6091 if line.data == "WITNESS_OF_MARRIAGE": 6092 role = EventRoleType( 6093 EventRoleType.WITNESS) 6094 else: 6095 role = EventRoleType( 6096 (EventRoleType.CUSTOM, line.data)) 6097 event_ref.set_role(role) 6098 wit.add_event_ref(event_ref) 6099 self.dbase.commit_person(wit, self.trans) 6100 else: 6101 # n _WITN <TEXTUAL_LIST_OF_NAMES> 6102 attr = Attribute() 6103 attr.set_type(AttributeType.WITNESS) 6104 attr.set_value(line.data) 6105 state.event.add_attribute(attr) 6106 6107 def __person_adopt_famc(self, line, state): 6108 """ 6109 @param line: The current line in GedLine format 6110 @type line: GedLine 6111 @param state: The current state 6112 @type state: CurrentState 6113 """ 6114 gid = self.fid_map[line.data] 6115 handle = self.__find_family_handle(gid) 6116 family = self.__find_or_create_family(gid) 6117 6118 sub_state = CurrentState(level=state.level + 1) 6119 sub_state.mrel = TYPE_BIRTH 6120 sub_state.frel = TYPE_BIRTH 6121 6122 self.__parse_level(sub_state, self.parse_person_adopt, 6123 self.__undefined) 6124 state.msg += sub_state.msg 6125 6126 if (int(sub_state.mrel) == ChildRefType.BIRTH and 6127 int(sub_state.frel) == ChildRefType.BIRTH): 6128 sub_state.mrel = sub_state.frel = TYPE_ADOPT 6129 6130 state.person.add_parent_family_handle(handle) 6131 6132 reflist = [ref for ref in family.get_child_ref_list() 6133 if ref.ref == state.person.handle] 6134 if reflist: 6135 ref = reflist[0] 6136 ref.set_father_relation(sub_state.frel) 6137 ref.set_mother_relation(sub_state.mrel) 6138 else: 6139 ref = ChildRef() 6140 ref.ref = state.person.handle 6141 ref.set_father_relation(sub_state.frel) 6142 ref.set_mother_relation(sub_state.mrel) 6143 family.add_child_ref(ref) 6144 self.dbase.commit_family(family, self.trans) 6145 6146 def __person_adopt_famc_adopt(self, line, state): 6147 """ 6148 @param line: The current line in GedLine format 6149 @type line: GedLine 6150 @param state: The current state 6151 @type state: CurrentState 6152 """ 6153 if line.data.strip() == "HUSB": 6154 state.frel = TYPE_ADOPT 6155 elif line.data.strip() == "WIFE": 6156 state.mrel = TYPE_ADOPT 6157 else: 6158 state.mrel = TYPE_ADOPT 6159 state.frel = TYPE_ADOPT 6160 6161 def __person_birth_famc(self, line, state): 6162 """ 6163 @param line: The current line in GedLine format 6164 @type line: GedLine 6165 @param state: The current state 6166 @type state: CurrentState 6167 """ 6168 handle = self.__find_family_handle(self.fid_map[line.data]) 6169 6170 state.person.add_parent_family_handle(handle) 6171 6172 frel = mrel = ChildRefType.BIRTH 6173 6174 family, _new = self.dbase.find_family_from_handle(handle, self.trans) 6175 reflist = [ref for ref in family.get_child_ref_list() 6176 if ref.ref == state.person.handle] 6177 if reflist: 6178 ref = reflist[0] 6179 ref.set_father_relation(frel) 6180 ref.set_mother_relation(mrel) 6181 else: 6182 ref = ChildRef() 6183 ref.ref = state.person.handle 6184 ref.set_father_relation(frel) 6185 ref.set_mother_relation(mrel) 6186 family.add_child_ref(ref) 6187 self.dbase.commit_family(family, self.trans) 6188 6189 def __address_date(self, line, state): 6190 """ 6191 Parses the DATE line of an ADDR tag 6192 6193 @param line: The current line in GedLine format 6194 @type line: GedLine 6195 @param state: The current state 6196 @type state: CurrentState 6197 """ 6198 state.addr.set_date_object(line.data) 6199 6200 def __address_adr1(self, line, state): 6201 """ 6202 Parses the ADR1 line of an ADDR tag 6203 6204 @param line: The current line in GedLine format 6205 @type line: GedLine 6206 @param state: The current state 6207 @type state: CurrentState 6208 """ 6209 # The ADDR may already have been parsed by the level above 6210 # assert state.addr.get_street() == "" 6211 if state.addr.get_street() != "": 6212 self.__add_msg(_("Warn: ADDR overwritten"), line, state) 6213 state.addr.set_street(line.data) 6214 6215 def __address_adr2(self, line, state): 6216 """ 6217 Parses the ADR2 line of an ADDR tag 6218 6219 @param line: The current line in GedLine format 6220 @type line: GedLine 6221 @param state: The current state 6222 @type state: CurrentState 6223 """ 6224 state.addr.set_locality(line.data) 6225 6226 def __address_city(self, line, state): 6227 """ 6228 Parses the CITY line of an ADDR tag 6229 6230 @param line: The current line in GedLine format 6231 @type line: GedLine 6232 @param state: The current state 6233 @type state: CurrentState 6234 """ 6235 state.addr.set_city(line.data) 6236 6237 def __address_state(self, line, state): 6238 """ 6239 Parses the STAE line of an ADDR tag 6240 6241 @param line: The current line in GedLine format 6242 @type line: GedLine 6243 @param state: The current state 6244 @type state: CurrentState 6245 """ 6246 state.addr.set_state(line.data) 6247 6248 def __address_post(self, line, state): 6249 """ 6250 Parses the POST line of an ADDR tag 6251 6252 @param line: The current line in GedLine format 6253 @type line: GedLine 6254 @param state: The current state 6255 @type state: CurrentState 6256 """ 6257 state.addr.set_postal_code(line.data) 6258 6259 def __address_country(self, line, state): 6260 """ 6261 Parses the country line of an ADDR tag 6262 6263 @param line: The current line in GedLine format 6264 @type line: GedLine 6265 @param state: The current state 6266 @type state: CurrentState 6267 """ 6268 state.addr.set_country(line.data) 6269 6270 def __address_sour(self, line, state): 6271 """ 6272 Parses the SOUR line of an ADDR tag 6273 6274 @param line: The current line in GedLine format 6275 @type line: GedLine 6276 @param state: The current state 6277 @type state: CurrentState 6278 """ 6279 state.addr.add_citation(self.handle_source(line, state.level, state)) 6280 6281 def __address_note(self, line, state): 6282 """ 6283 Parses the NOTE line of an ADDR tag 6284 6285 @param line: The current line in GedLine format 6286 @type line: GedLine 6287 @param state: The current state 6288 @type state: CurrentState 6289 """ 6290 self.__parse_note(line, state.addr, state) 6291 6292 def __citation_page(self, line, state): 6293 """ 6294 Parses the PAGE line of an SOUR instance tag 6295 6296 @param line: The current line in GedLine format 6297 @type line: GedLine 6298 @param state: The current state 6299 @type state: CurrentState 6300 """ 6301 state.citation.set_page(line.data) 6302 6303 def __citation_date(self, line, state): 6304 """ 6305 Parses the DATE line of an SOUR instance tag 6306 6307 @param line: The current line in GedLine format 6308 @type line: GedLine 6309 @param state: The current state 6310 @type state: CurrentState 6311 """ 6312 state.citation.set_date_object(line.data) 6313 6314 def __citation_data(self, line, state): 6315 """ 6316 Parses the DATA line of an SOUR instance tag 6317 6318 @param line: The current line in GedLine format 6319 @type line: GedLine 6320 @param state: The current state 6321 @type state: CurrentState 6322 """ 6323 sub_state = CurrentState(level=state.level + 1) 6324 sub_state.citation = state.citation 6325 6326 self.__parse_level(sub_state, self.citation_data_tbl, self.__undefined) 6327 state.msg += sub_state.msg 6328 6329 def __citation_data_date(self, line, state): 6330 state.citation.set_date_object(line.data) 6331 6332 def __source_text(self, line, state): 6333 note = Note() 6334 note.set(line.data) 6335 gramps_id = self.nid_map[""] 6336 note.set_gramps_id(gramps_id) 6337 note.set_type(NoteType.SOURCE_TEXT) 6338 self.dbase.add_note(note, self.trans) 6339 6340 state.source.add_note(note.get_handle()) 6341 6342 def __citation_data_text(self, line, state): 6343 note = Note() 6344 note.set(line.data) 6345 gramps_id = self.nid_map[""] 6346 note.set_gramps_id(gramps_id) 6347 note.set_type(NoteType.SOURCE_TEXT) 6348 self.dbase.add_note(note, self.trans) 6349 6350 state.citation.add_note(note.get_handle()) 6351 6352 def __citation_link(self, line, state): 6353 """ 6354 Not legal GEDCOM - added to support FTM, converts the _LINK tag to a 6355 note with styled text so link can be followed in reports etc. 6356 """ 6357 note = Note() 6358 tags = StyledTextTag(StyledTextTagType.LINK, 6359 line.data, 6360 [(0, len(line.data))]) 6361 note.set_styledtext(StyledText(line.data, [tags])) 6362 gramps_id = self.nid_map[""] 6363 note.set_gramps_id(gramps_id) 6364 note.set_type(NoteType.CITATION) 6365 self.dbase.add_note(note, self.trans) 6366 state.citation.add_note(note.get_handle()) 6367 6368 def __citation__just(self, line, state): 6369 """ 6370 Not legal GEDCOM - added to support FTM, converts the _JUST tag to a 6371 note. This tag represents the Justification for a source. 6372 """ 6373 note = Note() 6374 note.set(line.data) 6375 gramps_id = self.nid_map[""] 6376 note.set_gramps_id(gramps_id) 6377 note.set_type(_("Citation Justification")) 6378 self.dbase.add_note(note, self.trans) 6379 state.citation.add_note(note.get_handle()) 6380 6381 def __citation_data_note(self, line, state): 6382 self.__parse_note(line, state.citation, state) 6383 6384 def __citation_obje(self, line, state): 6385 """ 6386 Parses the OBJE line of an SOUR instance tag 6387 6388 @param line: The current line in GedLine format 6389 @type line: GedLine 6390 @param state: The current state 6391 @type state: CurrentState 6392 """ 6393 self.__obje(line, state, state.citation) 6394 6395 def __citation_refn(self, line, state): 6396 """ 6397 Parses the REFN line of an SOUR instance tag 6398 6399 @param line: The current line in GedLine format 6400 @type line: GedLine 6401 @param state: The current state 6402 @type state: CurrentState 6403 """ 6404 self.__add_msg(_("REFN ignored"), line, state) 6405 self.__skip_subordinate_levels(state.level + 1, state) 6406 6407 def __citation_even(self, line, state): 6408 """ 6409 Parses the EVEN line of an SOUR instance tag 6410 6411 @param line: The current line in GedLine format 6412 @type line: GedLine 6413 @param state: The current state 6414 @type state: CurrentState 6415 """ 6416 sattr = SrcAttribute() 6417 sattr.set_type("EVEN") 6418 sattr.set_value(line.data) 6419 state.citation.add_attribute(sattr) 6420 sub_state = CurrentState(level=state.level + 1) 6421 sub_state.citation = state.citation 6422 6423 self.__parse_level(sub_state, self.citation_even_tbl, self.__undefined) 6424 state.msg += sub_state.msg 6425 6426 def __citation_even_role(self, line, state): 6427 """ 6428 Parses the EVEN line of an SOUR instance tag 6429 6430 @param line: The current line in GedLine format 6431 @type line: GedLine 6432 @param state: The current state 6433 @type state: CurrentState 6434 """ 6435 sattr = SrcAttribute() 6436 sattr.set_type("EVEN:ROLE") 6437 sattr.set_value(line.data) 6438 state.citation.add_attribute(sattr) 6439 6440 def __citation_quay(self, line, state): 6441 """ 6442 Parses the QUAY line of an SOUR instance tag 6443 6444 @param line: The current line in GedLine format 6445 @type line: GedLine 6446 @param state: The current state 6447 @type state: CurrentState 6448 """ 6449 try: 6450 val = int(line.data) 6451 except ValueError: 6452 return 6453 # If value is greater than 3, cap at 3 6454 val = min(val, 3) 6455 if val > 1: 6456 state.citation.set_confidence_level(val + 1) 6457 else: 6458 state.citation.set_confidence_level(val) 6459 6460 def __citation_note(self, line, state): 6461 """ 6462 Parses the NOTE line of an SOUR instance tag 6463 6464 @param line: The current line in GedLine format 6465 @type line: GedLine 6466 @param state: The current state 6467 @type state: CurrentState 6468 """ 6469 self.__parse_note(line, state.citation, state) 6470 6471 #---------------------------------------------------------------------- 6472 # 6473 # SOUR parsing 6474 # 6475 #---------------------------------------------------------------------- 6476 6477 def __parse_source(self, name, level): 6478 """ 6479 n @<XREF:SOUR>@ SOUR {1:1} 6480 +1 DATA {0:1} 6481 +2 EVEN <EVENTS_RECORDED> {0:M} 6482 +3 DATE <DATE_PERIOD> {0:1} 6483 +3 PLAC <SOURCE_JURISDICTION_PLACE> {0:1} 6484 +2 AGNC <RESPONSIBLE_AGENCY> {0:1} 6485 +2 <<NOTE_STRUCTURE>> {0:M} 6486 +1 AUTH <SOURCE_ORIGINATOR> {0:1} 6487 +1 TITL <SOURCE_DESCRIPTIVE_TITLE> {0:1} 6488 +1 ABBR <SOURCE_FILED_BY_ENTRY> {0:1} 6489 +1 PUBL <SOURCE_PUBLICATION_FACTS> {0:1} 6490 +1 TEXT <TEXT_FROM_SOURCE> {0:1} 6491 +1 <<SOURCE_REPOSITORY_CITATION>> {0:1} 6492 +1 <<MULTIMEDIA_LINK>> {0:M} 6493 +1 <<NOTE_STRUCTURE>> {0:M} 6494 +1 REFN <USER_REFERENCE_NUMBER> {0:M} 6495 +2 TYPE <USER_REFERENCE_TYPE> {0:1} 6496 +1 RIN <AUTOMATED_RECORD_ID> {0:1} 6497 +1 <<CHANGE_DATE>> {0:1} 6498 """ 6499 6500 state = CurrentState() 6501 state.source = self.__find_or_create_source(self.sid_map[name]) 6502 # SOURce with the given gramps_id had no title 6503 state.source.set_title(_("No title - ID %s") % 6504 state.source.get_gramps_id()) 6505 state.level = level 6506 6507 self.__parse_level(state, self.source_func, self.__undefined) 6508 self.__check_msgs(_("SOUR (source) Gramps ID %s") % 6509 state.source.get_gramps_id(), 6510 state, state.source) 6511 self.dbase.commit_source(state.source, self.trans, state.source.change) 6512 6513 def __source_attr(self, line, state): 6514 """ 6515 @param line: The current line in GedLine format 6516 @type line: GedLine 6517 @param state: The current state 6518 @type state: CurrentState 6519 """ 6520 sattr = SrcAttribute() 6521 sattr.set_type(line.token_text) 6522 sattr.set_value(line.data) 6523 state.source.add_attribute(sattr) 6524 self.__skip_subordinate_levels(state.level + 1, state) 6525 6526 def __source_object(self, line, state): 6527 """ 6528 @param line: The current line in GedLine format 6529 @type line: GedLine 6530 @param state: The current state 6531 @type state: CurrentState 6532 """ 6533 self.__obje(line, state, state.source) 6534 6535 def __source_chan(self, line, state): 6536 """ 6537 @param line: The current line in GedLine format 6538 @type line: GedLine 6539 @param state: The current state 6540 @type state: CurrentState 6541 """ 6542 self.__parse_change(line, state.source, state.level + 1, state) 6543 6544 def __source_repo(self, line, state): 6545 """ 6546 @param line: The current line in GedLine format 6547 @type line: GedLine 6548 @param state: The current state 6549 @type state: CurrentState 6550 """ 6551 if line.data and line.data[0] == '@': 6552 # This deals with the standard GEDCOM 6553 # SOURCE_REPOSITORY_CITATION: = 6554 # n REPO @<XREF:REPO>@ {1:1} 6555 # +1 <<NOTE_STRUCTURE>> {0:M} 6556 # +1 CALN <SOURCE_CALL_NUMBER> {0:M} 6557 # +2 MEDI <SOURCE_MEDIA_TYPE> {0:1} 6558 gid = self.rid_map[line.data] 6559 repo = self.__find_or_create_repository(gid) 6560 elif line.data == '': 6561 # This deals with the non-standard GEDCOM format found in Family 6562 # Tree Maker for Windows, Broderbund Software, Banner Blue 6563 # Division: 6564 # SOURCE_REPOSITORY_CITATION: = 6565 # n REPO {1:1} 6566 # +1 <<NOTE_STRUCTURE>> {0:M} 6567 # +1 CALN <SOURCE_CALL_NUMBER> {0:M} 6568 # +2 MEDI <SOURCE_MEDIA_TYPE> {0:1} 6569 # 6570 # This format has no repository name. See http://west- 6571 # penwith.org.uk/misc/ftmged.htm which points out this is 6572 # incorrect 6573 gid = self.rid_map[""] 6574 repo = self.__find_or_create_repository(gid) 6575 self.dbase.commit_repository(repo, self.trans) 6576 else: 6577 # This deals with the non-standard GEDCOM 6578 # SOURCE_REPOSITORY_CITATION: = 6579 # n REPO <NAME_OF_REPOSITORY> {1:1} 6580 # +1 <<NOTE_STRUCTURE>> {0:M} 6581 # +1 CALN <SOURCE_CALL_NUMBER> {0:M} 6582 # +2 MEDI <SOURCE_MEDIA_TYPE> {0:1} 6583 # This seems to be used by Heredis 8 PC. Heredis is notorious for 6584 # non-standard GEDCOM. 6585 gid = self.repo2id.get(line.data) 6586 if gid is None: 6587 gid = self.rid_map[""] 6588 repo = self.__find_or_create_repository(gid) 6589 self.repo2id[line.data] = repo.get_gramps_id() 6590 repo.set_name(line.data) 6591 self.dbase.commit_repository(repo, self.trans) 6592 6593 repo_ref = RepoRef() 6594 repo_ref.set_reference_handle(repo.handle) 6595 6596 sub_state = CurrentState() 6597 sub_state.repo_ref = repo_ref 6598 sub_state.level = state.level + 1 6599 6600 self.__parse_level(sub_state, self.repo_ref_tbl, self.__undefined) 6601 state.msg += sub_state.msg 6602 6603 state.source.add_repo_reference(repo_ref) 6604 6605 def __repo_ref_call(self, line, state): 6606 """ 6607 @param line: The current line in GedLine format 6608 @type line: GedLine 6609 @param state: The current state 6610 @type state: CurrentState 6611 """ 6612 state.repo_ref.set_call_number(line.data) 6613 #self.__skip_subordinate_levels(state.level + 1, state) 6614 6615 def __repo_ref_medi(self, line, state): 6616 name = line.data 6617 mtype = MEDIA_MAP.get(name.lower(), 6618 (SourceMediaType.CUSTOM, name)) 6619 state.repo_ref.set_media_type(mtype) 6620 6621 def __repo_ref_note(self, line, state): 6622 """ 6623 @param line: The current line in GedLine format 6624 @type line: GedLine 6625 @param state: The current state 6626 @type state: CurrentState 6627 """ 6628 self.__parse_note(line, state.repo_ref, state) 6629 6630 def __repo_chan(self, line, state): 6631 """ 6632 @param line: The current line in GedLine format 6633 @type line: GedLine 6634 @param state: The current state 6635 @type state: CurrentState 6636 """ 6637 self.__parse_change(line, state.repo, state.level + 1, state) 6638 6639 def __source_abbr(self, line, state): 6640 """ 6641 @param line: The current line in GedLine format 6642 @type line: GedLine 6643 @param state: The current state 6644 @type state: CurrentState 6645 """ 6646 state.source.set_abbreviation(line.data) 6647 6648 def __source_agnc(self, line, state): 6649 """ 6650 @param line: The current line in GedLine format 6651 @type line: GedLine 6652 @param state: The current state 6653 @type state: CurrentState 6654 """ 6655 attr = Attribute() 6656 attr.set_type(AttributeType.AGENCY) 6657 attr.set_value(line.data) 6658 state.source.add_attribute(attr) 6659 6660 def __source_note(self, line, state): 6661 """ 6662 @param line: The current line in GedLine format 6663 @type line: GedLine 6664 @param state: The current state 6665 @type state: CurrentState 6666 """ 6667 self.__parse_note(line, state.source, state) 6668 6669 def __source_auth(self, line, state): 6670 """ 6671 @param line: The current line in GedLine format 6672 @type line: GedLine 6673 @param state: The current state 6674 @type state: CurrentState 6675 """ 6676 state.source.set_author(line.data) 6677 6678 def __source_publ(self, line, state): 6679 """ 6680 @param line: The current line in GedLine format 6681 @type line: GedLine 6682 @param state: The current state 6683 @type state: CurrentState 6684 """ 6685 state.source.set_publication_info(line.data) 6686 self.__skip_subordinate_levels(state.level + 1, state) 6687 6688 def __source_title(self, line, state): 6689 """ 6690 @param line: The current line in GedLine format 6691 @type line: GedLine 6692 @param state: The current state 6693 @type state: CurrentState 6694 """ 6695 state.source.set_title(line.data.replace('\n', ' ')) 6696 6697 def __source_taxt_peri(self, line, state): 6698 """ 6699 @param line: The current line in GedLine format 6700 @type line: GedLine 6701 @param state: The current state 6702 @type state: CurrentState 6703 """ 6704 if state.source.get_title() == "": 6705 state.source.set_title(line.data.replace('\n', ' ')) 6706 6707 #---------------------------------------------------------------------- 6708 # 6709 # OBJE parsing 6710 # 6711 #---------------------------------------------------------------------- 6712 6713 def __parse_obje(self, line): 6714 """ 6715 n @XREF:OBJE@ OBJE {1:1} # v5.5 layout 6716 +1 FILE <MULTIMEDIA_FILE_REFN> {1:1} # de-facto extension 6717 +1 FORM <MULTIMEDIA_FORMAT> {1:1} 6718 +1 TITL <DESCRIPTIVE_TITLE> {0:1} 6719 +1 <<NOTE_STRUCTURE>> {0:M} p.* 6720 +1 BLOB {1:1} # Deprecated, no support 6721 +2 CONT <ENCODED_MULTIMEDIA_LINE> {1:M} 6722 +1 OBJE @<XREF:OBJE>@ /* chain */ {0:1} # Deprecated, no support 6723 +1 REFN <USER_REFERENCE_NUMBER> {0:M} 6724 +2 TYPE <USER_REFERENCE_TYPE> {0:1} 6725 +1 RIN <AUTOMATED_RECORD_ID> {0:1} 6726 +1 <<CHANGE_DATE>> {0:1} 6727 6728 n @XREF:OBJE@ OBJE {1:1} # v5.5.1 layout 6729 +1 FILE <MULTIMEDIA_FILE_REFN> {1:M} # multi files, no support 6730 +2 FORM <MULTIMEDIA_FORMAT> {1:1} 6731 +3 TYPE <SOURCE_MEDIA_TYPE> {0:1} 6732 +2 TITL <DESCRIPTIVE_TITLE> {0:1} 6733 +2 DATE <mm/dd/yyy hh:mn:ss AM> {0:1} # FTM extension 6734 +2 TEXT <COMMENT, by user or exif> # FTM extension 6735 +1 REFN <USER_REFERENCE_NUMBER> {0:M} 6736 +2 TYPE <USER_REFERENCE_TYPE> {0:1} 6737 +1 RIN <AUTOMATED_RECORD_ID> {0:1} 6738 +1 <<NOTE_STRUCTURE>> {0:M} 6739 +1 <<SOURCE_CITATION>> {0:M} 6740 +1 <<CHANGE_DATE>> {0:1} 6741 """ 6742 gid = line.token_text.strip() 6743 media = self.__find_or_create_media(self.oid_map[gid]) 6744 6745 state = CurrentState() 6746 state.media = media 6747 state.level = 1 6748 6749 self.__parse_level(state, self.obje_func, self.__undefined) 6750 6751 if state.media.get_path() == "": 6752 self.__add_msg(_("Filename omitted"), line, state) 6753 # deal with mime types 6754 value = mimetypes.guess_type(state.media.get_path()) 6755 if value and value[0]: # found from filename 6756 state.media.set_mime_type(value[0]) 6757 else: # get from OBJE.FILE.FORM 6758 if '/' in state.form: # already has expanded mime type 6759 state.media.set_mime_type(state.form) 6760 else: 6761 value = mimetypes.types_map.get('.' + state.form, 6762 _('unknown')) 6763 state.media.set_mime_type(value) 6764 # Add the default reference if no source has found 6765 self.__add_default_source(media) 6766 6767 # Add a default tag if provided 6768 self.__add_default_tag(media) 6769 6770 self.__check_msgs(_("OBJE (multi-media object) Gramps ID %s") % 6771 media.get_gramps_id(), state, media) 6772 # commit the person to the database 6773 self.dbase.commit_media(media, self.trans, media.change) 6774 6775 def __obje_form(self, line, state): 6776 """ 6777 @param line: The current line in GedLine format 6778 @type line: GedLine 6779 @param state: The current state 6780 @type state: CurrentState 6781 """ 6782 state.form = line.data.lower().strip() 6783 6784 def __obje_file(self, line, state): 6785 """ 6786 @param line: The current line in GedLine format 6787 @type line: GedLine 6788 @param state: The current state 6789 @type state: CurrentState 6790 """ 6791 # The following checks for the odd "feature" of GEDCOM 5.5.1 that 6792 # allows multiple files to be attached to a single OBJE; not supported 6793 if state.media.get_path() != "": 6794 self.__add_msg(_("Multiple FILE in a single OBJE ignored"), 6795 line, state) 6796 self.__skip_subordinate_levels(state.level + 1, state) 6797 return 6798 res = urlparse(line.data) 6799 if line.data != '' and (res.scheme == '' or 6800 len(res.scheme) == 1 or res.scheme == 'file'): 6801 (file_ok, filename) = self.__find_file(line.data, self.dir_path) 6802 if state.form != "url": 6803 # Might not work if FORM doesn't precede FILE 6804 if not file_ok: 6805 self.__add_msg(_("Could not import %s") % line.data, line, 6806 state) 6807 path = filename 6808 else: 6809 path = line.data 6810 6811 state.media.set_path(path) 6812 if not state.media.get_description(): 6813 state.media.set_description(path.replace('\\', '/')) 6814 6815 def __obje_title(self, line, state): 6816 """ 6817 @param line: The current line in GedLine format 6818 @type line: GedLine 6819 @param state: The current state 6820 @type state: CurrentState 6821 """ 6822 state.media.set_description(line.data) 6823 6824# FTM non-standard TEXT in OBJE, treat as note. 6825 def __obje_text(self, line, state): 6826 """ 6827 @param line: The current line in GedLine format 6828 @type line: GedLine 6829 @param state: The current state 6830 @type state: CurrentState 6831 """ 6832 new_note = Note(line.data) 6833 new_note.set_gramps_id(self.nid_map[""]) 6834 new_note.set_handle(create_id()) 6835 new_note.set_type(NoteType.MEDIA) 6836 self.dbase.commit_note(new_note, self.trans, new_note.change) 6837 state.media.add_note(new_note.get_handle()) 6838 6839# FTM non-standard DATE in OBJE, treat as Media Date. 6840 def __obje_date(self, line, state): 6841 """ 6842 @param line: The current line in GedLine format 6843 @type line: GedLine 6844 @param state: The current state 6845 @type state: CurrentState 6846 """ 6847 state.media.set_date_object(line.data) 6848 6849 def __obje_note(self, line, state): 6850 """ 6851 @param line: The current line in GedLine format 6852 @type line: GedLine 6853 @param state: The current state 6854 @type state: CurrentState 6855 """ 6856 self.__parse_note(line, state.media, state) 6857 6858 def __obje_sour(self, line, state): 6859 """ 6860 @param line: The current line in GedLine format 6861 @type line: GedLine 6862 @param state: The current state 6863 @type state: CurrentState 6864 """ 6865 state.media.add_citation(self.handle_source(line, state.level, state)) 6866 6867 def __obje_refn(self, line, state): 6868 """ 6869 @param line: The current line in GedLine format 6870 @type line: GedLine 6871 @param state: The current state 6872 @type state: CurrentState 6873 """ 6874 self.__do_refn(line, state, state.media) 6875 6876 def __obje_type(self, line, state): 6877 """ 6878 +1 FILE <MULTIMEDIA_FILE_REFN> {1:M} 6879 +2 FORM <MULTIMEDIA_FORMAT> {1:1} 6880 +3 TYPE <SOURCE_MEDIA_TYPE> {0:1} # v5.5.1 6881 6882 Source_Media_type is one of (Photo, Audio, Book, etc.) 6883 6884 @param line: The current line in GedLine format 6885 @type line: GedLine 6886 @param state: The current state 6887 @type state: CurrentState 6888 """ 6889 attr = Attribute() 6890 mtype = MEDIA_MAP.get(line.data.lower(), 6891 (SourceMediaType.CUSTOM, line.data)) 6892 attr.set_type(_('Media-Type')) 6893 attr.set_value(str(SourceMediaType(mtype))) 6894 state.media.attribute_list.append(attr) 6895 6896 def __obje_rin(self, line, state): 6897 """ 6898 @param line: The current line in GedLine format 6899 @type line: GedLine 6900 @param state: The current state 6901 @type state: CurrentState 6902 """ 6903 attr = Attribute() 6904 attr.set_type(line.token_text) # Attribute: RIN 6905 attr.set_value(line.data) 6906 state.media.attribute_list.append(attr) 6907 6908 def __obje_chan(self, line, state): 6909 """ 6910 @param line: The current line in GedLine format 6911 @type line: GedLine 6912 @param state: The current state 6913 @type state: CurrentState 6914 """ 6915 self.__parse_change(line, state.media, state.level + 1, state) 6916 6917 def __person_attr_type(self, line, state): 6918 """ 6919 @param line: The current line in GedLine format 6920 @type line: GedLine 6921 @param state: The current state 6922 @type state: CurrentState 6923 """ 6924 if state.attr.get_type() == "": 6925 if line.data in GED_TO_GRAMPS_EVENT: 6926 name = GED_TO_GRAMPS_EVENT[line.data] 6927 else: 6928 name = line.data 6929 state.attr.set_type(name) 6930 else: 6931 self.__ignore(line, state) 6932 6933 def __person_attr_source(self, line, state): 6934 """ 6935 @param line: The current line in GedLine format 6936 @type line: GedLine 6937 @param state: The current state 6938 @type state: CurrentState 6939 """ 6940 state.attr.add_citation(self.handle_source(line, state.level, state)) 6941 6942 def __person_attr_place(self, line, state): 6943 """ 6944 @param line: The current line in GedLine format 6945 @type line: GedLine 6946 @param state: The current state 6947 @type state: CurrentState 6948 """ 6949 val = line.data 6950 if state.attr.get_value() == "": 6951 state.attr.set_value(val) 6952 self.__skip_subordinate_levels(state.level + 1, state) 6953 else: 6954 self.__ignore(line, state) 6955 6956 def __person_attr_note(self, line, state): 6957 """ 6958 @param line: The current line in GedLine format 6959 @type line: GedLine 6960 @param state: The current state 6961 @type state: CurrentState 6962 """ 6963 self.__parse_note(line, state.attr, state) 6964 6965 #---------------------------------------------------------------------- 6966 # 6967 # REPO parsing 6968 # 6969 #---------------------------------------------------------------------- 6970 6971 def __parse_repo(self, line): 6972 """ 6973 n @<XREF:REPO>@ REPO {1:1} 6974 +1 NAME <NAME_OF_REPOSITORY> {0:1} p.* 6975 +1 <<ADDRESS_STRUCTURE>> {0:1} p.* 6976 +1 <<NOTE_STRUCTURE>> {0:M} p.* 6977 +1 REFN <USER_REFERENCE_NUMBER> {0:M} p.* 6978 +1 RIN <AUTOMATED_RECORD_ID> {0:1} p.* 6979 +1 <<CHANGE_DATE>> {0:1} p. 6980 """ 6981 repo = self.__find_or_create_repository(self.rid_map[line.token_text]) 6982 6983 state = CurrentState() 6984 state.repo = repo 6985 state.level = 1 6986 self.__parse_level(state, self.repo_parse_tbl, self.__ignore) 6987 6988 self.__check_msgs(_("REPO (repository) Gramps ID %s") % 6989 repo.get_gramps_id(), state, repo) 6990 self.dbase.commit_repository(repo, self.trans, repo.change) 6991 6992 def __repo_name(self, line, state): 6993 """ 6994 @param line: The current line in GedLine format 6995 @type line: GedLine 6996 @param state: The current state 6997 @type state: CurrentState 6998 """ 6999 state.repo.set_name(line.data) 7000 7001 def __repo_note(self, line, state): 7002 """ 7003 @param line: The current line in GedLine format 7004 @type line: GedLine 7005 @param state: The current state 7006 @type state: CurrentState 7007 """ 7008 self.__parse_note(line, state.repo, state) 7009 7010 def __repo_addr(self, line, state): 7011 """ 7012 Parses the REPOsitory and HEADer COPR <ADDRESS_STRUCTURE> 7013 7014 n ADDR <ADDRESS_LINE> {0:1} 7015 +1 CONT <ADDRESS_LINE> {0:M} 7016 +1 ADR1 <ADDRESS_LINE1> {0:1} (Street) 7017 +1 ADR2 <ADDRESS_LINE2> {0:1} (Locality) 7018 +1 CITY <ADDRESS_CITY> {0:1} 7019 +1 STAE <ADDRESS_STATE> {0:1} 7020 +1 POST <ADDRESS_POSTAL_CODE> {0:1} 7021 +1 CTRY <ADDRESS_COUNTRY> {0:1} 7022 n PHON <PHONE_NUMBER> {0:3} 7023 7024 Some repositories do not try to break up the address, 7025 instead they put everything on a single line. Try to determine 7026 if this happened, and try to fix it. 7027 """ 7028 free_form = line.data 7029 7030 sub_state = CurrentState(level=state.level + 1) 7031 sub_state.addr = Address() 7032 7033 self.__parse_level(sub_state, self.parse_addr_tbl, self.__ignore) 7034 state.msg += sub_state.msg 7035 7036 self.__merge_address(free_form, sub_state.addr, line, state) 7037 state.repo.add_address(sub_state.addr) 7038 7039 def __repo_phon(self, line, state): 7040 """ 7041 @param line: The current line in GedLine format 7042 @type line: GedLine 7043 @param state: The current state 7044 @type state: CurrentState 7045 """ 7046 address_list = state.repo.get_address_list() 7047 if address_list: 7048 if address_list[0].get_phone(): 7049 self.__add_msg(_("Only one phone number supported"), 7050 line, state) 7051 else: 7052 address_list[0].set_phone(line.data) 7053 7054 def __repo_fax(self, line, state): 7055 """ 7056 @param line: The current line in GedLine format 7057 @type line: GedLine 7058 @param state: The current state 7059 @type state: CurrentState 7060 """ 7061 url = Url() 7062 url.set_path(line.data) 7063 url.set_type(UrlType(_('FAX'))) 7064 state.repo.add_url(url) 7065 7066 def __repo_www(self, line, state): 7067 """ 7068 @param line: The current line in GedLine format 7069 @type line: GedLine 7070 @param state: The current state 7071 @type state: CurrentState 7072 """ 7073 url = Url() 7074 url.set_path(line.data) 7075 url.set_type(UrlType(UrlType.WEB_HOME)) 7076 state.repo.add_url(url) 7077 7078 def __repo_email(self, line, state): 7079 """ 7080 @param line: The current line in GedLine format 7081 @type line: GedLine 7082 @param state: The current state 7083 @type state: CurrentState 7084 """ 7085 url = Url() 7086 url.set_path(line.data) 7087 url.set_type(UrlType(UrlType.EMAIL)) 7088 state.repo.add_url(url) 7089 7090 def __location_adr1(self, line, state): 7091 """ 7092 @param line: The current line in GedLine format 7093 @type line: GedLine 7094 @param state: The current state 7095 @type state: CurrentState 7096 """ 7097 if not state.location: 7098 state.location = Location() 7099 if state.location.get_street() != "": 7100 self.__add_msg(_("Warn: ADDR overwritten"), line, state) 7101 state.location.set_street(line.data) 7102 7103 def __location_adr2(self, line, state): 7104 """ 7105 @param line: The current line in GedLine format 7106 @type line: GedLine 7107 @param state: The current state 7108 @type state: CurrentState 7109 """ 7110 if not state.location: 7111 state.location = Location() 7112 state.location.set_locality(line.data) 7113 7114 def __location_city(self, line, state): 7115 """ 7116 @param line: The current line in GedLine format 7117 @type line: GedLine 7118 @param state: The current state 7119 @type state: CurrentState 7120 """ 7121 if not state.location: 7122 state.location = Location() 7123 state.location.set_city(line.data) 7124 7125 def __location_stae(self, line, state): 7126 """ 7127 @param line: The current line in GedLine format 7128 @type line: GedLine 7129 @param state: The current state 7130 @type state: CurrentState 7131 """ 7132 if not state.location: 7133 state.location = Location() 7134 state.location.set_state(line.data) 7135 7136 def __location_post(self, line, state): 7137 """ 7138 @param line: The current line in GedLine format 7139 @type line: GedLine 7140 @param state: The current state 7141 @type state: CurrentState 7142 """ 7143 if not state.location: 7144 state.location = Location() 7145 state.location.set_postal_code(line.data) 7146 7147 def __location_ctry(self, line, state): 7148 """ 7149 @param line: The current line in GedLine format 7150 @type line: GedLine 7151 @param state: The current state 7152 @type state: CurrentState 7153 """ 7154 if not state.location: 7155 state.location = Location() 7156 state.location.set_country(line.data) 7157 7158 def __location_phone(self, line, state): 7159 """ 7160 @param line: The current line in GedLine format 7161 @type line: GedLine 7162 @param state: The current state 7163 @type state: CurrentState 7164 """ 7165 if not state.location: 7166 state.location = Location() 7167 state.location.set_phone(line.data) 7168 7169 def __location_note(self, line, state): 7170 """ 7171 @param line: The current line in GedLine format 7172 @type line: GedLine 7173 @param state: The current state 7174 @type state: CurrentState 7175 """ 7176 if state.event: 7177 self.__parse_note(line, state.place, state) 7178 else: 7179 # This causes notes below SUBMitter to be ignored 7180 self.__not_recognized(line, state) 7181 7182 def __optional_note(self, line, state): 7183 """ 7184 @param line: The current line in GedLine format 7185 @type line: GedLine 7186 @param state: The current state 7187 @type state: CurrentState 7188 """ 7189 self.__parse_note(line, state.obj, state) 7190 7191 #---------------------------------------------------------------------- 7192 # 7193 # HEAD parsing 7194 # 7195 #---------------------------------------------------------------------- 7196 7197 def __parse_header(self): 7198 """ 7199 Handling of the lines subordinate to the HEAD GEDCOM tag 7200 7201 n HEAD {1:1} 7202 +1 SOUR <APPROVED_SYSTEM_ID> {1:1} 7203 +2 VERS <VERSION_NUMBER> {0:1} 7204 +2 NAME <NAME_OF_PRODUCT> {0:1} 7205 +2 CORP <NAME_OF_BUSINESS> {0:1} 7206 +3 <<ADDRESS_STRUCTURE>> {0:1} 7207 +2 DATA <NAME_OF_SOURCE_DATA> {0:1} 7208 +3 DATE <PUBLICATION_DATE> {0:1} 7209 +3 COPR <COPYRIGHT_SOURCE_DATA> {0:1} 7210 +1 DEST <RECEIVING_SYSTEM_NAME> {0:1*} 7211 +1 DATE <TRANSMISSION_DATE> {0:1} 7212 +2 TIME <TIME_VALUE> {0:1} 7213 +1 SUBM @<XREF:SUBM>@ {1:1} 7214 +1 SUBN @<XREF:SUBN>@ {0:1} 7215 +1 FILE <FILE_NAME> {0:1} 7216 +1 COPR <COPYRIGHT_GEDCOM_FILE> {0:1} 7217 +1 GEDC {1:1} 7218 +2 VERS <VERSION_NUMBER> {1:1} 7219 +2 FORM <GEDCOM_FORM> {1:1} 7220 +1 CHAR <CHARACTER_SET> {1:1} 7221 +2 VERS <VERSION_NUMBER> {0:1} 7222 +1 LANG <LANGUAGE_OF_TEXT> {0:1} 7223 +1 PLAC {0:1} 7224 +2 FORM <PLACE_HIERARCHY> {1:1} 7225 +1 NOTE <GEDCOM_CONTENT_DESCRIPTION> {0:1} 7226 +2 [CONT|CONC] <GEDCOM_CONTENT_DESCRIPTION> {0:M} 7227 7228 * NOTE: Submissions to the Family History Department for Ancestral 7229 File submission or for clearing temple ordinances must use a 7230 DESTination of ANSTFILE or TempleReady. 7231 7232 """ 7233 state = CurrentState(level=1) 7234 self.__parse_level(state, self.head_parse_tbl, self.__undefined) 7235 self.__check_msgs(_("HEAD (header)"), state, None) 7236 7237 def __header_sour(self, line, state): 7238 """ 7239 @param line: The current line in GedLine format 7240 @type line: GedLine 7241 @param state: The current state 7242 @type state: CurrentState 7243 """ 7244 if line.data.strip() in ["FTW", "FTM"]: 7245 self.is_ftw = True 7246 # Some software (e.g. RootsMagic (http://files.rootsmagic.com/PAF- 7247 # Book/RootsMagic-for-PAF-Users-Printable.pdf) use the Addr fields for 7248 # 'Place Details (address, hospital, cemetary)' 7249 if line.data.strip().lower() in ['rootsmagic']: 7250 self.addr_is_detail = True 7251 # We will use the approved system ID as the name of the generating 7252 # software, in case we do not get the name in the proper place 7253 self.genby = line.data 7254 if self.use_def_src: 7255 sattr = SrcAttribute() 7256 sattr.set_type(_("Approved system identification")) 7257 sattr.set_value("%s" % self.genby) 7258 self.def_src.add_attribute(sattr) 7259 sub_state = CurrentState(level=state.level + 1) 7260 self.__parse_level(sub_state, self.header_sour_parse_tbl, 7261 self.__undefined) 7262 state.msg += sub_state.msg 7263 # We can't produce the 'Generated by' statement till the end of the 7264 # SOUR level, because the name and version may come in any order 7265 if self.use_def_src: 7266 # feature request 2356: avoid genitive form 7267 sattr = SrcAttribute() 7268 sattr.set_type(_("Generated By")) 7269 sattr.set_value("%s %s" % (self.genby, self.genvers)) 7270 self.def_src.add_attribute(sattr) 7271 7272 def __header_sour_name(self, line, state): 7273 """ 7274 @param line: The current line in GedLine format 7275 @type line: GedLine 7276 @param state: The current state 7277 @type state: CurrentState 7278 """ 7279 # This is where the name of the product that generated the GEDCOM file 7280 # should appear, and this will overwrite the approved system ID, if any 7281 self.genby = line.data 7282 if self.use_def_src: 7283 sattr = SrcAttribute() 7284 sattr.set_type(_("Name of software product")) 7285 sattr.set_value(self.genby) 7286 self.def_src.add_attribute(sattr) 7287 7288 def __header_sour_vers(self, line, state): 7289 """ 7290 @param line: The current line in GedLine format 7291 @type line: GedLine 7292 @param state: The current state 7293 @type state: CurrentState 7294 """ 7295 self.genvers = line.data 7296 if self.use_def_src: 7297 sattr = SrcAttribute() 7298 sattr.set_type(_("Version number of software product")) 7299 sattr.set_value(self.genvers) 7300 self.def_src.add_attribute(sattr) 7301 7302 def __header_sour_corp(self, line, state): 7303 """ 7304 @param line: The current line in GedLine format 7305 @type line: GedLine 7306 @param state: The current state 7307 @type state: CurrentState 7308 """ 7309 repo = Repository() 7310 sub_state = CurrentState(level=state.level + 1) 7311 sub_state.repo = repo 7312 self.__parse_level(sub_state, self.header_corp_addr, self.__undefined) 7313 state.msg += sub_state.msg 7314 7315 if self.use_def_src: 7316 repo.set_name(_("Business that produced the product: %s") % 7317 line.data) 7318 rtype = RepositoryType() 7319 rtype.set((RepositoryType.CUSTOM, _('GEDCOM data'))) 7320 repo.set_type(rtype) 7321 self.dbase.add_repository(repo, self.trans) 7322 repo_ref = RepoRef() 7323 repo_ref.set_reference_handle(repo.handle) 7324 mtype = SourceMediaType() 7325 mtype.set((SourceMediaType.UNKNOWN, '')) 7326 repo_ref.set_media_type(mtype) 7327 self.def_src.add_repo_reference(repo_ref) 7328 7329 def __header_sour_data(self, line, state): 7330 """ 7331 @param line: The current line in GedLine format 7332 @type line: GedLine 7333 @param state: The current state 7334 @type state: CurrentState 7335 """ 7336 if self.use_def_src: 7337 sattr = SrcAttribute() 7338 sattr.set_type(_("Name of source data")) 7339 sattr.set_value(line.data) 7340 self.def_src.add_attribute(sattr) 7341 sub_state = CurrentState(level=state.level + 1) 7342 self.__parse_level(sub_state, self.header_sour_data, 7343 self.__undefined) 7344 state.msg += sub_state.msg 7345 7346 def __header_sour_copr(self, line, state): 7347 """ 7348 @param line: The current line in GedLine format 7349 @type line: GedLine 7350 @param state: The current state 7351 @type state: CurrentState 7352 """ 7353 if self.use_def_src: 7354 sattr = SrcAttribute() 7355 sattr.set_type(_("Copyright of source data")) 7356 sattr.set_value(line.data) 7357 self.def_src.add_attribute(sattr) 7358 7359 def __header_sour_date(self, line, state): 7360 """ 7361 @param line: The current line in GedLine format 7362 @type line: GedLine 7363 @param state: The current state 7364 @type state: CurrentState 7365 """ 7366 if self.use_def_src: 7367 # Because there is a DATE tag, line.data is automatically converted 7368 # to a Date object before getting to this point, so it has to be 7369 # converted back to a string 7370 text_date = str(line.data) 7371 sattr = SrcAttribute() 7372 sattr.set_type(_("Publication date of source data")) 7373 sattr.set_value(text_date) 7374 self.def_src.add_attribute(sattr) 7375 7376 def __header_file(self, line, state): 7377 """ 7378 @param line: The current line in GedLine format 7379 @type line: GedLine 7380 @param state: The current state 7381 @type state: CurrentState 7382 """ 7383 if self.use_def_src: 7384 filename = os.path.basename(line.data).split('\\')[-1] 7385 # feature request 2356: avoid genitive form 7386 self.def_src.set_title(_("Import from %s") % filename) 7387 7388 def __header_copr(self, line, state): 7389 """ 7390 @param line: The current line in GedLine format 7391 @type line: GedLine 7392 @param state: The current state 7393 @type state: CurrentState 7394 """ 7395 if self.use_def_src: 7396 self.def_src.set_publication_info(line.data) 7397 7398 def __header_subm(self, line, state): 7399 """ 7400 @param line: The current line in GedLine format 7401 @type line: GedLine 7402 @param state: The current state 7403 @type state: CurrentState 7404 7405 +1 SUBM @<XREF:SUBM>@ {1:1} 7406 This should be simply be a cross-reference to the correct Submitter 7407 record. Note that there can be multiple Submitter records, so it is 7408 necessary to remember which one should be applied. 7409 7410 """ 7411 self.subm = line.data[1:-1] 7412 sub_state = CurrentState(level=state.level + 1) 7413 self.__parse_level(sub_state, self.header_subm, self.__ignore) 7414 state.msg += sub_state.msg 7415 7416 def __header_subn(self, line, state): 7417 """ 7418 @param line: The current line in GedLine format 7419 @type line: GedLine 7420 @param state: The current state 7421 @type state: CurrentState 7422 """ 7423 if self.use_def_src: 7424 sattr = SrcAttribute() 7425 sattr.set_type(_('Submission record identifier')) 7426 sattr.set_value(line.token_text) 7427 self.def_src.add_attribute(sattr) 7428 7429 def __header_lang(self, line, state): 7430 """ 7431 @param line: The current line in GedLine format 7432 @type line: GedLine 7433 @param state: The current state 7434 @type state: CurrentState 7435 """ 7436 if self.use_def_src: 7437 sattr = SrcAttribute() 7438 sattr.set_type(_('Language of GEDCOM text')) 7439 sattr.set_value(line.data) 7440 self.def_src.add_attribute(sattr) 7441 7442 def __header_dest(self, line, state): 7443 """ 7444 @param line: The current line in GedLine format 7445 @type line: GedLine 7446 @param state: The current state 7447 @type state: CurrentState 7448 7449 FIXME: This processing does not depend on DEST, so there seems to be 7450 no reason for it to be placed here. Perhaps it is supposed to be after 7451 all the SOUR levels have been processed, but self.genby was only 7452 assigned by the initial SOUR tag, so this could have been done there. 7453 Perhaps, as suggested by the text of the error message, it was 7454 supposed to test whenther the_DEST_ was LEGACY, in which case the 7455 coding is now wrong. 7456 """ 7457 if self.genby.upper() == "LEGACY": 7458 fname = os.path.basename(self.filename) 7459 self.user.warn( 7460 _("Import of GEDCOM file %(filename)s with DEST=%(by)s, " 7461 "could cause errors in the resulting database!") % 7462 {'filename': fname, 'by': self.genby}, 7463 _("Look for nameless events.")) 7464 7465 def __header_char(self, line, state): 7466 """ 7467 @param line: The current line in GedLine format 7468 @type line: GedLine 7469 @param state: The current state 7470 @type state: CurrentState 7471 """ 7472 # +1 CHAR <CHARACTER_SET> {1:1} 7473 # +2 VERS <VERSION_NUMBER> {0:1} 7474 encoding = line.data 7475 version = "" 7476 while True: 7477 line = self.__get_next_line() 7478 if self.__level_is_finished(line, state.level + 1): 7479 break 7480 elif line.token == TOKEN_VERS: 7481 version = line.data 7482 7483 if self.use_def_src: 7484 if version == "": 7485 sattr = SrcAttribute() 7486 sattr.set_type(_('Character set')) 7487 sattr.set_value(encoding) 7488 self.def_src.add_attribute(sattr) 7489 else: 7490 sattr = SrcAttribute() 7491 sattr.set_type(_('Character set and version')) 7492 sattr.set_value("%s %s" % (encoding, version)) 7493 self.def_src.add_attribute(sattr) 7494 7495 def __header_gedc(self, line, state): 7496 """ 7497 @param line: The current line in GedLine format 7498 @type line: GedLine 7499 @param state: The current state 7500 @type state: CurrentState 7501 """ 7502 while True: 7503 line = self.__get_next_line() 7504 if self.__level_is_finished(line, state.level + 1): 7505 break 7506 elif line.token == TOKEN_VERS: 7507 if (not line.data) or line.data[0] != "5": 7508 self.__add_msg(_("GEDCOM version not supported"), 7509 line, state) 7510 if self.use_def_src: 7511 sattr = SrcAttribute() 7512 sattr.set_type(_('GEDCOM version')) 7513 sattr.set_value(line.data) 7514 self.def_src.add_attribute(sattr) 7515 elif line.token == TOKEN_FORM: 7516 if line.data == "LINEAGE-LINKED": 7517 pass 7518 elif line.data.upper() == "LINEAGE-LINKED": 7519 # Allow Lineage-Linked etc. though it should be in 7520 # uppercase (Note: Gramps is not a validator! prc) 7521 self.__add_msg(_("GEDCOM FORM should be in uppercase"), 7522 line, state) 7523 else: 7524 self.__add_msg(_("GEDCOM FORM not supported"), line, state) 7525 if self.use_def_src: 7526 sattr = SrcAttribute() 7527 sattr.set_type(_('GEDCOM form')) 7528 sattr.set_value(line.data) 7529 self.def_src.add_attribute(sattr) 7530 7531 def __header_plac(self, line, state): 7532 """ 7533 @param line: The current line in GedLine format 7534 @type line: GedLine 7535 @param state: The current state 7536 @type state: CurrentState 7537 """ 7538 sub_state = CurrentState(level=state.level + 1) 7539 self.__parse_level(sub_state, self.place_form, self.__undefined) 7540 state.msg += sub_state.msg 7541 7542 def __place_form(self, line, state): 7543 """ 7544 @param line: The current line in GedLine format 7545 @type line: GedLine 7546 @param state: The current state 7547 @type state: CurrentState 7548 """ 7549 self.place_parser.parse_form(line) 7550 7551 def __header_date(self, line, state): 7552 """ 7553 @param line: The current line in GedLine format 7554 @type line: GedLine 7555 @param state: The current state 7556 @type state: CurrentState 7557 7558 This processes the <TRANSMISSION_DATE>, i.e. the date when this 7559 [GEDCOM] transmission was created (as opposed to the date when the 7560 source data that was used to create the transmission was published or 7561 created 7562 7563 Because there is a DATE tag, line.data is automatically converted to a 7564 Date object before getting to this point, so it has to be converted 7565 back to a string 7566 """ 7567 tx_date = str(line.data) 7568 tx_time = "" 7569 line = self.__get_next_line() 7570 if self.__level_is_finished(line, state.level): 7571 pass 7572 elif line.token == TOKEN_TIME: 7573 tx_time = str(line.data) 7574 7575 if self.use_def_src: 7576 if tx_time == "": 7577 sattr = SrcAttribute() 7578 sattr.set_type(_('Creation date of GEDCOM')) 7579 sattr.set_value(tx_date) 7580 self.def_src.add_attribute(sattr) 7581 else: 7582 sattr = SrcAttribute() 7583 sattr.set_type(_('Creation date and time of GEDCOM')) 7584 sattr.set_value("%s %s" % (tx_date, tx_time)) 7585 self.def_src.add_attribute(sattr) 7586 7587 def __header_note(self, line, state): 7588 """ 7589 @param line: The current line in GedLine format 7590 @type line: GedLine 7591 @param state: The current state 7592 @type state: CurrentState 7593 """ 7594 if self.use_def_src: 7595 self.__parse_note(line, self.def_src, state) 7596 7597 def __header_subm_name(self, line, state): 7598 """ 7599 @param line: The current line in GedLine format 7600 @type line: GedLine 7601 @param state: The current state 7602 @type state: CurrentState 7603 """ 7604 if self.use_def_src: 7605 self.def_src.set_author(line.data) 7606 7607 def __parse_note(self, line, obj, state): 7608 if line.token == TOKEN_RNOTE: 7609 # reference to a named note defined elsewhere 7610 #NOTE_STRUCTURE: = 7611 # n NOTE @<XREF:NOTE>@ {1:1} 7612 # +1 SOUR @<XREF:SOUR>@ {0:M} # 5.5 only, not in 5.5.1 7613 handle = self.__find_note_handle(self.nid_map[line.data]) 7614 obj.add_note(handle) 7615 self.note_type_map[handle] = OBJ_NOTETYPE.get(type(obj).__name__, 7616 NoteType.GENERAL) 7617 else: 7618 # Embedded note 7619 #NOTE_STRUCTURE: = 7620 # n NOTE [<SUBMITTER_TEXT> | <NULL>] {1:1} 7621 # +1 [ CONC | CONT ] <SUBMITTER_TEXT> {0:M} 7622 # +1 SOUR @<XREF:SOUR>@ {0:M} 7623 if not line.data: 7624 self.__add_msg(_("Empty note ignored"), line, state) 7625 self.__skip_subordinate_levels(line.level + 1, state) 7626 else: 7627 new_note = Note(line.data) 7628 new_note.set_gramps_id(self.nid_map[""]) 7629 new_note.set_handle(create_id()) 7630 7631 sub_state = CurrentState(level=state.level + 1) 7632 sub_state.note = new_note 7633 self.__parse_level(sub_state, self.note_parse_tbl, 7634 self.__undefined) 7635 state.msg += sub_state.msg 7636 7637 # Add a default tag if provided 7638 self.__add_default_tag(new_note) 7639 # Set the type of the note 7640 new_note.set_type(OBJ_NOTETYPE.get(type(obj).__name__, 7641 NoteType.GENERAL)) 7642 self.dbase.commit_note(new_note, self.trans, new_note.change) 7643 obj.add_note(new_note.get_handle()) 7644 7645 #---------------------------------------------------------------------- 7646 # 7647 # NOTE parsing 7648 # 7649 #---------------------------------------------------------------------- 7650 7651 def __parse_inline_note(self, line, level): 7652 """ 7653 Handling of lines subordinate to the NOTE GEDCOM tag 7654 7655 n @<XREF:NOTE>@ NOTE <SUBMITTER_TEXT> {1:1} 7656 +1 [ CONC | CONT] <SUBMITTER_TEXT> {0:M} 7657 +1 <<SOURCE_CITATION>> {0:M} 7658 +1 REFN <USER_REFERENCE_NUMBER> {0:M} 7659 +2 TYPE <USER_REFERENCE_TYPE> {0:1} 7660 +1 RIN <AUTOMATED_RECORD_ID> {0:1} 7661 +1 <<CHANGE_DATE>> {0:1} 7662 """ 7663 state = CurrentState(level=1) 7664 if not line.data and \ 7665 self.nid_map.clean(line.token_text) not in self.nid_map.map(): 7666 self.__add_msg(_("Empty note ignored"), line) 7667 self.__skip_subordinate_levels(level, state) 7668 else: 7669 gid = self.nid_map[line.token_text] 7670 handle = self.__find_note_handle(gid) 7671 new_note = Note(line.data) 7672 new_note.set_handle(handle) 7673 new_note.set_gramps_id(gid) 7674 if handle in self.note_type_map: 7675 new_note.set_type(self.note_type_map[handle]) 7676 sub_state = CurrentState(level=state.level) 7677 sub_state.note = new_note 7678 self.__parse_level(sub_state, self.note_parse_tbl, 7679 self.__undefined) 7680 state.msg += sub_state.msg 7681 7682 self.dbase.commit_note(new_note, self.trans, new_note.change) 7683 self.__check_msgs(_("NOTE Gramps ID %s") % 7684 new_note.get_gramps_id(), state, None) 7685 7686 def __note_chan(self, line, state): 7687 if state.note: 7688 self.__parse_change(line, state.note, state.level + 1, state) 7689 7690 def __parse_source_reference(self, citation, level, handle, state): 7691 """ 7692 Read the data associated with a SOUR reference. 7693 """ 7694 sub_state = CurrentState(level=level + 1) 7695 sub_state.citation = citation 7696 sub_state.handle = handle 7697 self.__parse_level(sub_state, self.citation_parse_tbl, self.__ignore) 7698 state.msg += sub_state.msg 7699 7700 def __parse_header_head(self): 7701 """ 7702 Validate that this is a valid GEDCOM file. 7703 """ 7704 line = self.__get_next_line() 7705 if line.token != TOKEN_HEAD: 7706 raise GedcomError("%s is not a GEDCOM file" % self.filename) 7707 7708 def __parse_submission(self, line, state): 7709 """ 7710 @param line: The current line in GedLine format 7711 @type line: GedLine 7712 @param state: The current state 7713 @type state: CurrentState 7714 7715 Handling of lines subordinate to the level 0 SUMN (Submission) GEDCOM 7716 tag 7717 7718 n @<XREF:SUBN>@ SUBN {1:1] 7719 +1 SUBM @<XREF:SUBM>@ {0:1} 7720 +1 FAMF <NAME_OF_FAMILY_FILE> {0:1} 7721 +1 TEMP <TEMPLE_CODE> {0:1} 7722 +1 ANCE <GENERATIONS_OF_ANCESTORS> {0:1} 7723 +1 DESC <GENERATIONS_OF_DESCENDANTS> {0:1} 7724 +1 ORDI <ORDINANCE_PROCESS_FLAG> {0:1} 7725 +1 RIN <AUTOMATED_RECORD_ID> {0:1} 7726 +1 NOTE <NOTE_STRUCTURE> {0:m} 7727 """ 7728 while True: 7729 line = self.__get_next_line() 7730 msg = "" 7731 if self.__level_is_finished(line, state.level): 7732 break 7733 elif line.token == TOKEN_SUBM: 7734 msg = _("Submission: Submitter") 7735 elif line.token == TOKEN_UNKNOWN and line.token_text == "FAMF": 7736 msg = _("Submission: Family file") 7737 elif line.token == TOKEN_TEMP: 7738 msg = _("Submission: Temple code") 7739 elif line.token == TOKEN_UNKNOWN and line.token_text == "ANCE": 7740 msg = _("Submission: Generations of ancestors") 7741 elif line.token == TOKEN_UNKNOWN and line.token_text == "DESC": 7742 msg = _("Submission: Generations of descendants") 7743 elif line.token == TOKEN_UNKNOWN and line.token_text == "ORDI": 7744 msg = _("Submission: Ordinance process flag") 7745 elif line.token == TOKEN_NOTE or line.token == TOKEN_RNOTE: 7746 self.__parse_note(line, self.def_src, state) 7747 else: 7748 self.__not_recognized(line, state) 7749 continue 7750 7751 if self.use_def_src and msg != "": 7752 sattr = SrcAttribute() 7753 sattr.set_type(msg) 7754 sattr.set_value(line.data) 7755 self.def_src.add_attribute(sattr) 7756 self.dbase.commit_source(self.def_src, self.trans) 7757 7758 def handle_source(self, line, level, state): 7759 """ 7760 Handle the specified source, building a source reference to 7761 the object. 7762 """ 7763 citation = Citation() 7764 if line.data and line.data[0] != "@": 7765 title = line.data 7766 handle = self.inline_srcs.get(title, create_id()) 7767 src = Source() 7768 src.handle = handle 7769 src.gramps_id = self.sid_map[""] 7770 self.inline_srcs[title] = handle 7771 else: 7772 src = self.__find_or_create_source(self.sid_map[line.data]) 7773 # We need to set the title to the cross reference identifier of the 7774 # SOURce record, just in case we never find the source record. If 7775 # we didn't find the source record, then the source object would 7776 # have got deleted by Chack and repair because the record is empty. 7777 # If we find the source record, the title is overwritten in 7778 # __source_title. 7779 if not src.title: 7780 src.set_title(line.data) 7781 self.dbase.commit_source(src, self.trans) 7782 self.__parse_source_reference(citation, level, src.handle, state) 7783 citation.set_reference_handle(src.handle) 7784 self.dbase.add_citation(citation, self.trans) 7785 return citation.handle 7786 7787 def __parse_change(self, line, obj, level, state): 7788 """ 7789 CHANGE_DATE:= 7790 7791 > n CHAN {1:1} 7792 > +1 DATE <CHANGE_DATE> {1:1} 7793 > +2 TIME <TIME_VALUE> {0:1} 7794 > +1 <<NOTE_STRUCTURE>> {0:M} 7795 7796 The Note structure is ignored, since we have nothing 7797 corresponding in Gramps. 7798 7799 Based on the values calculated, attempt to convert to a valid 7800 change time using time.strptime. If this fails (and it shouldn't 7801 unless the value is meaningless and doesn't conform to the GEDCOM 7802 spec), the value is ignored. 7803 """ 7804 tstr = None 7805 dstr = None 7806 dobj = None 7807 while True: 7808 line = self.__get_next_line() 7809 if self.__level_is_finished(line, level): 7810 break 7811 elif line.token == TOKEN_TIME: 7812 tstr = line.data 7813 elif line.token == TOKEN_DATE: 7814 #Lexer converted already to Date object 7815 dobj = line.data 7816 elif line.token == TOKEN_NOTE or line.token == TOKEN_RNOTE: 7817 self.__ignore(line, state) 7818 else: 7819 self.__not_recognized(line, state) 7820 7821 # Attempt to convert the values to a valid change time 7822 if dobj: 7823 dstr = "%s %s %s" % (dobj.get_day(), dobj.get_month(), 7824 dobj.get_year()) 7825 try: 7826 if tstr: 7827 try: 7828 tstruct = time.strptime("%s %s" % (dstr, tstr), 7829 "%d %m %Y %H:%M:%S") 7830 except ValueError: 7831 #seconds is optional in GEDCOM 7832 tstruct = time.strptime("%s %s" % (dstr, tstr), 7833 "%d %m %Y %H:%M") 7834 else: 7835 tstruct = time.strptime(dstr, "%d %m %Y") 7836 val = time.mktime(tstruct) 7837 obj.change = val 7838 except (ValueError, OverflowError): 7839 # parse of time structure failed, so ignore. According to the 7840 # Python manual: "The functions in this [time] module do not 7841 # handle dates and times before the epoch or far in the future. 7842 # The cut-off point in the future is determined by the C 7843 # library; for Unix, it is typically in 2038." If the time is 7844 # too far in the future, this gives OverflowError. 7845 pass 7846 7847 def __do_refn(self, line, state, obj): 7848 """ 7849 @param line: The current line in GedLine format 7850 @type line: GedLine 7851 @param state: The current state 7852 @type state: CurrentState 7853 @param obj: The object to attach the attribute 7854 @type obj: Gramps primary object 7855 """ 7856 attr = Attribute() 7857 attr.set_type(line.token_text) # Atrribute : REFN 7858 attr.set_value(line.data) 7859 # if there is a subsequent TYPE, we add it as a note to the attribute 7860 line = self.__chk_subordinate(state.level + 1, state, TOKEN_TYPE) 7861 if line: 7862 new_note = Note(line.data) 7863 new_note.set_gramps_id(self.nid_map[""]) 7864 new_note.set_handle(create_id()) 7865 new_note.set_type('REFN-TYPE') 7866 self.dbase.commit_note(new_note, self.trans, new_note.change) 7867 attr.add_note(new_note.get_handle()) 7868 obj.attribute_list.append(attr) 7869 7870 def __build_event_pair(self, state, event_type, event_map, description): 7871 """ 7872 n TYPE <EVENT_DESCRIPTOR> {0:1} p.* 7873 n DATE <DATE_VALUE> {0:1} p.*/* 7874 n <<PLACE_STRUCTURE>> {0:1} p.* 7875 n <<ADDRESS_STRUCTURE>> {0:1} p.* 7876 n AGE <AGE_AT_EVENT> {0:1} p.* 7877 n AGNC <RESPONSIBLE_AGENCY> {0:1} p.* 7878 n CAUS <CAUSE_OF_EVENT> {0:1} p.* 7879 n <<SOURCE_CITATION>> {0:M} p.* 7880 n <<MULTIMEDIA_LINK>> {0:M} p.*, * 7881 n <<NOTE_STRUCTURE>> {0:M} p. 7882 """ 7883 event = Event() 7884 event_ref = EventRef() 7885 event.set_gramps_id(self.emapper.find_next()) 7886 event.set_type(event_type) 7887 7888 if description and description != 'Y': 7889 event.set_description(description) 7890 self.dbase.add_event(event, self.trans) 7891 7892 sub_state = CurrentState() 7893 sub_state.level = state.level + 1 7894 sub_state.event_ref = event_ref 7895 sub_state.event = event 7896 sub_state.person = state.person 7897 sub_state.pf = self.place_parser 7898 7899 self.__parse_level(sub_state, event_map, self.__undefined) 7900 if(description == 'Y' and event.date.is_empty() and 7901 event.type == EventType.BIRTH and not event.place): 7902 event.set_description(_("No Date Information")) 7903 state.msg += sub_state.msg 7904 7905 self.__add_place(event, sub_state) 7906 7907 self.dbase.commit_event(event, self.trans) 7908 7909 event_ref.set_reference_handle(event.handle) 7910 return event_ref 7911 7912 def __build_family_event_pair(self, state, event_type, event_map, 7913 description): 7914 event = Event() 7915 event_ref = EventRef() 7916 event.set_gramps_id(self.emapper.find_next()) 7917 event.set_type(event_type) 7918 if description and description != 'Y': 7919 event.set_description(description) 7920 7921 self.dbase.add_event(event, self.trans) 7922 7923 sub_state = CurrentState() 7924 sub_state.family = state.family 7925 sub_state.level = state.level + 1 7926 sub_state.event = event 7927 sub_state.event_ref = event_ref 7928 sub_state.pf = self.place_parser 7929 7930 self.__parse_level(sub_state, event_map, self.__undefined) 7931 state.msg += sub_state.msg 7932 7933 self.__add_place(event, sub_state) 7934 7935 self.dbase.commit_event(event, self.trans) 7936 event_ref.set_reference_handle(event.handle) 7937 return event_ref 7938 7939 def __do_photo(self, state): 7940 """ 7941 Choose the primary photo from the list of media present for this 7942 person. Supports FTM _PHOTO. and others _PRIM feature. 7943 0 INDI 7944 +1 _PHOTO @<XREF:OBJE>@ {1:1} 7945 7946 0 INDI 7947 +1 OBJE @<XREF:OBJE>@ 7948 +2 _PRIM <Y/N> 7949 7950 0 INDI 7951 +1 OBJE 7952 +2 FILE primary_photo.jpg 7953 +2 _PRIM <Y/N> 7954 7955 For the _PHOTO varient, state.photo contains the XREF ('@M1@'). 7956 For the _PRIM varients, state.photo contains the handle. 7957 Since Gramps currently uses the first media in the list as the 7958 primary, find the primary photo if already in the list, if present, 7959 move to beginning. If not present, add at the beginning. 7960 This is run after all of the person processing is complete but before 7961 committing the person. 7962 """ 7963 if state.photo.startswith('@'): 7964 gramps_id = self.oid_map[state.photo] 7965 handle = self.__find_media_handle(gramps_id) 7966 elif state.photo: 7967 handle = state.photo 7968 else: 7969 return 7970 for mref in state.person.media_list: 7971 if handle == mref.ref: 7972 state.person.media_list.remove(mref) 7973 state.person.media_list.insert(0, mref) 7974 return 7975 mref = MediaRef() 7976 mref.set_reference_handle(handle) 7977 state.person.media_list.insert(0, mref) 7978 7979 def __extract_temple(self, line): 7980 """ Determine the LDS Temple from the input line """ 7981 def get_code(code): 7982 """ get the Temple code """ 7983 if TEMPLES.is_valid_code(code): 7984 return code 7985 elif TEMPLES.is_valid_name(code): 7986 return TEMPLES.code(code) 7987 7988 code = get_code(line.data) 7989 if code: 7990 return code 7991 7992 # Not sure why we do this. Kind of ugly. 7993 code = get_code(line.data.split()[0]) 7994 if code: 7995 return code 7996 7997 # Okay we have no clue which temple this is. 7998 # We should tell the user and store it anyway. 7999 self.__add_msg(_("Invalid temple code"), line, None) 8000 return line.data 8001 8002 def __add_default_source(self, obj): 8003 """ 8004 Add the default source to the object. 8005 """ 8006 if self.use_def_src and len(obj.get_citation_list()) == 0: 8007 citation = Citation() 8008 citation.set_reference_handle(self.def_src.handle) 8009 self.dbase.add_citation(citation, self.trans) 8010 obj.add_citation(citation.handle) 8011 8012 def __add_default_tag(self, obj): 8013 """ 8014 Add the default tag to the object. 8015 """ 8016 if self.default_tag: 8017 obj.add_tag(self.default_tag.handle) 8018 8019 def __subm_name(self, line, state): 8020 """ 8021 @param line: The current line in GedLine format 8022 @type line: GedLine 8023 @param state: The current state 8024 @type state: CurrentState 8025 """ 8026 state.res.set_name(line.data) 8027 8028 def __subm_addr(self, line, state): 8029 """ 8030 @param line: The current line in GedLine format 8031 @type line: GedLine 8032 @param state: The current state 8033 @type state: CurrentState 8034 """ 8035 free_form = line.data 8036 8037 sub_state = CurrentState(level=state.level + 1) 8038 sub_state.location = state.res 8039 8040 self.__parse_level(sub_state, self.parse_loc_tbl, self.__undefined) 8041 state.msg += sub_state.msg 8042 8043 self.__merge_address(free_form, state.res, line, state) 8044 # Researcher is a sub-type of LocationBase, so get_street and 8045 # set_street which are used in routines called from self.parse_loc_tbl 8046 # work fine. 8047 # Unfortunately, Researcher also has get_address and set_address, so we 8048 # need to copy the street into that. 8049 state.res.set_address(state.res.get_street()) 8050 8051 def __subm_phon(self, line, state): 8052 """ 8053 n PHON <PHONE_NUMBER> {0:3} 8054 8055 @param line: The current line in GedLine format 8056 @type line: GedLine 8057 @param state: The current state 8058 @type state: CurrentState 8059 """ 8060 if state.res.get_phone(): 8061 self.__add_msg(_("Only one phone number supported"), line, state) 8062 else: 8063 state.res.set_phone(line.data) 8064 8065 def __subm_email(self, line, state): 8066 """ 8067 n EMAIL <ADDRESS_EMAIL> {0:3} 8068 8069 @param line: The current line in GedLine format 8070 @type line: GedLine 8071 @param state: The current state 8072 @type state: CurrentState 8073 """ 8074 # only record the first multiple emails for researcher 8075 if not state.res.get_email(): 8076 state.res.set_email(line.data) 8077 self.__repo_email(line, state) 8078 8079 8080#------------------------------------------------------------------------- 8081# 8082# GedcomStageOne 8083# 8084#------------------------------------------------------------------------- 8085class GedcomStageOne: 8086 """ 8087 The GedcomStageOne parser scans the file quickly, looking for a few things. 8088 This includes: 8089 8090 1. Character set encoding 8091 2. Number of people and families in the list 8092 3. Child to family references, since Ancestry.com creates GEDCOM files 8093 without the FAMC references. 8094 """ 8095 __BAD_UTF16 = _("Your GEDCOM file is corrupted. " 8096 "The file appears to be encoded using the UTF16 " 8097 "character set, but is missing the BOM marker.") 8098 __EMPTY_GED = _("Your GEDCOM file is empty.") 8099 8100 @staticmethod 8101 def __is_xref_value(value): 8102 """ 8103 Return True if value is in the form of a XREF value. We assume that 8104 if we have a leading '@' character, then we are okay. 8105 """ 8106 return value and value[0] == '@' 8107 8108 def __init__(self, ifile): 8109 self.ifile = ifile 8110 self.famc = defaultdict(list) 8111 self.fams = defaultdict(list) 8112 self.enc = "" 8113 self.pcnt = 0 8114 self.lcnt = 0 8115 8116 def __detect_file_decoder(self, input_file): 8117 """ 8118 Detects the file encoding of the file by looking for a BOM 8119 (byte order marker) in the GEDCOM file. If we detect a UTF-16 or 8120 UTF-8-BOM encoded file, we choose appropriate decoders. If no BOM 8121 is detected, we return in UTF-8 mode it is the more modern option; 8122 and anyway it doesn't really matter as we are only looking for GEDCOM 8123 keywords which are only 7-bit ASCII anyway. 8124 In any case, we Always return the file in text mode with transparent 8125 newline (CR, LF, or CRLF). 8126 """ 8127 line = input_file.read(2) 8128 if line == b"\xef\xbb": 8129 input_file.read(1) 8130 self.enc = "utf_8_sig" 8131 return TextIOWrapper(input_file, encoding='utf_8_sig', 8132 errors='replace', newline=None) 8133 elif line == b"\xff\xfe" or line == b"\xfe\xff": 8134 self.enc = "UTF16" 8135 input_file.seek(0) 8136 return TextIOWrapper(input_file, encoding='utf_16', 8137 errors='replace', newline=None) 8138 elif not line: 8139 raise GedcomError(self.__EMPTY_GED) 8140 elif line == b"\x30\x00" or line == b"\x00\x30": 8141 raise GedcomError(self.__BAD_UTF16) 8142 else: 8143 input_file.seek(0) 8144 return TextIOWrapper(input_file, encoding='utf-8', 8145 errors='replace', newline=None) 8146 8147 def parse(self): 8148 """ 8149 Parse the input file. 8150 """ 8151 current_family_id = "" 8152 8153 reader = self.__detect_file_decoder(self.ifile) 8154 8155 for line in reader: 8156 # Scan for a few items, keep counts. Also look for actual CHAR 8157 # Keyword to figure out actual encodeing for non-unicode file types 8158 line = line.strip() 8159 if not line: 8160 continue 8161 self.lcnt += 1 8162 8163 try: 8164 data = line.split(None, 3) + [''] 8165 (level, key, value) = data[:3] 8166 level = int(level) 8167 key = key.strip() 8168 value = value.strip() 8169 except: 8170 continue 8171 8172 if level == 0 and key[0] == '@': 8173 if value in ("FAM", "FAMILY"): 8174 current_family_id = key.strip()[1:-1] 8175 elif value in ("INDI", "INDIVIDUAL"): 8176 self.pcnt += 1 8177 elif key in ("HUSB", "HUSBAND", "WIFE") and \ 8178 self.__is_xref_value(value): 8179 self.fams[value[1:-1]].append(current_family_id) 8180 elif key in ("CHIL", "CHILD") and self.__is_xref_value(value): 8181 self.famc[value[1:-1]].append(current_family_id) 8182 elif key == 'CHAR' and not self.enc: 8183 assert isinstance(value, str) 8184 self.enc = value 8185 LOG.debug("parse pcnt %d", self.pcnt) 8186 LOG.debug("parse famc %s", dict(self.famc)) 8187 LOG.debug("parse fams %s", dict(self.fams)) 8188 self.ifile = reader # need this to keep python from autoclosing file 8189 8190 def get_famc_map(self): 8191 """ 8192 Return the Person to Child Family map 8193 """ 8194 return self.famc 8195 8196 def get_fams_map(self): 8197 """ 8198 Return the Person to Family map (where the person is a spouse) 8199 """ 8200 return self.fams 8201 8202 def get_encoding(self): 8203 """ 8204 Return the detected encoding 8205 """ 8206 return self.enc.upper() 8207 8208 def set_encoding(self, enc): 8209 """ 8210 Forces the encoding 8211 """ 8212 assert isinstance(enc, str) 8213 self.enc = enc 8214 8215 def get_person_count(self): 8216 """ 8217 Return the number of INDI records found 8218 """ 8219 return self.pcnt 8220 8221 def get_line_count(self): 8222 """ 8223 Return the number of lines in the file 8224 """ 8225 return self.lcnt 8226 8227 8228#------------------------------------------------------------------------- 8229# 8230# make_gedcom_date 8231# 8232#------------------------------------------------------------------------- 8233def make_gedcom_date(subdate, calendar, mode, quality): 8234 """ 8235 Convert a Gramps date structure into a GEDCOM compatible date. 8236 """ 8237 retval = "" 8238 (day, mon, year) = subdate[0:3] 8239 (mmap, prefix) = CALENDAR_MAP.get(calendar, (MONTH, "")) 8240 if year < 0: 8241 year = -year 8242 bce = " B.C." 8243 else: 8244 bce = "" 8245 try: 8246 retval = __build_date_string(day, mon, year, bce, mmap) 8247 except IndexError: 8248 print("Month index error - %d" % mon) 8249 retval = "%d%s" % (year, bce) 8250 if calendar == Date.CAL_SWEDISH: 8251 # If Swedish calendar use ISO for for date and append (swedish) 8252 # to indicate calandar 8253 if year and not mon and not day: 8254 retval = "%i" % (year) 8255 else: 8256 retval = "%i-%02i-%02i" % (year, mon, day) 8257 retval = retval + " (swedish)" 8258 # Skip prefix @#DUNKNOWN@ as it seems 8259 # not used in all other genealogy applications. 8260 # Gramps can handle it on import, but not with (swedish) appended 8261 # to explain what calendar, the unknown refer to 8262 prefix = "" 8263 if prefix: 8264 retval = "%s %s" % (prefix, retval) 8265 if mode in DATE_MODIFIER: 8266 retval = "%s %s" % (DATE_MODIFIER[mode], retval) 8267 if quality in DATE_QUALITY: 8268 retval = "%s %s" % (DATE_QUALITY[quality], retval) 8269 return retval 8270 8271 8272def __build_date_string(day, mon, year, bce, mmap): 8273 """ 8274 Build a date string from the supplied information. 8275 """ 8276 if day == 0: 8277 if mon == 0: 8278 retval = '%d%s' % (year, bce) 8279 elif year == 0: 8280 retval = '(%s)' % mmap[mon] 8281 else: 8282 retval = "%s %d%s" % (mmap[mon], year, bce) 8283 elif mon == 0: 8284 retval = '%d%s' % (year, bce) 8285 elif year == 0: 8286 retval = "(%d %s)" % (day, mmap[mon]) 8287 else: 8288 retval = "%d %s %d%s" % (day, mmap[mon], year, bce) 8289 return retval 8290