1#
2# Gramps - a GTK+/GNOME based genealogy program
3#
4# Copyright (C) 2000-2007  Donald N. Allingham
5# Copyright (C) 2009-2010  Gary Burton
6# Copyright (C) 2010       Nick Hall
7# Copyright (C) 2011       Tim G L Lyons
8# Copyright (C) 2016       Paul R. Culley
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
23#
24
25"""
26Import from GEDCOM
27
28The GEDCOM file format is defined by the GEDCOM 5.5 Specfication, found
29at http://www.familysearch.org/GEDCOM/GEDCOM55.EXE
30
31The basic structure is a line with three attributes:
32
33<LEVEL> <TOKEN> <DATA>
34
35Because of this structure, it does not lend itself to more traditional
36parsing techniques, such as LALR. The LEVEL token implies too much to be
37useful in this context. While this makes parsing more difficult, it
38does provide one very beneficial feature for GEDCOM: Error recoverability.
39GEDCOM is a poorly implemented standard, primarily because it is a poor
40standard to begin with.
41
42Most commercial applications that implement GEDCOM output add custom
43extensions, and feel free to violate the existing structure. If one were
44cynical, one might believe that the commercial programs were trying to
45make it difficult to transfer your data to another application.
46
47This parser takes a different approach to parsing a GEDCOM file. The first
48state, Lexer, reads lines from the file, and does some basic lexical
49analysis on each line (actually several lines, since it automatically
50combines CONT and CONC tagged lines). Each logical line returned to this
51parser contains:
52
53  Level, Token, Token text, Data, and line number.
54
55The Data field is typically text, but in some cases, it may be a integer
56value representing an enumerated type or a Gramps object (in the case of
57dates).
58
59The parser works on the current level. Each context and level has a an
60associated table (dictionary) of functions indexed by the corresponding
61TOKEN. When a token is found, we index into the table to find the function
62associated with the token. If no token is found, a function that skips the
63line and all subordinate (lines with a higher number). If a function is
64found, then we call that function, which in turn processes the line, and
65all tokens at the lower level.
66
67For example:
68
691 BIRT
70  2 DATE 1 JAN 2000
71  2 UKNOWN TAG
72    3 NOTE DATA
73
74The function parsing the individual at level 1, would encounter the BIRT tag.
75It would look up the BIRT token in the table to see if a function as defined
76for this TOKEN, and pass control to this function. This function would then
77start parsing level 2. It would encounter the DATE tag, look up the
78corresponding function in the level 2 table, and pass control to its
79associated function. This function would terminate, and return control back to
80the level 2 parser, which would then encounter the "UKNOWN" tag. Since this is
81not a valid token, it would not be in the table, and a function that would skip
82all lines until the next level 2 token is found (in this case, skipping the
83"3 NOTE DATA" line.
84"""
85
86#-------------------------------------------------------------------------
87#
88# standard python modules
89#
90#-------------------------------------------------------------------------
91import os
92import re
93import time
94# from xml.parsers.expat import ParserCreate
95from collections import defaultdict, OrderedDict
96import string
97import mimetypes
98from io import StringIO, TextIOWrapper
99from urllib.parse import urlparse
100
101#------------------------------------------------------------------------
102#
103# Set up logging
104#
105#------------------------------------------------------------------------
106import logging
107LOG = logging.getLogger(".libgedcom")
108
109#-------------------------------------------------------------------------
110#
111# Gramps modules
112#
113#-------------------------------------------------------------------------
114from gramps.gen.const import GRAMPS_LOCALE as glocale
115_ = glocale.translation.gettext
116from gramps.gen.errors import GedcomError
117from gramps.gen.lib import (
118    Address, Attribute, AttributeType, ChildRef,
119    ChildRefType, Citation, Date, Event, EventRef, EventRoleType,
120    EventType, Family, FamilyRelType, LdsOrd, Location, Media,
121    MediaRef, Name, NameType, Note, NoteType, Person, PersonRef, Place,
122    RepoRef, Repository, RepositoryType, Researcher,
123    Source, SourceMediaType, SrcAttribute,
124    Surname, Tag, Url, UrlType, PlaceType, PlaceRef, PlaceName)
125from gramps.gen.db import DbTxn
126from gramps.gen.updatecallback import UpdateCallback
127from gramps.gen.utils.file import media_path
128from gramps.gen.utils.id import create_id
129from gramps.gen.utils.lds import TEMPLES
130from gramps.gen.utils.unknown import make_unknown, create_explanation_note
131from gramps.gen.datehandler._dateparser import DateParser
132from gramps.gen.db.dbconst import EVENT_KEY
133from gramps.gen.lib.const import IDENTICAL
134from gramps.gen.lib import (StyledText, StyledTextTag, StyledTextTagType)
135from gramps.gen.lib.urlbase import UrlBase
136from gramps.plugins.lib.libplaceimport import PlaceImport
137from gramps.gen.display.place import displayer as _pd
138from gramps.gen.utils.grampslocale import GrampsLocale
139
140#-------------------------------------------------------------------------
141#
142# constants
143#
144#-------------------------------------------------------------------------
145TOKEN_UNKNOWN = 0
146TOKEN_ABBR = 1
147TOKEN_ADDR = 2
148TOKEN_ADOP = 3
149TOKEN_ADR1 = 4
150TOKEN_ADR2 = 5
151TOKEN_AFN = 6
152TOKEN_IGNORE = 7
153TOKEN_REFN = 8
154TOKEN__AKA = 9
155TOKEN_ALIA = 11
156TOKEN_ANCI = 12
157TOKEN_ASSO = 13
158TOKEN_AUTH = 14
159TOKEN_BAPL = 15
160TOKEN_BIRT = 16
161TOKEN__CAT = 17
162TOKEN_CAUS = 18
163TOKEN_CHAN = 19
164TOKEN_CHAR = 20
165TOKEN_CHIL = 21
166TOKEN_CITY = 22
167TOKEN__COMM = 23
168TOKEN_CONC = 24
169TOKEN_CONT = 25
170TOKEN_COPR = 26
171TOKEN_CORP = 27
172TOKEN_CTRY = 28
173TOKEN_DATA = 29
174TOKEN_DATE = 30
175TOKEN_DEAT = 32
176TOKEN_DESI = 33
177TOKEN_DEST = 34
178TOKEN_ENDL = 35
179TOKEN_EVEN = 36
180TOKEN_FAM = 38
181TOKEN_FAMC = 39
182TOKEN_FAMS = 40
183TOKEN_FILE = 41
184TOKEN_FORM = 42
185TOKEN__FREL = 43
186TOKEN_GEDC = 44
187TOKEN_GIVN = 45
188TOKEN__GODP = 46
189TOKEN_HUSB = 47
190TOKEN_INDI = 48
191TOKEN_LABL = 49
192TOKEN_LANG = 50
193TOKEN__LOC = 51
194TOKEN__MARNM = 52
195TOKEN__MREL = 53
196TOKEN__NAME = 54
197TOKEN_NAME = 55
198TOKEN_NCHI = 56
199TOKEN_NICK = 57
200TOKEN_NOTE = 58
201TOKEN_NPFX = 59
202TOKEN_NSFX = 60
203TOKEN_OBJE = 61
204TOKEN_OFFI = 62
205TOKEN_PAGE = 63
206TOKEN_PEDI = 64
207TOKEN_PERI = 65
208TOKEN_PHON = 66
209TOKEN_PLAC = 67
210TOKEN_POST = 68
211TOKEN__PRIMARY = 69
212TOKEN__PRIV = 70
213TOKEN_PUBL = 71
214TOKEN_QUAY = 72
215TOKEN_RELI = 74
216TOKEN_REPO = 75
217TOKEN_RESI = 76
218TOKEN_RFN = 77
219TOKEN_RIN = 78
220TOKEN__SCHEMA = 79
221TOKEN_SEX = 80
222TOKEN_SLGC = 81
223TOKEN_SLGS = 82
224TOKEN_SOUR = 83
225TOKEN_SPFX = 84
226TOKEN_STAE = 85
227TOKEN__STAT = 86
228TOKEN_STAT = 87
229TOKEN_SUBM = 88
230TOKEN_SUBN = 89
231TOKEN_SURN = 90
232TOKEN_TAXT = 91
233TOKEN_TEMP = 92
234TOKEN_TEXT = 93
235TOKEN_TIME = 94
236TOKEN_TITL = 95
237TOKEN__TODO = 96
238TOKEN_TRLR = 97
239TOKEN_TYPE = 98
240TOKEN__UID = 99
241TOKEN_VERS = 100
242TOKEN_WIFE = 101
243TOKEN__WITN = 102
244TOKEN__WTN = 103
245TOKEN_AGNC = 104
246TOKEN_HEAD = 105
247TOKEN_CALN = 106
248TOKEN_MEDI = 107
249TOKEN_RELA = 108
250TOKEN__LKD = 109
251TOKEN_BLOB = 110
252TOKEN_CONL = 111
253TOKEN_AGE = 112
254TOKEN_RESN = 114
255TOKEN_ID = 115
256TOKEN_GEVENT = 116
257TOKEN_RNOTE = 117
258TOKEN_GATTR = 118
259TOKEN_ATTR = 119
260TOKEN_MAP = 120
261TOKEN_LATI = 121
262TOKEN_LONG = 122
263TOKEN_FACT = 123
264TOKEN_EMAIL = 124
265TOKEN_WWW = 125
266TOKEN_FAX = 126
267TOKEN_ROLE = 127
268TOKEN__MAR = 128
269TOKEN__MARN = 129
270TOKEN__ADPN = 130
271TOKEN__FSFTID = 131
272TOKEN__PHOTO = 132
273TOKEN__LINK = 133
274TOKEN__PRIM = 134
275TOKEN__JUST = 135
276TOKEN__TEXT = 136
277TOKEN__DATE = 137
278
279TOKENS = {
280    "_ADPN"           : TOKEN__ADPN,
281    "_AKA"            : TOKEN__AKA,
282    "_AKAN"           : TOKEN__AKA,
283    "_ALIA"           : TOKEN_ALIA,
284    "_ANCES_ORDRE"    : TOKEN_IGNORE,
285    "_CAT"            : TOKEN_IGNORE,
286    "_CHUR"           : TOKEN_IGNORE,
287    "_COMM"           : TOKEN__COMM,
288    "_DATE"           : TOKEN__DATE,
289    "_DATE2"          : TOKEN_IGNORE,
290    "_DETAIL"         : TOKEN_IGNORE,
291    "_EMAIL"          : TOKEN_EMAIL,
292    "_E-MAIL"         : TOKEN_EMAIL,
293    "_FREL"           : TOKEN__FREL,
294    "_FSFTID"         : TOKEN__FSFTID,
295    "_GODP"           : TOKEN__GODP,
296    "_ITALIC"         : TOKEN_IGNORE,
297    "_JUST"           : TOKEN__JUST,    # FTM Citation Quality Justification
298    "_LEVEL"          : TOKEN_IGNORE,
299    "_LINK"           : TOKEN__LINK,
300    "_LKD"            : TOKEN__LKD,
301    "_LOC"            : TOKEN__LOC,
302    "_MAR"            : TOKEN__MAR,
303    "_MARN"           : TOKEN__MARN,
304    "_MARNM"          : TOKEN__MARNM,
305    "_MASTER"         : TOKEN_IGNORE,
306    "_MEDI"           : TOKEN_MEDI,
307    "_MREL"           : TOKEN__MREL,
308    "_NAME"           : TOKEN__NAME,
309    "_PAREN"          : TOKEN_IGNORE,
310    "_PHOTO"          : TOKEN__PHOTO,
311    "_PLACE"          : TOKEN_IGNORE,
312    "_PREF"           : TOKEN__PRIMARY,
313    "_PRIM"           : TOKEN__PRIM,
314    "_PRIMARY"        : TOKEN__PRIMARY,
315    "_PRIV"           : TOKEN__PRIV,
316    "_PUBLISHER"      : TOKEN_IGNORE,
317    "_SCBK"           : TOKEN_IGNORE,
318    "_SCHEMA"         : TOKEN__SCHEMA,
319    "_SSHOW"          : TOKEN_IGNORE,
320    "_STAT"           : TOKEN__STAT,
321    "_TEXT"           : TOKEN__TEXT,
322    "_TODO"           : TOKEN__TODO,
323    "_TYPE"           : TOKEN_TYPE,
324    "_UID"            : TOKEN__UID,
325    "_URL"            : TOKEN_WWW,
326    "_WITN"           : TOKEN__WITN,
327    "_WTN"            : TOKEN__WTN,
328    "ABBR"            : TOKEN_ABBR,
329    "ABBREVIATION"    : TOKEN_ABBR,
330    "ADDR"            : TOKEN_ADDR,
331    "ADDRESS"         : TOKEN_ADDR,
332    "ADDRESS1"        : TOKEN_ADR1,
333    "ADDRESS2"        : TOKEN_ADR2,
334    "ADOP"            : TOKEN_ADOP,
335    "ADOPT"           : TOKEN_ADOP,
336    "ADR1"            : TOKEN_ADR1,
337    "ADR2"            : TOKEN_ADR2,
338    "AFN"             : TOKEN_AFN,
339    "AGE"             : TOKEN_AGE,
340    "AGENCY"          : TOKEN_IGNORE,
341    "AGNC"            : TOKEN_AGNC,
342    "AKA"             : TOKEN__AKA,
343    "ALIA"            : TOKEN_ALIA,
344    "ALIAS"           : TOKEN_ALIA,
345    "ANCI"            : TOKEN_ANCI,
346    "ASSO"            : TOKEN_ASSO,
347    "ASSOCIATES"      : TOKEN_ASSO,
348    "AUTH"            : TOKEN_AUTH,
349    "AUTHOR"          : TOKEN_AUTH,
350    "BAPL"            : TOKEN_BAPL,
351    "BAPTISM-LDS"     : TOKEN_BAPL,
352    "BIRT"            : TOKEN_BIRT,
353    "BIRTH"           : TOKEN_BIRT,
354    "BLOB"            : TOKEN_BLOB,
355    "CALL_NUMBER"     : TOKEN_CALN,
356    "CALN"            : TOKEN_CALN,
357    "CAUS"            : TOKEN_CAUS,
358    "CAUSE"           : TOKEN_CAUS,
359    "CHAN"            : TOKEN_CHAN,
360    "CHANGE"          : TOKEN_CHAN,
361    "CHAR"            : TOKEN_CHAR,
362    "CHARACTER"       : TOKEN_CHAR,
363    "CHIL"            : TOKEN_CHIL,
364    "CHILD"           : TOKEN_CHIL,
365    "CHILDREN_COUNT"  : TOKEN_NCHI,
366    "CITY"            : TOKEN_CITY,
367    "CONC"            : TOKEN_CONC,
368    "CONCATENATION"   : TOKEN_CONC,
369    "CONCATENTATE"    : TOKEN_CONC,
370    "CONL"            : TOKEN_CONL,
371    "CONT"            : TOKEN_CONT,
372    "CONTINUATION"    : TOKEN_CONT,
373    "CONTINUED"       : TOKEN_CONT,
374    "COPR"            : TOKEN_COPR,
375    "COPYRIGHT"       : TOKEN_COPR,
376    "CORP"            : TOKEN_CORP,
377    "CORPORATION"     : TOKEN_CORP,
378    "COUNTRY"         : TOKEN_CTRY,
379    "CTRY"            : TOKEN_CTRY,
380    "DATA"            : TOKEN_DATA,
381    "DATE"            : TOKEN_DATE,
382    "DEAT"            : TOKEN_DEAT,
383    "DEATH"           : TOKEN_DEAT,
384    "DESI"            : TOKEN_DESI,
385    "DEST"            : TOKEN_DEST,
386    "DESTINATION"     : TOKEN_DEST,
387    "EMAI"            : TOKEN_EMAIL,
388    "EMAIL"           : TOKEN_EMAIL,
389    "ENDL"            : TOKEN_ENDL,
390    "ENDOWMENT"       : TOKEN_ENDL,
391    "EVEN"            : TOKEN_EVEN,
392    "EVENT"           : TOKEN_EVEN,
393    "FACT"            : TOKEN_FACT,
394    "FAM"             : TOKEN_FAM,
395    "FAMC"            : TOKEN_FAMC,
396    "FAMILY"          : TOKEN_FAM,
397    "FAMILY_CHILD"    : TOKEN_FAMC,
398    "FAMILY_SPOUSE"   : TOKEN_FAMS,
399    "FAMS"            : TOKEN_FAMS,
400    "FAX"             : TOKEN_FAX,
401    "FILE"            : TOKEN_FILE,
402    "FORM"            : TOKEN_FORM,
403    "GEDC"            : TOKEN_GEDC,
404    "GEDCOM"          : TOKEN_GEDC,
405    "GIVEN_NAME"      : TOKEN_GIVN,
406    "GIVN"            : TOKEN_GIVN,
407    "HEAD"            : TOKEN_HEAD,
408    "HEADER"          : TOKEN_HEAD,
409    "HUSB"            : TOKEN_HUSB,
410    "HUSBAND"         : TOKEN_HUSB,
411    "INDI"            : TOKEN_INDI,
412    "INDIVIDUAL"      : TOKEN_INDI,
413    "LABEL"           : TOKEN_LABL,
414    "LABL"            : TOKEN_LABL,
415    "LANG"            : TOKEN_LANG,
416    "LATI"            : TOKEN_LATI,
417    "LONG"            : TOKEN_LONG,
418    "MAP"             : TOKEN_MAP,
419    "MEDI"            : TOKEN_MEDI,
420    "MEDIA"           : TOKEN_MEDI,
421    "NAME"            : TOKEN_NAME,
422    "NAME_PREFIX"     : TOKEN_NPFX,
423    "NAME_SUFFIX"     : TOKEN_NSFX,
424    "NCHI"            : TOKEN_NCHI,
425    "NICK"            : TOKEN_NICK,
426    "NICKNAME"        : TOKEN_NICK,
427    "NOTE"            : TOKEN_NOTE,
428    "NPFX"            : TOKEN_NPFX,
429    "NSFX"            : TOKEN_NSFX,
430    "OBJE"            : TOKEN_OBJE,
431    "OBJECT"          : TOKEN_OBJE,
432    "OFFI"            : TOKEN_OFFI,
433    "PAGE"            : TOKEN_PAGE,
434    "PEDI"            : TOKEN_PEDI,
435    "PEDIGREE"        : TOKEN_PEDI,
436    "PERI"            : TOKEN_PERI,
437    "PHON"            : TOKEN_PHON,
438    "PHONE"           : TOKEN_PHON,
439    "PHONE_NUMBER"    : TOKEN_PHON,
440    "PLAC"            : TOKEN_PLAC,
441    "PLACE"           : TOKEN_PLAC,
442    "POST"            : TOKEN_POST,
443    "POSTAL_CODE"     : TOKEN_POST,
444    "PUBL"            : TOKEN_PUBL,
445    "PUBLICATION"     : TOKEN_PUBL,
446    "QUALITY_OF_DATA" : TOKEN_QUAY,
447    "QUAY"            : TOKEN_QUAY,
448    "REFERENCE"       : TOKEN_REFN,
449    "REFN"            : TOKEN_REFN,
450    "RELA"            : TOKEN_RELA,
451    "RELI"            : TOKEN_RELI,
452    "RELIGION"        : TOKEN_RELI,
453    "REPO"            : TOKEN_REPO,
454    "REPOSITORY"      : TOKEN_REPO,
455    "RESN"            : TOKEN_RESN,
456    "RFN"             : TOKEN_RFN,
457    "RIN"             : TOKEN_RIN,
458    "ROLE"            : TOKEN_ROLE,
459    "SCHEMA"          : TOKEN__SCHEMA,
460    "SEX"             : TOKEN_SEX,
461    "SLGC"            : TOKEN_SLGC,
462    "SLGS"            : TOKEN_SLGS,
463    "SOUR"            : TOKEN_SOUR,
464    "SOURCE"          : TOKEN_SOUR,
465    "SPFX"            : TOKEN_SPFX,
466    "STAE"            : TOKEN_STAE,
467    "STAT"            : TOKEN_STAT,
468    "STATE"           : TOKEN_STAE,
469    "STATUS"          : TOKEN_STAT,
470    "SUBM"            : TOKEN_SUBM,
471    "SUBMISSION"      : TOKEN_SUBN,
472    "SUBMITTER"       : TOKEN_SUBM,
473    "SUBN"            : TOKEN_SUBN,
474    "SURN"            : TOKEN_SURN,
475    "SURN_PREFIX"     : TOKEN_SPFX,
476    "SURNAME"         : TOKEN_SURN,
477    "TAXT"            : TOKEN_TAXT,
478    "TEMP"            : TOKEN_TEMP,
479    "TEMPLE"          : TOKEN_TEMP,
480    "TEXT"            : TOKEN_TEXT,
481    "TIME"            : TOKEN_TIME,
482    "TITL"            : TOKEN_TITL,
483    "TITLE"           : TOKEN_TITL,
484    "TRAILER"         : TOKEN_TRLR,
485    "TRLR"            : TOKEN_TRLR,
486    "TYPE"            : TOKEN_TYPE,
487    "URL"             : TOKEN_WWW,
488    "VERS"            : TOKEN_VERS,
489    "VERSION"         : TOKEN_VERS,
490    "WIFE"            : TOKEN_WIFE,
491    "WWW"             : TOKEN_WWW,
492}
493
494ADOPT_NONE = 0
495ADOPT_EVENT = 1
496ADOPT_FTW = 2
497ADOPT_LEGACY = 3
498ADOPT_PEDI = 4
499ADOPT_STD = 5
500CONC_OK = 0
501CONC_BROKEN = 1
502ALT_NAME_NONE = 0
503ALT_NAME_STD = 1
504ALT_NAME_ALIAS = 2
505ALT_NAME_AKA = 3
506ALT_NAME_EVENT_AKA = 4
507ALT_NAME_UALIAS = 5
508CALENDAR_NO = 0
509CALENDAR_YES = 1
510OBJE_NO = 0
511OBJE_YES = 1
512PREFIX_NO = 0
513PREFIX_YES = 1
514RESIDENCE_ADDR = 0
515RESIDENCE_PLAC = 1
516SOURCE_REFS_NO = 0
517SOURCE_REFS_YES = 1
518
519TYPE_BIRTH = ChildRefType()
520TYPE_ADOPT = ChildRefType(ChildRefType.ADOPTED)
521TYPE_FOSTER = ChildRefType(ChildRefType.FOSTER)
522
523RELATION_TYPES = (
524    ChildRefType.BIRTH,
525    ChildRefType.UNKNOWN,
526    ChildRefType.NONE)
527
528PEDIGREE_TYPES = {
529    'birth'  : ChildRefType(),
530    'natural': ChildRefType(),
531    'step'   : ChildRefType(ChildRefType.STEPCHILD),
532    'adopted': TYPE_ADOPT,
533    'foster' : TYPE_FOSTER, }
534
535FTW_BAD_PLACE = [
536    EventType.OCCUPATION,
537    EventType.RELIGION,
538    EventType.DEGREE, ]
539
540MEDIA_MAP = {
541    'audio'      : SourceMediaType.AUDIO,
542    'book'       : SourceMediaType.BOOK,
543    'card'       : SourceMediaType.CARD,
544    'electronic' : SourceMediaType.ELECTRONIC,
545    'fiche'      : SourceMediaType.FICHE,
546    'microfiche' : SourceMediaType.FICHE,
547    'microfilm'  : SourceMediaType.FICHE,
548    'film'       : SourceMediaType.FILM,
549    'magazine'   : SourceMediaType.MAGAZINE,
550    'manuscript' : SourceMediaType.MANUSCRIPT,
551    'map'        : SourceMediaType.MAP,
552    'newspaper'  : SourceMediaType.NEWSPAPER,
553    'photo'      : SourceMediaType.PHOTO,
554    'tombstone'  : SourceMediaType.TOMBSTONE,
555    'grave'      : SourceMediaType.TOMBSTONE,
556    'video'      : SourceMediaType.VIDEO,
557}
558
559OBJ_NOTETYPE = {
560    "Attribute"  : NoteType.ATTRIBUTE,
561    "Address"    : NoteType.ADDRESS,
562    "Citation"   : NoteType.CITATION,
563    "Event"      : NoteType.EVENT,
564    "Family"     : NoteType.FAMILY,
565    "LdsOrd"     : NoteType.LDS,
566    "Media"      : NoteType.MEDIA,
567    "Name"       : NoteType.GENERAL,
568    "Place"      : NoteType.PLACE,
569    "Person"     : NoteType.PERSON,
570    "Repository" : NoteType.REPO,
571    "RepoRef"    : NoteType.REPOREF,
572    "Source"     : NoteType.SOURCE,
573    "PersonRef"  : NoteType.ASSOCIATION,
574}
575
576#-------------------------------------------------------------------------
577#
578# Integer to GEDCOM tag mappings for constants
579#
580#-------------------------------------------------------------------------
581CALENDAR_MAP_GEDCOM2XML = {
582    "FRENCH R" : Date.CAL_FRENCH,
583    "JULIAN"   : Date.CAL_JULIAN,
584    "HEBREW"   : Date.CAL_HEBREW,
585}
586
587QUALITY_MAP = {
588    'CAL' : Date.QUAL_CALCULATED,
589    'INT' : Date.QUAL_CALCULATED,
590    'EST' : Date.QUAL_ESTIMATED,
591}
592
593SEX_MAP = {
594    'F' : Person.FEMALE,
595    'M' : Person.MALE,
596}
597
598FAMILYCONSTANTEVENTS = {
599    EventType.ANNULMENT  : "ANUL",
600    EventType.DIV_FILING : "DIVF",
601    EventType.DIVORCE    : "DIV",
602    EventType.CENSUS     : "CENS",
603    EventType.ENGAGEMENT : "ENGA",
604    EventType.MARR_BANNS : "MARB",
605    EventType.MARR_CONTR : "MARC",
606    EventType.MARR_LIC   : "MARL",
607    EventType.MARR_SETTL : "MARS",
608    EventType.MARRIAGE   : "MARR"
609}
610
611PERSONALCONSTANTEVENTS = {
612    EventType.ADOPT            : "ADOP",
613    EventType.ADULT_CHRISTEN   : "CHRA",
614    EventType.BIRTH            : "BIRT",
615    EventType.DEATH            : "DEAT",
616    EventType.BAPTISM          : "BAPM",
617    EventType.BAR_MITZVAH      : "BARM",
618    EventType.BAS_MITZVAH      : "BASM",
619    EventType.BLESS            : "BLES",
620    EventType.BURIAL           : "BURI",
621    # EventType.CAUSE_DEATH      : "CAUS",  Not legal Gedcom since v5.0
622    EventType.ORDINATION       : "ORDN",
623    EventType.CENSUS           : "CENS",
624    EventType.CHRISTEN         : "CHR",
625    EventType.CONFIRMATION     : "CONF",
626    EventType.CREMATION        : "CREM",
627    EventType.DEGREE           : "_DEG",
628    EventType.DIV_FILING       : "DIVF",
629    EventType.EDUCATION        : "EDUC",
630    EventType.ELECTED          : "_ELEC",  # FTM custom tag
631    EventType.EMIGRATION       : "EMIG",
632    EventType.FIRST_COMMUN     : "FCOM",
633    EventType.GRADUATION       : "GRAD",
634    EventType.MED_INFO         : "_MDCL",
635    EventType.MILITARY_SERV    : "_MILT",
636    EventType.NATURALIZATION   : "NATU",
637    EventType.NOB_TITLE        : "TITL",
638    EventType.NUM_MARRIAGES    : "NMR",
639    EventType.IMMIGRATION      : "IMMI",
640    EventType.OCCUPATION       : "OCCU",
641    EventType.PROBATE          : "PROB",
642    EventType.PROPERTY         : "PROP",
643    EventType.RELIGION         : "RELI",
644    EventType.RESIDENCE        : "RESI",
645    EventType.RETIREMENT       : "RETI",
646    EventType.WILL             : "WILL",
647}
648
649FAMILYCONSTANTATTRIBUTES = {
650    AttributeType.NUM_CHILD   : "NCHI",
651}
652
653PERSONALCONSTANTATTRIBUTES = {
654    AttributeType.CASTE       : "CAST",
655    AttributeType.DESCRIPTION : "DSCR",
656    AttributeType.ID          : "IDNO",
657    AttributeType.NATIONAL    : "NATI",
658    AttributeType.NUM_CHILD   : "NCHI",
659    AttributeType.SSN         : "SSN",
660}
661
662#-------------------------------------------------------------------------
663#
664# Gedcom to int constants
665#
666#-------------------------------------------------------------------------
667LDS_STATUS = {
668    "BIC"      : LdsOrd.STATUS_BIC,
669    "CANCELED" : LdsOrd.STATUS_CANCELED,
670    "CHILD"    : LdsOrd.STATUS_CHILD,
671    "CLEARED"  : LdsOrd.STATUS_CLEARED,
672    "COMPLETED": LdsOrd.STATUS_COMPLETED,
673    "DNS"      : LdsOrd.STATUS_DNS,
674    "INFANT"   : LdsOrd.STATUS_INFANT,
675    "PRE-1970" : LdsOrd.STATUS_PRE_1970,
676    "QUALIFIED": LdsOrd.STATUS_QUALIFIED,
677    "DNS/CAN"  : LdsOrd.STATUS_DNS_CAN,
678    "STILLBORN": LdsOrd.STATUS_STILLBORN,
679    "SUBMITTED": LdsOrd.STATUS_SUBMITTED,
680    "UNCLEARED": LdsOrd.STATUS_UNCLEARED,
681}
682# -------------------------------------------------------------------------
683#
684# Custom event friendly names.  These are non-standard GEDCOM "NEW_TAG"
685# tags that start with an '_' i.e. "_DNA".  FTM has several of these, other
686# programs may have more.  If a tag with this format is encountered it is
687# checked in this table for a "friendly" name translation and thereafter is
688# displayed and exported as such.  If the tag is NOT in this table and not
689# otherwise handled by the code, the tag itself is used for display and
690# export.  For example "_XYZ" is not in the table and will be displayed as
691# "_XYZ" and exported as an EVEN.TYPE=_XYZ
692# As Custom entries, they do not appear in Gramps Events add choice unless
693# already imported via GEDCOM.
694#
695# -------------------------------------------------------------------------
696CUSTOMEVENTTAGS = {
697    "_CIRC"     : _("Circumcision"),
698    "_COML"     : _("Common Law Marriage"),
699    "_DEST"     : _("Destination"),
700    "_DNA"      : _("DNA"),
701    "_DCAUSE"   : _("Cause of Death"),
702    "_EMPLOY"   : _("Employment"),
703    "_EXCM"     : _("Excommunication"),
704    "_EYC"      : _("Eye Color"),
705    "_FUN"      : _("Funeral"),
706    "_HEIG"     : _("Height"),
707    "_INIT"     : _("Initiatory (LDS)"),
708    "_MILTID"   : _("Military ID"),
709    "_MISN"     : _("Mission (LDS)"),
710    "_NAMS"     : _("Namesake"),
711    "_ORDI"     : _("Ordinance"),
712    "_ORIG"     : _("Origin"),
713    "_SEPR"     : _("Separation"),         # Applies to Families
714    "_WEIG"     : _("Weight"),
715}
716# table for skipping illegal control chars in GEDCOM import
717# Only 09, 0A, 0D are allowed.
718STRIP_DICT = dict.fromkeys(list(range(9)) + list(range(11, 13)) +
719                           list(range(14, 32)))
720# The C1 Control characters are not treated in Latin-1 (ISO-8859-1) as
721# undefined, but if they have been used, the file is probably supposed to be
722# cp1252
723DEL_AND_C1 = dict.fromkeys(list(range(0x7F, 0x9F)))
724
725#-------------------------------------------------------------------------
726#
727# GEDCOM events to Gramps events conversion
728#
729#-------------------------------------------------------------------------
730GED_TO_GRAMPS_EVENT = {}
731for __val, __key in PERSONALCONSTANTEVENTS.items():
732    if __key != "":
733        GED_TO_GRAMPS_EVENT[__key] = __val
734
735for __val, __key in FAMILYCONSTANTEVENTS.items():
736    if __key != "":
737        GED_TO_GRAMPS_EVENT[__key] = __val
738
739GED_TO_GRAMPS_ATTR = {}
740for __val, __key in PERSONALCONSTANTATTRIBUTES.items():
741    if __key != "":
742        GED_TO_GRAMPS_ATTR[__key] = __val
743
744#-------------------------------------------------------------------------
745#
746# GEDCOM Date Constants
747#
748#-------------------------------------------------------------------------
749HMONTH = [
750    "", "TSH", "CSH", "KSL", "TVT", "SHV", "ADR",
751    "ADS", "NSN", "IYR", "SVN", "TMZ", "AAV", "ELL"]
752
753FMONTH = [
754    "", "VEND", "BRUM", "FRIM", "NIVO", "PLUV", "VENT",
755    "GERM", "FLOR", "PRAI", "MESS", "THER", "FRUC", "COMP"]
756
757MONTH = [
758    "", "JAN", "FEB", "MAR", "APR", "MAY", "JUN",
759    "JUL", "AUG", "SEP", "OCT", "NOV", "DEC"]
760
761CALENDAR_MAP = {
762    Date.CAL_HEBREW : (HMONTH, '@#DHEBREW@'),
763    Date.CAL_FRENCH : (FMONTH, '@#DFRENCH R@'),
764    Date.CAL_JULIAN : (MONTH, '@#DJULIAN@'),
765    Date.CAL_SWEDISH : (MONTH, '@#DUNKNOWN@'),
766}
767
768CALENDAR_MAP_PARSESTRING = {
769    Date.CAL_HEBREW : ' (h)',
770    Date.CAL_FRENCH : ' (f)',
771    Date.CAL_JULIAN : ' (j)',
772    Date.CAL_SWEDISH : ' (s)',
773}
774
775#how wrong calendar use is shown
776CALENDAR_MAP_WRONGSTRING = {
777    Date.CAL_HEBREW : ' <hebrew>',
778    Date.CAL_FRENCH : ' <french rep>',
779    Date.CAL_JULIAN : ' <julian>',
780    Date.CAL_SWEDISH : ' <swedish>',
781}
782
783DATE_MODIFIER = {
784    Date.MOD_ABOUT   : "ABT",
785    Date.MOD_BEFORE  : "BEF",
786    Date.MOD_AFTER   : "AFT",
787    #Date.MOD_INTERPRETED : "INT",
788}
789
790DATE_QUALITY = {
791    Date.QUAL_CALCULATED : "CAL",
792    Date.QUAL_ESTIMATED  : "EST",
793}
794
795#-------------------------------------------------------------------------
796#
797# regular expressions
798#
799#-------------------------------------------------------------------------
800NOTE_RE = re.compile(r"\s*\d+\s+\@(\S+)\@\s+NOTE(.*)$")
801CONT_RE = re.compile(r"\s*\d+\s+CONT\s?(.*)$")
802CONC_RE = re.compile(r"\s*\d+\s+CONC\s?(.*)$")
803PERSON_RE = re.compile(r"\s*\d+\s+\@(\S+)\@\s+INDI(.*)$")
804MOD = re.compile(r"\s*(INT|EST|CAL)\s+(.*)$")
805CAL = re.compile(r"\s*(ABT|BEF|AFT)?\s*@#D?([^@]+)@\s*(.*)$")
806RANGE = re.compile(
807    r"\s*BET\s+@#D?([^@]+)@\s*(.*)\s+AND\s+@#D?([^@]+)@\s*(.*)$")
808RANGE1 = re.compile(r"\s*BET\s+\s*(.*)\s+AND\s+@#D?([^@]+)@\s*(.*)$")
809RANGE2 = re.compile(r"\s*BET\s+@#D?([^@]+)@\s*(.*)\s+AND\s+\s*(.*)$")
810SPAN = re.compile(r"\s*FROM\s+@#D?([^@]+)@\s*(.*)\s+TO\s+@#D?([^@]+)@\s*(.*)$")
811SPAN1 = re.compile(r"\s*FROM\s+\s*(.*)\s+TO\s+@#D?([^@]+)@\s*(.*)$")
812SPAN2 = re.compile(r"\s*FROM\s+@#D?([^@]+)@\s*(.*)\s+TO\s+\s*(.*)$")
813NAME_RE = re.compile(r"/?([^/]*)(/([^/]*)(/([^/]*))?)?")
814SURNAME_RE = re.compile(r"/([^/]*)/([^/]*)")
815
816
817#-----------------------------------------------------------------------
818#
819# GedcomDateParser
820#
821#-----------------------------------------------------------------------
822class GedcomDateParser(DateParser):
823    """ Parse the dates """
824    month_to_int = {
825        'jan' : 1, 'feb' : 2, 'mar' : 3, 'apr' : 4,
826        'may' : 5, 'jun' : 6, 'jul' : 7, 'aug' : 8,
827        'sep' : 9, 'oct' : 10, 'nov' : 11, 'dec' : 12, }
828
829    _locale = GrampsLocale(lang='en_US')  # no register_datehandler here
830
831    def dhformat_changed(self):
832        """ Allow overriding so a subclass can modify it """
833        self.dhformat = "%m/%d/%y"
834
835
836#-------------------------------------------------------------------------
837#
838# Lexer - serves as the lexical analysis engine
839#
840#-------------------------------------------------------------------------
841class Lexer:
842    """ low level line reading and early parsing """
843    def __init__(self, ifile, __add_msg):
844        self.ifile = ifile
845        self.current_list = []
846        self.eof = False
847        self.cnv = None
848        self.cnt = 0
849        self.index = 0
850        self.func_map = {TOKEN_CONT : self.__fix_token_cont,
851                         TOKEN_CONC : self.__fix_token_conc}
852        self.__add_msg = __add_msg
853
854    def readline(self):
855        """ read a line from file with possibility of putting it back """
856        if len(self.current_list) <= 1 and not self.eof:
857            self.__readahead()
858        try:
859            return GedLine(self.current_list.pop())
860        except:
861            LOG.debug('Error in reading Gedcom line', exc_info=True)
862            return None
863
864    def __fix_token_cont(self, data):
865        line = self.current_list[0]
866        new_value = line[2] + '\n' + data[2]
867        self.current_list[0] = (line[0], line[1], new_value, line[3], line[4])
868
869    def __fix_token_conc(self, data):
870        line = self.current_list[0]
871        if len(line[2]) == 4:
872            # This deals with lines of the form
873            # 0 @<XREF:NOTE>@ NOTE
874            #   1 CONC <SUBMITTER TEXT>
875            # The previous line contains only a tag and no data so concat a
876            # space to separate the new line from the tag. This prevents the
877            # first letter of the new line being lost later
878            # in _GedcomParse.__parse_record
879            new_value = line[2] + ' ' + data[2]
880        else:
881            new_value = line[2] + data[2]
882        self.current_list[0] = (line[0], line[1], new_value, line[3], line[4])
883
884    def __readahead(self):
885        while len(self.current_list) < 5:
886            line = self.ifile.readline()
887            self.index += 1
888            if not line:
889                self.eof = True
890                return
891
892            original_line = line
893            try:
894                # According to the GEDCOM 5.5 standard,
895                # Chapter 1 subsection Grammar "leading whitespace preceeding
896                # a GEDCOM line should be ignored"
897                # We will also strip the terminator which is any combination
898                # of carriage_return and line_feed
899                line = line.lstrip(' ').rstrip('\n\r')
900                # split into level+delim+rest
901                line = line.partition(' ')
902                level = int(line[0])
903                # there should only be one space after the level,
904                # but we can ignore more,
905                line = line[2].lstrip(' ')
906                # then split into tag+delim+line_value
907                # or xfef_id+delim+rest
908                # the xref_id can have spaces in it
909                if line.startswith('@'):
910                    line = line.split('@', 2)
911                    # line is now [None, alphanum+pointer_string, rest]
912                    tag = '@' + line[1] + '@'
913                    line_value = line[2].lstrip()
914                    # Ignore meaningless @IDENT@ on CONT or CONC line
915                    # as noted at http://www.tamurajones.net/IdentCONT.xhtml
916                    if (line_value.lstrip().startswith("CONT ") or
917                            line_value.lstrip().startswith("CONC ")):
918                        line = line_value.lstrip().partition(' ')
919                        tag = line[0]
920                        line_value = line[2]
921                else:
922                    line = line.partition(' ')
923                    tag = line[0]
924                    line_value = line[2]
925            except:
926                problem = _("Line ignored ")
927                text = original_line.rstrip('\n\r')
928                prob_width = 66
929                problem = problem.ljust(prob_width)[0:(prob_width - 1)]
930                text = text.replace("\n", "\n".ljust(prob_width + 22))
931                message = "%s              %s" % (problem, text)
932                self.__add_msg(message)
933                continue
934
935            # Need to un-double '@' See Gedcom 5.5 spec 'any_char'
936            line_value = line_value.replace('@@', '@')
937            token = TOKENS.get(tag, TOKEN_UNKNOWN)
938            data = (level, token, line_value, tag, self.index)
939
940            func = self.func_map.get(data[1])
941            if func:
942                func(data)
943            else:
944                # There will normally only be one space between tag and
945                # line_value, but in case there is more then one, remove extra
946                # spaces after CONC/CONT processing
947                # Also, Gedcom spec says there should be no spaces at end of
948                # line, however some programs put them there (FTM), so let's
949                # leave them in place.
950                data = data[:2] + (data[2].lstrip(),) + data[3:]
951                self.current_list.insert(0, data)
952
953    def clean_up(self):
954        """
955        Break circular references to parsing methods stored in dictionaries
956        to aid garbage collection
957        """
958        for key in list(self.func_map.keys()):
959            del self.func_map[key]
960        del self.func_map
961
962
963#-----------------------------------------------------------------------
964#
965# GedLine - represents a tokenized version of a GEDCOM line
966#
967#-----------------------------------------------------------------------
968class GedLine:
969    """
970    GedLine is a class the represents a GEDCOM line. The form of a  GEDCOM line
971    is:
972
973    <LEVEL> <TOKEN> <TEXT>
974
975    This gets parsed into
976
977    Line Number, Level, Token Value, Token Text, and Data
978
979    Data is dependent on the context the Token Value. For most of tokens,
980    this is just a text string. However, for certain tokens where we know
981    the context, we can provide some value. The current parsed tokens are:
982
983    TOKEN_DATE   - Date
984    TOKEN_SEX    - Person gender item
985    TOEKN_UKNOWN - Check to see if this is a known event
986    """
987    __DATE_CNV = GedcomDateParser()
988
989    @staticmethod
990    def __extract_date(text):
991        """
992        Converts the specified text to a Date object.
993        """
994        dateobj = Date()
995        # Horrible hack for importing illegal GEDCOM from Apple Macintosh
996        # Classic 'Gene' program
997        text = text.replace('BET ABT', 'EST BET')
998
999        # extract out the MOD line
1000        match = MOD.match(text)
1001        mod = ''
1002        if match:
1003            (mod, text) = match.groups()
1004            qual = QUALITY_MAP.get(mod, Date.QUAL_NONE)
1005            mod += ' '
1006        else:
1007            qual = Date.QUAL_NONE
1008
1009        # parse the range if we match, if so, return
1010        match = RANGE.match(text)
1011        match1 = RANGE1.match(text)
1012        match2 = RANGE2.match(text)
1013        if match or match1 or match2:
1014            if match:
1015                (cal1, data1, cal2, data2) = match.groups()
1016            elif match1:
1017                cal1 = Date.CAL_GREGORIAN
1018                (data1, cal2, data2) = match1.groups()
1019            elif match2:
1020                cal2 = Date.CAL_GREGORIAN
1021                (cal1, data1, data2) = match2.groups()
1022            cal1 = CALENDAR_MAP_GEDCOM2XML.get(cal1, Date.CAL_GREGORIAN)
1023            cal2 = CALENDAR_MAP_GEDCOM2XML.get(cal2, Date.CAL_GREGORIAN)
1024            if cal1 != cal2:
1025                #not supported by GRAMPS, import as text, we construct a string
1026                # that the parser will not parse as a correct date
1027                return GedLine.__DATE_CNV.parse(
1028                    '%sbetween %s%s and %s%s' %
1029                    (mod, data1, CALENDAR_MAP_WRONGSTRING.get(cal1, ''),
1030                     CALENDAR_MAP_WRONGSTRING.get(cal2, ''), data2))
1031
1032            #add hebrew, ... calendar so that months are recognized
1033            data1 += CALENDAR_MAP_PARSESTRING.get(cal1, '')
1034            data2 += CALENDAR_MAP_PARSESTRING.get(cal2, '')
1035            start = GedLine.__DATE_CNV.parse(data1)
1036            stop = GedLine.__DATE_CNV.parse(data2)
1037            dateobj.set(Date.QUAL_NONE, Date.MOD_RANGE, cal1,
1038                        start.get_start_date() + stop.get_start_date())
1039            dateobj.set_quality(qual)
1040            return dateobj
1041
1042        # parse a span if we match
1043        match = SPAN.match(text)
1044        match1 = SPAN1.match(text)
1045        match2 = SPAN2.match(text)
1046        if match or match1 or match2:
1047            if match:
1048                (cal1, data1, cal2, data2) = match.groups()
1049            elif match1:
1050                cal1 = Date.CAL_GREGORIAN
1051                (data1, cal2, data2) = match1.groups()
1052            elif match2:
1053                cal2 = Date.CAL_GREGORIAN
1054                (cal1, data1, data2) = match2.groups()
1055            cal1 = CALENDAR_MAP_GEDCOM2XML.get(cal1, Date.CAL_GREGORIAN)
1056            cal2 = CALENDAR_MAP_GEDCOM2XML.get(cal2, Date.CAL_GREGORIAN)
1057            if cal1 != cal2:
1058                #not supported by GRAMPS, import as text, we construct a string
1059                # that the parser will not parse as a correct date
1060                return GedLine.__DATE_CNV.parse(
1061                    '%sfrom %s%s to %s%s' %
1062                    (mod, data1, CALENDAR_MAP_WRONGSTRING.get(cal1, ''),
1063                     CALENDAR_MAP_WRONGSTRING.get(cal2, ''), data2))
1064            #add hebrew, ... calendar so that months are recognized
1065            data1 += CALENDAR_MAP_PARSESTRING.get(cal1, '')
1066            data2 += CALENDAR_MAP_PARSESTRING.get(cal2, '')
1067            start = GedLine.__DATE_CNV.parse(data1)
1068            stop = GedLine.__DATE_CNV.parse(data2)
1069            dateobj.set(Date.QUAL_NONE, Date.MOD_SPAN, cal1,
1070                        start.get_start_date() + stop.get_start_date())
1071            dateobj.set_quality(qual)
1072            return dateobj
1073
1074        match = CAL.match(text)
1075        if match:
1076            (abt, call, data) = match.groups()
1077            call = CALENDAR_MAP_GEDCOM2XML.get(call, Date.CAL_GREGORIAN)
1078            data += CALENDAR_MAP_PARSESTRING.get(call, '')
1079            if abt:
1080                dateobj = GedLine.__DATE_CNV.parse("%s %s" % (abt, data))
1081            else:
1082                dateobj = GedLine.__DATE_CNV.parse(data)
1083            dateobj.set_quality(qual)
1084            return dateobj
1085        dateobj = GedLine.__DATE_CNV.parse(text)
1086        dateobj.set_quality(qual)
1087        return dateobj
1088
1089    def __init__(self, data):
1090        """
1091        If the level is 0, then this is a top level instance. In this case,
1092        we may find items in the form of:
1093
1094        <LEVEL> @ID@ <ITEM>
1095
1096        If this is not the top level, we check the MAP_DATA array to see if
1097        there is a conversion function for the data.
1098        """
1099        self.line = data[4]
1100        self.level = data[0]
1101        self.token = data[1]
1102        self.token_text = data[3].strip()
1103        self.data = str(data[2])
1104
1105        if self.level == 0:
1106            if (self.token_text and self.token_text[0] == '@' and
1107                    self.token_text[-1] == '@'):
1108                self.token = TOKEN_ID
1109                self.token_text = self.token_text[1:-1]
1110                self.data = self.data.strip()
1111        else:
1112            func = _MAP_DATA.get(self.token)
1113            if func:
1114                func(self)
1115
1116    def calc_sex(self):
1117        """
1118        Converts the data field to a gen.lib token indicating the gender
1119        """
1120        try:
1121            self.data = SEX_MAP.get(self.data.strip()[0],
1122                                    Person.UNKNOWN)
1123        except:
1124            self.data = Person.UNKNOWN
1125
1126    def calc_date(self):
1127        """
1128        Converts the data field to a Date object
1129        """
1130        self.data = self.__extract_date(self.data)
1131        self.token = TOKEN_DATE
1132
1133    def calc_unknown(self):
1134        """
1135        Checks to see if the token maps a known GEDCOM event. If so, we
1136        change the type from UNKNOWN to TOKEN_GEVENT (gedcom event), and
1137        the data is assigned to the associated Gramps EventType
1138        """
1139        token = GED_TO_GRAMPS_EVENT.get(self.token_text)
1140        if token:
1141            event = Event()
1142            event.set_description(self.data)
1143            event.set_type(token)
1144            self.token = TOKEN_GEVENT
1145            self.data = event
1146        else:
1147            token = GED_TO_GRAMPS_ATTR.get(self.token_text)
1148            if token:
1149                attr = Attribute()
1150                attr.set_value(self.data)
1151                attr.set_type(token)
1152                self.token = TOKEN_ATTR
1153                self.data = attr
1154
1155    def calc_note(self):
1156        """ look for a note xref @N0001@ """
1157        gid = self.data.strip()
1158        if len(gid) > 2 and gid[0] == '@' and gid[-1] == '@':
1159            self.token = TOKEN_RNOTE
1160            self.data = gid[1:-1]
1161
1162    def calc_nchi(self):
1163        """ set attribute for number of children """
1164        attr = Attribute()
1165        attr.set_value(self.data)
1166        attr.set_type(AttributeType.NUM_CHILD)
1167        self.data = attr
1168        self.token = TOKEN_ATTR
1169
1170    def calc_attr(self):
1171        """ set attribure for general attributes """
1172        attr = Attribute()
1173        attr.set_value(self.data)
1174        attr.set_type((AttributeType.CUSTOM, self.token_text))
1175        self.data = attr
1176        self.token = TOKEN_ATTR
1177
1178    def __repr__(self):
1179        return "%d: %d (%d:%s) %s" % (self.line, self.level, self.token,
1180                                      self.token_text, self.data)
1181
1182_MAP_DATA = {
1183    TOKEN_UNKNOWN : GedLine.calc_unknown,
1184    TOKEN_DATE    : GedLine.calc_date,
1185    TOKEN__DATE   : GedLine.calc_date,
1186    TOKEN_SEX     : GedLine.calc_sex,
1187    TOKEN_NOTE    : GedLine.calc_note,
1188    TOKEN_NCHI    : GedLine.calc_nchi,
1189    TOKEN__STAT   : GedLine.calc_attr,
1190    TOKEN__UID    : GedLine.calc_attr,
1191    TOKEN_AFN     : GedLine.calc_attr,
1192    TOKEN__FSFTID : GedLine.calc_attr, }
1193
1194
1195#-------------------------------------------------------------------------
1196#
1197# File Readers
1198#
1199#-------------------------------------------------------------------------
1200class BaseReader:
1201    """ base char level reader """
1202    def __init__(self, ifile, encoding, __add_msg):
1203        self.ifile = ifile
1204        self.enc = encoding
1205        self.__add_msg = __add_msg
1206
1207    def reset(self):
1208        """ return to beginning """
1209        self.ifile.seek(0)
1210
1211    def readline(self):
1212        """ Read a single line """
1213        raise NotImplementedError()
1214
1215    def report_error(self, problem, line):
1216        """ Create an error message """
1217        line = line.rstrip('\n\r')
1218        prob_width = 66
1219        problem = problem.ljust(prob_width)[0:(prob_width - 1)]
1220        text = line.replace("\n", "\n".ljust(prob_width + 22))
1221        message = "%s               %s" % (problem, text)
1222        self.__add_msg(message)
1223
1224
1225class UTF8Reader(BaseReader):
1226    """ The main UTF-8 reader, uses Python for char handling """
1227    def __init__(self, ifile, __add_msg, enc):
1228        BaseReader.__init__(self, ifile, enc, __add_msg)
1229        self.reset()
1230        if enc == 'UTF_8_SIG':
1231            self.ifile = TextIOWrapper(ifile, encoding='utf_8_sig',
1232                                       errors='replace', newline=None)
1233        else:
1234            self.ifile = TextIOWrapper(ifile, encoding='utf_8',
1235                                       errors='replace', newline=None)
1236
1237    def readline(self):
1238        line = self.ifile.readline()
1239        return line.translate(STRIP_DICT)
1240
1241
1242class UTF16Reader(BaseReader):
1243    """ The main UTF-16 reader, uses Python for char handling """
1244    def __init__(self, ifile, __add_msg):
1245        BaseReader.__init__(self, ifile, 'UTF16', __add_msg)
1246        self.ifile = TextIOWrapper(ifile, encoding='utf_16',
1247                                   errors='replace', newline=None)
1248        self.reset()
1249
1250    def readline(self):
1251        line = self.ifile.readline()
1252        return line.translate(STRIP_DICT)
1253
1254
1255class AnsiReader(BaseReader):
1256    """ The main ANSI (latin1) reader, uses Python for char handling """
1257    def __init__(self, ifile, __add_msg):
1258        BaseReader.__init__(self, ifile, 'latin1', __add_msg)
1259        self.ifile = TextIOWrapper(ifile, encoding='latin1',
1260                                   errors='replace', newline=None)
1261
1262    def readline(self):
1263        line = self.ifile.readline()
1264        if line.translate(DEL_AND_C1) != line:
1265            self.report_error("DEL or C1 control chars in line did you mean "
1266                              "CHAR cp1252??", line)
1267        return line.translate(STRIP_DICT)
1268
1269
1270class CP1252Reader(BaseReader):
1271    """ The extra credit CP1252 reader, uses Python for char handling """
1272    def __init__(self, ifile, __add_msg):
1273        BaseReader.__init__(self, ifile, 'cp1252', __add_msg)
1274        self.ifile = TextIOWrapper(ifile, encoding='cp1252',
1275                                   errors='replace', newline=None)
1276
1277    def readline(self):
1278        line = self.ifile.readline()
1279        return line.translate(STRIP_DICT)
1280
1281
1282class AnselReader(BaseReader):
1283    """
1284    ANSEL to Unicode Conversion
1285
1286    ANSEL references:
1287    http://lcweb2.loc.gov/diglib/codetables/45.html
1288    http://www.gymel.com/charsets/ANSEL.html
1289
1290    list of ANSEL codes that replicate ASCII
1291    note that DEL (127=0x7F) is a control char
1292    Note: spec allows control-chars that Gramps probably doesn't use
1293    but 10=0x0A _is_ needed (!)
1294    ---
1295    Also: there are two additional control chars 0x98,0x9c (unicode same)
1296    which we also ignore for now (start/emd of string (or sort sequence)
1297    ---
1298    ?: should we allow TAB, as a Gramps extension?
1299    """
1300    __printable_ascii = list(map(chr, list(range(32, 127))))  # up thru 126
1301    #                            LF  CR  Esc GS  RS  US
1302    __use_ASCII = list(map(chr, [10, 13, 27, 29, 30, 31])) + __printable_ascii
1303
1304    # mappings of single byte ANSEL codes to unicode
1305    __onebyte = {
1306        b'\xA1' : '\u0141', b'\xA2' : '\u00d8', b'\xA3' : '\u0110',
1307        b'\xA4' : '\u00de', b'\xA5' : '\u00c6', b'\xA6' : '\u0152',
1308        b'\xA7' : '\u02b9', b'\xA8' : '\u00b7', b'\xA9' : '\u266d',
1309        b'\xAA' : '\u00ae', b'\xAB' : '\u00b1', b'\xAC' : '\u01a0',
1310        b'\xAD' : '\u01af', b'\xAE' : '\u02bc', b'\xB0' : '\u02bb',
1311        b'\xB1' : '\u0142', b'\xB2' : '\u00f8', b'\xB3' : '\u0111',
1312        b'\xB4' : '\u00fe', b'\xB5' : '\u00e6', b'\xB6' : '\u0153',
1313        b'\xB7' : '\u02ba', b'\xB8' : '\u0131', b'\xB9' : '\u00a3',
1314        b'\xBA' : '\u00f0', b'\xBC' : '\u01a1', b'\xBD' : '\u01b0',
1315        b'\xBE' : '\u25a1', b'\xBF' : '\u25a0',
1316        b'\xC0' : '\u00b0', b'\xC1' : '\u2113', b'\xC2' : '\u2117',
1317        b'\xC3' : '\u00a9', b'\xC4' : '\u266f', b'\xC5' : '\u00bf',
1318        b'\xC6' : '\u00a1', b'\xC7' : '\u00df', b'\xC8' : '\u20ac',
1319        b'\xCD' : '\u0065', b'\xCE' : '\u006f', b'\xCF' : '\u00df', }
1320
1321    # combining forms (in ANSEL, they precede the modified ASCII character
1322    # whereas the unicode combining term follows the character modified
1323    # Note: unicode allows multiple modifiers, but ANSEL may not (TDB?),
1324    # so we ignore multiple combining forms in this module
1325    #  8d & 8e are zero-width joiner (ZWJ), and zero-width non-joiner ZWNJ
1326    #  (strange things) probably not commonly found in our needs, unless one
1327    #   starts writing persian (or???) poetry in ANSEL
1328    __acombiners = {
1329        b'\x8D' : '\u200d', b'\x8E' : '\u200c', b'\xE0' : '\u0309',
1330        b'\xE1' : '\u0300', b'\xE2' : '\u0301', b'\xE3' : '\u0302',
1331        b'\xE4' : '\u0303', b'\xE5' : '\u0304', b'\xE6' : '\u0306',
1332        b'\xE7' : '\u0307', b'\xE8' : '\u0308', b'\xE9' : '\u030c',
1333        b'\xEA' : '\u030a', b'\xEB' : '\ufe20', b'\xEC' : '\ufe21',
1334        b'\xED' : '\u0315', b'\xEE' : '\u030b', b'\xEF' : '\u0310',
1335        b'\xF0' : '\u0327', b'\xF1' : '\u0328', b'\xF2' : '\u0323',
1336        b'\xF3' : '\u0324', b'\xF4' : '\u0325', b'\xF5' : '\u0333',
1337        b'\xF6' : '\u0332', b'\xF7' : '\u0326', b'\xF8' : '\u031c',
1338        b'\xF9' : '\u032e', b'\xFA' : '\ufe22', b'\xFB' : '\ufe23',
1339        b'\xFC' : '\u0338',
1340        b'\xFE' : '\u0313', }
1341
1342    # mappings of two byte (precomposed forms) ANSEL codes to unicode
1343    __twobyte = {
1344        b'\xE0\x41' : '\u1ea2', b'\xE0\x45' : '\u1eba',
1345        b'\xE0\x49' : '\u1ec8', b'\xE0\x4F' : '\u1ece',
1346        b'\xE0\x55' : '\u1ee6', b'\xE0\x59' : '\u1ef6',
1347        b'\xE0\x61' : '\u1ea3', b'\xE0\x65' : '\u1ebb',
1348        b'\xE0\x69' : '\u1ec9', b'\xE0\x6F' : '\u1ecf',
1349        b'\xE0\x75' : '\u1ee7', b'\xE0\x79' : '\u1ef7',
1350        b'\xE1\x41' : '\u00c0', b'\xE1\x45' : '\u00c8',
1351        b'\xE1\x49' : '\u00cc', b'\xE1\x4F' : '\u00d2',
1352        b'\xE1\x55' : '\u00d9', b'\xE1\x57' : '\u1e80',
1353        b'\xE1\x59' : '\u1ef2', b'\xE1\x61' : '\u00e0',
1354        b'\xE1\x65' : '\u00e8', b'\xE1\x69' : '\u00ec',
1355        b'\xE1\x6F' : '\u00f2', b'\xE1\x75' : '\u00f9',
1356        b'\xE1\x77' : '\u1e81', b'\xE1\x79' : '\u1ef3',
1357        b'\xE2\x41' : '\u00c1', b'\xE2\x43' : '\u0106',
1358        b'\xE2\x45' : '\u00c9', b'\xE2\x47' : '\u01f4',
1359        b'\xE2\x49' : '\u00cd', b'\xE2\x4B' : '\u1e30',
1360        b'\xE2\x4C' : '\u0139', b'\xE2\x4D' : '\u1e3e',
1361        b'\xE2\x4E' : '\u0143', b'\xE2\x4F' : '\u00d3',
1362        b'\xE2\x50' : '\u1e54', b'\xE2\x52' : '\u0154',
1363        b'\xE2\x53' : '\u015a', b'\xE2\x55' : '\u00da',
1364        b'\xE2\x57' : '\u1e82', b'\xE2\x59' : '\u00dd',
1365        b'\xE2\x5A' : '\u0179', b'\xE2\x61' : '\u00e1',
1366        b'\xE2\x63' : '\u0107', b'\xE2\x65' : '\u00e9',
1367        b'\xE2\x67' : '\u01f5', b'\xE2\x69' : '\u00ed',
1368        b'\xE2\x6B' : '\u1e31', b'\xE2\x6C' : '\u013a',
1369        b'\xE2\x6D' : '\u1e3f', b'\xE2\x6E' : '\u0144',
1370        b'\xE2\x6F' : '\u00f3', b'\xE2\x70' : '\u1e55',
1371        b'\xE2\x72' : '\u0155', b'\xE2\x73' : '\u015b',
1372        b'\xE2\x75' : '\u00fa', b'\xE2\x77' : '\u1e83',
1373        b'\xE2\x79' : '\u00fd', b'\xE2\x7A' : '\u017a',
1374        b'\xE2\xA5' : '\u01fc', b'\xE2\xB5' : '\u01fd',
1375        b'\xE3\x41' : '\u00c2', b'\xE3\x43' : '\u0108',
1376        b'\xE3\x45' : '\u00ca', b'\xE3\x47' : '\u011c',
1377        b'\xE3\x48' : '\u0124', b'\xE3\x49' : '\u00ce',
1378        b'\xE3\x4A' : '\u0134', b'\xE3\x4F' : '\u00d4',
1379        b'\xE3\x53' : '\u015c', b'\xE3\x55' : '\u00db',
1380        b'\xE3\x57' : '\u0174', b'\xE3\x59' : '\u0176',
1381        b'\xE3\x5A' : '\u1e90', b'\xE3\x61' : '\u00e2',
1382        b'\xE3\x63' : '\u0109', b'\xE3\x65' : '\u00ea',
1383        b'\xE3\x67' : '\u011d', b'\xE3\x68' : '\u0125',
1384        b'\xE3\x69' : '\u00ee', b'\xE3\x6A' : '\u0135',
1385        b'\xE3\x6F' : '\u00f4', b'\xE3\x73' : '\u015d',
1386        b'\xE3\x75' : '\u00fb', b'\xE3\x77' : '\u0175',
1387        b'\xE3\x79' : '\u0177', b'\xE3\x7A' : '\u1e91',
1388        b'\xE4\x41' : '\u00c3', b'\xE4\x45' : '\u1ebc',
1389        b'\xE4\x49' : '\u0128', b'\xE4\x4E' : '\u00d1',
1390        b'\xE4\x4F' : '\u00d5', b'\xE4\x55' : '\u0168',
1391        b'\xE4\x56' : '\u1e7c', b'\xE4\x59' : '\u1ef8',
1392        b'\xE4\x61' : '\u00e3', b'\xE4\x65' : '\u1ebd',
1393        b'\xE4\x69' : '\u0129', b'\xE4\x6E' : '\u00f1',
1394        b'\xE4\x6F' : '\u00f5', b'\xE4\x75' : '\u0169',
1395        b'\xE4\x76' : '\u1e7d', b'\xE4\x79' : '\u1ef9',
1396        b'\xE5\x41' : '\u0100', b'\xE5\x45' : '\u0112',
1397        b'\xE5\x47' : '\u1e20', b'\xE5\x49' : '\u012a',
1398        b'\xE5\x4F' : '\u014c', b'\xE5\x55' : '\u016a',
1399        b'\xE5\x61' : '\u0101', b'\xE5\x65' : '\u0113',
1400        b'\xE5\x67' : '\u1e21', b'\xE5\x69' : '\u012b',
1401        b'\xE5\x6F' : '\u014d', b'\xE5\x75' : '\u016b',
1402        b'\xE5\xA5' : '\u01e2', b'\xE5\xB5' : '\u01e3',
1403        b'\xE6\x41' : '\u0102', b'\xE6\x45' : '\u0114',
1404        b'\xE6\x47' : '\u011e', b'\xE6\x49' : '\u012c',
1405        b'\xE6\x4F' : '\u014e', b'\xE6\x55' : '\u016c',
1406        b'\xE6\x61' : '\u0103', b'\xE6\x65' : '\u0115',
1407        b'\xE6\x67' : '\u011f', b'\xE6\x69' : '\u012d',
1408        b'\xE6\x6F' : '\u014f', b'\xE6\x75' : '\u016d',
1409        b'\xE7\x42' : '\u1e02', b'\xE7\x43' : '\u010a',
1410        b'\xE7\x44' : '\u1e0a', b'\xE7\x45' : '\u0116',
1411        b'\xE7\x46' : '\u1e1e', b'\xE7\x47' : '\u0120',
1412        b'\xE7\x48' : '\u1e22', b'\xE7\x49' : '\u0130',
1413        b'\xE7\x4D' : '\u1e40', b'\xE7\x4E' : '\u1e44',
1414        b'\xE7\x50' : '\u1e56', b'\xE7\x52' : '\u1e58',
1415        b'\xE7\x53' : '\u1e60', b'\xE7\x54' : '\u1e6a',
1416        b'\xE7\x57' : '\u1e86', b'\xE7\x58' : '\u1e8a',
1417        b'\xE7\x59' : '\u1e8e', b'\xE7\x5A' : '\u017b',
1418        b'\xE7\x62' : '\u1e03', b'\xE7\x63' : '\u010b',
1419        b'\xE7\x64' : '\u1e0b', b'\xE7\x65' : '\u0117',
1420        b'\xE7\x66' : '\u1e1f', b'\xE7\x67' : '\u0121',
1421        b'\xE7\x68' : '\u1e23', b'\xE7\x6D' : '\u1e41',
1422        b'\xE7\x6E' : '\u1e45', b'\xE7\x70' : '\u1e57',
1423        b'\xE7\x72' : '\u1e59', b'\xE7\x73' : '\u1e61',
1424        b'\xE7\x74' : '\u1e6b', b'\xE7\x77' : '\u1e87',
1425        b'\xE7\x78' : '\u1e8b', b'\xE7\x79' : '\u1e8f',
1426        b'\xE7\x7A' : '\u017c', b'\xE8\x41' : '\u00c4',
1427        b'\xE8\x45' : '\u00cb', b'\xE8\x48' : '\u1e26',
1428        b'\xE8\x49' : '\u00cf', b'\xE8\x4F' : '\u00d6',
1429        b'\xE8\x55' : '\u00dc', b'\xE8\x57' : '\u1e84',
1430        b'\xE8\x58' : '\u1e8c', b'\xE8\x59' : '\u0178',
1431        b'\xE8\x61' : '\u00e4', b'\xE8\x65' : '\u00eb',
1432        b'\xE8\x68' : '\u1e27', b'\xE8\x69' : '\u00ef',
1433        b'\xE8\x6F' : '\u00f6', b'\xE8\x74' : '\u1e97',
1434        b'\xE8\x75' : '\u00fc', b'\xE8\x77' : '\u1e85',
1435        b'\xE8\x78' : '\u1e8d', b'\xE8\x79' : '\u00ff',
1436        b'\xE9\x41' : '\u01cd', b'\xE9\x43' : '\u010c',
1437        b'\xE9\x44' : '\u010e', b'\xE9\x45' : '\u011a',
1438        b'\xE9\x47' : '\u01e6', b'\xE9\x49' : '\u01cf',
1439        b'\xE9\x4B' : '\u01e8', b'\xE9\x4C' : '\u013d',
1440        b'\xE9\x4E' : '\u0147', b'\xE9\x4F' : '\u01d1',
1441        b'\xE9\x52' : '\u0158', b'\xE9\x53' : '\u0160',
1442        b'\xE9\x54' : '\u0164', b'\xE9\x55' : '\u01d3',
1443        b'\xE9\x5A' : '\u017d', b'\xE9\x61' : '\u01ce',
1444        b'\xE9\x63' : '\u010d', b'\xE9\x64' : '\u010f',
1445        b'\xE9\x65' : '\u011b', b'\xE9\x67' : '\u01e7',
1446        b'\xE9\x69' : '\u01d0', b'\xE9\x6A' : '\u01f0',
1447        b'\xE9\x6B' : '\u01e9', b'\xE9\x6C' : '\u013e',
1448        b'\xE9\x6E' : '\u0148', b'\xE9\x6F' : '\u01d2',
1449        b'\xE9\x72' : '\u0159', b'\xE9\x73' : '\u0161',
1450        b'\xE9\x74' : '\u0165', b'\xE9\x75' : '\u01d4',
1451        b'\xE9\x7A' : '\u017e', b'\xEA\x41' : '\u00c5',
1452        b'\xEA\x61' : '\u00e5', b'\xEA\x75' : '\u016f',
1453        b'\xEA\x77' : '\u1e98', b'\xEA\x79' : '\u1e99',
1454        b'\xEA\xAD' : '\u016e', b'\xEE\x4F' : '\u0150',
1455        b'\xEE\x55' : '\u0170', b'\xEE\x6F' : '\u0151',
1456        b'\xEE\x75' : '\u0171', b'\xF0\x20' : '\u00b8',
1457        b'\xF0\x43' : '\u00c7', b'\xF0\x44' : '\u1e10',
1458        b'\xF0\x47' : '\u0122', b'\xF0\x48' : '\u1e28',
1459        b'\xF0\x4B' : '\u0136', b'\xF0\x4C' : '\u013b',
1460        b'\xF0\x4E' : '\u0145', b'\xF0\x52' : '\u0156',
1461        b'\xF0\x53' : '\u015e', b'\xF0\x54' : '\u0162',
1462        b'\xF0\x63' : '\u00e7', b'\xF0\x64' : '\u1e11',
1463        b'\xF0\x67' : '\u0123', b'\xF0\x68' : '\u1e29',
1464        b'\xF0\x6B' : '\u0137', b'\xF0\x6C' : '\u013c',
1465        b'\xF0\x6E' : '\u0146', b'\xF0\x72' : '\u0157',
1466        b'\xF0\x73' : '\u015f', b'\xF0\x74' : '\u0163',
1467        b'\xF1\x41' : '\u0104', b'\xF1\x45' : '\u0118',
1468        b'\xF1\x49' : '\u012e', b'\xF1\x4F' : '\u01ea',
1469        b'\xF1\x55' : '\u0172', b'\xF1\x61' : '\u0105',
1470        b'\xF1\x65' : '\u0119', b'\xF1\x69' : '\u012f',
1471        b'\xF1\x6F' : '\u01eb', b'\xF1\x75' : '\u0173',
1472        b'\xF2\x41' : '\u1ea0', b'\xF2\x42' : '\u1e04',
1473        b'\xF2\x44' : '\u1e0c', b'\xF2\x45' : '\u1eb8',
1474        b'\xF2\x48' : '\u1e24', b'\xF2\x49' : '\u1eca',
1475        b'\xF2\x4B' : '\u1e32', b'\xF2\x4C' : '\u1e36',
1476        b'\xF2\x4D' : '\u1e42', b'\xF2\x4E' : '\u1e46',
1477        b'\xF2\x4F' : '\u1ecc', b'\xF2\x52' : '\u1e5a',
1478        b'\xF2\x53' : '\u1e62', b'\xF2\x54' : '\u1e6c',
1479        b'\xF2\x55' : '\u1ee4', b'\xF2\x56' : '\u1e7e',
1480        b'\xF2\x57' : '\u1e88', b'\xF2\x59' : '\u1ef4',
1481        b'\xF2\x5A' : '\u1e92', b'\xF2\x61' : '\u1ea1',
1482        b'\xF2\x62' : '\u1e05', b'\xF2\x64' : '\u1e0d',
1483        b'\xF2\x65' : '\u1eb9', b'\xF2\x68' : '\u1e25',
1484        b'\xF2\x69' : '\u1ecb', b'\xF2\x6B' : '\u1e33',
1485        b'\xF2\x6C' : '\u1e37', b'\xF2\x6D' : '\u1e43',
1486        b'\xF2\x6E' : '\u1e47', b'\xF2\x6F' : '\u1ecd',
1487        b'\xF2\x72' : '\u1e5b', b'\xF2\x73' : '\u1e63',
1488        b'\xF2\x74' : '\u1e6d', b'\xF2\x75' : '\u1ee5',
1489        b'\xF2\x76' : '\u1e7f', b'\xF2\x77' : '\u1e89',
1490        b'\xF2\x79' : '\u1ef5', b'\xF2\x7A' : '\u1e93',
1491        b'\xF3\x55' : '\u1e72', b'\xF3\x75' : '\u1e73',
1492        b'\xF4\x41' : '\u1e00', b'\xF4\x61' : '\u1e01',
1493        b'\xF9\x48' : '\u1e2a', b'\xF9\x68' : '\u1e2b', }
1494
1495    def __ansel_to_unicode(self, text):
1496        """ Convert an ANSEL encoded text to unicode """
1497
1498        buff = StringIO()
1499        error = ""
1500        while text:
1501            if text[0] < 128:
1502                if chr(text[0]) in AnselReader.__use_ASCII:
1503                    head = chr(text[0])
1504                else:
1505                    # substitute space for disallowed (control) chars
1506                    error += " (%#X)" % text[0]
1507                    head = ' '
1508                text = text[1:]
1509            else:
1510                if text[0:2] in AnselReader.__twobyte:
1511                    head = AnselReader.__twobyte[text[0:2]]
1512                    text = text[2:]
1513                elif bytes([text[0]]) in AnselReader.__onebyte:
1514                    head = AnselReader.__onebyte[bytes([text[0]])]
1515                    text = text[1:]
1516                elif bytes([text[0]]) in AnselReader.__acombiners:
1517                    cmb = AnselReader.__acombiners[bytes([text[0]])]
1518                    # always consume the combiner
1519                    text = text[1:]
1520                    next_byte = text[0]
1521                    if next_byte < 128 and chr(next_byte) in \
1522                            AnselReader.__printable_ascii:
1523                        # consume next as well
1524                        text = text[1:]
1525                        # unicode: combiner follows base-char
1526                        head = chr(next_byte) + cmb
1527                    else:
1528                        # just drop the unexpected combiner
1529                        error += " (%#X)" % text[0]
1530                        continue
1531                else:
1532                    error += " (%#X)" % text[0]
1533                    head = '\ufffd'  # "Replacement Char"
1534                    text = text[1:]
1535            buff.write(head)
1536        ans = buff.getvalue()
1537
1538        if error:
1539            # e.g. Illegal character (oxAB) (0xCB)... 1 NOTE xyz?pqr?lmn
1540            self.report_error(_("Illegal character%s") % error, ans)
1541        buff.close()
1542        return ans
1543
1544    def __init__(self, ifile, __add_msg):
1545        BaseReader.__init__(self, ifile, "ANSEL", __add_msg)
1546        # In theory, we should have been able to skip the encode/decode from
1547        # ascii.  But this way allows us to use pythons universal newline
1548        self.ifile = TextIOWrapper(ifile, encoding='ascii',
1549                                   errors='surrogateescape', newline=None)
1550
1551    def readline(self):
1552        line = self.ifile.readline()
1553        linebytes = line.encode(encoding='ascii',
1554                                errors='surrogateescape')
1555        return self.__ansel_to_unicode(linebytes)
1556
1557
1558#-------------------------------------------------------------------------
1559#
1560# CurrentState
1561#
1562#-------------------------------------------------------------------------
1563class CurrentState:
1564    """
1565    Keep track of the current state variables.
1566    """
1567    def __init__(self, person=None, level=0, event=None, event_ref=None):
1568        """
1569        Initialize the object.
1570        """
1571        self.name_cnt = 0
1572        self.person = person
1573        self.family = None
1574        self.level = level
1575        self.event = event
1576        self.event_ref = event_ref
1577        self.source_ref = None
1578        self.citation = None
1579        self.note = None
1580        self.lds_ord = None
1581        self.msg = ""
1582        self.primary = False        # _PRIMARY tag on an INDI.FAMC tag
1583        self.filename = ""
1584        self.title = ""
1585        self.addr = None
1586        self.res = None
1587        self.source = None
1588        self.ftype = None
1589        self.pf = None              # method for parsing places
1590        self.location = None
1591        self.place_fields = None    # method for parsing places
1592        self.ref = None             # PersonRef
1593        self.handle = None          #
1594        self.form = ""              # Multimedia format
1595        self.frel = None            # Child relation to father
1596        self.mrel = None
1597        self.repo = None
1598        self.attr = None
1599        self.obj = None
1600        self.name = ""
1601        self.ignore = False
1602        self.repo_ref = None
1603        self.place = None
1604        self.media = None
1605        self.photo = ""             # Person primary photo
1606        self.prim = None            # Photo is primary
1607
1608    def __getattr__(self, name):
1609        """
1610        Return the value associated with the specified attribute.
1611        """
1612        return self.__dict__.get(name)
1613
1614    def __setattr__(self, name, value):
1615        """
1616        Set the value associated with the specified attribute.
1617        """
1618        self.__dict__[name] = value
1619
1620
1621#-------------------------------------------------------------------------
1622#
1623# PlaceParser
1624#
1625#-------------------------------------------------------------------------
1626class PlaceParser:
1627    """
1628    Provide the ability to parse GEDCOM FORM statements for places, and
1629    the parse the line of text, mapping the text components to Location
1630    values based of the FORM statement.
1631    """
1632
1633    __field_map = {
1634        'addr'          : Location.set_street,
1635        'subdivision'   : Location.set_street,
1636        'addr1'         : Location.set_street,
1637        'adr1'          : Location.set_street,
1638        'street'        : Location.set_street,
1639        'addr2'         : Location.set_locality,
1640        'adr2'          : Location.set_locality,
1641        'locality'      : Location.set_locality,
1642        'neighborhood'  : Location.set_locality,
1643        'city'          : Location.set_city,
1644        'town'          : Location.set_city,
1645        'village'       : Location.set_city,
1646        'county'        : Location.set_county,
1647        'country'       : Location.set_country,
1648        'state'         : Location.set_state,
1649        'state/province': Location.set_state,
1650        'region'        : Location.set_state,
1651        'province'      : Location.set_state,
1652        'area code'     : Location.set_postal_code,
1653        'post code'     : Location.set_postal_code,
1654        'zip code'      : Location.set_postal_code, }
1655
1656    def __init__(self, line=None):
1657        self.parse_function = []
1658
1659        if line:
1660            self.parse_form(line)
1661
1662    def parse_form(self, line):
1663        """
1664        Parses the GEDCOM PLAC.FORM into a list of function
1665        pointers (if possible). It does this my mapping the text strings
1666        (separated by commas) to the corresponding Location
1667        method via the __field_map variable
1668        """
1669        for item in line.data.split(','):
1670            item = item.lower().strip()
1671            fcn = self.__field_map.get(item, lambda x, y: None)
1672            self.parse_function.append(fcn)
1673
1674    def load_place(self, place_import, place, text):
1675        """
1676        Takes the text string representing a place, splits it into
1677        its subcomponents (comma separated), and calls the approriate
1678        function based of its position, depending on the parsed value
1679        from the FORM statement.
1680        """
1681        items = [item.strip() for item in text.split(',')]
1682        if len(items) != len(self.parse_function):
1683            return
1684        index = 0
1685        loc = Location()
1686        for item in items:
1687            self.parse_function[index](loc, item)
1688            index += 1
1689
1690        location = (loc.get_street(),
1691                    loc.get_locality(),
1692                    loc.get_parish(),
1693                    loc.get_city(),
1694                    loc.get_county(),
1695                    loc.get_state(),
1696                    loc.get_country())
1697
1698        for level, name in enumerate(location):
1699            if name:
1700                break
1701
1702        if name:
1703            type_num = 7 - level
1704        else:
1705            name = place.title
1706            type_num = PlaceType.UNKNOWN
1707        place.name.set_value(name)
1708        place.set_type(PlaceType(type_num))
1709        code = loc.get_postal_code()
1710        place.set_code(code)
1711        if place.handle:    # if handle is available, store immediately
1712            place_import.store_location(location, place.handle)
1713        else:               # return for storage later
1714            return location
1715
1716
1717#-------------------------------------------------------------------------
1718#
1719# IdFinder
1720#
1721#-------------------------------------------------------------------------
1722class IdFinder:
1723    """
1724    Provide method of finding the next available ID.
1725    """
1726    def __init__(self, keys, prefix):
1727        """
1728        Initialize the object.
1729        """
1730        self.ids = set(keys)
1731        self.index = 0
1732        self.prefix = prefix
1733
1734    def find_next(self):
1735        """
1736        Return the next available GRAMPS' ID for a Event object based
1737        off the person ID prefix.
1738
1739        @return: Returns the next available index
1740        @rtype: str
1741        """
1742        index = self.prefix % self.index
1743        while index in self.ids:
1744            self.index += 1
1745            index = self.prefix % self.index
1746        self.ids.add(index)
1747        self.index += 1
1748        return index
1749
1750
1751#-------------------------------------------------------------------------
1752#
1753# IdMapper
1754#
1755#-------------------------------------------------------------------------
1756class IdMapper:
1757    """ This class provide methods to keep track of the correspoindence between
1758    Gedcom xrefs (@P1023@) and Gramps IDs. """
1759    def __init__(self, has_gid, find_next, id2user_format):
1760        self.has_gid = has_gid
1761        self.find_next = find_next
1762        self.id2user_format = id2user_format
1763        self.swap = {}
1764
1765    def __getitem__(self, gid):
1766        if gid == "":
1767            # We need to find the next gramps ID provided it is not already
1768            # the target of a swap
1769            new_val = self.find_next()
1770            while new_val in list(self.swap.values()):
1771                new_val = self.find_next()
1772        else:
1773            # remove any @ signs
1774            gid = self.clean(gid)
1775            if gid in self.swap:
1776                return self.swap[gid]
1777            else:
1778                # now standardise the format
1779                formatted_gid = self.id2user_format(gid)
1780                # I1 and I0001 will both format as I0001. If we had already
1781                # encountered I1, it would be in self.swap, so we would already
1782                # have found it. If we had already encountered I0001 and we are
1783                # now looking for I1, it wouldn't be in self.swap, and we now
1784                # find that I0001 is in use, so we have to create a new id.
1785                if self.has_gid(formatted_gid) or \
1786                        (formatted_gid in list(self.swap.values())):
1787                    new_val = self.find_next()
1788                    while new_val in list(self.swap.values()):
1789                        new_val = self.find_next()
1790                else:
1791                    new_val = formatted_gid
1792            # we need to distinguish between I1 and I0001, so we record the map
1793            # from the original format
1794            self.swap[gid] = new_val
1795        return new_val
1796
1797    def clean(self, gid):
1798        """ remove '@' from start and end of xref """
1799        temp = gid.strip()
1800        if len(temp) > 1 and temp[0] == '@' and temp[-1] == '@':
1801            temp = temp[1:-1]
1802        return temp
1803
1804    def map(self):
1805        """ return the xref to GID translation map """
1806        return self.swap
1807
1808
1809#-------------------------------------------------------------------------
1810#
1811# GedcomParser
1812#
1813#-------------------------------------------------------------------------
1814class GedcomParser(UpdateCallback):
1815    """
1816    Performs the second pass of the GEDCOM parser, which does all the heavy
1817    lifting.
1818    """
1819
1820    __TRUNC_MSG = _("Your GEDCOM file is corrupted. "
1821                    "It appears to have been truncated.")
1822    _EMPTY_LOC = Location().serialize()
1823
1824    SyntaxError = "Syntax Error"
1825    BadFile = "Not a GEDCOM file"
1826
1827    @staticmethod
1828    def __find_from_handle(gramps_id, table):
1829        """
1830        Find a handle corresponding to the specified Gramps ID.
1831
1832        The passed table contains the mapping. If the value is found, we return
1833        it, otherwise we create a new handle, store it, and return it.
1834
1835        """
1836        intid = table.get(gramps_id)
1837        if not intid:
1838            intid = create_id()
1839            table[gramps_id] = intid
1840        return intid
1841
1842    @staticmethod
1843    def __parse_name_personal(text):
1844        """
1845        Parses a GEDCOM NAME value into an Name structure
1846        """
1847        name = Name()
1848
1849        match = SURNAME_RE.match(text)
1850        if match:
1851            #/surname/ extra, we assume extra is given name
1852            names = match.groups()
1853            name.set_first_name(names[1].strip())
1854            surn = Surname()
1855            surn.set_surname(names[0].strip())
1856            surn.set_primary()
1857            name.set_surname_list([surn])
1858        else:
1859            try:
1860                names = NAME_RE.match(text).groups()
1861                # given /surname/ extra, we assume extra is suffix
1862                name.set_first_name(names[0].strip())
1863                surn = Surname()
1864                surn.set_surname(names[2].strip())
1865                surn.set_primary()
1866                name.set_surname_list([surn])
1867                name.set_suffix(names[4].strip())
1868            except:
1869                # something strange, set as first name
1870                name.set_first_name(text.strip())
1871        return name
1872
1873    def __init__(self, dbase, ifile, filename, user, stage_one,
1874                 default_source, default_tag_format=None):
1875        UpdateCallback.__init__(self, user.callback)
1876        self.user = user
1877        self.set_total(stage_one.get_line_count())
1878        self.repo2id = {}
1879        self.trans = None
1880        self.errors = []
1881        self.number_of_errors = 0
1882        self.maxpeople = stage_one.get_person_count()
1883        self.dbase = dbase
1884        self.import_researcher = self.dbase.get_total() == 0
1885        event_ids = []
1886        for event in dbase.iter_events():
1887            event_ids.append(event.gramps_id)
1888        self.emapper = IdFinder(event_ids, dbase.event_prefix)
1889        self.famc_map = stage_one.get_famc_map()
1890        self.fams_map = stage_one.get_fams_map()
1891
1892        self.place_parser = PlaceParser()
1893        self.inline_srcs = OrderedDict()
1894        self.media_map = {}
1895        self.note_type_map = {}
1896        self.genby = ""
1897        self.genvers = ""
1898        self.subm = ""
1899        self.use_def_src = default_source
1900        self.func_list = []
1901        if self.use_def_src:
1902            self.def_src = Source()
1903            fname = os.path.basename(filename).split('\\')[-1]
1904            self.def_src.set_title(_("Import from GEDCOM (%s)") % fname)
1905        if default_tag_format:
1906            name = time.strftime(default_tag_format)
1907            tag = self.dbase.get_tag_from_name(name)
1908            if tag:
1909                self.default_tag = tag
1910            else:
1911                self.default_tag = Tag()
1912                self.default_tag.set_name(name)
1913        else:
1914            self.default_tag = None
1915        self.dir_path = os.path.dirname(filename)
1916        self.is_ftw = False
1917        self.addr_is_detail = False
1918        self.groups = None
1919        self.want_parse_warnings = True
1920
1921        self.pid_map = IdMapper(
1922            self.dbase.has_person_gramps_id,
1923            self.dbase.find_next_person_gramps_id,
1924            self.dbase.id2user_format)
1925        self.fid_map = IdMapper(
1926            self.dbase.has_family_gramps_id,
1927            self.dbase.find_next_family_gramps_id,
1928            self.dbase.fid2user_format)
1929        self.sid_map = IdMapper(
1930            self.dbase.has_source_gramps_id,
1931            self.dbase.find_next_source_gramps_id,
1932            self.dbase.sid2user_format)
1933        self.oid_map = IdMapper(
1934            self.dbase.has_media_gramps_id,
1935            self.dbase.find_next_media_gramps_id,
1936            self.dbase.oid2user_format)
1937        self.rid_map = IdMapper(
1938            self.dbase.has_repository_gramps_id,
1939            self.dbase.find_next_repository_gramps_id,
1940            self.dbase.rid2user_format)
1941        self.nid_map = IdMapper(
1942            self.dbase.has_note_gramps_id,
1943            self.dbase.find_next_note_gramps_id,
1944            self.dbase.nid2user_format)
1945
1946        self.gid2id = {}
1947        self.oid2id = {}
1948        self.sid2id = {}
1949        self.lid2id = {}
1950        self.fid2id = {}
1951        self.rid2id = {}
1952        self.nid2id = {}
1953
1954        self.place_import = PlaceImport(self.dbase)
1955
1956        #
1957        # Parse table for <<SUBMITTER_RECORD>> below the level 0 SUBM tag
1958        #
1959        # n @<XREF:SUBM>@   SUBM                          {1:1}
1960        #   +1 NAME <SUBMITTER_NAME>                      {1:1}
1961        #   +1 <<ADDRESS_STRUCTURE>>                      {0:1}
1962        #   +1 <<MULTIMEDIA_LINK>>                        {0:M}
1963        #   +1 LANG <LANGUAGE_PREFERENCE>                 {0:3}
1964        #   +1 <<NOTE_STRUCTURE>>                         {0:M}
1965        #   +1 RFN <SUBMITTER_REGISTERED_RFN>             {0:1}
1966        #   +1 RIN <AUTOMATED_RECORD_ID>                  {0:1}
1967        #   +1 <<CHANGE_DATE>>                            {0:1}
1968
1969        # (N.B. GEDCOM allows multiple SUBMitter records)
1970        self.subm_parse_tbl = {
1971            # +1 NAME <SUBMITTER_NAME>
1972            TOKEN_NAME  : self.__subm_name,
1973            # +1 <<ADDRESS_STRUCTURE>>
1974            TOKEN_ADDR  : self.__subm_addr,
1975            TOKEN_PHON  : self.__subm_phon,
1976            TOKEN_EMAIL : self.__subm_email,
1977            TOKEN_WWW   : self.__repo_www,
1978            TOKEN_FAX   : self.__repo_fax,
1979            # +1 <<MULTIMEDIA_LINK>>
1980            # +1 LANG <LANGUAGE_PREFERENCE>
1981            # +1 <<NOTE_STRUCTURE>>
1982            TOKEN_NOTE  : self.__repo_note,
1983            TOKEN_RNOTE : self.__repo_note,
1984            # +1 RFN <SUBMITTER_REGISTERED_RFN>
1985            # +1 RIN <AUTOMATED_RECORD_ID>
1986            # +1 <<CHANGE_DATE>>
1987            TOKEN_CHAN  : self.__repo_chan, }
1988        self.func_list.append(self.subm_parse_tbl)
1989
1990        #
1991        # Parse table for <<INDIVIDUAL_RECORD>> below the level 0  INDI tag
1992        #
1993        # n @<XREF:INDI>@  INDI                           {1:1}
1994        #   +1 RESN <RESTRICTION_NOTICE>                  {0:1}
1995        #   +1 <<PERSONAL_NAME_STRUCTURE>>                {0:M}
1996        #   +1 SEX <SEX_VALUE>                            {0:1}
1997        #   +1 <<INDIVIDUAL_EVENT_STRUCTURE>>             {0:M}
1998        #   +1 <<INDIVIDUAL_ATTRIBUTE_STRUCTURE>>         {0:M}
1999        #   +1 <<LDS_INDIVIDUAL_ORDINANCE>>               {0:M}
2000        #   +1 <<CHILD_TO_FAMILY_LINK>>                   {0:M}
2001        #   +1 <<SPOUSE_TO_FAMILY_LINK>>                  {0:M}
2002        #   +1 SUBM @<XREF:SUBM>@                         {0:M}
2003        #   +1 <<ASSOCIATION_STRUCTURE>>                  {0:M}
2004        #   +1 ALIA @<XREF:INDI>@                         {0:M}
2005        #   +1 ANCI @<XREF:SUBM>@                         {0:M}
2006        #   +1 DESI @<XREF:SUBM>@                         {0:M}
2007        #   +1 <<SOURCE_CITATION>>                        {0:M}
2008        #   +1 <<MULTIMEDIA_LINK>>                        {0:M}
2009        #   +1 <<NOTE_STRUCTURE>>                         {0:M}
2010        #   +1 RFN <PERMANENT_RECORD_FILE_NUMBER>         {0:1}
2011        #   +1 AFN <ANCESTRAL_FILE_NUMBER>                {0:1}
2012        #   +1 REFN <USER_REFERENCE_NUMBER>               {0:M}
2013        #     +2 TYPE <USER_REFERENCE_TYPE>               {0:1}
2014        #   +1 RIN <AUTOMATED_RECORD_ID>                  {0:1}
2015        #   +1 <<CHANGE_DATE>>                            {0:1}
2016
2017        self.indi_parse_tbl = {
2018            # +1 RESN <RESTRICTION_NOTICE> {0:1}
2019            TOKEN_RESN  : self.__person_resn,
2020            # +1 <<PERSONAL_NAME_STRUCTURE>> {0:M}
2021            TOKEN_NAME  : self.__person_name,
2022            # +1 SEX <SEX_VALUE> {0:1}
2023            TOKEN_SEX   : self.__person_sex,
2024            # +1 <<INDIVIDUAL_EVENT_STRUCTURE>> {0:M}
2025            TOKEN_EVEN  : self.__person_even,
2026            TOKEN_GEVENT: self.__person_std_event,
2027            TOKEN_BIRT  : self.__person_birt,
2028            TOKEN_RELI  : self.__person_reli,
2029            TOKEN_ADOP  : self.__person_adop,
2030            TOKEN_DEAT  : self.__person_deat,
2031            # +1 <<INDIVIDUAL_ATTRIBUTE_STRUCTURE>> {0:M}
2032            # +1 AFN <ANCESTRAL_FILE_NUMBER> {0:1}
2033            TOKEN_ATTR  : self.__person_std_attr,
2034            TOKEN_FACT  : self.__person_fact,
2035            #+1 <<LDS_INDIVIDUAL_ORDINANCE>> {0:M}
2036            TOKEN_BAPL  : self.__person_bapl,
2037            TOKEN_CONL  : self.__person_conl,
2038            TOKEN_ENDL  : self.__person_endl,
2039            TOKEN_SLGC  : self.__person_slgc,
2040            #+1 <<CHILD_TO_FAMILY_LINK>> {0:M}
2041            TOKEN_FAMC  : self.__person_famc,
2042            # +1 <<SPOUSE_TO_FAMILY_LINK>> {0:M}
2043            TOKEN_FAMS  : self.__person_fams,
2044            # +1 SUBM @<XREF:SUBM>@ {0:M}
2045            TOKEN_SUBM  : self.__skip_record,
2046            # +1 <<ASSOCIATION_STRUCTURE>> {0:M}
2047            TOKEN_ASSO  : self.__person_asso,
2048            # +1 ALIA @<XREF:INDI>@ {0:M}
2049            TOKEN_ALIA  : self.__person_alt_name,
2050            # +1 ANCI @<XREF:SUBM>@ {0:M}
2051            TOKEN_ANCI  : self.__skip_record,
2052            # +1 DESI @<XREF:SUBM>@ {0:M}
2053            TOKEN_DESI  : self.__skip_record,
2054            # +1 <<SOURCE_CITATION>> {0:M}
2055            TOKEN_SOUR  : self.__person_sour,
2056            # +1 <<MULTIMEDIA_LINK>> {0:M}
2057            TOKEN_OBJE  : self.__person_object,
2058            # +1 <<NOTE_STRUCTURE>> {0:M}
2059            TOKEN_NOTE  : self.__person_note,
2060            TOKEN_RNOTE : self.__person_note,
2061            TOKEN__COMM : self.__person_note,
2062            # +1 RFN <PERMANENT_RECORD_FILE_NUMBER> {0:1}
2063            TOKEN_RFN   : self.__person_attr,
2064            # +1 REFN <USER_REFERENCE_NUMBER> {0:M}
2065            # +2 TYPE <USER_REFERENCE_TYPE> {0:1}
2066            TOKEN_REFN  : self.__person_refn,
2067            # TYPE should be below REFN, but will work here anyway
2068            TOKEN_TYPE  : self.__person_attr,
2069            # +1 RIN <AUTOMATED_RECORD_ID> {0:1}
2070            TOKEN_RIN   : self.__person_attr,
2071            # +1 <<CHANGE_DATE>> {0:1}
2072            TOKEN_CHAN  : self.__person_chan,
2073            # The following tags are not part of Gedcom spec but are commonly
2074            # found here anyway
2075            TOKEN_ADDR  : self.__person_addr,
2076            TOKEN_PHON  : self.__person_phon,
2077            TOKEN_FAX   : self.__person_fax,
2078            TOKEN_EMAIL : self.__person_email,
2079            TOKEN_WWW   : self.__person_www,
2080            TOKEN__TODO : self.__skip_record,
2081            TOKEN_TITL  : self.__person_titl,
2082            TOKEN__PHOTO: self.__person_photo, }
2083        self.func_list.append(self.indi_parse_tbl)
2084
2085        self.name_parse_tbl = {
2086            # +1 NPFX <NAME_PIECE_PREFIX> {0:1}
2087            TOKEN_NPFX   : self.__name_npfx,
2088            # +1 GIVN <NAME_PIECE_GIVEN> {0:1}
2089            TOKEN_GIVN   : self.__name_givn,
2090            # NICK <NAME_PIECE_NICKNAME> {0:1}
2091            TOKEN_NICK   : self.__name_nick,
2092            # +1 SPFX <NAME_PIECE_SURNAME_PREFIX {0:1}
2093            TOKEN_SPFX   : self.__name_spfx,
2094            # +1 SURN <NAME_PIECE_SURNAME> {0:1}
2095            TOKEN_SURN   : self.__name_surn,
2096            # +1 NSFX <NAME_PIECE_SUFFIX> {0:1}
2097            TOKEN_NSFX   : self.__name_nsfx,
2098            # +1 <<SOURCE_CITATION>> {0:M}
2099            TOKEN_SOUR   : self.__name_sour,
2100            # +1 <<NOTE_STRUCTURE>> {0:M}
2101            TOKEN_NOTE   : self.__name_note,
2102            TOKEN_RNOTE  : self.__name_note,
2103            # Extensions
2104            TOKEN_ALIA   : self.__name_alia,
2105            TOKEN__MARNM : self.__name_marnm,
2106            TOKEN__MAR   : self.__name_marnm,   # Generated by geni.com
2107            TOKEN__MARN  : self.__name_marnm,   # Gen'd by BROSKEEP 6.1.31 WIN
2108            TOKEN__AKA   : self.__name_aka,     # PAF and AncestQuest
2109            TOKEN_TYPE   : self.__name_type,    # This is legal GEDCOM 5.5.1
2110            TOKEN_BIRT   : self.__ignore,
2111            TOKEN_DATE   : self.__name_date,
2112            # This handles date as a subsidiary of "1 ALIA" which might be used
2113            # by Family Tree Maker and Reunion, and by cheating (handling a
2114            # lower level from the current parse table) handles date as
2115            # subsidiary to "2 _MARN", "2 _AKAN" and "2 _ADPN" which has been
2116            # found in Brother's keeper.
2117            TOKEN__ADPN   : self.__name_adpn, }
2118        self.func_list.append(self.name_parse_tbl)
2119
2120        #
2121        # Parse table for <<REPOSITORY_RECORD>> below the level 0 REPO tag
2122        #
2123        # n @<XREF:REPO>@ REPO                            {1:1}
2124        #   +1 NAME <NAME_OF_REPOSITORY>                  {0:1}
2125        #   +1 <<ADDRESS_STRUCTURE>>                      {0:1}
2126        #   +1 <<NOTE_STRUCTURE>>                         {0:M}
2127        #   +1 REFN <USER_REFERENCE_NUMBER>               {0:M}
2128        #     +2 TYPE <USER_REFERENCE_TYPE>               {0:1}
2129        #   +1 RIN <AUTOMATED_RECORD_ID>                  {0:1}
2130        #   +1 <<CHANGE_DATE>>                            {0:1}
2131
2132        self.repo_parse_tbl = {
2133            TOKEN_NAME   : self.__repo_name,
2134            TOKEN_ADDR   : self.__repo_addr,
2135            TOKEN_RIN    : self.__ignore,
2136            TOKEN_NOTE   : self.__repo_note,
2137            TOKEN_RNOTE  : self.__repo_note,
2138            TOKEN_CHAN   : self.__repo_chan,
2139            TOKEN_PHON   : self.__repo_phon,
2140            TOKEN_EMAIL  : self.__repo_email,
2141            TOKEN_WWW    : self.__repo_www,
2142            TOKEN_FAX    : self.__repo_fax, }
2143        self.func_list.append(self.repo_parse_tbl)
2144
2145        self.event_parse_tbl = {
2146            # n TYPE <EVENT_DESCRIPTOR> {0:1}
2147            TOKEN_TYPE   : self.__event_type,
2148            # n DATE <DATE_VALUE> {0:1} p.*/*
2149            TOKEN_DATE   : self.__event_date,
2150            # n <<PLACE_STRUCTURE>> {0:1} p.*
2151            TOKEN_PLAC   : self.__event_place,
2152            # n <<ADDRESS_STRUCTURE>> {0:1} p.*
2153            TOKEN_ADDR   : self.__event_addr,
2154            # n AGE <AGE_AT_EVENT> {0:1} p.*
2155            TOKEN_AGE    : self.__event_age,
2156            # n AGNC <RESPONSIBLE_AGENCY> {0:1} p.*
2157            TOKEN_AGNC   : self.__event_agnc,
2158            # n CAUS <CAUSE_OF_EVENT> {0:1} p.*
2159            TOKEN_CAUS   : self.__event_cause,
2160            # n <<SOURCE_CITATION>> {0:M} p.*
2161            TOKEN_SOUR   : self.__event_source,
2162            # n <<MULTIMEDIA_LINK>> {0:M} p.*, *
2163            TOKEN_OBJE   : self.__event_object,
2164            # n <<NOTE_STRUCTURE>> {0:M} p.
2165            TOKEN_NOTE   : self.__event_inline_note,
2166            TOKEN_RNOTE  : self.__event_note,
2167            # Other
2168            TOKEN__PRIV  : self.__event_privacy,
2169            TOKEN_OFFI   : self.__event_note,
2170            TOKEN_PHON   : self.__event_phon,
2171            TOKEN__GODP  : self.__event_witness,
2172            TOKEN__WITN  : self.__event_witness,
2173            TOKEN__WTN   : self.__event_witness,
2174            TOKEN_RELI   : self.__ignore,
2175            # Not legal, but inserted by PhpGedView
2176            TOKEN_TIME   : self.__event_time,
2177            TOKEN_ASSO   : self.__ignore,
2178            TOKEN_IGNORE : self.__ignore,
2179            TOKEN_STAT   : self.__ignore,
2180            TOKEN_TEMP   : self.__ignore,
2181            TOKEN_HUSB   : self.__event_husb,
2182            TOKEN_WIFE   : self.__event_wife,
2183            TOKEN_FAMC   : self.__person_birth_famc,
2184            # Not legal, but inserted by Ultimate Family Tree
2185            TOKEN_CHAN   : self.__ignore,
2186            TOKEN_QUAY  : self.__ignore,
2187            # Not legal, but inserted by FamilyTreeBuilder
2188            TOKEN_RIN    : self.__event_rin,
2189            TOKEN_ATTR   : self.__event_attr,   # FTB for _UID
2190            TOKEN_EMAIL  : self.__event_email,  # FTB for RESI events
2191            TOKEN_WWW    : self.__event_www,    # FTB for RESI events
2192            TOKEN_FAX    : self.__event_fax,    # legal...
2193        }
2194        self.func_list.append(self.event_parse_tbl)
2195
2196        self.adopt_parse_tbl = {
2197            TOKEN_TYPE   : self.__event_type,
2198            TOKEN__PRIV  : self.__event_privacy,
2199            TOKEN_DATE   : self.__event_date,
2200            TOKEN_SOUR   : self.__event_source,
2201            TOKEN_PLAC   : self.__event_place,
2202            TOKEN_ADDR   : self.__event_addr,
2203            TOKEN_PHON   : self.__event_phon,
2204            TOKEN_CAUS   : self.__event_cause,
2205            TOKEN_AGNC   : self.__event_agnc,
2206            TOKEN_AGE    : self.__event_age,
2207            TOKEN_NOTE   : self.__event_note,
2208            TOKEN_RNOTE  : self.__event_note,
2209            TOKEN_OFFI   : self.__event_note,
2210            TOKEN__GODP  : self.__event_witness,
2211            TOKEN__WITN  : self.__event_witness,
2212            TOKEN__WTN   : self.__event_witness,
2213            TOKEN_RELI   : self.__ignore,
2214            TOKEN_TIME   : self.__ignore,
2215            TOKEN_ASSO   : self.__ignore,
2216            TOKEN_IGNORE : self.__ignore,
2217            TOKEN_STAT   : self.__ignore,
2218            TOKEN_TEMP   : self.__ignore,
2219            TOKEN_OBJE   : self.__event_object,
2220            TOKEN_FAMC   : self.__person_adopt_famc,
2221            # Not legal, but inserted by Ultimate Family Tree
2222            TOKEN_CHAN   : self.__ignore,
2223            TOKEN_QUAY   : self.__ignore,
2224        }
2225        self.func_list.append(self.adopt_parse_tbl)
2226
2227        self.famc_parse_tbl = {
2228            # n FAMC @<XREF:FAM>@ {1:1}
2229            # +1 PEDI <PEDIGREE_LINKAGE_TYPE> {0:1} p.*
2230            TOKEN_PEDI   : self.__person_famc_pedi,
2231            # +1 _FREL <Father PEDIGREE_LINKAGE_TYPE> {0:1}  non-standard
2232            TOKEN__FREL  : self.__person_famc_frel,
2233            # +1 _MREL <Mother PEDIGREE_LINKAGE_TYPE> {0:1}  non-standard
2234            TOKEN__MREL  : self.__person_famc_mrel,
2235            # +1 <<NOTE_STRUCTURE>> {0:M} p.*
2236            TOKEN_NOTE   : self.__person_famc_note,
2237            TOKEN_RNOTE  : self.__person_famc_note,
2238            # Extras
2239            TOKEN__PRIMARY: self.__person_famc_primary,
2240            TOKEN_SOUR   : self.__person_famc_sour,
2241            # GEDit
2242            TOKEN_STAT   : self.__ignore,
2243        }
2244        self.func_list.append(self.famc_parse_tbl)
2245
2246        self.person_fact_parse_tbl = {
2247            TOKEN_TYPE   : self.__person_fact_type,
2248            TOKEN_SOUR   : self.__person_attr_source,
2249            TOKEN_NOTE   : self.__person_attr_note,
2250            TOKEN_RNOTE  : self.__person_attr_note,
2251        }
2252        self.func_list.append(self.person_fact_parse_tbl)
2253
2254        self.person_attr_parse_tbl = {
2255            TOKEN_TYPE   : self.__person_attr_type,
2256            TOKEN_CAUS   : self.__ignore,
2257            TOKEN_DATE   : self.__ignore,
2258            TOKEN_TIME   : self.__ignore,
2259            TOKEN_ADDR   : self.__ignore,
2260            TOKEN_IGNORE : self.__ignore,
2261            TOKEN_STAT   : self.__ignore,
2262            TOKEN_TEMP   : self.__ignore,
2263            TOKEN_OBJE   : self.__ignore,
2264            TOKEN_SOUR   : self.__person_attr_source,
2265            TOKEN_PLAC   : self.__person_attr_place,
2266            TOKEN_NOTE   : self.__person_attr_note,
2267            TOKEN_RNOTE  : self.__person_attr_note,
2268        }
2269        self.func_list.append(self.person_attr_parse_tbl)
2270
2271        self.lds_parse_tbl = {
2272            TOKEN_TEMP   : self.__lds_temple,
2273            TOKEN_DATE   : self.__lds_date,
2274            TOKEN_FAMC   : self.__lds_famc,
2275            TOKEN_FORM   : self.__lds_form,
2276            TOKEN_PLAC   : self.__lds_plac,
2277            TOKEN_SOUR   : self.__lds_sour,
2278            TOKEN_NOTE   : self.__lds_note,
2279            TOKEN_RNOTE  : self.__lds_note,
2280            TOKEN_STAT   : self.__lds_stat,
2281        }
2282        self.func_list.append(self.lds_parse_tbl)
2283
2284        self.asso_parse_tbl = {
2285            TOKEN_RELA   : self.__person_asso_rela,
2286            TOKEN_SOUR   : self.__person_asso_sour,
2287            TOKEN_NOTE   : self.__person_asso_note,
2288            TOKEN_RNOTE  : self.__person_asso_note,
2289        }
2290        self.func_list.append(self.asso_parse_tbl)
2291
2292        self.citation_parse_tbl = {
2293            TOKEN_PAGE   : self.__citation_page,
2294            TOKEN_DATE   : self.__citation_date,
2295            TOKEN_DATA   : self.__citation_data,
2296            TOKEN_OBJE   : self.__citation_obje,
2297            TOKEN_REFN   : self.__citation_refn,
2298            TOKEN_EVEN   : self.__citation_even,
2299            TOKEN_IGNORE : self.__ignore,
2300            TOKEN__LKD   : self.__ignore,
2301            TOKEN_QUAY   : self.__citation_quay,
2302            TOKEN_NOTE   : self.__citation_note,
2303            TOKEN_RNOTE  : self.__citation_note,
2304            TOKEN_TEXT   : self.__citation_data_text,
2305            TOKEN__LINK  : self.__citation_link,
2306            TOKEN__JUST  : self.__citation__just,
2307        }
2308        self.func_list.append(self.citation_parse_tbl)
2309
2310        self.media_parse_tbl = {
2311            TOKEN_FORM   : self.__media_ref_form,
2312            TOKEN_MEDI   : self.__media_ref_medi,        # v5.5.1
2313            TOKEN_TITL   : self.__media_ref_titl,
2314            TOKEN_FILE   : self.__media_ref_file,
2315            TOKEN_NOTE   : self.__obje_note,  # illegal, but often there
2316            TOKEN_RNOTE  : self.__obje_note,  # illegal, but often there
2317            TOKEN__PRIM  : self.__media_ref_prim,        # LFT etc.
2318            TOKEN_IGNORE : self.__ignore,
2319        }
2320        self.func_list.append(self.media_parse_tbl)
2321
2322        self.parse_loc_tbl = {
2323            TOKEN_ADR1   : self.__location_adr1,
2324            TOKEN_ADR2   : self.__location_adr2,
2325            TOKEN_CITY   : self.__location_city,
2326            TOKEN_STAE   : self.__location_stae,
2327            TOKEN_POST   : self.__location_post,
2328            TOKEN_CTRY   : self.__location_ctry,
2329            # Not legal GEDCOM - not clear why these are included at this level
2330            TOKEN_ADDR   : self.__ignore,
2331            TOKEN_DATE   : self.__ignore,  # there is nowhere to put a date
2332            TOKEN_NOTE   : self.__location_note,
2333            TOKEN_RNOTE  : self.__location_note,
2334            TOKEN__LOC   : self.__ignore,
2335            TOKEN__NAME  : self.__ignore,
2336            TOKEN_PHON   : self.__location_phone,
2337            TOKEN_IGNORE : self.__ignore,
2338        }
2339        self.func_list.append(self.parse_loc_tbl)
2340
2341        #
2342        # Parse table for <<FAM_RECORD>> below the level 0 FAM tag
2343        #
2344        # n @<XREF:FAM>@   FAM                            {1:1}
2345        #   +1 <<FAMILY_EVENT_STRUCTURE>>                 {0:M}
2346        #   +1 HUSB @<XREF:INDI>@                         {0:1}
2347        #   +1 WIFE @<XREF:INDI>@                         {0:1}
2348        #   +1 CHIL @<XREF:INDI>@                         {0:M}
2349        #   +1 NCHI <COUNT_OF_CHILDREN>                   {0:1}
2350        #   +1 SUBM @<XREF:SUBM>@                         {0:M}
2351        #   +1 <<LDS_SPOUSE_SEALING>>                     {0:M}
2352        #   +1 <<SOURCE_CITATION>>                        {0:M}
2353        #   +1 <<MULTIMEDIA_LINK>>                        {0:M}
2354        #   +1 <<NOTE_STRUCTURE>>                         {0:M}
2355        #   +1 REFN <USER_REFERENCE_NUMBER>               {0:M}
2356        #   +1 RIN <AUTOMATED_RECORD_ID>                  {0:1}
2357        #   +1 <<CHANGE_DATE>>                            {0:1}
2358
2359        self.family_func = {
2360            # +1 <<FAMILY_EVENT_STRUCTURE>>  {0:M}
2361            TOKEN_GEVENT : self.__family_std_event,
2362            TOKEN_EVEN   : self.__fam_even,
2363            # +1 HUSB @<XREF:INDI>@  {0:1}
2364            TOKEN_HUSB   : self.__family_husb,
2365            # +1 WIFE @<XREF:INDI>@  {0:1}
2366            TOKEN_WIFE   : self.__family_wife,
2367            # +1 CHIL @<XREF:INDI>@  {0:M}
2368            TOKEN_CHIL   : self.__family_chil,
2369            # +1 NCHI <COUNT_OF_CHILDREN>  {0:1}
2370            # +1 SUBM @<XREF:SUBM>@  {0:M}
2371            # +1 <<LDS_SPOUSE_SEALING>>  {0:M}
2372            TOKEN_SLGS   : self.__family_slgs,
2373            # +1 <<SOURCE_CITATION>>  {0:M}
2374            TOKEN_SOUR   : self.__family_source,
2375            # +1 <<MULTIMEDIA_LINK>>  {0:M}
2376            TOKEN_OBJE   : self.__family_object,
2377            # +1 <<NOTE_STRUCTURE>>  {0:M}
2378            TOKEN__COMM  : self.__family_comm,
2379            TOKEN_NOTE   : self.__family_note,
2380            TOKEN_RNOTE  : self.__family_note,
2381            # +1 REFN <USER_REFERENCE_NUMBER>  {0:M}
2382            TOKEN_REFN   : self.__family_refn,
2383            # TYPE should be below REFN, but will work here anyway
2384            TOKEN_TYPE   : self.__family_cust_attr,
2385            # +1 RIN <AUTOMATED_RECORD_ID>  {0:1}
2386            # +1 <<CHANGE_DATE>>  {0:1}
2387            TOKEN_CHAN   : self.__family_chan,
2388            TOKEN_ENDL   : self.__ignore,
2389            TOKEN_ADDR   : self.__ignore,
2390            TOKEN_RIN    : self.__family_cust_attr,
2391            TOKEN_SUBM   : self.__ignore,
2392            TOKEN_ATTR   : self.__family_attr,
2393        }
2394        self.func_list.append(self.family_func)
2395
2396        self.family_rel_tbl = {
2397            TOKEN__FREL  : self.__family_frel,
2398            TOKEN__MREL  : self.__family_mrel,
2399            TOKEN_ADOP   : self.__family_adopt,
2400            TOKEN__STAT  : self.__family_stat,
2401        }
2402        self.func_list.append(self.family_rel_tbl)
2403
2404        #
2405        # Parse table for <<SOURCE_RECORD>> below the level 0 SOUR tag
2406        #
2407        # n @<XREF:SOUR>@ SOUR                            {1:1}
2408        #   +1 DATA                                       {0:1}
2409        #     +2 EVEN <EVENTS_RECORDED>                   {0:M}
2410        #       +3 DATE <DATE_PERIOD>                     {0:1}
2411        #       +3 PLAC <SOURCE_JURISDICTION_PLACE>       {0:1}
2412        #     +2 AGNC <RESPONSIBLE_AGENCY>                {0:1}
2413        #     +2 <<NOTE_STRUCTURE>>                       {0:M}
2414        #   +1 AUTH <SOURCE_ORIGINATOR>                   {0:1}
2415        #     +2 [CONT|CONC] <SOURCE_ORIGINATOR>          {0:M}
2416        #   +1 TITL <SOURCE_DESCRIPTIVE_TITLE>            {0:1}
2417        #     +2 [CONT|CONC] <SOURCE_DESCRIPTIVE_TITLE>   {0:M}
2418        #   +1 ABBR <SOURCE_FILED_BY_ENTRY>               {0:1}
2419        #   +1 PUBL <SOURCE_PUBLICATION_FACTS>            {0:1}
2420        #     +2 [CONT|CONC] <SOURCE_PUBLICATION_FACTS>   {0:M}
2421        #   +1 TEXT <TEXT_FROM_SOURCE>                    {0:1}
2422        #     +2 [CONT|CONC] <TEXT_FROM_SOURCE>           {0:M}
2423        #   +1 <<SOURCE_REPOSITORY_CITATION>>             {0:1}
2424        #   +1 <<MULTIMEDIA_LINK>>                        {0:M}
2425        #   +1 <<NOTE_STRUCTURE>>                         {0:M}
2426        #   +1 REFN <USER_REFERENCE_NUMBER>               {0:M}
2427        #     +2 TYPE <USER_REFERENCE_TYPE>               {0:1}
2428        #   +1 RIN <AUTOMATED_RECORD_ID>                  {0:1}
2429        #   +1 <<CHANGE_DATE>>                            {0:1}
2430
2431        self.source_func = {
2432            TOKEN_TITL   : self.__source_title,
2433            TOKEN_TAXT   : self.__source_taxt_peri,
2434            TOKEN_PERI   : self.__source_taxt_peri,
2435            TOKEN_AUTH   : self.__source_auth,
2436            TOKEN_PUBL   : self.__source_publ,
2437            TOKEN_NOTE   : self.__source_note,
2438            TOKEN_RNOTE  : self.__source_note,
2439            TOKEN_TEXT   : self.__source_text,
2440            TOKEN_ABBR   : self.__source_abbr,
2441            TOKEN_REFN   : self.__source_attr,
2442            TOKEN_RIN    : self.__source_attr,
2443            TOKEN_REPO   : self.__source_repo,
2444            TOKEN_OBJE   : self.__source_object,
2445            TOKEN_CHAN   : self.__source_chan,
2446            TOKEN_MEDI   : self.__source_attr,
2447            TOKEN__NAME  : self.__source_attr,
2448            TOKEN_DATA   : self.__ignore,
2449            # TYPE should be below REFN, but will work here anyway
2450            TOKEN_TYPE   : self.__source_attr,
2451            TOKEN_CALN   : self.__ignore,
2452            # not legal, but Ultimate Family Tree does this
2453            TOKEN_DATE   : self.__ignore,
2454            TOKEN_IGNORE : self.__ignore,
2455        }
2456        self.func_list.append(self.source_func)
2457
2458        #
2459        # Parse table for <<MULTIMEDIA_RECORD>> below the level 0 OBJE tag
2460        #
2461        # n  @XREF:OBJE@ OBJE {1:1}                 # v5.5 layout
2462        #   +1 FILE <MULTIMEDIA_FILE_REFN>    {1:1} # de-facto extension
2463        #   +1 FORM <MULTIMEDIA_FORMAT>       {1:1}
2464        #   +1 TITL <DESCRIPTIVE_TITLE>       {0:1}
2465        #   +1 <<NOTE_STRUCTURE>>             {0:M}
2466        #   +1 BLOB                           {1:1} # Deprecated, no support
2467        #     +2 CONT <ENCODED_MULTIMEDIA_LINE> {1:M}
2468        #   +1 OBJE @<XREF:OBJE>@ /* chain */ {0:1} # Deprecated, no support
2469        #   +1 REFN <USER_REFERENCE_NUMBER>   {0:M}
2470        #     +2 TYPE <USER_REFERENCE_TYPE>   {0:1}
2471        #   +1 RIN <AUTOMATED_RECORD_ID>      {0:1}
2472        #   +1 <<CHANGE_DATE>>                {0:1}
2473        #
2474        # n @XREF:OBJE@ OBJE {1:1}                  # v5.5.1 layout
2475        #   +1 FILE <MULTIMEDIA_FILE_REFN>    {1:M} # multi files, no support
2476        #     +2 FORM <MULTIMEDIA_FORMAT>     {1:1}
2477        #       +3 TYPE <SOURCE_MEDIA_TYPE>   {0:1}
2478        #     +2 TITL <DESCRIPTIVE_TITLE>     {0:1}
2479        #     +2 DATE <mm/dd/yyy hh:mn:ss AM> {0:1}    # FTM extension
2480        #     +2 TEXT <COMMENT, by user or exif> {0:1} # FTM extension
2481        #   +1 REFN <USER_REFERENCE_NUMBER>   {0:M}
2482        #     +2 TYPE <USER_REFERENCE_TYPE>   {0:1}
2483        #   +1 RIN <AUTOMATED_RECORD_ID>      {0:1}
2484        #   +1 <<NOTE_STRUCTURE>>             {0:M}
2485        #   +1 <<SOURCE_CITATION>>            {0:M}
2486        #   +1 <<CHANGE_DATE>>                {0:1}
2487
2488        self.obje_func = {
2489            TOKEN_FORM   : self.__obje_form,
2490            TOKEN_TYPE   : self.__obje_type,    # v5.5.1
2491            TOKEN_TITL   : self.__obje_title,
2492            TOKEN_FILE   : self.__obje_file,    # de-facto extension
2493            TOKEN_TEXT   : self.__obje_text,    # FTM extension
2494            TOKEN__TEXT  : self.__obje_text,    # FTM 2017 extension
2495            TOKEN_DATE   : self.__obje_date,    # FTM extension
2496            TOKEN__DATE  : self.__obje_date,    # FTM 2017 extension
2497            TOKEN_NOTE   : self.__obje_note,
2498            TOKEN_RNOTE  : self.__obje_note,
2499            TOKEN_SOUR   : self.__obje_sour,
2500            TOKEN_BLOB   : self.__ignore,       # v5.5.1 deprecated
2501            TOKEN_REFN   : self.__obje_refn,
2502            TOKEN_RIN    : self.__obje_rin,
2503            TOKEN_CHAN   : self.__obje_chan,
2504        }
2505        self.func_list.append(self.obje_func)
2506
2507        self.parse_addr_tbl = {
2508            TOKEN_DATE   : self.__address_date,
2509            TOKEN_ADR1   : self.__address_adr1,
2510            TOKEN_ADR2   : self.__address_adr2,
2511            TOKEN_CITY   : self.__address_city,
2512            TOKEN_STAE   : self.__address_state,
2513            TOKEN_POST   : self.__address_post,
2514            TOKEN_CTRY   : self.__address_country,
2515            TOKEN_PHON   : self.__ignore,
2516            TOKEN_SOUR   : self.__address_sour,
2517            TOKEN_NOTE   : self.__address_note,
2518            TOKEN_RNOTE  : self.__address_note,
2519            TOKEN__LOC   : self.__ignore,
2520            TOKEN__NAME  : self.__ignore,
2521            TOKEN_IGNORE : self.__ignore,
2522            TOKEN_TYPE   : self.__ignore,
2523            TOKEN_CAUS   : self.__ignore,
2524        }
2525        self.func_list.append(self.parse_addr_tbl)
2526
2527        self.event_cause_tbl = {
2528            TOKEN_SOUR   : self.__event_cause_source,
2529        }
2530        self.func_list.append(self.event_cause_tbl)
2531
2532        self.event_place_map = {
2533            TOKEN_NOTE   : self.__event_place_note,
2534            TOKEN_RNOTE  : self.__event_place_note,
2535            TOKEN_FORM   : self.__event_place_form,
2536            # Not legal.
2537            TOKEN_OBJE   : self.__event_place_object,
2538            TOKEN_SOUR   : self.__event_place_sour,
2539            TOKEN__LOC   : self.__ignore,
2540            TOKEN_MAP    : self.__place_map,
2541            # Not legal,  but generated by Ultimate Family Tree
2542            TOKEN_QUAY   : self.__ignore,
2543        }
2544        self.func_list.append(self.event_place_map)
2545
2546        self.place_map_tbl = {
2547            TOKEN_LATI   : self.__place_lati,
2548            TOKEN_LONG   : self.__place_long,
2549        }
2550        self.func_list.append(self.place_map_tbl)
2551
2552        self.repo_ref_tbl = {
2553            TOKEN_CALN   : self.__repo_ref_call,
2554            TOKEN_NOTE   : self.__repo_ref_note,
2555            TOKEN_RNOTE  : self.__repo_ref_note,
2556            TOKEN_MEDI   : self.__repo_ref_medi,
2557            TOKEN_IGNORE : self.__ignore,
2558        }
2559        self.func_list.append(self.repo_ref_tbl)
2560
2561        self.parse_person_adopt = {
2562            TOKEN_ADOP   : self.__person_adopt_famc_adopt,
2563        }
2564        self.func_list.append(self.parse_person_adopt)
2565
2566        self.opt_note_tbl = {
2567            TOKEN_RNOTE  : self.__optional_note,
2568            TOKEN_NOTE   : self.__optional_note,
2569        }
2570        self.func_list.append(self.opt_note_tbl)
2571
2572        self.citation_data_tbl = {
2573            TOKEN_DATE   : self.__citation_data_date,
2574            TOKEN_TEXT   : self.__citation_data_text,
2575            TOKEN_RNOTE  : self.__citation_data_note,
2576            TOKEN_NOTE   : self.__citation_data_note,
2577        }
2578        self.func_list.append(self.citation_data_tbl)
2579
2580        self.citation_even_tbl = {
2581            TOKEN_ROLE   : self.__citation_even_role,
2582        }
2583        self.func_list.append(self.citation_even_tbl)
2584
2585        #
2586        # Parse table for <<HEADER>> record below the level 0 HEAD tag
2587        #
2588        # n HEAD                                          {1:1}
2589        #   +1 SOUR <APPROVED_SYSTEM_ID>                  {1:1}
2590        #     +2 VERS <VERSION_NUMBER>                    {0:1}
2591        #     +2 NAME <NAME_OF_PRODUCT>                   {0:1}
2592        #     +2 CORP <NAME_OF_BUSINESS>                  {0:1}
2593        #       +3 <<ADDRESS_STRUCTURE>>                  {0:1}
2594        #     +2 DATA <NAME_OF_SOURCE_DATA>               {0:1}
2595        #       +3 DATE <PUBLICATION_DATE>                {0:1}
2596        #       +3 COPR <COPYRIGHT_SOURCE_DATA>           {0:1}
2597        #   +1 DEST <RECEIVING_SYSTEM_NAME>               {0:1*}
2598        #   +1 DATE <TRANSMISSION_DATE>                   {0:1}
2599        #     +2 TIME <TIME_VALUE>                        {0:1}
2600        #   +1 SUBM @<XREF:SUBM>@                         {1:1}
2601        #   +1 SUBN @<XREF:SUBN>@                         {0:1}
2602        #   +1 FILE <FILE_NAME>                           {0:1}
2603        #   +1 COPR <COPYRIGHT_GEDCOM_FILE>               {0:1}
2604        #   +1 GEDC                                       {1:1}
2605        #     +2 VERS <VERSION_NUMBER>                    {1:1}
2606        #     +2 FORM <GEDCOM_FORM>                       {1:1}
2607        #   +1 CHAR <CHARACTER_SET>                       {1:1}
2608        #     +2 VERS <VERSION_NUMBER>                    {0:1}
2609        #   +1 LANG <LANGUAGE_OF_TEXT>                    {0:1}
2610        #   +1 PLAC                                       {0:1}
2611        #     +2 FORM <PLACE_HIERARCHY>                   {1:1}
2612        #   +1 NOTE <GEDCOM_CONTENT_DESCRIPTION>          {0:1}
2613        #     +2 [CONT|CONC] <GEDCOM_CONTENT_DESCRIPTION> {0:M}
2614
2615        #  * NOTE: Submissions to the Family History Department for Ancestral
2616        #  File submission or for clearing temple ordinances must use a
2617        #  DESTination of ANSTFILE or TempleReady.
2618
2619        self.head_parse_tbl = {
2620            TOKEN_SOUR  : self.__header_sour,
2621            TOKEN_NAME  : self.__header_sour_name,  # This should be below SOUR
2622            TOKEN_VERS  : self.__header_sour_vers,  # This should be below SOUR
2623            TOKEN_FILE  : self.__header_file,
2624            TOKEN_COPR  : self.__header_copr,
2625            TOKEN_SUBM  : self.__header_subm,
2626            TOKEN_CORP  : self.__ignore,           # This should be below SOUR
2627            TOKEN_DATA  : self.__ignore,       # This should be below SOUR
2628            TOKEN_SUBN  : self.__header_subn,
2629            TOKEN_LANG  : self.__header_lang,
2630            TOKEN_TIME  : self.__ignore,       # This should be below DATE
2631            TOKEN_DEST  : self.__header_dest,
2632            TOKEN_CHAR  : self.__header_char,
2633            TOKEN_GEDC  : self.__header_gedc,
2634            TOKEN_PLAC  : self.__header_plac,
2635            TOKEN_DATE  : self.__header_date,
2636            TOKEN_NOTE  : self.__header_note,
2637            TOKEN__SCHEMA: self.__ignore,
2638        }
2639        self.func_list.append(self.head_parse_tbl)
2640
2641        self.header_sour_parse_tbl = {
2642            TOKEN_VERS   : self.__header_sour_vers,
2643            TOKEN_NAME   : self.__header_sour_name,
2644            TOKEN_CORP   : self.__header_sour_corp,
2645            TOKEN_DATA   : self.__header_sour_data,
2646        }
2647        self.func_list.append(self.header_sour_parse_tbl)
2648
2649        self.header_sour_data = {
2650            TOKEN_DATE   : self.__header_sour_date,
2651            TOKEN_COPR   : self.__header_sour_copr,
2652        }
2653        self.func_list.append(self.header_sour_data)
2654
2655        self.header_corp_addr = {
2656            TOKEN_ADDR   : self.__repo_addr,
2657            TOKEN_PHON   : self.__repo_phon,
2658            TOKEN_FAX    : self.__repo_fax,
2659            TOKEN_WWW    : self.__repo_www,
2660            TOKEN_EMAIL  : self.__repo_email,
2661        }
2662        self.func_list.append(self.header_corp_addr)
2663
2664        self.header_subm = {
2665            TOKEN_NAME   : self.__header_subm_name,
2666        }
2667        self.func_list.append(self.header_subm)
2668
2669        self.place_form = {
2670            TOKEN_FORM   : self.__place_form,
2671        }
2672        self.func_list.append(self.place_form)
2673
2674        #
2675        # Parse table for <<NOTE_RECORD>> below the level 0 NOTE tag
2676        #
2677        # n @<XREF:NOTE>@ NOTE <SUBMITTER_TEXT>           {1:1}
2678        #   +1 [ CONC | CONT] <SUBMITTER_TEXT>            {0:M}
2679        #   +1 <<SOURCE_CITATION>>                        {0:M}
2680        #   +1 REFN <USER_REFERENCE_NUMBER>               {0:M}
2681        #     +2 TYPE <USER_REFERENCE_TYPE>               {0:1}
2682        #   +1 RIN <AUTOMATED_RECORD_ID>                  {0:1}
2683        #   +1 <<CHANGE_DATE>>                            {0:1}
2684
2685        self.note_parse_tbl = {
2686            TOKEN_SOUR   : self.__ignore,
2687            TOKEN_REFN   : self.__ignore,
2688            TOKEN_RIN    : self.__ignore,
2689            TOKEN_CHAN   : self.__note_chan,
2690        }
2691        self.func_list.append(self.note_parse_tbl)
2692
2693        # look for existing place titles, build a map
2694        self.place_names = defaultdict(list)
2695        cursor = dbase.get_place_cursor()
2696        data = next(cursor)
2697        while data:
2698            (handle, val) = data
2699            self.place_names[val[2]].append(handle)
2700            data = next(cursor)
2701        cursor.close()
2702
2703        enc = stage_one.get_encoding()
2704
2705        if enc == "ANSEL":
2706            rdr = AnselReader(ifile, self.__add_msg)
2707        elif enc in ("UTF-8", "UTF8", "UTF_8_SIG"):
2708            rdr = UTF8Reader(ifile, self.__add_msg, enc)
2709        elif enc in ("UTF-16LE", "UTF-16BE", "UTF16", "UNICODE"):
2710            rdr = UTF16Reader(ifile, self.__add_msg)
2711        elif enc in ("CP1252", "WINDOWS-1252"):
2712            rdr = CP1252Reader(ifile, self.__add_msg)
2713        else:
2714            rdr = AnsiReader(ifile, self.__add_msg)
2715
2716        self.lexer = Lexer(rdr, self.__add_msg)
2717        self.filename = filename
2718        self.backoff = False
2719
2720        fullpath = os.path.normpath(os.path.abspath(filename))
2721        self.geddir = os.path.dirname(fullpath)
2722
2723        self.error_count = 0
2724        amap = PERSONALCONSTANTATTRIBUTES
2725
2726        self.attrs = list(amap.values())
2727        self.gedattr = dict([key, val] for val, key in amap.items())
2728
2729    def parse_gedcom_file(self, use_trans=False):
2730        """
2731        Parses the opened GEDCOM file.
2732
2733        LINEAGE_LINKED_GEDCOM: =
2734          0 <<HEADER>>                                    {1:1}
2735          0 <<SUBMISSION_RECORD>>                         {0:1}
2736          0 <<RECORD>>                                    {1:M}
2737          0 TRLR                                          {1:1}
2738
2739        """
2740        no_magic = self.maxpeople < 1000
2741        with DbTxn(_("GEDCOM import"), self.dbase, not use_trans,
2742                   no_magic=no_magic) as self.trans:
2743
2744            self.dbase.disable_signals()
2745            self.__parse_header_head()
2746            self.want_parse_warnings = False
2747            self.__parse_header()
2748            self.want_parse_warnings = True
2749            if self.use_def_src:
2750                self.dbase.add_source(self.def_src, self.trans)
2751            if self.default_tag and self.default_tag.handle is None:
2752                self.dbase.add_tag(self.default_tag, self.trans)
2753            self.__parse_record()
2754            self.__parse_trailer()
2755            for title, handle in self.inline_srcs.items():
2756                src = Source()
2757                src.set_handle(handle)
2758                src.set_title(title)
2759                self.dbase.add_source(src, self.trans)
2760            self.__clean_up()
2761
2762            self.place_import.generate_hierarchy(self.trans)
2763
2764            if not self.dbase.get_feature("skip-check-xref"):
2765                self.__check_xref()
2766        self.dbase.enable_signals()
2767        self.dbase.request_rebuild()
2768        if self.number_of_errors == 0:
2769            message = _("GEDCOM import report: No errors detected")
2770        else:
2771            message = _("GEDCOM import report: %s errors detected") % \
2772                self.number_of_errors
2773        if hasattr(self.user.uistate, 'window'):
2774            parent_window = self.user.uistate.window
2775        else:
2776            parent_window = None
2777        self.user.info(message, "".join(self.errors),
2778                       parent=parent_window, monospaced=True)
2779
2780    def __clean_up(self):
2781        """
2782        Break circular references to parsing methods stored in dictionaries
2783        to aid garbage collection
2784        """
2785        for func_map in self.func_list:
2786            for key in list(func_map.keys()):
2787                del func_map[key]
2788            del func_map
2789        del self.func_list
2790        del self.update
2791        self.lexer.clean_up()
2792
2793    def __find_person_handle(self, gramps_id):
2794        """
2795        Return the database handle associated with the person's Gramps ID
2796        """
2797        return self.__find_from_handle(gramps_id, self.gid2id)
2798
2799    def __find_family_handle(self, gramps_id):
2800        """
2801        Return the database handle associated with the family's Gramps ID
2802        """
2803        return self.__find_from_handle(gramps_id, self.fid2id)
2804
2805    def __find_media_handle(self, gramps_id):
2806        """
2807        Return the database handle associated with the media object's Gramps ID
2808        """
2809        return self.__find_from_handle(gramps_id, self.oid2id)
2810
2811    def __find_note_handle(self, gramps_id):
2812        """
2813        Return the database handle associated with the media object's Gramps ID
2814        """
2815        return self.__find_from_handle(gramps_id, self.nid2id)
2816
2817    def __find_or_create_person(self, gramps_id):
2818        """
2819        Finds or creates a person based on the Gramps ID. If the ID is
2820        already used (is in the db), we return the item in the db. Otherwise,
2821        we create a new person, assign the handle and Gramps ID.
2822        """
2823        person = Person()
2824        intid = self.gid2id.get(gramps_id)
2825        if self.dbase.has_person_handle(intid):
2826            person.unserialize(self.dbase.get_raw_person_data(intid))
2827        else:
2828            intid = self.__find_from_handle(gramps_id, self.gid2id)
2829            person.set_handle(intid)
2830            person.set_gramps_id(gramps_id)
2831        return person
2832
2833    def __find_or_create_family(self, gramps_id):
2834        """
2835        Finds or creates a family based on the Gramps ID. If the ID is
2836        already used (is in the db), we return the item in the db. Otherwise,
2837        we create a new family, assign the handle and Gramps ID.
2838        """
2839        family = Family()
2840        # Add a counter for reordering the children later:
2841        family.child_ref_count = 0
2842        intid = self.fid2id.get(gramps_id)
2843        if self.dbase.has_family_handle(intid):
2844            family.unserialize(self.dbase.get_raw_family_data(intid))
2845        else:
2846            intid = self.__find_from_handle(gramps_id, self.fid2id)
2847            family.set_handle(intid)
2848            family.set_gramps_id(gramps_id)
2849        return family
2850
2851    def __find_or_create_media(self, gramps_id):
2852        """
2853        Finds or creates a media object based on the Gramps ID. If the ID is
2854        already used (is in the db), we return the item in the db. Otherwise,
2855        we create a new media object, assign the handle and Gramps ID.
2856        """
2857        obj = Media()
2858        intid = self.oid2id.get(gramps_id)
2859        if self.dbase.has_media_handle(intid):
2860            obj.unserialize(self.dbase.get_raw_media_data(intid))
2861        else:
2862            intid = self.__find_from_handle(gramps_id, self.oid2id)
2863            obj.set_handle(intid)
2864            obj.set_gramps_id(gramps_id)
2865        return obj
2866
2867    def __find_or_create_source(self, gramps_id):
2868        """
2869        Find or create a source based on the Gramps ID.
2870
2871        If the ID is already used (is in the db), we return the item in the
2872        db. Otherwise, we create a new source, assign the handle and Gramps ID.
2873
2874        """
2875        obj = Source()
2876        intid = self.sid2id.get(gramps_id)
2877        if self.dbase.has_source_handle(intid):
2878            obj.unserialize(self.dbase.get_raw_source_data(intid))
2879        else:
2880            intid = self.__find_from_handle(gramps_id, self.sid2id)
2881            obj.set_handle(intid)
2882            obj.set_gramps_id(gramps_id)
2883        return obj
2884
2885    def __find_or_create_repository(self, gramps_id):
2886        """
2887        Finds or creates a repository based on the Gramps ID. If the ID is
2888        already used (is in the db), we return the item in the db. Otherwise,
2889        we create a new repository, assign the handle and Gramps ID.
2890
2891        Some GEDCOM "flavors" destroy the specification, and declare the
2892        repository inline instead of in a object.
2893        """
2894        repository = Repository()
2895        intid = self.rid2id.get(gramps_id)
2896        if self.dbase.has_repository_handle(intid):
2897            repository.unserialize(self.dbase.get_raw_repository_data(intid))
2898        else:
2899            intid = self.__find_from_handle(gramps_id, self.rid2id)
2900            repository.set_handle(intid)
2901            repository.set_gramps_id(gramps_id)
2902        return repository
2903
2904    def __find_or_create_note(self, gramps_id):
2905        """
2906        Finds or creates a note based on the Gramps ID. If the ID is
2907        already used (is in the db), we return the item in the db. Otherwise,
2908        we create a new note, assign the handle and Gramps ID.
2909        If no Gramps ID is passed in, we not only make a Note with GID, we
2910        commit it.
2911        """
2912        note = Note()
2913        if not gramps_id:
2914            need_commit = True
2915            gramps_id = self.dbase.find_next_note_gramps_id()
2916        else:
2917            need_commit = False
2918
2919        intid = self.nid2id.get(gramps_id)
2920        if self.dbase.has_note_handle(intid):
2921            note.unserialize(self.dbase.get_raw_note_data(intid))
2922        else:
2923            intid = self.__find_from_handle(gramps_id, self.nid2id)
2924            note.set_handle(intid)
2925            note.set_gramps_id(gramps_id)
2926        if need_commit:
2927            self.dbase.add_note(note, self.trans)
2928        return note
2929
2930    def __loc_is_empty(self, location):
2931        """
2932        Determines whether a location is empty.
2933
2934        @param location: The current location
2935        @type location: gen.lib.Location
2936        @return True of False
2937        """
2938        if location is None:
2939            return True
2940        elif location.serialize() == self._EMPTY_LOC:
2941            return True
2942        elif location.is_empty():
2943            return True
2944        return False
2945
2946    def __find_place(self, title, location, placeref_list):
2947        """
2948        Finds an existing place based on the title and primary location.
2949
2950        @param title: The place title
2951        @type title: string
2952        @param location: The current location
2953        @type location: gen.lib.Location
2954        @return gen.lib.Place
2955        """
2956        for place_handle in self.place_names[title]:
2957            place = self.dbase.get_place_from_handle(place_handle)
2958            if place.get_title() == title:
2959                if self.__loc_is_empty(location) and \
2960                   self.__loc_is_empty(self.__get_first_loc(place)) and \
2961                   place.get_placeref_list() == placeref_list:
2962                    return place
2963                elif (not self.__loc_is_empty(location) and
2964                      not self.__loc_is_empty(self.__get_first_loc(place)) and
2965                      self.__get_first_loc(place).is_equivalent(location) ==
2966                      IDENTICAL) and \
2967                        place.get_placeref_list() == placeref_list:
2968                    return place
2969        return None
2970
2971    def __add_place(self, event, sub_state):
2972        """
2973        Add a new place to an event if not already present, or update a
2974        place.
2975
2976        @param event: The event
2977        @type event: gen.lib.Event
2978        @param substate: The sub-state for PLAC or ADDR elements (i.e. parsed
2979                        by event_parse_tbl)
2980        @type sub_state: CurrentState
2981        """
2982        if sub_state.place:
2983            # see whether this place already exists
2984            place = self.__find_place(sub_state.place.get_title(),
2985                                      self.__get_first_loc(sub_state.place),
2986                                      sub_state.place.get_placeref_list())
2987            if place is None:
2988                place = sub_state.place
2989                place_title = _pd.display(self.dbase, place)
2990                location = sub_state.pf.load_place(self.place_import, place,
2991                                                   place_title)
2992                self.dbase.add_place(place, self.trans)
2993                # if 'location was created, then store it, now that we have a
2994                # handle.
2995                if location:
2996                    self.place_import.store_location(location, place.handle)
2997                self.place_names[place.get_title()].append(place.get_handle())
2998                event.set_place_handle(place.get_handle())
2999            else:
3000                place.merge(sub_state.place)
3001                place_title = _pd.display(self.dbase, place)
3002                location = sub_state.pf.load_place(self.place_import, place,
3003                                                   place_title)
3004                self.dbase.commit_place(place, self.trans)
3005                if location:
3006                    self.place_import.store_location(location, place.handle)
3007                event.set_place_handle(place.get_handle())
3008
3009    def __find_file(self, fullname, altpath):
3010        # try to find the media file
3011        fullname = fullname.replace('\\', os.path.sep)
3012
3013        try:
3014            if os.path.isfile(fullname):
3015                return (1, fullname)
3016        except UnicodeEncodeError:
3017            # FIXME: problem possibly caused by umlaut/accented character
3018            # in filename
3019            return (0, fullname)
3020        # strip off Windows drive letter, if present
3021        if len(fullname) > 3 and fullname[1] == ':':
3022            fullname = fullname[2:]
3023        # look where we found the '.ged', using the full path in fullname
3024        other = os.path.join(altpath, fullname)
3025        if os.path.isfile(other):
3026            return (1, other)
3027        # lets try reducing to just where we found '.ged'
3028        other = os.path.join(altpath, os.path.basename(fullname))
3029        if os.path.isfile(other):
3030            return (1, other)
3031        # lets try using the base path for relative media paths
3032        other = os.path.join(media_path(self.dbase), fullname)
3033        if os.path.isfile(other):
3034            return (1, fullname)
3035        # lets try using the base path for relative media paths with base name
3036        other = os.path.join(media_path(self.dbase),
3037                             os.path.basename(fullname))
3038        if os.path.isfile(other):
3039            return (1, os.path.basename(fullname))
3040        return (0, fullname)
3041
3042    def __get_next_line(self):
3043        """
3044        Get the next line for analysis from the lexical analyzer. Return the
3045        same value if the _backup flag is set.
3046        """
3047        if not self.backoff:
3048            self.groups = self.lexer.readline()
3049            self.update()
3050
3051            # EOF ?
3052            if not self.groups:
3053                self.backoff = False
3054                # We will add the truncation warning message to the error
3055                # messages report, even though it probably won't be reported
3056                # because the exception below gets raised before the report is
3057                # produced. We do this in case __add_msg is changed in the
3058                # future to do something else
3059                self.__add_msg(self.__TRUNC_MSG)
3060                self.groups = None
3061                raise GedcomError(self.__TRUNC_MSG)
3062
3063        self.backoff = False
3064        return self.groups
3065
3066    def __chk_subordinate(self, level, state, token):
3067        """
3068        checks for a single subordinate line with specific token.  If any other
3069        lines are present, they are not understood.
3070
3071        @param level: Current level in the file
3072        @type level: int
3073        @param state: The current state
3074        @type state: CurrentState
3075        @param token: The token to search for
3076        @type token: int
3077        """
3078        skips = 0
3079        got_line = None
3080        while True:
3081            line = self.__get_next_line()
3082            if self.__level_is_finished(line, level):
3083                if skips:
3084                    # This improves formatting when there are long sequences of
3085                    # skipped lines
3086                    self.__add_msg("", None, None)
3087                return got_line
3088            if line.token == token:
3089                got_line = line
3090            else:
3091                self.__add_msg(_("Line ignored as not understood"),
3092                               line, state)
3093                skips += 1
3094
3095    def __undefined(self, line, state):
3096        """
3097        @param line: The current line in GedLine format
3098        @type line: GedLine
3099        @param state: The current state
3100        @type state: CurrentState
3101        """
3102        self.__not_recognized(line, state)
3103
3104    def __ignore(self, line, state):
3105        """
3106        Prints a message when an unexpected token is found.  If the token is
3107        known, then the line is considered "not supported", otherwise the line
3108        is "not understood".
3109
3110        @param line: The current line in GedLine format
3111        @type line: GedLine
3112        @param state: The current state
3113        @type state: CurrentState
3114        """
3115        if line.token == TOKEN_UNKNOWN:
3116            self.__add_msg(_("Line ignored as not understood"), line, state)
3117        else:
3118            self.__add_msg(_("Tag recognized but not supported"), line, state)
3119        self.__skip_subordinate_levels(line.level + 1, state)
3120
3121    def __not_recognized(self, line, state):
3122        """
3123        Prints a message when an undefined token is found. All subordinate
3124        items to the current item are ignored.
3125
3126        @param level: Current level in the file
3127        @type level: int
3128        """
3129        self.__add_msg(_("Line ignored as not understood"), line, state)
3130        self.__skip_subordinate_levels(line.level + 1, state)
3131
3132    def __skip_record(self, _line, state):
3133        """
3134        @param line: The current line in GedLine format
3135        @type line: GedLine
3136        @param state: The current state
3137        @type state: CurrentState
3138        """
3139        self.__skip_subordinate_levels(2, state)
3140
3141    def __skip_subordinate_levels(self, level, state):
3142        """
3143        Skip add lines of the specified level or lower.
3144        """
3145        skips = 0
3146        while True:
3147            line = self.__get_next_line()
3148            if self.__level_is_finished(line, level):
3149                if skips:
3150                    # This improves formatting when there are long sequences of
3151                    # skipped lines
3152                    self.__add_msg("", None, None)
3153                return
3154            self.__add_msg(_("Skipped subordinate line"), line, state)
3155            skips += 1
3156
3157    def __level_is_finished(self, text, level):
3158        """
3159        Check to see if the level has been completed, indicated by finding
3160        a level indiciated by the passed level value. If the level is finished,
3161        then make sure to call self._backup to reset the text pointer.
3162        """
3163        done = text.level < level
3164        if done:
3165            self._backup()
3166        return done
3167
3168    def __add_msg(self, problem, line=None, state=None):
3169        if problem != "":
3170            self.number_of_errors += 1
3171        if line:
3172            prob_width = 66
3173            problem = problem.ljust(prob_width)[0:(prob_width - 1)]
3174            text = str(line.data).replace("\n", "\n".ljust(prob_width + 22))
3175            message = "%s   Line %5d: %s %s %s\n" % (problem, line.line,
3176                                                     line.level,
3177                                                     line.token_text, text)
3178        else:
3179            message = problem + "\n"
3180        if state:
3181            state.msg += message
3182        self.errors.append(message)
3183
3184    def __check_msgs(self, record_name, state, obj):
3185        if state.msg == "":
3186            return
3187        message = _("Records not imported into ") + record_name + ":\n\n" + \
3188            state.msg
3189        new_note = Note()
3190        tag = StyledTextTag(StyledTextTagType.FONTFACE, 'Monospace',
3191                            [(0, len(message))])
3192        text = StyledText(message, [tag])
3193        new_note.set_styledtext(text)
3194        new_note.set_handle(create_id())
3195        gramps_id = self.nid_map[""]
3196        new_note.set_gramps_id(gramps_id)
3197        note_type = NoteType()
3198        note_type.set((NoteType.CUSTOM, _("GEDCOM import")))
3199        new_note.set_type(note_type)
3200        self.dbase.add_note(new_note, self.trans)
3201        # If possible, attach the note to the relevant object
3202        if obj:
3203            obj.add_note(new_note.get_handle())
3204
3205    def _backup(self):
3206        """
3207        Set the _backup flag so that the current line can be accessed by the
3208        next level up.
3209        """
3210        self.backoff = True
3211
3212    def __check_xref(self):
3213
3214        def __check(_map, has_gid_func, class_func, commit_func,
3215                    gramps_id2handle, msg):
3216            for input_id, gramps_id in _map.map().items():
3217                # Check whether an object exists for the mapped gramps_id
3218                if not has_gid_func(gramps_id):
3219                    _handle = self.__find_from_handle(gramps_id,
3220                                                      gramps_id2handle)
3221                    if msg == "FAM":
3222                        make_unknown(gramps_id, self.explanation.handle,
3223                                     class_func, commit_func, self.trans,
3224                                     db=self.dbase)
3225                        self.missing_references += 1
3226                        self.__add_msg(_("Error: %(msg)s  '%(gramps_id)s'"
3227                                         " (input as @%(xref)s@) not in input"
3228                                         " GEDCOM. Record synthesised") %
3229                                       {'msg' : msg, 'gramps_id' : gramps_id,
3230                                        'xref' : input_id})
3231                    else:
3232                        make_unknown(gramps_id, self.explanation.handle,
3233                                     class_func, commit_func, self.trans)
3234                        self.missing_references += 1
3235                        self.__add_msg(_("Error: %(msg)s '%(gramps_id)s'"
3236                                         " (input as @%(xref)s@) not in input"
3237                                         " GEDCOM. Record with typifying"
3238                                         " attribute 'Unknown' created") %
3239                                       {'msg' : msg, 'gramps_id' : gramps_id,
3240                                        'xref' : input_id})
3241
3242        self.explanation = create_explanation_note(self.dbase)
3243
3244        self.missing_references = 0
3245        __check(self.pid_map, self.dbase.has_person_gramps_id,
3246                self.__find_or_create_person, self.dbase.commit_person,
3247                self.gid2id, "INDI")
3248        __check(self.fid_map, self.dbase.has_family_gramps_id,
3249                self.__find_or_create_family, self.dbase.commit_family,
3250                self.fid2id, "FAM")
3251        __check(self.sid_map, self.dbase.has_source_gramps_id,
3252                self.__find_or_create_source, self.dbase.commit_source,
3253                self.sid2id, "SOUR")
3254        __check(self.oid_map, self.dbase.has_media_gramps_id,
3255                self.__find_or_create_media, self.dbase.commit_media,
3256                self.oid2id, "OBJE")
3257        __check(self.rid_map, self.dbase.has_repository_gramps_id,
3258                self.__find_or_create_repository, self.dbase.commit_repository,
3259                self.rid2id, "REPO")
3260        __check(self.nid_map, self.dbase.has_note_gramps_id,
3261                self.__find_or_create_note, self.dbase.commit_note,
3262                self.nid2id, "NOTE")
3263
3264        # Check persons membership in referenced families
3265        def __input_fid(gramps_id):
3266            for (key, val) in self.fid_map.map().items():
3267                if val == gramps_id:
3268                    return key
3269
3270        for input_id, gramps_id in self.pid_map.map().items():
3271            person_handle = self.__find_from_handle(gramps_id, self.gid2id)
3272            person = self.dbase.get_person_from_handle(person_handle)
3273            for family_handle in person.get_family_handle_list():
3274                family = self.dbase.get_family_from_handle(family_handle)
3275                if family and family.get_father_handle() != person_handle and \
3276                        family.get_mother_handle() != person_handle:
3277                    person.remove_family_handle(family_handle)
3278                    self.dbase.commit_person(person, self.trans)
3279                    self.__add_msg(_("Error: family '%(family)s' (input as"
3280                                     " @%(orig_family)s@) person %(person)s"
3281                                     " (input as %(orig_person)s) is not a"
3282                                     " member of the referenced family."
3283                                     " Family reference removed from person") %
3284                                   {'family' : family.gramps_id,
3285                                    'orig_family' :
3286                                        __input_fid(family.gramps_id),
3287                                    'person' : person.gramps_id,
3288                                    'orig_person' : input_id})
3289
3290        def __input_pid(gramps_id):
3291            for (key, val) in self.pid_map.map().items():
3292                if val == gramps_id:
3293                    return key
3294
3295        for input_id, gramps_id in self.fid_map.map().items():
3296            family_handle = self.__find_from_handle(gramps_id, self.fid2id)
3297            family = self.dbase.get_family_from_handle(family_handle)
3298            father_handle = family.get_father_handle()
3299            mother_handle = family.get_mother_handle()
3300
3301            if father_handle:
3302                father = self.dbase.get_person_from_handle(father_handle)
3303                if father and \
3304                        family_handle not in father.get_family_handle_list():
3305                    father.add_family_handle(family_handle)
3306                    self.dbase.commit_person(father, self.trans)
3307                    self.__add_msg("Error: family '%(family)s' (input as"
3308                                   " @%(orig_family)s@) father '%(father)s'"
3309                                   " (input as '%(orig_father)s') does not "
3310                                   "refer back to the family. Reference added."
3311                                   % {'family' : family.gramps_id,
3312                                      'orig_family' : input_id,
3313                                      'father' : father.gramps_id,
3314                                      'orig_father' :
3315                                          __input_pid(father.gramps_id)})
3316
3317            if mother_handle:
3318                mother = self.dbase.get_person_from_handle(mother_handle)
3319                if mother and \
3320                        family_handle not in mother.get_family_handle_list():
3321                    mother.add_family_handle(family_handle)
3322                    self.dbase.commit_person(mother, self.trans)
3323                    self.__add_msg("Error: family '%(family)s' (input as"
3324                                   " @%(orig_family)s@) mother '%(mother)s'"
3325                                   " (input as '%(orig_mother)s') does not "
3326                                   "refer back to the family. Reference added."
3327                                   % {'family' : family.gramps_id,
3328                                      'orig_family' : input_id,
3329                                      'mother' : mother.gramps_id,
3330                                      'orig_mother' :
3331                                          __input_pid(mother.gramps_id)})
3332
3333            for child_ref in family.get_child_ref_list():
3334                child_handle = child_ref.ref
3335                child = self.dbase.get_person_from_handle(child_handle)
3336                if child:
3337                    if family_handle not in \
3338                            child.get_parent_family_handle_list():
3339                        # The referenced child has no reference to the family.
3340                        # There was a link from the FAM record to the child,
3341                        # but no FAMC link from the child to the FAM.
3342                        child.add_parent_family_handle(family_handle)
3343                        self.dbase.commit_person(child, self.trans)
3344                        self.__add_msg("Error: family '%(family)s' (input as"
3345                                       " @%(orig_family)s@) child '%(child)s'"
3346                                       " (input as '%(orig_child)s') does not "
3347                                       "refer back to the family. "
3348                                       "Reference added." %
3349                                       {'family' : family.gramps_id,
3350                                        'orig_family' : input_id,
3351                                        'child' : child.gramps_id,
3352                                        'orig_child' :
3353                                            __input_pid(child.gramps_id)})
3354
3355        if self.missing_references:
3356            self.dbase.commit_note(self.explanation, self.trans, time.time())
3357            txt = _("\nThe imported file was not self-contained.\n"
3358                    "To correct for that, %(new)d objects were created and\n"
3359                    "their typifying attribute was set to 'Unknown'.\n"
3360                    "Where possible these 'Unknown' objects are \n"
3361                    "referenced by note %(unknown)s.\n"
3362                    ) % {'new': self.missing_references,
3363                         'unknown': self.explanation.gramps_id}
3364            self.__add_msg(txt)
3365            self.number_of_errors -= 1
3366
3367    def __merge_address(self, free_form_address, addr, line, state):
3368        """
3369        Merge freeform and structured addrssses.
3370        n ADDR <ADDRESS_LINE> {0:1}
3371        +1 CONT <ADDRESS_LINE> {0:M}
3372        +1 ADR1 <ADDRESS_LINE1> {0:1}  (Street)
3373        +1 ADR2 <ADDRESS_LINE2> {0:1}  (Locality)
3374        +1 CITY <ADDRESS_CITY> {0:1}
3375        +1 STAE <ADDRESS_STATE> {0:1}
3376        +1 POST <ADDRESS_POSTAL_CODE> {0:1}
3377        +1 CTRY <ADDRESS_COUNTRY> {0:1}
3378
3379        This is done along the lines suggested by Tamura Jones in
3380        http://www.tamurajones.net/GEDCOMADDR.xhtml as a result of bug 6382.
3381        "When a GEDCOM reader encounters a double address, it should read the
3382        structured address. ... A GEDCOM reader that does verify that the
3383        addresses are the same should issue an error if they are not".
3384
3385        This is called for SUBMitter addresses (__subm_addr), INDIvidual
3386        addresses (__person_addr), REPO addresses and HEADer corp address
3387        (__repo_address) and EVENt addresses (__event_adr).
3388
3389        The structured address (if any) will have been accumulated into an
3390        object of type LocationBase, which will either be a Location, or an
3391        Address object.
3392
3393        If ADDR is provided, but none of ADR1, ADR2, CITY, STAE, or POST (not
3394        CTRY), then Street is set to the freeform address. N.B. this is a
3395        change for Repository addresses and HEADer Corp address where
3396        previously the free-form address was deconstrucated into different
3397        structured components. N.B. PAF provides a free-form address and a
3398        country, so this allows for that case.
3399
3400        If both forms of address are provided, then the structured address is
3401        used, and if the ADDR/CONT contains anything not in the structured
3402        address, a warning is issued.
3403
3404        If just ADR1, ADR2, CITY, STAE, POST or CTRY are provided (this is not
3405        actually legal GEDCOM symtax, but may be possible by GEDCOM extensions)
3406        then just the structrued address is used.
3407        The routine returns a string suitable for a title.
3408        """
3409        title = ''
3410        free_form_address = free_form_address.replace('\n', ', ')
3411        if not (addr.get_street() or addr.get_locality() or
3412                addr.get_city() or addr.get_state() or
3413                addr.get_postal_code()):
3414
3415            addr.set_street(free_form_address)
3416            return free_form_address
3417        else:
3418            # structured address provided
3419            addr_list = free_form_address.split(",")
3420            str_list = []
3421            for func in (addr.get_street(), addr.get_locality(),
3422                         addr.get_city(), addr.get_state(),
3423                         addr.get_postal_code(), addr.get_country()):
3424                str_list += [i.strip(',' + string.whitespace)
3425                             for i in func.split("\n")]
3426            for elmn in addr_list:
3427                if elmn.strip(',' + string.whitespace) not in str_list:
3428                    # message means that the element %s was ignored, but
3429                    # expressed the wrong way round because the message is
3430                    # truncated for output
3431                    self.__add_msg(_("ADDR element ignored '%s'"
3432                                     % elmn), line, state)
3433            # The free-form address ADDR is discarded
3434            # Assemble a title out of structured address
3435            for elmn in str_list:
3436                if elmn:
3437                    if title != '':
3438                        # TODO for Arabic, should the next comma be translated?
3439                        title += ', '
3440                    title += elmn
3441            return title
3442
3443    def __parse_trailer(self):
3444        """
3445        Looks for the expected TRLR token
3446        """
3447        try:
3448            line = self.__get_next_line()
3449            if line and line.token != TOKEN_TRLR:
3450                state = CurrentState()
3451                self.__not_recognized(line, state)
3452                self.__check_msgs(_("TRLR (trailer)"), state, None)
3453        except TypeError:
3454            return
3455
3456    def __parse_submitter(self, line):
3457        """
3458        Parses the submitter data
3459
3460        n @<XREF:SUBM>@ SUBM
3461          +1 NAME <SUBMITTER_NAME>
3462          +1 <<ADDRESS_STRUCTURE>>
3463          +1 <<MULTIMEDIA_LINK>>
3464          +1 LANG <LANGUAGE_PREFERENCE>
3465          +1 RFN <SUBMITTER_REGISTERED_RFN>
3466          +1 RIN <AUTOMATED_RECORD_ID>
3467          +1 <<CHANGE_DATE>>
3468        """
3469        researcher = Researcher()
3470        state = CurrentState()
3471        state.res = researcher
3472        state.level = 1
3473        repo = Repository()
3474        state.repo = repo
3475        self.__parse_level(state, self.subm_parse_tbl, self.__undefined)
3476        # If this is the submitter that we were told about in the HEADer, then
3477        # we will need to update the researcher
3478        if line.token_text == self.subm and self.import_researcher:
3479            self.dbase.set_researcher(state.res)
3480
3481        localized_submitter = _("(Submitter):")
3482        if state.res.get_name() == "":
3483            submitter_name = "SUBM %s @%s@" % (localized_submitter,
3484                                               line.token_text)
3485        else:
3486            submitter_name = "SUBM %s (@%s@) %s" % (localized_submitter,
3487                                                    line.token_text,
3488                                                    state.res.get_name())
3489        if self.use_def_src:
3490            repo.set_name(submitter_name)
3491            repo.set_handle(create_id())
3492            repo.set_gramps_id(self.rid_map[""])
3493
3494            addr = Address()
3495            addr.set_street(state.res.get_address())
3496            addr.set_locality(state.res.get_locality())
3497            addr.set_city(state.res.get_city())
3498            addr.set_state(state.res.get_state())
3499            addr.set_country(state.res.get_country())
3500            addr.set_postal_code(state.res.get_postal_code())
3501            addr.set_county(state.res.get_county())
3502            addr.set_phone(state.res.get_phone())
3503            repo.add_address(addr)
3504            rtype = RepositoryType()
3505            rtype.set((RepositoryType.CUSTOM, _('GEDCOM data')))
3506            repo.set_type(rtype)
3507            self.__check_msgs(submitter_name, state, repo)
3508            self.dbase.commit_repository(repo, self.trans, state.repo.change)
3509            repo_ref = RepoRef()
3510            repo_ref.set_reference_handle(repo.handle)
3511            mtype = SourceMediaType()
3512            mtype.set((SourceMediaType.UNKNOWN, ''))
3513            repo_ref.set_media_type(mtype)
3514            self.def_src.add_repo_reference(repo_ref)
3515            self.dbase.commit_source(self.def_src, self.trans)
3516        else:
3517            self.__check_msgs(submitter_name, state, None)
3518
3519    def __parse_record(self):
3520        """
3521        Parse the top level (0 level) instances.
3522        RECORD: =
3523          [
3524          n <<FAM_RECORD>>                                {1:1}
3525          |
3526          n <<INDIVIDUAL_RECORD>>                         {1:1}
3527          |
3528          n <<MULTIMEDIA_RECORD>>                         {1:M}
3529          |
3530          n <<NOTE_RECORD>>                               {1:1}
3531          |
3532          n <<REPOSITORY_RECORD>>                         {1:1}
3533          |
3534          n <<SOURCE_RECORD>>                             {1:1}
3535          |
3536          n <<SUBMITTER_RECORD>>                          {1:1}
3537          ]
3538
3539        This also deals with the SUBN (submission) record, of which there
3540        should be exactly one.
3541        """
3542        while True:
3543            line = self.__get_next_line()
3544            key = line.data
3545            if not line or line.token == TOKEN_TRLR:
3546                self._backup()
3547                break
3548            if line.token == TOKEN_UNKNOWN:
3549                state = CurrentState()
3550                self.__add_msg(_("Unknown tag"), line, state)
3551                self.__skip_subordinate_levels(1, state)
3552                self.__check_msgs(_("Top Level"), state, None)
3553            elif key in ("FAM", "FAMILY"):
3554                self.__parse_fam(line)
3555            elif key in ("INDI", "INDIVIDUAL"):
3556                self.__parse_indi(line)
3557            elif key in ("OBJE", "OBJECT"):
3558                self.__parse_obje(line)
3559            elif key in ("REPO", "REPOSITORY"):
3560                self.__parse_repo(line)
3561            elif key in ("SUBM", "SUBMITTER"):
3562                self.__parse_submitter(line)
3563            elif key == "SUBN":
3564                state = CurrentState(level=1)
3565                self.__parse_submission(line, state)
3566                self.__check_msgs(_("Top Level"), state, None)
3567            elif line.token in (TOKEN_SUBM, TOKEN_SUBN, TOKEN_IGNORE):
3568                state = CurrentState()
3569                self.__skip_subordinate_levels(1, state)
3570                self.__check_msgs(_("Top Level"), state, None)
3571            elif key in ("SOUR", "SOURCE"):
3572                self.__parse_source(line.token_text, 1)
3573            elif (line.data.startswith("SOUR ") or
3574                  line.data.startswith("SOURCE ")):
3575                # A source formatted in a single line, for example:
3576                # 0 @S62@ SOUR This is the title of the source
3577                source = self.__find_or_create_source(self.sid_map[line.data])
3578                source.set_title(line.data[5:])
3579                self.dbase.commit_source(source, self.trans)
3580            elif key[0:4] == "NOTE":
3581                try:
3582                    line.data = line.data[5:]
3583                except:
3584                    # don't think this path is ever taken, but if it is..
3585                    # ensure a message is emitted & subordinates skipped
3586                    line.data = None
3587                self.__parse_inline_note(line, 1)
3588            else:
3589                state = CurrentState()
3590                self.__not_recognized(line, state)
3591                self.__check_msgs(_("Top Level"), state, None)
3592
3593    def __parse_level(self, state, __map, default):
3594        """
3595        Loop trough the current GEDCOM level, calling the appropriate
3596        functions associated with the TOKEN.
3597
3598        If no matching function for the token is found, the default function
3599        is called instead.
3600
3601        """
3602        while True:
3603            line = self.__get_next_line()
3604            if line.level < state.level:
3605                self.backoff = True
3606                return
3607            else:
3608                func = __map.get(line.token, default)
3609                func(line, state)
3610
3611    #----------------------------------------------------------------------
3612    #
3613    # INDI parsing
3614    #
3615    #----------------------------------------------------------------------
3616
3617    def __parse_indi(self, line):
3618        """
3619        Handling of the GEDCOM INDI tag and all lines subordinate to the
3620        current line.
3621
3622        n  @XREF:INDI@ INDI {1:1}
3623          +1 RESN <RESTRICTION_NOTICE> {0:1}
3624          +1 <<PERSONAL_NAME_STRUCTURE>> {0:M}
3625          +1 SEX <SEX_VALUE> {0:1}
3626          +1 <<INDIVIDUAL_EVENT_STRUCTURE>> {0:M}
3627          +1 <<INDIVIDUAL_ATTRIBUTE_STRUCTURE>> {0:M}
3628          +1 <<LDS_INDIVIDUAL_ORDINANCE>> {0:M}
3629          +1 <<CHILD_TO_FAMILY_LINK>> {0:M}
3630          +1 <<SPOUSE_TO_FAMILY_LINK>> {0:M}
3631          +1 SUBM @<XREF:SUBM>@ {0:M}
3632          +1 <<ASSOCIATION_STRUCTURE>> {0:M}
3633          +1 ALIA @<XREF:INDI>@ {0:M}
3634          +1 ANCI @<XREF:SUBM>@ {0:M}
3635          +1 DESI @<XREF:SUBM>@ {0:M}
3636          +1 <<SOURCE_CITATION>> {0:M}
3637          +1 <<MULTIMEDIA_LINK>> {0:M}
3638          +1 <<NOTE_STRUCTURE>> {0:M}
3639          +1 RFN <PERMANENT_RECORD_FILE_NUMBER> {0:1}
3640          +1 AFN <ANCESTRAL_FILE_NUMBER> {0:1}
3641          +1 REFN <USER_REFERENCE_NUMBER> {0:M}
3642          +2 TYPE <USER_REFERENCE_TYPE> {0:1}
3643          +1 RIN <AUTOMATED_RECORD_ID> {0:1}
3644          +1 <<CHANGE_DATE>> {0:1}
3645        """
3646
3647        # find the person
3648        real_id = self.pid_map[line.token_text]
3649        person = self.__find_or_create_person(real_id)
3650
3651        # set up the state for the parsing
3652        state = CurrentState(person=person, level=1)
3653
3654        # do the actual parsing
3655        self.__parse_level(state, self.indi_parse_tbl, self.__person_event)
3656
3657        # Add the default reference if no source has found
3658        self.__add_default_source(person)
3659
3660        # Add a default tag if provided
3661        self.__add_default_tag(person)
3662
3663        # Set up primary photo if present
3664        self.__do_photo(state)
3665
3666        self.__check_msgs(_("INDI (individual) Gramps ID %s") %
3667                          person.get_gramps_id(), state, person)
3668        # commit the person to the database
3669        self.dbase.commit_person(person, self.trans, state.person.change)
3670
3671    def __person_sour(self, line, state):
3672        """
3673        @param line: The current line in GedLine format
3674        @type line: GedLine
3675        @param state: The current state
3676        @type state: CurrentState
3677        """
3678        citation_handle = self.handle_source(line, state.level, state)
3679        state.person.add_citation(citation_handle)
3680
3681    def __person_refn(self, line, state):
3682        """
3683        @param line: The current line in GedLine format
3684        @type line: GedLine
3685        @param state: The current state
3686        @type state: CurrentState
3687        """
3688        self.__do_refn(line, state, state.person)
3689
3690    def __person_attr(self, line, state):
3691        """
3692        @param line: The current line in GedLine format
3693        @type line: GedLine
3694        @param state: The current state
3695        @type state: CurrentState
3696        """
3697        attr = Attribute()
3698        attr.set_type((AttributeType.CUSTOM, line.token_text))
3699        attr.set_value(line.data)
3700        state.person.add_attribute(attr)
3701
3702    def __person_event(self, line, state):
3703        """
3704        @param line: The current line in GedLine format
3705        @type line: GedLine
3706        @param state: The current state
3707        @type state: CurrentState
3708        """
3709        # We can get here when a tag that is not valid in the indi_parse_tbl
3710        # parse table is encountered. The tag may be of the form "_XXX".  We
3711        # try to convert to a friendly name, if fails use the tag itself as
3712        # the TYPE in a custom event
3713        cust_tag = CUSTOMEVENTTAGS.get(line.token_text, line.token_text)
3714        cust_type = EventType((EventType.CUSTOM, cust_tag))
3715        event_ref = self.__build_event_pair(state, cust_type,
3716                                            self.event_parse_tbl,
3717                                            str(line.data))
3718        state.person.add_event_ref(event_ref)
3719
3720    def __fam_even(self, line, state):
3721        """
3722        @param line: The current line in GedLine format
3723        @type line: GedLine
3724        @param state: The current state
3725        @type state: CurrentState
3726        """
3727        event_ref = self.__build_family_event_pair(state,
3728                                                   EventType.CUSTOM,
3729                                                   self.event_parse_tbl,
3730                                                   line.data)
3731        state.family.add_event_ref(event_ref)
3732
3733    def __person_chan(self, line, state):
3734        """
3735        @param line: The current line in GedLine format
3736        @type line: GedLine
3737        @param state: The current state
3738        @type state: CurrentState
3739        """
3740        self.__parse_change(line, state.person, state.level + 1, state)
3741
3742    def __person_resn(self, line, state):
3743        """
3744        Parses the RESN tag, adding it as an attribute.
3745
3746        @param line: The current line in GedLine format
3747        @type line: GedLine
3748        @param state: The current state
3749        @type state: CurrentState
3750        """
3751        attr = Attribute()
3752        attr.set_type((AttributeType.CUSTOM, 'RESN'))
3753        state.person.add_attribute(attr)
3754
3755    def __person_alt_name(self, line, state):
3756        """
3757        This parses the standard GEDCOM structure:
3758
3759        n  @XREF:INDI@ INDI {1:1}
3760          +1 ALIA @<XREF:INDI>@ {0:M}
3761
3762        The ALIA tag is supposed to cross reference another person. We will
3763        store this in the Association record.
3764
3765        ALIA {ALIAS}: = An indicator to link different record descriptions of a
3766        person who may be the same person.
3767
3768        Some systems use the ALIA tag as an alternate NAME tag, which is not
3769        legal in GEDCOM, but oddly enough, is easy to support. This parses the
3770        illegal (ALIA or ALIAS) or non-standard (_ALIA) GEDCOM. "1 ALIA" is
3771        used by Family Tree Maker and Reunion. "1 ALIAS" and "1 _ALIA" do not
3772        appear to be used.
3773
3774        n  @XREF:INDI@ INDI                        {1:1}
3775          +1  <ALIA> <NAME_PERSONAL>               {1:1}
3776            +2 NPFX <NAME_PIECE_PREFIX>            {0:1}
3777            +2 GIVN <NAME_PIECE_GIVEN>             {0:1}
3778            +2 NICK <NAME_PIECE_NICKNAME>          {0:1}
3779            +2 SPFX <NAME_PIECE_SURNAME_PREFIX>    {0:1}
3780            +2 SURN <NAME_PIECE_SURNAME>           {0:1}
3781            +2 NSFX <NAME_PIECE_SUFFIX>            {0:1}
3782            +2 <<SOURCE_CITATION>>                 {0:M}
3783              +3 <<NOTE_STRUCTURE>>                {0:M}
3784              +3 <<MULTIMEDIA_LINK>>               {0:M}
3785            +2 <<NOTE_STRUCTURE>>                  {0:M}
3786        where <ALIA> == ALIA | _ALIA | ALIAS
3787
3788        @param line: The current line in GedLine format
3789        @type line: GedLine
3790        @param state: The current state
3791        @type state: CurrentState
3792        """
3793        if line.data == '':
3794            self.__add_msg(_("Empty Alias <NAME PERSONAL> ignored"),
3795                           line, state)
3796            self.__skip_subordinate_levels(state.level + 1, state)
3797        elif line.data[0] == '@':
3798            handle = self.__find_person_handle(self.pid_map[line.data])
3799            ref = PersonRef()
3800            ref.ref = handle
3801            ref.rel = "Alias"
3802            state.person.add_person_ref(ref)
3803        else:
3804            self.__parse_alias_name(line, state)
3805
3806    def __parse_alias_name(self, line, state):
3807        """
3808        Parse a level 1 alias name and subsidiary levels when called from
3809        __person_alt_name (when the <NAME_PERSONAL> does not start with @).
3810        Also parses a level 2 alias name and subsidiary levels when called
3811        from __name_alias.
3812
3813          +1  <ALIA> <NAME_PERSONAL>               {1:1}
3814            +2 NPFX <NAME_PIECE_PREFIX>            {0:1}
3815            +2 GIVN <NAME_PIECE_GIVEN>             {0:1}
3816            +2 NICK <NAME_PIECE_NICKNAME>          {0:1}
3817            +2 SPFX <NAME_PIECE_SURNAME_PREFIX>    {0:1}
3818            +2 SURN <NAME_PIECE_SURNAME>           {0:1}
3819            +2 NSFX <NAME_PIECE_SUFFIX>            {0:1}
3820            +2 <<SOURCE_CITATION>>                 {0:M}
3821              +3 <<NOTE_STRUCTURE>>                {0:M}
3822              +3 <<MULTIMEDIA_LINK>>               {0:M}
3823            +2 <<NOTE_STRUCTURE>>                  {0:M}
3824        where <ALIA> == ALIA | _ALIA | ALIAS
3825
3826        @param line: The current line in GedLine format
3827        @type line: GedLine
3828        @param state: The current state
3829        @type state: CurrentState
3830        """
3831        name = self.__parse_name_personal(line.data)
3832        name.set_type(NameType.AKA)
3833        state.person.add_alternate_name(name)
3834
3835        # Create a new state, and parse the remainder of the NAME level
3836        sub_state = CurrentState()
3837        sub_state.person = state.person
3838        sub_state.name = name
3839        sub_state.level = state.level + 1
3840
3841        self.__parse_level(sub_state, self.name_parse_tbl, self.__undefined)
3842        state.msg += sub_state.msg
3843
3844    def __person_object(self, line, state):
3845        """
3846        @param line: The current line in GedLine format
3847        @type line: GedLine
3848        @param state: The current state
3849        @type state: CurrentState
3850        """
3851        self.__obje(line, state, state.person)
3852
3853    def __person_photo(self, line, state):
3854        """
3855        This handles the FTM _PHOTO feature, which identifies an OBJE to use
3856        as the person's primary photo.
3857        """
3858        state.photo = line.data     # Just save it for now.
3859
3860    def __person_name(self, line, state):
3861        """
3862        Parsers the NAME token in a GEDCOM file. The text is in the format
3863        of (according to the GEDCOM Spec):
3864        >   <TEXT>|/<TEXT>/|<TEXT>/<TEXT>/|/<TEXT>/<TEXT>|<TEXT>/<TEXT>/<TEXT>
3865        We have encountered some variations that use:
3866        >   <TEXT>/
3867
3868        The basic Name structure is:
3869
3870          n  NAME <NAME_PERSONAL> {1:1}
3871          +1 NPFX <NAME_PIECE_PREFIX> {0:1}
3872          +1 GIVN <NAME_PIECE_GIVEN> {0:1}
3873          +1 NICK <NAME_PIECE_NICKNAME> {0:1}
3874          +1 SPFX <NAME_PIECE_SURNAME_PREFIX {0:1}
3875          +1 SURN <NAME_PIECE_SURNAME> {0:1}
3876          +1 NSFX <NAME_PIECE_SUFFIX> {0:1}
3877          +1 <<SOURCE_CITATION>> {0:M}
3878          +1 <<NOTE_STRUCTURE>> {0:M}
3879
3880        @param line: The current line in GedLine format
3881        @type line: GedLine
3882        @param state: The current state
3883        @type state: CurrentState
3884        """
3885
3886        # build a Name structure from the text
3887
3888        name = self.__parse_name_personal(line.data)
3889
3890        # Add the name as the primary name if this is the first one that
3891        # we have encountered for this person. Assume that if this is the
3892        # first name, that it is a birth name. Otherwise, label it as an
3893        # "Also Known As (AKA)". GEDCOM does not seem to have the concept
3894        # of different name types
3895
3896        if state.name_cnt == 0:
3897            name.set_type(NameType.BIRTH)
3898            state.person.set_primary_name(name)
3899        else:
3900            name.set_type(NameType.AKA)
3901            state.person.add_alternate_name(name)
3902        state.name_cnt += 1
3903
3904        # Create a new state, and parse the remainder of the NAME level
3905        sub_state = CurrentState()
3906        sub_state.person = state.person
3907        sub_state.name = name
3908        sub_state.level = state.level + 1
3909
3910        self.__parse_level(sub_state, self.name_parse_tbl, self.__undefined)
3911        state.msg += sub_state.msg
3912
3913    def __person_sex(self, line, state):
3914        """
3915        Parses the SEX line of a GEDCOM file. It has the format of:
3916
3917        +1 SEX <SEX_VALUE> {0:1}
3918
3919        @param line: The current line in GedLine format
3920        @type line: GedLine
3921        @param state: The current state
3922        @type state: CurrentState
3923        """
3924        state.person.set_gender(line.data)
3925
3926    def __person_even(self, line, state):
3927        """
3928        Parses the custom EVEN tag, which has the format of:
3929
3930           n  <<EVENT_TYPE>> {1:1}
3931           +1 <<EVENT_DETAIL>> {0:1} p.*
3932
3933        @param line: The current line in GedLine format
3934        @type line: GedLine
3935        @param state: The current state
3936        @type state: CurrentState
3937        """
3938        event_ref = self.__build_event_pair(state, EventType.CUSTOM,
3939                                            self.event_parse_tbl, line.data)
3940        state.person.add_event_ref(event_ref)
3941
3942    def __person_std_event(self, line, state):
3943        """
3944        Parses GEDCOM event types that map to a Gramps standard type.
3945        Additional parsing required is for the event detail:
3946
3947           +1 <<EVENT_DETAIL>> {0:1} p.*
3948
3949        @param line: The current line in GedLine format
3950        @type line: GedLine
3951        @param state: The current state
3952        @type state: CurrentState
3953        """
3954
3955        event = line.data
3956        event.set_gramps_id(self.emapper.find_next())
3957        event_ref = EventRef()
3958        self.dbase.add_event(event, self.trans)
3959
3960        sub_state = CurrentState()
3961        sub_state.person = state.person
3962        sub_state.level = state.level + 1
3963        sub_state.event = event
3964        sub_state.event_ref = event_ref
3965        sub_state.pf = self.place_parser
3966
3967        self.__parse_level(sub_state, self.event_parse_tbl, self.__undefined)
3968        state.msg += sub_state.msg
3969
3970        self.__add_place(event, sub_state)
3971
3972        self.dbase.commit_event(event, self.trans)
3973        event_ref.ref = event.handle
3974        state.person.add_event_ref(event_ref)
3975
3976    def __person_reli(self, line, state):
3977        """
3978        Parses the RELI tag.
3979
3980           n  RELI [Y|<NULL>] {1:1}
3981           +1 <<EVENT_DETAIL>> {0:1} p.*
3982
3983        @param line: The current line in GedLine format
3984        @type line: GedLine
3985        @param state: The current state
3986        @type state: CurrentState
3987        """
3988        event_ref = self.__build_event_pair(state, EventType.RELIGION,
3989                                            self.event_parse_tbl, line.data)
3990        state.person.add_event_ref(event_ref)
3991
3992    def __person_birt(self, line, state):
3993        """
3994        Parses GEDCOM BIRT tag into a Gramps birth event. Additional work
3995        must be done, since additional handling must be done by Gramps to set
3996        this up as a birth reference event.
3997
3998           n  BIRT [Y|<NULL>] {1:1}
3999           +1 <<EVENT_DETAIL>> {0:1} p.*
4000           +1 FAMC @<XREF:FAM>@ {0:1} p.*
4001
4002        I'm not sure what value the FAMC actually offers here, since
4003        the FAMC record should handle this. Why it is a valid sub value
4004        is beyond me.
4005
4006        @param line: The current line in GedLine format
4007        @type line: GedLine
4008        @param state: The current state
4009        @type state: CurrentState
4010        """
4011        event_ref = self.__build_event_pair(state, EventType.BIRTH,
4012                                            self.event_parse_tbl, line.data)
4013        if state.person.get_birth_ref():
4014            state.person.add_event_ref(event_ref)
4015        else:
4016            state.person.set_birth_ref(event_ref)
4017
4018    def __person_adop(self, line, state):
4019        """
4020        Parses GEDCOM ADOP tag, subordinate to the INDI tag. Additinal tags
4021        are needed by the tag, so we pass a different function map.
4022
4023           n  ADOP [Y|<NULL>] {1:1}
4024           +1 <<EVENT_DETAIL>> {0:1} p.*
4025           +1 FAMC @<XREF:FAM>@ {0:1} p.*
4026           +2 ADOP <ADOPTED_BY_WHICH_PARENT> {0:1}
4027
4028        @param line: The current line in GedLine format
4029        @type line: GedLine
4030        @param state: The current state
4031        @type state: CurrentState
4032        """
4033        event_ref = self.__build_event_pair(state, EventType.ADOPT,
4034                                            self.adopt_parse_tbl, line.data)
4035        state.person.add_event_ref(event_ref)
4036
4037    def __person_deat(self, line, state):
4038        """
4039        Parses GEDCOM DEAT tag into a Gramps birth event. Additional work
4040        must be done, since additional handling must be done by Gramps to set
4041        this up as a death reference event.
4042
4043           n  DEAT [Y|<NULL>] {1:1}
4044           +1 <<EVENT_DETAIL>> {0:1} p.*
4045
4046        @param line: The current line in GedLine format
4047        @type line: GedLine
4048        @param state: The current state
4049        @type state: CurrentState
4050        """
4051        event_ref = self.__build_event_pair(state, EventType.DEATH,
4052                                            self.event_parse_tbl, line.data)
4053        if state.person.get_death_ref():
4054            state.person.add_event_ref(event_ref)
4055        else:
4056            state.person.set_death_ref(event_ref)
4057
4058    def __person_note(self, line, state):
4059        """
4060        Parses a note associated with the person
4061
4062        @param line: The current line in GedLine format
4063        @type line: GedLine
4064        @param state: The current state
4065        @type state: CurrentState
4066        """
4067        self.__parse_note(line, state.person, state)
4068
4069    def __person_rnote(self, line, state):
4070        """
4071        Parses a note associated with the person
4072
4073        @param line: The current line in GedLine format
4074        @type line: GedLine
4075        @param state: The current state
4076        @type state: CurrentState
4077        """
4078        self.__parse_note(line, state.person, state)
4079
4080    def __person_addr(self, line, state):
4081        """
4082        Parses the INDIvidual <ADDRESS_STRUCTURE>
4083
4084        n ADDR <ADDRESS_LINE> {0:1}
4085        +1 CONT <ADDRESS_LINE> {0:M}
4086        +1 ADR1 <ADDRESS_LINE1> {0:1}  (Street)
4087        +1 ADR2 <ADDRESS_LINE2> {0:1}  (Locality)
4088        +1 CITY <ADDRESS_CITY> {0:1}
4089        +1 STAE <ADDRESS_STATE> {0:1}
4090        +1 POST <ADDRESS_POSTAL_CODE> {0:1}
4091        +1 CTRY <ADDRESS_COUNTRY> {0:1}
4092        n PHON <PHONE_NUMBER> {0:3}
4093
4094        @param line: The current line in GedLine format
4095        @type line: GedLine
4096        @param state: The current state
4097        @type state: CurrentState
4098        """
4099        if self.is_ftw:
4100            self.__person_resi(line, state)
4101            return
4102        free_form = line.data
4103
4104        sub_state = CurrentState(level=state.level + 1)
4105        sub_state.addr = Address()
4106
4107        self.__parse_level(sub_state, self.parse_addr_tbl, self.__ignore)
4108        state.msg += sub_state.msg
4109
4110        self.__merge_address(free_form, sub_state.addr, line, state)
4111        state.person.add_address(sub_state.addr)
4112
4113    def __person_resi(self, line, state):
4114        """
4115        Parses GEDCOM ADDR tag, subordinate to the INDI tag, when sourced by
4116        FTM.  We treat this as a RESI event, because FTM puts standard event
4117        details below the ADDR line.
4118
4119           n  ADDR <ADDRESS_LINE> {0:1}
4120           +1 <<EVENT_DETAIL>> {0:1} p.*
4121
4122        @param line: The current line in GedLine format
4123        @type line: GedLine
4124        @param state: The current state
4125        @type state: CurrentState
4126        """
4127        self.backoff = True  # reprocess the current ADDR line
4128        line.level += 1      # as if it was next level down
4129        event_ref = self.__build_event_pair(state, EventType.RESIDENCE,
4130                                            self.event_parse_tbl, '')
4131        state.person.add_event_ref(event_ref)
4132
4133    def __person_phon(self, line, state):
4134        """
4135        n PHON <PHONE_NUMBER> {0:3}
4136
4137        @param line: The current line in GedLine format
4138        @type line: GedLine
4139        @param state: The current state
4140        @type state: CurrentState
4141        """
4142        url = Url()
4143        url.set_path(line.data)
4144        url.set_type(UrlType(_('Phone')))
4145        state.person.add_url(url)
4146
4147    def __person_fax(self, line, state):
4148        """
4149        O INDI
4150        1 FAX <PHONE_NUMBER> {0:3}
4151
4152        @param line: The current line in GedLine format
4153        @type line: GedLine
4154        @param state: The current state
4155        @type state: CurrentState
4156        """
4157        url = Url()
4158        url.set_path(line.data)
4159        url.set_type(UrlType(_('FAX')))
4160        state.person.add_url(url)
4161
4162    def __person_email(self, line, state):
4163        """
4164        O INDI
4165        1 EMAIL <EMAIL> {0:3}
4166
4167        @param line: The current line in GedLine format
4168        @type line: GedLine
4169        @param state: The current state
4170        @type state: CurrentState
4171        """
4172        url = Url()
4173        url.set_path(line.data)
4174        url.set_type(UrlType(UrlType.EMAIL))
4175        state.person.add_url(url)
4176
4177    def __person_www(self, line, state):
4178        """
4179        O INDI
4180        1 WWW <URL> {0:3}
4181
4182        @param line: The current line in GedLine format
4183        @type line: GedLine
4184        @param state: The current state
4185        @type state: CurrentState
4186        """
4187        url = Url()
4188        url.set_path(line.data)
4189        url.set_type(UrlType(UrlType.WEB_HOME))
4190        state.person.add_url(url)
4191
4192    def __person_titl(self, line, state):
4193        """
4194        @param line: The current line in GedLine format
4195        @type line: GedLine
4196        @param state: The current state
4197        @type state: CurrentState
4198        """
4199        event = Event()
4200        event_ref = EventRef()
4201        event.set_gramps_id(self.emapper.find_next())
4202        event.set_type(EventType.NOB_TITLE)
4203        event.set_description(line.data)
4204
4205        sub_state = CurrentState()
4206        sub_state.person = state.person
4207        sub_state.level = state.level + 1
4208        sub_state.event = event
4209        sub_state.event_ref = event_ref
4210        sub_state.pf = self.place_parser
4211
4212        self.__parse_level(sub_state, self.event_parse_tbl, self.__undefined)
4213        state.msg += sub_state.msg
4214
4215        self.__add_place(event, sub_state)
4216
4217        self.dbase.add_event(event, self.trans)
4218        event_ref.ref = event.handle
4219        state.person.add_event_ref(event_ref)
4220
4221    def __person_attr_plac(self, line, state):
4222        """
4223        @param line: The current line in GedLine format
4224        @type line: GedLine
4225        @param state: The current state
4226        @type state: CurrentState
4227        """
4228        if state.attr.get_value() == "":
4229            state.attr.set_value(line.data)
4230
4231    def __name_type(self, line, state):
4232        """
4233        @param line: The current line in GedLine format
4234        @type line: GedLine
4235        @param state: The current state
4236        @type state: CurrentState
4237        """
4238        if line.data.upper() in ("_OTHN", "_AKA", "AKA", "AKAN"):
4239            state.name.set_type(NameType.AKA)
4240        elif line.data.upper() in ("_MAR", "_MARN", "_MARNM", "MARRIED"):
4241            state.name.set_type(NameType.MARRIED)
4242        else:
4243            state.name.set_type((NameType.CUSTOM, line.data))
4244
4245    def __name_date(self, line, state):
4246        """
4247        @param line: The current line in GedLine format
4248        @type line: GedLine
4249        @param state: The current state
4250        @type state: CurrentState
4251        """
4252        if state.name:
4253            state.name.set_date_object(line.data)
4254
4255    def __name_note(self, line, state):
4256        """
4257        @param line: The current line in GedLine format
4258        @type line: GedLine
4259        @param state: The current state
4260        @type state: CurrentState
4261        """
4262        self.__parse_note(line, state.name, state)
4263
4264    def __name_alia(self, line, state):
4265        """
4266        This parses the illegal (ALIA or ALIAS) or non-standard (_ALIA) GEDCOM
4267        tag as a subsidiary of the NAME tag.
4268
4269        n  @XREF:INDI@ INDI                        {1:1}
4270          +1 NAME <NAME_PERSONAL>                  {1:1}
4271            +2 NPFX <NAME_PIECE_PREFIX>            {0:1}
4272            +2 GIVN <NAME_PIECE_GIVEN>             {0:1}
4273            +2 NICK <NAME_PIECE_NICKNAME>          {0:1}
4274            +2 SPFX <NAME_PIECE_SURNAME_PREFIX>    {0:1}
4275            +2 SURN <NAME_PIECE_SURNAME>           {0:1}
4276            +2 NSFX <NAME_PIECE_SUFFIX>            {0:1}
4277            +2 <ALIA>  <NAME_PERSONAL>             {1:1}
4278              +3 NPFX <NAME_PIECE_PREFIX>          {0:1}
4279              +3 GIVN <NAME_PIECE_GIVEN>           {0:1}
4280              +3 NICK <NAME_PIECE_NICKNAME>        {0:1}
4281              +3 SPFX <NAME_PIECE_SURNAME_PREFIX>  {0:1}
4282              +3 SURN <NAME_PIECE_SURNAME>         {0:1}
4283              +3 NSFX <NAME_PIECE_SUFFIX>          {0:1}
4284              +3 <<SOURCE_CITATION>>               {0:M}
4285                +4 <<NOTE_STRUCTURE>>              {0:M}
4286                +4 <<MULTIMEDIA_LINK>>             {0:M}
4287              +3 <<NOTE_STRUCTURE>>                {0:M}
4288            +2 <<SOURCE_CITATION>>                 {0:M}
4289              +3 <<NOTE_STRUCTURE>>                {0:M}
4290              +3 <<MULTIMEDIA_LINK>>               {0:M}
4291            +2 <<NOTE_STRUCTURE>>                  {0:M}
4292
4293        Note that the subsidiary name structure detail will overwrite the ALIA
4294        name (if the same elements are provided in both), so the names should
4295        match.
4296
4297        "2 _ALIA" is used for example, by PRO-GEN v 3.0a and "2 ALIA" is used
4298        by GTEdit and Brother's keeper 5.2 for windows. It had been supported
4299        in previous versions of Gramps but as it was probably incorrectly coded
4300        as it would only work if the name started with '@'.
4301
4302        @param line: The current line in GedLine format
4303        @type line: GedLine
4304        @param state: The current state
4305        @type state: CurrentState
4306        """
4307        self.__parse_alias_name(line, state)
4308
4309    def __name_npfx(self, line, state):
4310        """
4311        @param line: The current line in GedLine format
4312        @type line: GedLine
4313        @param state: The current state
4314        @type state: CurrentState
4315        """
4316        state.name.set_title(line.data.strip())
4317        self.__skip_subordinate_levels(state.level + 1, state)
4318
4319    def __name_givn(self, line, state):
4320        """
4321        @param line: The current line in GedLine format
4322        @type line: GedLine
4323        @param state: The current state
4324        @type state: CurrentState
4325        """
4326        state.name.set_first_name(line.data.strip())
4327        self.__skip_subordinate_levels(state.level + 1, state)
4328
4329    def __name_spfx(self, line, state):
4330        """
4331        @param line: The current line in GedLine format
4332        @type line: GedLine
4333        @param state: The current state
4334        @type state: CurrentState
4335        """
4336        spfx = line.data.strip().split(", ")[0]
4337        if state.name.get_surname_list():
4338            state.name.get_surname_list()[0].set_prefix(spfx)
4339        else:
4340            surn = Surname()
4341            surn.set_prefix(spfx)
4342            surn.set_primary()
4343            state.name.set_surname_list([surn])
4344        self.__skip_subordinate_levels(state.level + 1, state)
4345
4346    def __name_surn(self, line, state):
4347        """
4348        @param line: The current line in GedLine format
4349        @type line: GedLine
4350        @param state: The current state
4351        @type state: CurrentState
4352        """
4353        names = line.data.strip().split(", ")
4354        overwrite = bool(state.name.get_surname_list())
4355        for name in names:
4356            if overwrite:
4357                state.name.get_surname_list()[0].set_surname(name)
4358                overwrite = False
4359            else:
4360                surn = Surname()
4361                surn.set_surname(name)
4362                surn.set_primary(primary=not state.name.get_surname_list())
4363                state.name.get_surname_list().append(surn)
4364        self.__skip_subordinate_levels(state.level + 1, state)
4365
4366    def __name_marnm(self, line, state):
4367        """
4368        This is non-standard GEDCOM. _MARNM is reported to be used in Ancestral
4369        Quest and Personal Ancestral File 5. This will also handle a usage
4370        which has been found in Brother's Keeper (BROSKEEP VERS 6.1.31 WINDOWS)
4371        as follows:
4372
4373        0 @I203@ INDI
4374          1 NAME John Richard/Doe/
4375            2 _MARN Some Other Name
4376              3 DATE 27 JUN 1817
4377
4378        @param line: The current line in GedLine format
4379        @type line: GedLine
4380        @param state: The current state
4381        @type state: CurrentState
4382        """
4383        text = line.data.strip()
4384        data = text.split()
4385        if len(data) == 1:
4386            name = Name(state.person.primary_name)
4387            surn = Surname()
4388            surn.set_surname(data[0].strip())
4389            surn.set_primary()
4390            name.set_surname_list([surn])
4391            name.set_type(NameType.MARRIED)
4392            state.person.add_alternate_name(name)
4393        elif len(data) > 1:
4394            name = self.__parse_name_personal(text)
4395            name.set_type(NameType.MARRIED)
4396            state.person.add_alternate_name(name)
4397
4398    def __name_nsfx(self, line, state):
4399        """
4400        @param line: The current line in GedLine format
4401        @type line: GedLine
4402        @param state: The current state
4403        @type state: CurrentState
4404        """
4405        if state.name.get_suffix() == "" or \
4406           state.name.get_suffix() == line.data:
4407            #suffix might be set before when parsing name string
4408            state.name.set_suffix(line.data)
4409        else:
4410            #previously set suffix different, to not loose information, append
4411            state.name.set_suffix(state.name.get_suffix() + ' ' + line.data)
4412        self.__skip_subordinate_levels(state.level + 1, state)
4413
4414    def __name_nick(self, line, state):
4415        """
4416        @param line: The current line in GedLine format
4417        @type line: GedLine
4418        @param state: The current state
4419        @type state: CurrentState
4420        """
4421        state.name.set_nick_name(line.data.strip())
4422        self.__skip_subordinate_levels(state.level + 1, state)
4423
4424    def __name_aka(self, line, state):
4425        """
4426        This parses the non-standard GEDCOM tags _AKA or _AKAN as a subsidiary
4427        to the NAME tag, which is reported to have been found in Ancestral
4428        Quest and Personal Ancestral File 4 and 5. Note: example AQ and PAF
4429        files have separate 2 NICK and 2 _AKA lines for the same person. The
4430        NICK will be stored by Gramps in the nick_name field of the name
4431        structure, while the _AKA, if it is a single word, will be stored in
4432        the NICKNAME attribute. If more than one word it is stored as an AKA
4433        alternate name.
4434
4435        This will also handle a usage which has been found in in  Brother's
4436        Keeper (BROSKEEP VERS 6.1.31 WINDOWS) as follows:
4437
4438        0 @I203@ INDI
4439          1 NAME John Richard/Doe/
4440            2 _AKAN Some Other Name
4441              3 DATE 27 JUN 1817
4442
4443        @param line: The current line in GedLine format
4444        @type line: GedLine
4445        @param state: The current state
4446        @type state: CurrentState
4447        """
4448        lname = line.data.split()
4449        name_len = len(lname)
4450        if name_len == 1:
4451            attr = Attribute()
4452            attr.set_type(AttributeType.NICKNAME)
4453            attr.set_value(line.data)
4454            state.person.add_attribute(attr)
4455        elif name_len == 0:
4456            return
4457        else:
4458            name = Name()
4459            surname = Surname()
4460            surname.set_surname(lname[-1].strip())
4461            surname.set_primary()
4462            name.set_surname_list([surname])
4463            name.set_first_name(' '.join(lname[0:name_len - 1]))
4464#            name = self.__parse_name_personal(line.data)
4465            name.set_type(NameType.AKA)
4466            state.person.add_alternate_name(name)
4467
4468    def __name_adpn(self, line, state):
4469        """
4470        @param line: The current line in GedLine format
4471        @type line: GedLine
4472        @param state: The current state
4473        @type state: CurrentState
4474        """
4475        text = line.data.strip()
4476        data = text.split()
4477        if len(data) == 1:
4478            name = Name(state.person.primary_name)
4479            surn = Surname()
4480            surn.set_surname(data[0].strip())
4481            surn.set_primary()
4482            name.set_surname_list([surn])
4483            name.set_type((NameType.CUSTOM, "Adopted"))
4484            state.person.add_alternate_name(name)
4485        elif len(data) > 1:
4486            name = self.__parse_name_personal(text)
4487            name.set_type((NameType.CUSTOM, "Adopted"))
4488            state.person.add_alternate_name(name)
4489
4490    def __name_sour(self, line, state):
4491        """
4492        @param line: The current line in GedLine format
4493        @type line: GedLine
4494        @param state: The current state
4495        @type state: CurrentState
4496        """
4497        citation_handle = self.handle_source(line, state.level, state)
4498        state.name.add_citation(citation_handle)
4499
4500    def __person_std_attr(self, line, state):
4501        """
4502        Parses an TOKEN that Gramps recognizes as an Attribute
4503
4504        @param line: The current line in GedLine format
4505        @type line: GedLine
4506        @param state: The current state
4507        @type state: CurrentState
4508        """
4509        sub_state = CurrentState()
4510        sub_state.person = state.person
4511        sub_state.attr = line.data
4512        sub_state.level = state.level + 1
4513        state.person.add_attribute(sub_state.attr)
4514        self.__parse_level(sub_state, self.person_attr_parse_tbl,
4515                           self.__ignore)
4516        state.msg += sub_state.msg
4517
4518    def __person_fact(self, line, state):
4519        """
4520        Parses an TOKEN that Gramps recognizes as an Attribute
4521
4522        @param line: The current line in GedLine format
4523        @type line: GedLine
4524        @param state: The current state
4525        @type state: CurrentState
4526        """
4527        sub_state = CurrentState()
4528        sub_state.person = state.person
4529        sub_state.attr = Attribute()
4530        sub_state.attr.set_value(line.data)
4531        sub_state.level = state.level + 1
4532        state.person.add_attribute(sub_state.attr)
4533        self.__parse_level(sub_state, self.person_fact_parse_tbl,
4534                           self.__ignore)
4535        state.msg += sub_state.msg
4536
4537    def __person_fact_type(self, line, state):
4538        state.attr.set_type(line.data)
4539
4540    def __person_bapl(self, line, state):
4541        """
4542        Parses an BAPL TOKEN, producing a Gramps LdsOrd instance
4543
4544        @param line: The current line in GedLine format
4545        @type line: GedLine
4546        @param state: The current state
4547        @type state: CurrentState
4548        """
4549        self.build_lds_ord(state, LdsOrd.BAPTISM)
4550
4551    def __person_conl(self, line, state):
4552        """
4553        Parses an CONL TOKEN, producing a Gramps LdsOrd instance
4554
4555        @param line: The current line in GedLine format
4556        @type line: GedLine
4557        @param state: The current state
4558        @type state: CurrentState
4559        """
4560        self.build_lds_ord(state, LdsOrd.CONFIRMATION)
4561
4562    def __person_endl(self, line, state):
4563        """
4564        Parses an ENDL TOKEN, producing a Gramps LdsOrd instance
4565
4566        @param line: The current line in GedLine format
4567        @type line: GedLine
4568        @param state: The current state
4569        @type state: CurrentState
4570        """
4571        self.build_lds_ord(state, LdsOrd.ENDOWMENT)
4572
4573    def __person_slgc(self, line, state):
4574        """
4575        Parses an SLGC TOKEN, producing a Gramps LdsOrd instance
4576
4577        @param line: The current line in GedLine format
4578        @type line: GedLine
4579        @param state: The current state
4580        @type state: CurrentState
4581        """
4582        self.build_lds_ord(state, LdsOrd.SEAL_TO_PARENTS)
4583
4584    def build_lds_ord(self, state, lds_type):
4585        """
4586        Parses an LDS ordinance, using the type passed to the routine
4587
4588        @param state: The current state
4589        @type state: CurrentState
4590        @param lds_type: The type of the LDS ordinance
4591        @type line: LdsOrd type
4592        """
4593        sub_state = CurrentState()
4594        sub_state.level = state.level + 1
4595        sub_state.lds_ord = LdsOrd()
4596        sub_state.lds_ord.set_type(lds_type)
4597        sub_state.place = None
4598        sub_state.place_fields = PlaceParser()
4599        sub_state.person = state.person
4600        state.person.lds_ord_list.append(sub_state.lds_ord)
4601
4602        self.__parse_level(sub_state, self.lds_parse_tbl, self.__ignore)
4603        state.msg += sub_state.msg
4604
4605        if sub_state.place:
4606            place_title = _pd.display(self.dbase, sub_state.place)
4607            sub_state.place_fields.load_place(self.place_import,
4608                                              sub_state.place,
4609                                              place_title)
4610
4611    def __lds_temple(self, line, state):
4612        """
4613        Parses the TEMP tag, looking up the code for a match.
4614
4615        @param line: The current line in GedLine format
4616        @type line: GedLine
4617        @param state: The current state
4618        @type state: CurrentState
4619        """
4620        value = self.__extract_temple(line)
4621        if value:
4622            state.lds_ord.set_temple(value)
4623
4624    def __lds_date(self, line, state):
4625        """
4626        Parses the DATE tag for the LdsOrd
4627
4628        @param line: The current line in GedLine format
4629        @type line: GedLine
4630        @param state: The current state
4631        @type state: CurrentState
4632        """
4633        state.lds_ord.set_date_object(line.data)
4634
4635    def __lds_famc(self, line, state):
4636        """
4637        Parses the FAMC tag attached to the LdsOrd
4638
4639        @param line: The current line in GedLine format
4640        @type line: GedLine
4641        @param state: The current state
4642        @type state: CurrentState
4643        """
4644        gid = self.fid_map[line.data]
4645        state.lds_ord.set_family_handle(self.__find_family_handle(gid))
4646
4647    def __lds_form(self, line, state):
4648        """
4649        Parses the FORM tag thate defines the place structure for a place.
4650        This tag, if found, will override any global place structure.
4651
4652        @param line: The current line in GedLine format
4653        @type line: GedLine
4654        @param state: The current state
4655        @type state: CurrentState
4656        """
4657        state.pf = PlaceParser(line)
4658
4659    def __lds_plac(self, line, state):
4660        """
4661        Parses the PLAC tag attached to the LdsOrd. Create a new place if
4662        needed and set the title.
4663
4664        @param line: The current line in GedLine format
4665        @type line: GedLine
4666        @param state: The current state
4667        @type state: CurrentState
4668        """
4669        try:
4670            title = line.data
4671            place = self.__find_place(title, None, None)
4672            if place is None:
4673                place = Place()
4674                place.set_title(title)
4675                place.name.set_value(title)
4676                self.dbase.add_place(place, self.trans)
4677                self.place_names[place.get_title()].append(place.get_handle())
4678            else:
4679                pass
4680            state.lds_ord.set_place_handle(place.handle)
4681        except NameError:
4682            return
4683
4684    def __lds_sour(self, line, state):
4685        """
4686        Parses the SOUR tag attached to the LdsOrd.
4687
4688        @param line: The current line in GedLine format
4689        @type line: GedLine
4690        @param state: The current state
4691        @type state: CurrentState
4692        """
4693        citation_handle = self.handle_source(line, state.level, state)
4694        state.lds_ord.add_citation(citation_handle)
4695
4696    def __lds_note(self, line, state):
4697        """
4698        Parses the NOTE tag attached to the LdsOrd.
4699
4700        @param line: The current line in GedLine format
4701        @type line: GedLine
4702        @param state: The current state
4703        @type state: CurrentState
4704        """
4705        self.__parse_note(line, state.lds_ord, state)
4706
4707    def __lds_stat(self, line, state):
4708        """
4709        Parses the STAT (status) tag attached to the LdsOrd.
4710
4711        @param line: The current line in GedLine format
4712        @type line: GedLine
4713        @param state: The current state
4714        @type state: CurrentState
4715        """
4716        status = LDS_STATUS.get(line.data, LdsOrd.STATUS_NONE)
4717        state.lds_ord.set_status(status)
4718
4719    def __person_famc(self, line, state):
4720        """
4721        Handles the parsing of the FAMC line, which indicates which family the
4722        person is a child of.
4723
4724        n FAMC @<XREF:FAM>@ {1:1}
4725        +1 PEDI <PEDIGREE_LINKAGE_TYPE> {0:1} p.*
4726        +1 _FREL <Father relationship type> {0:1}   non-standard Extension
4727        +1 _MREL <Mother relationship type> {0:1}   non-standard Extension
4728        +1 <<NOTE_STRUCTURE>> {0:M} p.*
4729
4730        @param line: The current line in GedLine format
4731        @type line: GedLine
4732        @param state: The current state
4733        @type state: CurrentState
4734        """
4735
4736        if not line.data:  # handles empty FAMC line
4737            self.__not_recognized(line, state)
4738            return
4739        sub_state = CurrentState()
4740        sub_state.person = state.person
4741        sub_state.level = state.level + 1
4742        sub_state.ftype = None
4743        sub_state.primary = False
4744
4745        gid = self.fid_map[line.data]
4746        handle = self.__find_family_handle(gid)
4747
4748        self.__parse_level(sub_state, self.famc_parse_tbl, self.__undefined)
4749        state.msg += sub_state.msg
4750
4751        # if the handle is not already in the person's parent family list, we
4752        # need to add it to thie list.
4753
4754        flist = state.person.get_parent_family_handle_list()
4755        if handle not in flist:
4756            state.person.add_parent_family_handle(handle)
4757
4758            # search childrefs
4759            family, _new = self.dbase.find_family_from_handle(handle,
4760                                                              self.trans)
4761            family.set_gramps_id(gid)
4762
4763            for ref in family.get_child_ref_list():
4764                if ref.ref == state.person.handle:
4765                    break
4766            else:
4767                ref = ChildRef()
4768                ref.ref = state.person.handle
4769                family.add_child_ref(ref)
4770            if sub_state.ftype:
4771                ref.set_mother_relation(sub_state.ftype)
4772                ref.set_father_relation(sub_state.ftype)
4773            else:
4774                if sub_state.frel:
4775                    ref.set_father_relation(sub_state.frel)
4776                if sub_state.mrel:
4777                    ref.set_mother_relation(sub_state.mrel)
4778            self.dbase.commit_family(family, self.trans)
4779
4780    def __person_famc_pedi(self, line, state):
4781        """
4782        Parses the PEDI tag attached to a INDI.FAMC record. No values are set
4783        at this point, because we have to do some post processing. Instead, we
4784        assign the ftype field of the state variable. We convert the text from
4785        the line to an index into the PEDIGREE_TYPES dictionary, which will map
4786        to the correct ChildTypeRef.
4787
4788        @param line: The current line in GedLine format
4789        @type line: GedLine
4790        @param state: The current state
4791        @type state: CurrentState
4792        """
4793        state.ftype = PEDIGREE_TYPES.get(line.data.lower(),
4794                                         ChildRefType.UNKNOWN)
4795
4796    def __person_famc_frel(self, line, state):
4797        """
4798        Parses the _FREL tag attached to a INDI.FAMC record. No values are set
4799        at this point, because we have to do some post processing. Instead, we
4800        assign the frel field of the state variable. We convert the text from
4801        the line to an index into the PEDIGREE_TYPES dictionary, which will map
4802        to the correct ChildTypeRef.
4803
4804        @param line: The current line in GedLine format
4805        @type line: GedLine
4806        @param state: The current state
4807        @type state: CurrentState
4808        """
4809        state.frel = PEDIGREE_TYPES.get(line.data.lower().strip(), None)
4810        if state.frel is None:
4811            state.frel = ChildRefType(line.data.capitalize().strip())
4812
4813    def __person_famc_mrel(self, line, state):
4814        """
4815        Parses the _MREL tag attached to a INDI.FAMC record. No values are set
4816        at this point, because we have to do some post processing. Instead, we
4817        assign the mrel field of the state variable. We convert the text from
4818        the line to an index into the PEDIGREE_TYPES dictionary, which will map
4819        to the correct ChildTypeRef.
4820
4821        @param line: The current line in GedLine format
4822        @type line: GedLine
4823        @param state: The current state
4824        @type state: CurrentState
4825        """
4826        state.mrel = PEDIGREE_TYPES.get(line.data.lower().strip(), None)
4827        if state.mrel is None:
4828            state.mrel = ChildRefType(line.data.capitalize().strip())
4829
4830    def __person_famc_note(self, line, state):
4831        """
4832        Parses the INDI.FAMC.NOTE tag .
4833
4834        @param line: The current line in GedLine format
4835        @type line: GedLine
4836        @param state: The current state
4837        @type state: CurrentState
4838        """
4839        self.__parse_note(line, state.person, state)
4840
4841    def __person_famc_primary(self, line, state):
4842        """
4843        Parses the _PRIMARY tag on an INDI.FAMC tag. This value is stored in
4844        the state record to be used later.
4845
4846        @param line: The current line in GedLine format
4847        @type line: GedLine
4848        @param state: The current state
4849        @type state: CurrentState
4850        """
4851        state.primary = True
4852
4853    def __person_famc_sour(self, line, state):
4854        """
4855        Parses the SOUR tag on an INDI.FAMC tag. Gramps has no corresponding
4856        record on its family relationship, so we add the source to the Person
4857        record.
4858
4859        @param line: The current line in GedLine format
4860        @type line: GedLine
4861        @param state: The current state
4862        @type state: CurrentState
4863        """
4864        citation_handle = self.handle_source(line, state.level, state)
4865        state.person.add_citation(citation_handle)
4866
4867    def __person_fams(self, line, state):
4868        """
4869        Parses the INDI.FAMS record, which indicates the family in which the
4870        person is a spouse.
4871
4872        n FAMS @<XREF:FAM>@ {1:1} p.*
4873        +1 <<NOTE_STRUCTURE>> {0:M} p.*
4874
4875        @param line: The current line in GedLine format
4876        @type line: GedLine
4877        @param state: The current state
4878        @type state: CurrentState
4879        """
4880        gid = self.fid_map[line.data]
4881        handle = self.__find_family_handle(gid)
4882        state.person.add_family_handle(handle)
4883
4884        sub_state = CurrentState(level=state.level + 1)
4885        sub_state.obj = state.person
4886        self.__parse_level(sub_state, self.opt_note_tbl, self.__ignore)
4887        state.msg += sub_state.msg
4888
4889    def __person_asso(self, line, state):
4890        """
4891        Parse the ASSO tag, add the referenced person to the person we
4892        are currently parsing. The GEDCOM spec indicates that valid ASSO tag
4893        is:
4894
4895        n ASSO @<XREF:INDI>@ {0:M}
4896
4897        And the sub tags are:
4898
4899        ASSOCIATION_STRUCTURE:=
4900         +1 RELA <RELATION_IS_DESCRIPTOR> {1:1}
4901         +1 <<NOTE_STRUCTURE>> {0:M}
4902         +1 <<SOURCE_CITATION>> {0:M}
4903
4904        The Gedcom spec notes that the ASSOCIATION_STRUCTURE
4905        can only link to an INDIVIDUAL_RECORD
4906
4907        @param line: The current line in GedLine format
4908        @type line: GedLine
4909        @param state: The current state
4910        @type state: CurrentState
4911        """
4912
4913        # find the id and person that we are referencing
4914        handle = self.__find_person_handle(self.pid_map[line.data])
4915
4916        # create a new PersonRef, and assign the handle, add the
4917        # PersonRef to the active person
4918
4919        sub_state = CurrentState()
4920        sub_state.person = state.person
4921        sub_state.level = state.level + 1
4922        sub_state.ref = PersonRef()
4923        sub_state.ref.ref = handle
4924        sub_state.ignore = False
4925
4926        self.__parse_level(sub_state, self.asso_parse_tbl, self.__ignore)
4927        state.msg += sub_state.msg
4928        if not sub_state.ignore:
4929            state.person.add_person_ref(sub_state.ref)
4930
4931    def __person_asso_rela(self, line, state):
4932        """
4933        Parses the INDI.ASSO.RELA tag.
4934
4935        @param line: The current line in GedLine format
4936        @type line: GedLine
4937        @param state: The current state
4938        @type state: CurrentState
4939        """
4940        state.ref.rel = line.data
4941
4942    def __person_asso_sour(self, line, state):
4943        """
4944        Parses the INDI.ASSO.SOUR tag.
4945
4946        @param line: The current line in GedLine format
4947        @type line: GedLine
4948        @param state: The current state
4949        @type state: CurrentState
4950        """
4951        state.ref.add_citation(self.handle_source(line, state.level, state))
4952
4953    def __person_asso_note(self, line, state):
4954        """
4955        Parses the INDI.ASSO.NOTE tag.
4956
4957        @param line: The current line in GedLine format
4958        @type line: GedLine
4959        @param state: The current state
4960        @type state: CurrentState
4961        """
4962        self.__parse_note(line, state.ref, state)
4963
4964    #-------------------------------------------------------------------
4965    #
4966    # FAM parsing
4967    #
4968    #-------------------------------------------------------------------
4969
4970    def __parse_fam(self, line):
4971        """
4972        n @<XREF:FAM>@   FAM   {1:1}
4973          +1 <<FAMILY_EVENT_STRUCTURE>>  {0:M}
4974          +1 HUSB @<XREF:INDI>@  {0:1}
4975          +1 WIFE @<XREF:INDI>@  {0:1}
4976          +1 CHIL @<XREF:INDI>@  {0:M}
4977          +1 NCHI <COUNT_OF_CHILDREN>  {0:1}
4978          +1 SUBM @<XREF:SUBM>@  {0:M}
4979          +1 <<LDS_SPOUSE_SEALING>>  {0:M}
4980          +1 <<SOURCE_CITATION>>  {0:M}
4981          +1 <<MULTIMEDIA_LINK>>  {0:M}
4982          +1 <<NOTE_STRUCTURE>>  {0:M}
4983          +1 REFN <USER_REFERENCE_NUMBER>  {0:M}
4984          +1 RIN <AUTOMATED_RECORD_ID>  {0:1}
4985          +1 <<CHANGE_DATE>>  {0:1}
4986        """
4987        # create a family
4988
4989        gid = self.fid_map[line.token_text]
4990        family = self.__find_or_create_family(gid)
4991
4992        # parse the family
4993
4994        state = CurrentState(level=1)
4995        state.family = family
4996
4997        self.__parse_level(state, self.family_func, self.__family_even)
4998
4999        # handle addresses attached to families
5000        if state.addr is not None:
5001            father_handle = family.get_father_handle()
5002            father = self.dbase.get_person_from_handle(father_handle)
5003            if father:
5004                father.add_address(state.addr)
5005                self.dbase.commit_person(father, self.trans)
5006            mother_handle = family.get_mother_handle()
5007            mother = self.dbase.get_person_from_handle(mother_handle)
5008            if mother:
5009                mother.add_address(state.addr)
5010                self.dbase.commit_person(mother, self.trans)
5011
5012            for child_ref in family.get_child_ref_list():
5013                child_handle = child_ref.ref
5014                child = self.dbase.get_person_from_handle(child_handle)
5015                if child:
5016                    child.add_address(state.addr)
5017                    self.dbase.commit_person(child, self.trans)
5018
5019        # add default reference if no reference exists
5020        self.__add_default_source(family)
5021
5022        # Add a default tag if provided
5023        self.__add_default_tag(family)
5024
5025        self.__check_msgs(_("FAM (family) Gramps ID %s") %
5026                          family.get_gramps_id(), state, family)
5027        # commit family to database
5028        self.dbase.commit_family(family, self.trans, family.change)
5029
5030    def __family_husb(self, line, state):
5031        """
5032        Parses the husband line of a family
5033
5034        n HUSB @<XREF:INDI>@  {0:1}
5035
5036        @param line: The current line in GedLine format
5037        @type line: GedLine
5038        @param state: The current state
5039        @type state: CurrentState
5040            """
5041        handle = self.__find_person_handle(self.pid_map[line.data])
5042        state.family.set_father_handle(handle)
5043
5044    def __family_wife(self, line, state):
5045        """
5046            Parses the wife line of a family
5047
5048              n WIFE @<XREF:INDI>@  {0:1}
5049
5050        @param line: The current line in GedLine format
5051        @type line: GedLine
5052        @param state: The current state
5053        @type state: CurrentState
5054            """
5055        handle = self.__find_person_handle(self.pid_map[line.data])
5056        state.family.set_mother_handle(handle)
5057
5058    def __family_std_event(self, line, state):
5059        """
5060        Parses GEDCOM event types that map to a Gramps standard type.
5061        Additional parsing required is for the event detail:
5062
5063           +1 <<EVENT_DETAIL>> {0:1} p.*
5064
5065        @param line: The current line in GedLine format
5066        @type line: GedLine
5067        @param state: The current state
5068        @type state: CurrentState
5069        """
5070        event = line.data
5071        event.set_gramps_id(self.emapper.find_next())
5072        event_ref = EventRef()
5073        event_ref.set_role(EventRoleType.FAMILY)
5074        self.dbase.add_event(event, self.trans)
5075
5076        sub_state = CurrentState()
5077        sub_state.person = state.person
5078        sub_state.level = state.level + 1
5079        sub_state.event = event
5080        sub_state.event_ref = event_ref
5081        sub_state.pf = self.place_parser
5082
5083        self.__parse_level(sub_state, self.event_parse_tbl, self.__undefined)
5084        state.msg += sub_state.msg
5085
5086        self.__add_place(event, sub_state)
5087
5088        if event.type == EventType.MARRIAGE:
5089            descr = event.get_description()
5090            if descr == "Civil Union":
5091                state.family.type.set(FamilyRelType.CIVIL_UNION)
5092                event.set_description('')
5093            elif descr == "Unmarried":
5094                state.family.type.set(FamilyRelType.UNMARRIED)
5095                event.set_description('')
5096            else:
5097                state.family.type.set(FamilyRelType.MARRIED)
5098            if descr == "Y":
5099                event.set_description('')
5100
5101        self.dbase.commit_event(event, self.trans)
5102        event_ref.ref = event.handle
5103        state.family.add_event_ref(event_ref)
5104
5105    def __family_even(self, line, state):
5106        """
5107        Parses GEDCOM event types that map to a Gramps standard type.
5108        Additional parsing required is for the event detail:
5109
5110           +1 <<EVENT_DETAIL>> {0:1} p.*
5111
5112        @param line: The current line in GedLine format
5113        @type line: GedLine
5114        @param state: The current state
5115        @type state: CurrentState
5116        """
5117        # We can get here when a tag that is not valid in the family_func
5118        # parse table is encountered. The tag may be of the form "_XXX".  We
5119        # try to convert to a friendly name, if fails use the tag itself as
5120        # the TYPE in a custom event
5121        cust_tag = CUSTOMEVENTTAGS.get(line.token_text, line.token_text)
5122        cust_type = EventType((EventType.CUSTOM, cust_tag))
5123        event = Event()
5124        event_ref = EventRef()
5125        event_ref.set_role(EventRoleType.FAMILY)
5126        event.set_gramps_id(self.emapper.find_next())
5127        event.set_type(cust_type)
5128        # in case a description ever shows up
5129        if line.data and line.data != 'Y':
5130            event.set_description(str(line.data))
5131        self.dbase.add_event(event, self.trans)
5132
5133        sub_state = CurrentState()
5134        sub_state.person = state.person
5135        sub_state.level = state.level + 1
5136        sub_state.event = event
5137        sub_state.event_ref = event_ref
5138        sub_state.pf = self.place_parser
5139
5140        self.__parse_level(sub_state, self.event_parse_tbl, self.__undefined)
5141        state.msg += sub_state.msg
5142
5143        self.__add_place(event, sub_state)
5144
5145        self.dbase.commit_event(event, self.trans)
5146        event_ref.ref = event.handle
5147        state.family.add_event_ref(event_ref)
5148
5149    def __family_chil(self, line, state):
5150        """
5151        Parses the child line of a family
5152
5153        n CHIL @<XREF:INDI>@  {0:1}
5154
5155        @param line: The current line in GedLine format
5156        @type line: GedLine
5157        @param state: The current state
5158        @type state: CurrentState
5159        """
5160        sub_state = CurrentState()
5161        sub_state.family = state.family
5162        sub_state.level = state.level + 1
5163        sub_state.mrel = None
5164        sub_state.frel = None
5165
5166        self.__parse_level(sub_state, self.family_rel_tbl, self.__ignore)
5167        state.msg += sub_state.msg
5168
5169        child = self.__find_or_create_person(self.pid_map[line.data])
5170
5171        reflist = [ref for ref in state.family.get_child_ref_list()
5172                   if ref.ref == child.handle]
5173
5174        if reflist:  # The child has been referenced already
5175            ref = reflist[0]
5176            if sub_state.frel:
5177                ref.set_father_relation(sub_state.frel)
5178            if sub_state.mrel:
5179                ref.set_mother_relation(sub_state.mrel)
5180            # then we will set the order now:
5181            self.set_child_ref_order(state.family, ref)
5182        else:
5183            ref = ChildRef()
5184            ref.ref = child.handle
5185            if sub_state.frel:
5186                ref.set_father_relation(sub_state.frel)
5187            if sub_state.mrel:
5188                ref.set_mother_relation(sub_state.mrel)
5189            state.family.add_child_ref(ref)
5190
5191    def set_child_ref_order(self, family, child_ref):
5192        """
5193        Sets the child_ref in family.child_ref_list to be in the position
5194        family.child_ref_count. This reorders the children to be in the
5195        order given in the FAM section.
5196        """
5197        family.child_ref_list.remove(child_ref)
5198        family.child_ref_list.insert(family.child_ref_count, child_ref)
5199        family.child_ref_count += 1
5200
5201    def __family_slgs(self, line, state):
5202        """
5203        n  SLGS          {1:1}
5204        +1 STAT <LDS_SPOUSE_SEALING_DATE_STATUS>  {0:1}
5205        +1 DATE <DATE_LDS_ORD>  {0:1}
5206        +1 TEMP <TEMPLE_CODE>  {0:1}
5207        +1 PLAC <PLACE_LIVING_ORDINANCE>  {0:1}
5208        +1 <<SOURCE_CITATION>>  {0:M}
5209        +1 <<NOTE_STRUCTURE>>  {0:M}
5210
5211        @param line: The current line in GedLine format
5212        @type line: GedLine
5213        @param state: The current state
5214        @type state: CurrentState
5215            """
5216        sub_state = CurrentState()
5217        sub_state.level = state.level + 1
5218        sub_state.lds_ord = LdsOrd()
5219        sub_state.lds_ord.set_type(LdsOrd.SEAL_TO_SPOUSE)
5220        sub_state.place = None
5221        sub_state.family = state.family
5222        sub_state.place_fields = PlaceParser()
5223        state.family.lds_ord_list.append(sub_state.lds_ord)
5224
5225        self.__parse_level(sub_state, self.lds_parse_tbl, self.__ignore)
5226        state.msg += sub_state.msg
5227
5228        if sub_state.place:
5229            place_title = _pd.display(self.dbase, sub_state.place)
5230            sub_state.place_fields.load_place(self.place_import,
5231                                              sub_state.place,
5232                                              place_title)
5233
5234    def __family_source(self, line, state):
5235        """
5236        n SOUR @<XREF:SOUR>@ /* pointer to source record */ {1:1} p.*
5237        +1 PAGE <WHERE_WITHIN_SOURCE> {0:1} p.*
5238        +1 EVEN <EVENT_TYPE_CITED_FROM> {0:1} p.*
5239        +1 DATA {0:1}
5240        +1 QUAY <CERTAINTY_ASSESSMENT> {0:1} p.*
5241        +1 <<MULTIMEDIA_LINK>> {0:M} p.*, *
5242        +1 <<NOTE_STRUCTURE>> {0:M} p.*
5243
5244        | /* Systems not using source records */
5245        n SOUR <SOURCE_DESCRIPTION> {1:1} p.*
5246        +1 [ CONC | CONT ] <SOURCE_DESCRIPTION> {0:M}
5247        +1 TEXT <TEXT_FROM_SOURCE> {0:M} p.*
5248        +1 <<NOTE_STRUCTURE>> {0:M} p.*
5249
5250        @param line: The current line in GedLine format
5251        @type line: GedLine
5252        @param state: The current state
5253        @type state: CurrentState
5254        """
5255        citation_handle = self.handle_source(line, state.level, state)
5256        state.family.add_citation(citation_handle)
5257
5258    def __family_object(self, line, state):
5259        """
5260          +1 <<MULTIMEDIA_LINK>>  {0:M}
5261
5262        @param line: The current line in GedLine format
5263        @type line: GedLine
5264        @param state: The current state
5265        @type state: CurrentState
5266        """
5267        self.__obje(line, state, state.family)
5268
5269    def __family_comm(self, line, state):
5270        """
5271        @param line: The current line in GedLine format
5272        @type line: GedLine
5273        @param state: The current state
5274        @type state: CurrentState
5275        """
5276        note = line.data
5277        state.family.add_note(note)
5278        self.__skip_subordinate_levels(state.level + 1, state)
5279
5280    def __family_note(self, line, state):
5281        """
5282        +1 <<NOTE_STRUCTURE>>  {0:M}
5283
5284        @param line: The current line in GedLine format
5285        @type line: GedLine
5286        @param state: The current state
5287        @type state: CurrentState
5288        """
5289        self.__parse_note(line, state.family, state)
5290
5291    def __family_chan(self, line, state):
5292        """
5293        +1 <<CHANGE_DATE>>  {0:1}
5294
5295        @param line: The current line in GedLine format
5296        @type line: GedLine
5297        @param state: The current state
5298        @type state: CurrentState
5299        """
5300        self.__parse_change(line, state.family, state.level + 1, state)
5301
5302    def __family_attr(self, line, state):
5303        """
5304        Parses an TOKEN that Gramps recognizes as an Attribute
5305        @param line: The current line in GedLine format
5306        @type line: GedLine
5307        @param state: The current state
5308        @type state: CurrentState
5309        """
5310        sub_state = CurrentState()
5311        sub_state.person = state.person
5312        sub_state.attr = line.data
5313        sub_state.level = state.level + 1
5314        state.family.add_attribute(line.data)
5315        self.__parse_level(sub_state, self.person_attr_parse_tbl,
5316                           self.__ignore)
5317        state.msg += sub_state.msg
5318
5319    def __family_refn(self, line, state):
5320        """
5321        @param line: The current line in GedLine format
5322        @type line: GedLine
5323        @param state: The current state
5324        @type state: CurrentState
5325        """
5326        self.__do_refn(line, state, state.family)
5327
5328    def __family_cust_attr(self, line, state):
5329        """
5330        @param line: The current line in GedLine format
5331        @type line: GedLine
5332        @param state: The current state
5333        @type state: CurrentState
5334        """
5335        attr = Attribute()
5336        attr.set_type(line.token_text)
5337        attr.set_value(line.data)
5338        state.family.add_attribute(attr)
5339
5340    def __obje(self, line, state, pri_obj):
5341        """
5342       Embedded form
5343
5344          n OBJE @<XREF:OBJE>@ {1:1}
5345          +1 _PRIM <Y/N>       {0:1}                # Indicates primary photo
5346
5347        Linked form
5348
5349          n  OBJE {1:1}
5350          +1 FORM <MULTIMEDIA_FORMAT> {1:1}         # v5.5 layout
5351          +1 TITL <DESCRIPTIVE_TITLE> {0:1}
5352          +1 FILE <MULTIMEDIA_FILE_REFERENCE> {1:1} # v5.5.1 allows multiple
5353            +2 FORM <MULTIMEDIA_FORMAT> {1:1}       # v5.5.1 layout
5354              +3 MEDI <SOURCE_MEDIA_TYPE> {0:1}     # v5.5.1 layout
5355          +1 <<NOTE_STRUCTURE>> {0:M}
5356          +1 _PRIM <Y/N>       {0:1}                # Indicates primary photo
5357
5358        @param line: The current line in GedLine format
5359        @type line: GedLine
5360        @param state: The current state
5361        @type state: CurrentState
5362        @param pri_obj: The Primary object to which this is attached
5363        @type state: Person # or Family, or Source etc.
5364        """
5365        if line.data and line.data[0] == '@':
5366            # Reference to a named multimedia object defined elsewhere
5367            gramps_id = self.oid_map[line.data]
5368            handle = self.__find_media_handle(gramps_id)
5369            # check to see if this is a primary photo
5370            line = self.__chk_subordinate(state.level + 1, state, TOKEN__PRIM)
5371            if line and line.data == 'Y':
5372                state.photo = handle
5373            oref = MediaRef()
5374            oref.set_reference_handle(handle)
5375            pri_obj.add_media_reference(oref)
5376            return
5377        #
5378        # The remainder of this code is similar in concept to __parse_obje
5379        # except that it combines references to the same media file by
5380        # comparing path names.  If they are the same, then only the first
5381        # is kept.  This does mean that if there are different notes etc. on a
5382        # later OBJE, they will be lost.
5383        #
5384        sub_state = CurrentState()
5385        sub_state.form = ""
5386        sub_state.attr = None
5387        sub_state.filename = ""
5388        sub_state.title = ""
5389        sub_state.media = Media()
5390        sub_state.level = state.level + 1
5391        sub_state.prim = ""
5392
5393        self.__parse_level(sub_state, self.media_parse_tbl, self.__ignore)
5394        state.msg += sub_state.msg
5395        if sub_state.filename == "":
5396            self.__add_msg(_("Filename omitted"), line, state)
5397        # The following lines are commented out because Gramps is NOT a
5398        # Gedcom validator!
5399        # if sub_state.form == "":
5400        #     self.__add_msg(_("Form omitted"), line, state)
5401
5402        # The following code that detects URL is an older v5.5 usage; the
5403        # modern option is to use the EMAIL tag.
5404        if isinstance(sub_state.form, str) and sub_state.form == "url":
5405            if isinstance(pri_obj, UrlBase):
5406                url = Url()
5407                url.set_path(sub_state.filename)
5408                url.set_description(sub_state.title)
5409                url.set_type(UrlType.WEB_HOME)
5410                pri_obj.add_url(url)
5411            else:  # some primary objects (Event) son't have spot for URL
5412                new_note = Note(sub_state.filename)
5413                new_note.set_gramps_id(self.nid_map[""])
5414                new_note.set_handle(create_id())
5415                new_note.set_type(OBJ_NOTETYPE.get(type(pri_obj).__name__,
5416                                                   NoteType.GENERAL))
5417                self.dbase.commit_note(new_note, self.trans, new_note.change)
5418                pri_obj.add_note(new_note.get_handle())
5419
5420        else:
5421            # to allow import of references to URLs (especially for import from
5422            # geni.com), do not try to find the file if it is blatently a URL
5423            res = urlparse(sub_state.filename)
5424            if sub_state.filename != '' and (res.scheme == '' or
5425                                             len(res.scheme) == 1 or
5426                                             res.scheme == 'file'):
5427                (valid, path) = self.__find_file(sub_state.filename,
5428                                                 self.dir_path)
5429                if not valid:
5430                    self.__add_msg(_("Could not import %s") %
5431                                   sub_state.filename, line, state)
5432            else:
5433                path = sub_state.filename
5434            # Multiple references to the same media silently drops the later
5435            # ones, even if title, etc.  are different
5436            photo_handle = self.media_map.get(path)
5437            if photo_handle is None:
5438                photo = Media()
5439                photo.set_path(path)
5440                if sub_state.title:
5441                    photo.set_description(sub_state.title)
5442                else:
5443                    photo.set_description(path.replace('\\', '/'))
5444                full_path = os.path.abspath(path)
5445                # deal with mime types
5446                value = mimetypes.guess_type(full_path)
5447                if value and value[0]:  # found from filename
5448                    photo.set_mime_type(value[0])
5449                else:  # get from OBJE.FILE.FORM
5450                    if '/' in sub_state.form:  # already has expanded mime type
5451                        photo.set_mime_type(sub_state.form)
5452                    else:
5453                        value = mimetypes.types_map.get('.' + sub_state.form,
5454                                                        _('unknown'))
5455                        photo.set_mime_type(value)
5456                if sub_state.attr:
5457                    photo.attribute_list.append(sub_state.attr)
5458                self.dbase.add_media(photo, self.trans)
5459                self.media_map[path] = photo.handle
5460            else:
5461                photo = self.dbase.get_media_from_handle(photo_handle)
5462            # copy notes to our media
5463            for note in sub_state.media.get_note_list():
5464                photo.add_note(note)
5465            self.dbase.commit_media(photo, self.trans)
5466
5467            if sub_state.prim == "Y":
5468                state.photo = photo.handle
5469            oref = MediaRef()
5470            oref.set_reference_handle(photo.handle)
5471            pri_obj.add_media_reference(oref)
5472
5473    def __media_ref_form(self, line, state):
5474        """
5475          +1 FORM <MULTIMEDIA_FORMAT> {1:1}
5476
5477        @param line: The current line in GedLine format
5478        @type line: GedLine
5479        @param state: The current state
5480        @type state: CurrentState
5481        """
5482        state.form = line.data.lower()
5483
5484    def __media_ref_medi(self, line, state):
5485        """
5486          +1 MEDI <SOURCE_MEDIA_TYPE> {0:1}   (Photo, Audio, Book, etc.)
5487
5488        @param line: The current line in GedLine format
5489        @type line: GedLine
5490        @param state: The current state
5491        @type state: CurrentState
5492        """
5493        state.attr = Attribute()
5494        mtype = MEDIA_MAP.get(line.data.lower(),
5495                              (SourceMediaType.CUSTOM, line.data))
5496        state.attr.set_type(_('Media-Type'))
5497        state.attr.set_value(str(SourceMediaType(mtype)))
5498
5499    def __media_ref_titl(self, line, state):
5500        """
5501          +1 TITL <DESCRIPTIVE_TITLE> {0:1}
5502
5503        @param line: The current line in GedLine format
5504        @type line: GedLine
5505        @param state: The current state
5506        @type state: CurrentState
5507        """
5508        state.title = line.data
5509
5510    def __media_ref_file(self, line, state):
5511        """
5512          +1 FILE <MULTIMEDIA_FILE_REFERENCE> {1:1}
5513
5514        @param line: The current line in GedLine format
5515        @type line: GedLine
5516        @param state: The current state
5517        @type state: CurrentState
5518        """
5519        if state.filename != "":
5520            self.__add_msg(_("Multiple FILE in a single OBJE ignored"),
5521                           line, state)
5522            self.__skip_subordinate_levels(state.level + 1, state)
5523            return
5524        state.filename = line.data
5525
5526    def __media_ref_prim(self, line, state):
5527        """
5528          +1 _PRIM <Y/N> {0:1}
5529
5530        Indicates that this OBJE is the primary photo.
5531
5532        @param line: The current line in GedLine format
5533        @type line: GedLine
5534        @param state: The current state
5535        @type state: CurrentState
5536        """
5537        state.prim = line.data
5538
5539    def __family_adopt(self, line, state):
5540        """
5541        n ADOP
5542
5543        @param line: The current line in GedLine format
5544        @type line: GedLine
5545        @param state: The current state
5546        @type state: CurrentState
5547        """
5548        state.frel = TYPE_ADOPT
5549        state.mrel = TYPE_ADOPT
5550
5551    def __family_frel(self, line, state):
5552        """
5553        The _FREL key is a FTW/FTM specific extension to indicate father/child
5554        relationship.
5555
5556        n _FREL <type>
5557
5558        @param line: The current line in GedLine format
5559        @type line: GedLine
5560        @param state: The current state
5561        @type state: CurrentState
5562        """
5563        state.frel = PEDIGREE_TYPES.get(line.data.strip().lower())
5564
5565    def __family_mrel(self, line, state):
5566        """
5567        The _MREL key is a FTW/FTM specific extension to indicate father/child
5568        relationship.
5569
5570        n _MREL <type>
5571
5572        @param line: The current line in GedLine format
5573        @type line: GedLine
5574        @param state: The current state
5575        @type state: CurrentState
5576        """
5577        state.mrel = PEDIGREE_TYPES.get(line.data.strip().lower())
5578
5579    def __family_stat(self, line, state):
5580        """
5581        @param line: The current line in GedLine format
5582        @type line: GedLine
5583        @param state: The current state
5584        @type state: CurrentState
5585        """
5586        state.mrel = TYPE_BIRTH
5587        state.frel = TYPE_BIRTH
5588
5589    def __event_object(self, line, state):
5590        """
5591        @param line: The current line in GedLine format
5592        @type line: GedLine
5593        @param state: The current state
5594        @type state: CurrentState
5595        """
5596        self.__obje(line, state, state.event)
5597
5598    def __event_type(self, line, state):
5599        """
5600        Parses the TYPE line for an event.
5601
5602        @param line: The current line in GedLine format
5603        @type line: GedLine
5604        @param state: The current state
5605        @type state: CurrentState
5606        """
5607        if state.event.get_type().is_custom():
5608            if line.data in GED_TO_GRAMPS_EVENT:
5609                name = EventType(GED_TO_GRAMPS_EVENT[line.data])
5610            else:
5611                try:
5612                    name = EventType((EventType.CUSTOM, line.data))
5613                except AttributeError:
5614                    name = EventType(EventType.UNKNOWN)
5615            state.event.set_type(name)
5616        else:
5617            try:
5618                if line.data not in GED_TO_GRAMPS_EVENT and \
5619                        line.data[0] != 'Y':
5620                    state.event.set_description(line.data)
5621            except IndexError:
5622                return
5623
5624    def __event_date(self, line, state):
5625        """
5626        @param line: The current line in GedLine format
5627        @type line: GedLine
5628        @param state: The current state
5629        @type state: CurrentState
5630        """
5631        state.event.set_date_object(line.data)
5632
5633    def __event_place(self, line, state):
5634        """
5635        Parse the place portion of a event. A special case has to be made for
5636        Family Tree Maker, which violates the GEDCOM spec. It uses the PLAC
5637        field to store the description or value associated with the event.
5638
5639         n  PLAC <PLACE_VALUE> {1:1}
5640         +1 FORM <PLACE_HIERARCHY> {0:1}
5641         +1 <<SOURCE_CITATION>> {0:M}
5642         +1 <<NOTE_STRUCTURE>> {0:M}
5643
5644        @param line: The current line in GedLine format
5645        @type line: GedLine
5646        @param state: The current state
5647        @type state: CurrentState
5648        """
5649
5650        if self.is_ftw and (state.event.type in FTW_BAD_PLACE) \
5651                and not state.event.get_description():
5652            state.event.set_description(line.data)
5653        else:
5654            place = state.place
5655            if place:
5656                # We encounter a PLAC, having previously encountered an ADDR
5657                if state.place.place_type.string != _("Address"):
5658                    # We have previously found a PLAC
5659                    self.__add_msg(_("A second PLAC ignored"), line, state)
5660                    # ignore this second PLAC, and use the old one
5661                else:
5662                    # This is the first PLAC
5663                    place.set_title(line.data)
5664                    place.name.set_value(line.data)
5665            else:
5666                # The first thing we encounter is PLAC
5667                state.place = Place()
5668                place = state.place
5669                place.set_title(line.data)
5670                place.name.set_value(line.data)
5671
5672            sub_state = CurrentState()
5673            sub_state.place = place
5674            sub_state.level = state.level + 1
5675
5676            self.__parse_level(sub_state, self.event_place_map,
5677                               self.__undefined)
5678            state.msg += sub_state.msg
5679            if sub_state.pf:                # if we found local PLAC:FORM
5680                state.pf = sub_state.pf     # save to override global value
5681            # merge notes etc into place
5682            state.place.merge(sub_state.place)
5683
5684    def __event_place_note(self, line, state):
5685        """
5686        @param line: The current line in GedLine format
5687        @type line: GedLine
5688        @param state: The current state
5689        @type state: CurrentState
5690        """
5691        self.__parse_note(line, state.place, state)
5692
5693    def __event_place_form(self, line, state):
5694        """
5695        @param line: The current line in GedLine format
5696        @type line: GedLine
5697        @param state: The current state
5698        @type state: CurrentState
5699        """
5700        state.pf = PlaceParser(line)
5701
5702    def __event_place_object(self, line, state):
5703        """
5704        @param line: The current line in GedLine format
5705        @type line: GedLine
5706        @param state: The current state
5707        @type state: CurrentState
5708        """
5709        self.__obje(line, state, state.place)
5710
5711    def __event_place_sour(self, line, state):
5712        """
5713        @param line: The current line in GedLine format
5714        @type line: GedLine
5715        @param state: The current state
5716        @type state: CurrentState
5717        """
5718        state.place.add_citation(self.handle_source(line, state.level, state))
5719
5720    def __place_map(self, line, state):
5721        """
5722
5723        n   MAP
5724        n+1 LONG <PLACE_LONGITUDE>
5725        n+1 LATI <PLACE_LATITUDE>
5726
5727        @param line: The current line in GedLine format
5728        @type line: GedLine
5729        @param state: The current state
5730        @type state: CurrentState
5731        """
5732        sub_state = CurrentState()
5733        sub_state.level = state.level + 1
5734        sub_state.place = state.place
5735        self.__parse_level(sub_state, self.place_map_tbl, self.__undefined)
5736        state.msg += sub_state.msg
5737        state.place = sub_state.place
5738
5739    def __place_lati(self, line, state):
5740        """
5741        @param line: The current line in GedLine format
5742        @type line: GedLine
5743        @param state: The current state
5744        @type state: CurrentState
5745        """
5746        state.place.set_latitude(line.data)
5747
5748    def __place_long(self, line, state):
5749        """
5750        @param line: The current line in GedLine format
5751        @type line: GedLine
5752        @param state: The current state
5753        @type state: CurrentState
5754        """
5755        state.place.set_longitude(line.data)
5756
5757    def __event_addr(self, line, state):
5758        """
5759        @param line: The current line in GedLine format
5760        @type line: GedLine
5761        @param state: The current state
5762        @type state: CurrentState
5763        """
5764        free_form = line.data
5765
5766        sub_state = CurrentState(level=state.level + 1)
5767        sub_state.location = Location()
5768        sub_state.event = state.event
5769        sub_state.place = Place()  # temp stash for notes, citations etc
5770
5771        self.__parse_level(sub_state, self.parse_loc_tbl, self.__undefined)
5772        state.msg += sub_state.msg
5773
5774        title = self.__merge_address(free_form, sub_state.location,
5775                                     line, state)
5776
5777        location = sub_state.location
5778
5779        if self.addr_is_detail and state.place:
5780            # Commit the enclosing place
5781            place = self.__find_place(state.place.get_title(), None,
5782                                      state.place.get_placeref_list())
5783            if place is None:
5784                place = state.place
5785                self.dbase.add_place(place, self.trans)
5786                self.place_names[place.get_title()].append(place.get_handle())
5787            else:
5788                place.merge(state.place)
5789                self.dbase.commit_place(place, self.trans)
5790            place_title = _pd.display(self.dbase, place)
5791            state.pf.load_place(self.place_import, place, place_title)
5792
5793            # Create the Place Details (it is committed with the event)
5794            place_detail = Place()
5795            place_detail.set_name(PlaceName(value=title))
5796            place_detail.set_title(title)
5797            # For RootsMagic etc. Place Details e.g. address, hospital, ...
5798            place_detail.set_type((PlaceType.CUSTOM, _("Detail")))
5799            placeref = PlaceRef()
5800            placeref.ref = place.get_handle()
5801            place_detail.set_placeref_list([placeref])
5802            state.place = place_detail
5803        else:
5804            place = state.place
5805            if place:
5806                # We encounter an ADDR having previously encountered a PLAC
5807                if len(place.get_alternate_locations()) != 0 and \
5808                        not self.__get_first_loc(place).is_empty():
5809                    # We have perviously found an ADDR, or have populated
5810                    # location from PLAC title
5811                    self.__add_msg(_("Location already populated; ADDR "
5812                                     "ignored"), line, state)
5813                    # ignore this second ADDR, and use the old one
5814                else:
5815                    # This is the first ADDR
5816                    place.add_alternate_locations(location)
5817            else:
5818                # The first thing we encounter is ADDR
5819                state.place = Place()
5820                place = state.place
5821                place.add_alternate_locations(location)
5822                place.set_name(PlaceName(value=title))
5823                place.set_title(title)
5824                place.set_type((PlaceType.CUSTOM, _("Address")))
5825
5826        # merge notes etc into place
5827        state.place.merge(sub_state.place)
5828
5829    def __add_location(self, place, location):
5830        """
5831        @param place: A place object we have found or created
5832        @type place: Place
5833        @param location: A location we want to add to this place
5834        @type location: gen.lib.location
5835        """
5836        for loc in place.get_alternate_locations():
5837            if loc.is_equivalent(location) == IDENTICAL:
5838                return
5839        place.add_alternate_locations(location)
5840
5841    def __get_first_loc(self, place):
5842        """
5843        @param place: A place object
5844        @type place: Place
5845        @return location: the first alternate location if any else None
5846        @type location: gen.lib.location
5847        """
5848        if len(place.get_alternate_locations()) == 0:
5849            return None
5850        else:
5851            return place.get_alternate_locations()[0]
5852
5853    def __event_privacy(self, line, state):
5854        """
5855        @param line: The current line in GedLine format
5856        @type line: GedLine
5857        @param state: The current state
5858        @type state: CurrentState
5859        """
5860        state.event.set_privacy(True)
5861
5862    def __event_note(self, line, state):
5863        """
5864        @param line: The current line in GedLine format
5865        @type line: GedLine
5866        @param state: The current state
5867        @type state: CurrentState
5868        """
5869        self.__parse_note(line, state.event, state)
5870
5871    def __event_inline_note(self, line, state):
5872        """
5873        @param line: The current line in GedLine format
5874        @type line: GedLine
5875        @param state: The current state
5876        @type state: CurrentState
5877        """
5878        if line.data[0:13] == "Description: ":
5879            state.event.set_description(line.data[13:])
5880        else:
5881            self.__parse_note(line, state.event, state)
5882
5883    def __event_source(self, line, state):
5884        """
5885        @param line: The current line in GedLine format
5886        @type line: GedLine
5887        @param state: The current state
5888        @type state: CurrentState
5889        """
5890        state.event.add_citation(self.handle_source(line, state.level, state))
5891
5892    def __event_rin(self, line, state):
5893        """
5894        @param line: The current line in GedLine format
5895        @type line: GedLine
5896        @param state: The current state
5897        @type state: CurrentState
5898        """
5899        attr = Attribute()
5900        attr.set_type(line.token_text)
5901        attr.set_value(line.data)
5902        state.event.add_attribute(attr)
5903
5904    def __event_attr(self, line, state):
5905        """
5906        @param line: The current line in GedLine format
5907        @type line: GedLine
5908        @param state: The current state
5909        @type state: CurrentState
5910        """
5911        state.event.add_attribute(line.data)
5912
5913    def __event_phon(self, line, state):
5914        """
5915        @param line: The current line in GedLine format
5916        @type line: GedLine
5917        @param state: The current state
5918        @type state: CurrentState
5919        """
5920        attr = Attribute()
5921        attr.set_type(_("Phone"))
5922        attr.set_value(line.data)
5923        state.event.add_attribute(attr)
5924
5925    def __event_fax(self, line, state):
5926        """
5927        @param line: The current line in GedLine format
5928        @type line: GedLine
5929        @param state: The current state
5930        @type state: CurrentState
5931        """
5932        attr = Attribute()
5933        attr.set_type(_("FAX"))
5934        attr.set_value(line.data)
5935        state.event.add_attribute(attr)
5936
5937    def __event_email(self, line, state):
5938        """
5939        @param line: The current line in GedLine format
5940        @type line: GedLine
5941        @param state: The current state
5942        @type state: CurrentState
5943        """
5944        attr = Attribute()
5945        attr.set_type(_("EMAIL"))
5946        attr.set_value(line.data)
5947        state.event.add_attribute(attr)
5948
5949    def __event_www(self, line, state):
5950        """
5951        @param line: The current line in GedLine format
5952        @type line: GedLine
5953        @param state: The current state
5954        @type state: CurrentState
5955        """
5956        attr = Attribute()
5957        attr.set_type(_("WWW"))
5958        attr.set_value(line.data)
5959        state.event.add_attribute(attr)
5960
5961    def __event_cause(self, line, state):
5962        """
5963        @param line: The current line in GedLine format
5964        @type line: GedLine
5965        @param state: The current state
5966        @type state: CurrentState
5967        """
5968        attr = Attribute()
5969        attr.set_type(AttributeType.CAUSE)
5970        attr.set_value(line.data)
5971        state.event.add_attribute(attr)
5972
5973        sub_state = CurrentState()
5974        sub_state.event = state.event
5975        sub_state.level = state.level + 1
5976        sub_state.attr = attr
5977
5978        self.__parse_level(sub_state, self.event_cause_tbl, self.__undefined)
5979        state.msg += sub_state.msg
5980
5981    def __event_cause_source(self, line, state):
5982        """
5983        @param line: The current line in GedLine format
5984        @type line: GedLine
5985        @param state: The current state
5986        @type state: CurrentState
5987        """
5988        state.attr.add_citation(self.handle_source(line, state.level, state))
5989
5990    def __event_age(self, line, state):
5991        """
5992        @param line: The current line in GedLine format
5993        @type line: GedLine
5994        @param state: The current state
5995        @type state: CurrentState
5996        """
5997        attr = Attribute()
5998        attr.set_type(AttributeType.AGE)
5999        attr.set_value(line.data)
6000        state.event_ref.add_attribute(attr)
6001
6002    def __event_husb(self, line, state):
6003        """
6004        @param line: The current line in GedLine format
6005        @type line: GedLine
6006        @param state: The current state
6007        @type state: CurrentState
6008        """
6009        while True:
6010            line = self.__get_next_line()
6011            if self.__level_is_finished(line, state.level + 1):
6012                break
6013            elif line.token == TOKEN_AGE:
6014                attr = Attribute()
6015                attr.set_type(AttributeType.FATHER_AGE)
6016                attr.set_value(line.data)
6017                state.event_ref.add_attribute(attr)
6018            elif line.token == TOKEN_WIFE:
6019                #wife event can be on same level, if so call it and finish
6020                self.__event_wife(line, state)
6021                break
6022
6023    def __event_wife(self, line, state):
6024        """
6025        @param line: The current line in GedLine format
6026        @type line: GedLine
6027        @param state: The current state
6028        @type state: CurrentState
6029        """
6030        while True:
6031            line = self.__get_next_line()
6032            if self.__level_is_finished(line, state.level + 1):
6033                break
6034            elif line.token == TOKEN_AGE:
6035                attr = Attribute()
6036                attr.set_type(AttributeType.MOTHER_AGE)
6037                attr.set_value(line.data)
6038                state.event_ref.add_attribute(attr)
6039            elif line.token == TOKEN_HUSB:
6040                #husband event can be on same level, if so call it and finish
6041                self.__event_husb(line, state)
6042                break
6043
6044    def __event_agnc(self, line, state):
6045        """
6046        @param line: The current line in GedLine format
6047        @type line: GedLine
6048        @param state: The current state
6049        @type state: CurrentState
6050        """
6051        attr = Attribute()
6052        attr.set_type(AttributeType.AGENCY)
6053        attr.set_value(line.data)
6054        state.event.add_attribute(attr)
6055
6056    def __event_time(self, line, state):
6057        """
6058        @param line: The current line in GedLine format
6059        @type line: GedLine
6060        @param state: The current state
6061        @type state: CurrentState
6062        """
6063        if hasattr(state, 'event'):
6064            #read in time as attribute of event
6065            attr = Attribute()
6066            attr.set_type(AttributeType.TIME)
6067            attr.set_value(line.data)
6068            state.event.add_attribute(attr)
6069
6070    def __event_witness(self, line, state):
6071        """
6072        Parse the witness of an event
6073
6074        @param line: The current line in GedLine format
6075        @type line: GedLine
6076        @param state: The current state
6077        @type state: CurrentState
6078        """
6079        if line.data and line.data[0] == "@":
6080            # n  _WITN @<XREF:INDI>@
6081            # +1 TYPE <TYPE_OF_RELATION>
6082            assert state.event.handle  # event handle is required to be set
6083            wit = self.__find_or_create_person(self.pid_map[line.data])
6084            event_ref = EventRef()
6085            event_ref.set_reference_handle(state.event.handle)
6086            while True:
6087                line = self.__get_next_line()
6088                if self.__level_is_finished(line, state.level + 1):
6089                    break
6090                elif line.token == TOKEN_TYPE:
6091                    if line.data == "WITNESS_OF_MARRIAGE":
6092                        role = EventRoleType(
6093                            EventRoleType.WITNESS)
6094                    else:
6095                        role = EventRoleType(
6096                            (EventRoleType.CUSTOM, line.data))
6097                    event_ref.set_role(role)
6098            wit.add_event_ref(event_ref)
6099            self.dbase.commit_person(wit, self.trans)
6100        else:
6101            # n _WITN <TEXTUAL_LIST_OF_NAMES>
6102            attr = Attribute()
6103            attr.set_type(AttributeType.WITNESS)
6104            attr.set_value(line.data)
6105            state.event.add_attribute(attr)
6106
6107    def __person_adopt_famc(self, line, state):
6108        """
6109        @param line: The current line in GedLine format
6110        @type line: GedLine
6111        @param state: The current state
6112        @type state: CurrentState
6113        """
6114        gid = self.fid_map[line.data]
6115        handle = self.__find_family_handle(gid)
6116        family = self.__find_or_create_family(gid)
6117
6118        sub_state = CurrentState(level=state.level + 1)
6119        sub_state.mrel = TYPE_BIRTH
6120        sub_state.frel = TYPE_BIRTH
6121
6122        self.__parse_level(sub_state, self.parse_person_adopt,
6123                           self.__undefined)
6124        state.msg += sub_state.msg
6125
6126        if (int(sub_state.mrel) == ChildRefType.BIRTH and
6127                int(sub_state.frel) == ChildRefType.BIRTH):
6128            sub_state.mrel = sub_state.frel = TYPE_ADOPT
6129
6130        state.person.add_parent_family_handle(handle)
6131
6132        reflist = [ref for ref in family.get_child_ref_list()
6133                   if ref.ref == state.person.handle]
6134        if reflist:
6135            ref = reflist[0]
6136            ref.set_father_relation(sub_state.frel)
6137            ref.set_mother_relation(sub_state.mrel)
6138        else:
6139            ref = ChildRef()
6140            ref.ref = state.person.handle
6141            ref.set_father_relation(sub_state.frel)
6142            ref.set_mother_relation(sub_state.mrel)
6143            family.add_child_ref(ref)
6144            self.dbase.commit_family(family, self.trans)
6145
6146    def __person_adopt_famc_adopt(self, line, state):
6147        """
6148        @param line: The current line in GedLine format
6149        @type line: GedLine
6150        @param state: The current state
6151        @type state: CurrentState
6152        """
6153        if line.data.strip() == "HUSB":
6154            state.frel = TYPE_ADOPT
6155        elif line.data.strip() == "WIFE":
6156            state.mrel = TYPE_ADOPT
6157        else:
6158            state.mrel = TYPE_ADOPT
6159            state.frel = TYPE_ADOPT
6160
6161    def __person_birth_famc(self, line, state):
6162        """
6163        @param line: The current line in GedLine format
6164        @type line: GedLine
6165        @param state: The current state
6166        @type state: CurrentState
6167        """
6168        handle = self.__find_family_handle(self.fid_map[line.data])
6169
6170        state.person.add_parent_family_handle(handle)
6171
6172        frel = mrel = ChildRefType.BIRTH
6173
6174        family, _new = self.dbase.find_family_from_handle(handle, self.trans)
6175        reflist = [ref for ref in family.get_child_ref_list()
6176                   if ref.ref == state.person.handle]
6177        if reflist:
6178            ref = reflist[0]
6179            ref.set_father_relation(frel)
6180            ref.set_mother_relation(mrel)
6181        else:
6182            ref = ChildRef()
6183            ref.ref = state.person.handle
6184            ref.set_father_relation(frel)
6185            ref.set_mother_relation(mrel)
6186            family.add_child_ref(ref)
6187            self.dbase.commit_family(family, self.trans)
6188
6189    def __address_date(self, line, state):
6190        """
6191        Parses the DATE line of an ADDR tag
6192
6193        @param line: The current line in GedLine format
6194        @type line: GedLine
6195        @param state: The current state
6196        @type state: CurrentState
6197        """
6198        state.addr.set_date_object(line.data)
6199
6200    def __address_adr1(self, line, state):
6201        """
6202        Parses the ADR1 line of an ADDR tag
6203
6204        @param line: The current line in GedLine format
6205        @type line: GedLine
6206        @param state: The current state
6207        @type state: CurrentState
6208        """
6209        # The ADDR may already have been parsed by the level above
6210        # assert state.addr.get_street() == ""
6211        if state.addr.get_street() != "":
6212            self.__add_msg(_("Warn: ADDR overwritten"), line, state)
6213        state.addr.set_street(line.data)
6214
6215    def __address_adr2(self, line, state):
6216        """
6217        Parses the ADR2 line of an ADDR tag
6218
6219        @param line: The current line in GedLine format
6220        @type line: GedLine
6221        @param state: The current state
6222        @type state: CurrentState
6223        """
6224        state.addr.set_locality(line.data)
6225
6226    def __address_city(self, line, state):
6227        """
6228        Parses the CITY line of an ADDR tag
6229
6230        @param line: The current line in GedLine format
6231        @type line: GedLine
6232        @param state: The current state
6233        @type state: CurrentState
6234        """
6235        state.addr.set_city(line.data)
6236
6237    def __address_state(self, line, state):
6238        """
6239        Parses the STAE line of an ADDR tag
6240
6241        @param line: The current line in GedLine format
6242        @type line: GedLine
6243        @param state: The current state
6244        @type state: CurrentState
6245        """
6246        state.addr.set_state(line.data)
6247
6248    def __address_post(self, line, state):
6249        """
6250        Parses the POST line of an ADDR tag
6251
6252        @param line: The current line in GedLine format
6253        @type line: GedLine
6254        @param state: The current state
6255        @type state: CurrentState
6256        """
6257        state.addr.set_postal_code(line.data)
6258
6259    def __address_country(self, line, state):
6260        """
6261        Parses the country line of an ADDR tag
6262
6263        @param line: The current line in GedLine format
6264        @type line: GedLine
6265        @param state: The current state
6266        @type state: CurrentState
6267        """
6268        state.addr.set_country(line.data)
6269
6270    def __address_sour(self, line, state):
6271        """
6272        Parses the SOUR line of an ADDR tag
6273
6274        @param line: The current line in GedLine format
6275        @type line: GedLine
6276        @param state: The current state
6277        @type state: CurrentState
6278        """
6279        state.addr.add_citation(self.handle_source(line, state.level, state))
6280
6281    def __address_note(self, line, state):
6282        """
6283        Parses the NOTE line of an ADDR tag
6284
6285        @param line: The current line in GedLine format
6286        @type line: GedLine
6287        @param state: The current state
6288        @type state: CurrentState
6289        """
6290        self.__parse_note(line, state.addr, state)
6291
6292    def __citation_page(self, line, state):
6293        """
6294        Parses the PAGE line of an SOUR instance tag
6295
6296        @param line: The current line in GedLine format
6297        @type line: GedLine
6298        @param state: The current state
6299        @type state: CurrentState
6300        """
6301        state.citation.set_page(line.data)
6302
6303    def __citation_date(self, line, state):
6304        """
6305        Parses the DATE line of an SOUR instance tag
6306
6307        @param line: The current line in GedLine format
6308        @type line: GedLine
6309        @param state: The current state
6310        @type state: CurrentState
6311        """
6312        state.citation.set_date_object(line.data)
6313
6314    def __citation_data(self, line, state):
6315        """
6316        Parses the DATA line of an SOUR instance tag
6317
6318        @param line: The current line in GedLine format
6319        @type line: GedLine
6320        @param state: The current state
6321        @type state: CurrentState
6322        """
6323        sub_state = CurrentState(level=state.level + 1)
6324        sub_state.citation = state.citation
6325
6326        self.__parse_level(sub_state, self.citation_data_tbl, self.__undefined)
6327        state.msg += sub_state.msg
6328
6329    def __citation_data_date(self, line, state):
6330        state.citation.set_date_object(line.data)
6331
6332    def __source_text(self, line, state):
6333        note = Note()
6334        note.set(line.data)
6335        gramps_id = self.nid_map[""]
6336        note.set_gramps_id(gramps_id)
6337        note.set_type(NoteType.SOURCE_TEXT)
6338        self.dbase.add_note(note, self.trans)
6339
6340        state.source.add_note(note.get_handle())
6341
6342    def __citation_data_text(self, line, state):
6343        note = Note()
6344        note.set(line.data)
6345        gramps_id = self.nid_map[""]
6346        note.set_gramps_id(gramps_id)
6347        note.set_type(NoteType.SOURCE_TEXT)
6348        self.dbase.add_note(note, self.trans)
6349
6350        state.citation.add_note(note.get_handle())
6351
6352    def __citation_link(self, line, state):
6353        """
6354        Not legal GEDCOM - added to support FTM, converts the _LINK tag to a
6355        note with styled text so link can be followed in reports etc.
6356        """
6357        note = Note()
6358        tags = StyledTextTag(StyledTextTagType.LINK,
6359                             line.data,
6360                             [(0, len(line.data))])
6361        note.set_styledtext(StyledText(line.data, [tags]))
6362        gramps_id = self.nid_map[""]
6363        note.set_gramps_id(gramps_id)
6364        note.set_type(NoteType.CITATION)
6365        self.dbase.add_note(note, self.trans)
6366        state.citation.add_note(note.get_handle())
6367
6368    def __citation__just(self, line, state):
6369        """
6370        Not legal GEDCOM - added to support FTM, converts the _JUST tag to a
6371        note.  This tag represents the Justification for a source.
6372        """
6373        note = Note()
6374        note.set(line.data)
6375        gramps_id = self.nid_map[""]
6376        note.set_gramps_id(gramps_id)
6377        note.set_type(_("Citation Justification"))
6378        self.dbase.add_note(note, self.trans)
6379        state.citation.add_note(note.get_handle())
6380
6381    def __citation_data_note(self, line, state):
6382        self.__parse_note(line, state.citation, state)
6383
6384    def __citation_obje(self, line, state):
6385        """
6386        Parses the OBJE line of an SOUR instance tag
6387
6388        @param line: The current line in GedLine format
6389        @type line: GedLine
6390        @param state: The current state
6391        @type state: CurrentState
6392        """
6393        self.__obje(line, state, state.citation)
6394
6395    def __citation_refn(self, line, state):
6396        """
6397        Parses the REFN line of an SOUR instance tag
6398
6399        @param line: The current line in GedLine format
6400        @type line: GedLine
6401        @param state: The current state
6402        @type state: CurrentState
6403        """
6404        self.__add_msg(_("REFN ignored"), line, state)
6405        self.__skip_subordinate_levels(state.level + 1, state)
6406
6407    def __citation_even(self, line, state):
6408        """
6409        Parses the EVEN line of an SOUR instance tag
6410
6411        @param line: The current line in GedLine format
6412        @type line: GedLine
6413        @param state: The current state
6414        @type state: CurrentState
6415        """
6416        sattr = SrcAttribute()
6417        sattr.set_type("EVEN")
6418        sattr.set_value(line.data)
6419        state.citation.add_attribute(sattr)
6420        sub_state = CurrentState(level=state.level + 1)
6421        sub_state.citation = state.citation
6422
6423        self.__parse_level(sub_state, self.citation_even_tbl, self.__undefined)
6424        state.msg += sub_state.msg
6425
6426    def __citation_even_role(self, line, state):
6427        """
6428        Parses the EVEN line of an SOUR instance tag
6429
6430        @param line: The current line in GedLine format
6431        @type line: GedLine
6432        @param state: The current state
6433        @type state: CurrentState
6434        """
6435        sattr = SrcAttribute()
6436        sattr.set_type("EVEN:ROLE")
6437        sattr.set_value(line.data)
6438        state.citation.add_attribute(sattr)
6439
6440    def __citation_quay(self, line, state):
6441        """
6442        Parses the QUAY line of an SOUR instance tag
6443
6444        @param line: The current line in GedLine format
6445        @type line: GedLine
6446        @param state: The current state
6447        @type state: CurrentState
6448        """
6449        try:
6450            val = int(line.data)
6451        except ValueError:
6452            return
6453        # If value is greater than 3, cap at 3
6454        val = min(val, 3)
6455        if val > 1:
6456            state.citation.set_confidence_level(val + 1)
6457        else:
6458            state.citation.set_confidence_level(val)
6459
6460    def __citation_note(self, line, state):
6461        """
6462        Parses the NOTE line of an SOUR instance tag
6463
6464        @param line: The current line in GedLine format
6465        @type line: GedLine
6466        @param state: The current state
6467        @type state: CurrentState
6468        """
6469        self.__parse_note(line, state.citation, state)
6470
6471    #----------------------------------------------------------------------
6472    #
6473    # SOUR parsing
6474    #
6475    #----------------------------------------------------------------------
6476
6477    def __parse_source(self, name, level):
6478        """
6479        n @<XREF:SOUR>@ SOUR {1:1}
6480          +1 DATA {0:1}
6481          +2 EVEN <EVENTS_RECORDED> {0:M}
6482          +3 DATE <DATE_PERIOD> {0:1}
6483          +3 PLAC <SOURCE_JURISDICTION_PLACE> {0:1}
6484          +2 AGNC <RESPONSIBLE_AGENCY> {0:1}
6485          +2 <<NOTE_STRUCTURE>> {0:M}
6486          +1 AUTH <SOURCE_ORIGINATOR> {0:1}
6487          +1 TITL <SOURCE_DESCRIPTIVE_TITLE> {0:1}
6488          +1 ABBR <SOURCE_FILED_BY_ENTRY> {0:1}
6489          +1 PUBL <SOURCE_PUBLICATION_FACTS> {0:1}
6490          +1 TEXT <TEXT_FROM_SOURCE> {0:1}
6491          +1 <<SOURCE_REPOSITORY_CITATION>> {0:1}
6492          +1 <<MULTIMEDIA_LINK>> {0:M}
6493          +1 <<NOTE_STRUCTURE>> {0:M}
6494          +1 REFN <USER_REFERENCE_NUMBER> {0:M}
6495          +2 TYPE <USER_REFERENCE_TYPE> {0:1}
6496          +1 RIN <AUTOMATED_RECORD_ID> {0:1}
6497          +1 <<CHANGE_DATE>> {0:1}
6498        """
6499
6500        state = CurrentState()
6501        state.source = self.__find_or_create_source(self.sid_map[name])
6502        # SOURce with the given gramps_id had no title
6503        state.source.set_title(_("No title - ID %s") %
6504                               state.source.get_gramps_id())
6505        state.level = level
6506
6507        self.__parse_level(state, self.source_func, self.__undefined)
6508        self.__check_msgs(_("SOUR (source) Gramps ID %s") %
6509                          state.source.get_gramps_id(),
6510                          state, state.source)
6511        self.dbase.commit_source(state.source, self.trans, state.source.change)
6512
6513    def __source_attr(self, line, state):
6514        """
6515        @param line: The current line in GedLine format
6516        @type line: GedLine
6517        @param state: The current state
6518        @type state: CurrentState
6519        """
6520        sattr = SrcAttribute()
6521        sattr.set_type(line.token_text)
6522        sattr.set_value(line.data)
6523        state.source.add_attribute(sattr)
6524        self.__skip_subordinate_levels(state.level + 1, state)
6525
6526    def __source_object(self, line, state):
6527        """
6528        @param line: The current line in GedLine format
6529        @type line: GedLine
6530        @param state: The current state
6531        @type state: CurrentState
6532        """
6533        self.__obje(line, state, state.source)
6534
6535    def __source_chan(self, line, state):
6536        """
6537        @param line: The current line in GedLine format
6538        @type line: GedLine
6539        @param state: The current state
6540        @type state: CurrentState
6541        """
6542        self.__parse_change(line, state.source, state.level + 1, state)
6543
6544    def __source_repo(self, line, state):
6545        """
6546        @param line: The current line in GedLine format
6547        @type line: GedLine
6548        @param state: The current state
6549        @type state: CurrentState
6550        """
6551        if line.data and line.data[0] == '@':
6552            # This deals with the standard GEDCOM
6553            # SOURCE_REPOSITORY_CITATION: =
6554            #   n  REPO @<XREF:REPO>@                {1:1}
6555            #     +1 <<NOTE_STRUCTURE>>              {0:M}
6556            #     +1 CALN <SOURCE_CALL_NUMBER>       {0:M}
6557            #        +2 MEDI <SOURCE_MEDIA_TYPE>     {0:1}
6558            gid = self.rid_map[line.data]
6559            repo = self.__find_or_create_repository(gid)
6560        elif line.data == '':
6561            # This deals with the non-standard GEDCOM format found in Family
6562            # Tree Maker for Windows, Broderbund Software, Banner Blue
6563            # Division:
6564            # SOURCE_REPOSITORY_CITATION: =
6565            #   n  REPO                              {1:1}
6566            #     +1 <<NOTE_STRUCTURE>>              {0:M}
6567            #     +1 CALN <SOURCE_CALL_NUMBER>       {0:M}
6568            #        +2 MEDI <SOURCE_MEDIA_TYPE>     {0:1}
6569            #
6570            # This format has no repository name. See http://west-
6571            # penwith.org.uk/misc/ftmged.htm which points out this is
6572            # incorrect
6573            gid = self.rid_map[""]
6574            repo = self.__find_or_create_repository(gid)
6575            self.dbase.commit_repository(repo, self.trans)
6576        else:
6577            # This deals with the non-standard GEDCOM
6578            # SOURCE_REPOSITORY_CITATION: =
6579            #   n  REPO <NAME_OF_REPOSITORY>         {1:1}
6580            #     +1 <<NOTE_STRUCTURE>>              {0:M}
6581            #     +1 CALN <SOURCE_CALL_NUMBER>       {0:M}
6582            #        +2 MEDI <SOURCE_MEDIA_TYPE>     {0:1}
6583            # This seems to be used by Heredis 8 PC. Heredis is notorious for
6584            # non-standard GEDCOM.
6585            gid = self.repo2id.get(line.data)
6586            if gid is None:
6587                gid = self.rid_map[""]
6588            repo = self.__find_or_create_repository(gid)
6589            self.repo2id[line.data] = repo.get_gramps_id()
6590            repo.set_name(line.data)
6591            self.dbase.commit_repository(repo, self.trans)
6592
6593        repo_ref = RepoRef()
6594        repo_ref.set_reference_handle(repo.handle)
6595
6596        sub_state = CurrentState()
6597        sub_state.repo_ref = repo_ref
6598        sub_state.level = state.level + 1
6599
6600        self.__parse_level(sub_state, self.repo_ref_tbl, self.__undefined)
6601        state.msg += sub_state.msg
6602
6603        state.source.add_repo_reference(repo_ref)
6604
6605    def __repo_ref_call(self, line, state):
6606        """
6607        @param line: The current line in GedLine format
6608        @type line: GedLine
6609        @param state: The current state
6610        @type state: CurrentState
6611        """
6612        state.repo_ref.set_call_number(line.data)
6613        #self.__skip_subordinate_levels(state.level + 1, state)
6614
6615    def __repo_ref_medi(self, line, state):
6616        name = line.data
6617        mtype = MEDIA_MAP.get(name.lower(),
6618                              (SourceMediaType.CUSTOM, name))
6619        state.repo_ref.set_media_type(mtype)
6620
6621    def __repo_ref_note(self, line, state):
6622        """
6623        @param line: The current line in GedLine format
6624        @type line: GedLine
6625        @param state: The current state
6626        @type state: CurrentState
6627        """
6628        self.__parse_note(line, state.repo_ref, state)
6629
6630    def __repo_chan(self, line, state):
6631        """
6632        @param line: The current line in GedLine format
6633        @type line: GedLine
6634        @param state: The current state
6635        @type state: CurrentState
6636        """
6637        self.__parse_change(line, state.repo, state.level + 1, state)
6638
6639    def __source_abbr(self, line, state):
6640        """
6641        @param line: The current line in GedLine format
6642        @type line: GedLine
6643        @param state: The current state
6644        @type state: CurrentState
6645        """
6646        state.source.set_abbreviation(line.data)
6647
6648    def __source_agnc(self, line, state):
6649        """
6650        @param line: The current line in GedLine format
6651        @type line: GedLine
6652        @param state: The current state
6653        @type state: CurrentState
6654        """
6655        attr = Attribute()
6656        attr.set_type(AttributeType.AGENCY)
6657        attr.set_value(line.data)
6658        state.source.add_attribute(attr)
6659
6660    def __source_note(self, line, state):
6661        """
6662        @param line: The current line in GedLine format
6663        @type line: GedLine
6664        @param state: The current state
6665        @type state: CurrentState
6666        """
6667        self.__parse_note(line, state.source, state)
6668
6669    def __source_auth(self, line, state):
6670        """
6671        @param line: The current line in GedLine format
6672        @type line: GedLine
6673        @param state: The current state
6674        @type state: CurrentState
6675        """
6676        state.source.set_author(line.data)
6677
6678    def __source_publ(self, line, state):
6679        """
6680        @param line: The current line in GedLine format
6681        @type line: GedLine
6682        @param state: The current state
6683        @type state: CurrentState
6684        """
6685        state.source.set_publication_info(line.data)
6686        self.__skip_subordinate_levels(state.level + 1, state)
6687
6688    def __source_title(self, line, state):
6689        """
6690        @param line: The current line in GedLine format
6691        @type line: GedLine
6692        @param state: The current state
6693        @type state: CurrentState
6694        """
6695        state.source.set_title(line.data.replace('\n', ' '))
6696
6697    def __source_taxt_peri(self, line, state):
6698        """
6699        @param line: The current line in GedLine format
6700        @type line: GedLine
6701        @param state: The current state
6702        @type state: CurrentState
6703        """
6704        if state.source.get_title() == "":
6705            state.source.set_title(line.data.replace('\n', ' '))
6706
6707    #----------------------------------------------------------------------
6708    #
6709    # OBJE parsing
6710    #
6711    #----------------------------------------------------------------------
6712
6713    def __parse_obje(self, line):
6714        """
6715        n  @XREF:OBJE@ OBJE {1:1}                   # v5.5 layout
6716          +1 FILE <MULTIMEDIA_FILE_REFN> {1:1}      # de-facto extension
6717          +1 FORM <MULTIMEDIA_FORMAT> {1:1}
6718          +1 TITL <DESCRIPTIVE_TITLE> {0:1}
6719          +1 <<NOTE_STRUCTURE>> {0:M} p.*
6720          +1 BLOB {1:1}                             # Deprecated, no support
6721            +2 CONT <ENCODED_MULTIMEDIA_LINE> {1:M}
6722          +1 OBJE @<XREF:OBJE>@ /* chain */ {0:1}   # Deprecated, no support
6723          +1 REFN <USER_REFERENCE_NUMBER> {0:M}
6724            +2 TYPE <USER_REFERENCE_TYPE> {0:1}
6725          +1 RIN <AUTOMATED_RECORD_ID> {0:1}
6726          +1 <<CHANGE_DATE>> {0:1}
6727
6728        n @XREF:OBJE@ OBJE {1:1}                    # v5.5.1 layout
6729          +1 FILE <MULTIMEDIA_FILE_REFN> {1:M}      # multi files, no support
6730            +2 FORM <MULTIMEDIA_FORMAT> {1:1}
6731              +3 TYPE <SOURCE_MEDIA_TYPE> {0:1}
6732            +2 TITL <DESCRIPTIVE_TITLE> {0:1}
6733            +2 DATE <mm/dd/yyy hh:mn:ss AM> {0:1}   # FTM extension
6734            +2 TEXT <COMMENT, by user or exif>      # FTM extension
6735          +1 REFN <USER_REFERENCE_NUMBER> {0:M}
6736            +2 TYPE <USER_REFERENCE_TYPE> {0:1}
6737          +1 RIN <AUTOMATED_RECORD_ID> {0:1}
6738          +1 <<NOTE_STRUCTURE>> {0:M}
6739          +1 <<SOURCE_CITATION>> {0:M}
6740          +1 <<CHANGE_DATE>> {0:1}
6741        """
6742        gid = line.token_text.strip()
6743        media = self.__find_or_create_media(self.oid_map[gid])
6744
6745        state = CurrentState()
6746        state.media = media
6747        state.level = 1
6748
6749        self.__parse_level(state, self.obje_func, self.__undefined)
6750
6751        if state.media.get_path() == "":
6752            self.__add_msg(_("Filename omitted"), line, state)
6753        # deal with mime types
6754        value = mimetypes.guess_type(state.media.get_path())
6755        if value and value[0]:  # found from filename
6756            state.media.set_mime_type(value[0])
6757        else:  # get from OBJE.FILE.FORM
6758            if '/' in state.form:  # already has expanded mime type
6759                state.media.set_mime_type(state.form)
6760            else:
6761                value = mimetypes.types_map.get('.' + state.form,
6762                                                _('unknown'))
6763                state.media.set_mime_type(value)
6764        # Add the default reference if no source has found
6765        self.__add_default_source(media)
6766
6767        # Add a default tag if provided
6768        self.__add_default_tag(media)
6769
6770        self.__check_msgs(_("OBJE (multi-media object) Gramps ID %s") %
6771                          media.get_gramps_id(), state, media)
6772        # commit the person to the database
6773        self.dbase.commit_media(media, self.trans, media.change)
6774
6775    def __obje_form(self, line, state):
6776        """
6777        @param line: The current line in GedLine format
6778        @type line: GedLine
6779        @param state: The current state
6780        @type state: CurrentState
6781        """
6782        state.form = line.data.lower().strip()
6783
6784    def __obje_file(self, line, state):
6785        """
6786        @param line: The current line in GedLine format
6787        @type line: GedLine
6788        @param state: The current state
6789        @type state: CurrentState
6790        """
6791        # The following checks for the odd "feature" of GEDCOM 5.5.1 that
6792        # allows multiple files to be attached to a single OBJE; not supported
6793        if state.media.get_path() != "":
6794            self.__add_msg(_("Multiple FILE in a single OBJE ignored"),
6795                           line, state)
6796            self.__skip_subordinate_levels(state.level + 1, state)
6797            return
6798        res = urlparse(line.data)
6799        if line.data != '' and (res.scheme == '' or
6800                                len(res.scheme) == 1 or res.scheme == 'file'):
6801            (file_ok, filename) = self.__find_file(line.data, self.dir_path)
6802            if state.form != "url":
6803                # Might not work if FORM doesn't precede FILE
6804                if not file_ok:
6805                    self.__add_msg(_("Could not import %s") % line.data, line,
6806                                   state)
6807            path = filename
6808        else:
6809            path = line.data
6810
6811        state.media.set_path(path)
6812        if not state.media.get_description():
6813            state.media.set_description(path.replace('\\', '/'))
6814
6815    def __obje_title(self, line, state):
6816        """
6817        @param line: The current line in GedLine format
6818        @type line: GedLine
6819        @param state: The current state
6820        @type state: CurrentState
6821        """
6822        state.media.set_description(line.data)
6823
6824# FTM non-standard TEXT in OBJE, treat as note.
6825    def __obje_text(self, line, state):
6826        """
6827        @param line: The current line in GedLine format
6828        @type line: GedLine
6829        @param state: The current state
6830        @type state: CurrentState
6831        """
6832        new_note = Note(line.data)
6833        new_note.set_gramps_id(self.nid_map[""])
6834        new_note.set_handle(create_id())
6835        new_note.set_type(NoteType.MEDIA)
6836        self.dbase.commit_note(new_note, self.trans, new_note.change)
6837        state.media.add_note(new_note.get_handle())
6838
6839# FTM non-standard DATE in OBJE, treat as Media Date.
6840    def __obje_date(self, line, state):
6841        """
6842        @param line: The current line in GedLine format
6843        @type line: GedLine
6844        @param state: The current state
6845        @type state: CurrentState
6846        """
6847        state.media.set_date_object(line.data)
6848
6849    def __obje_note(self, line, state):
6850        """
6851        @param line: The current line in GedLine format
6852        @type line: GedLine
6853        @param state: The current state
6854        @type state: CurrentState
6855        """
6856        self.__parse_note(line, state.media, state)
6857
6858    def __obje_sour(self, line, state):
6859        """
6860        @param line: The current line in GedLine format
6861        @type line: GedLine
6862        @param state: The current state
6863        @type state: CurrentState
6864        """
6865        state.media.add_citation(self.handle_source(line, state.level, state))
6866
6867    def __obje_refn(self, line, state):
6868        """
6869        @param line: The current line in GedLine format
6870        @type line: GedLine
6871        @param state: The current state
6872        @type state: CurrentState
6873        """
6874        self.__do_refn(line, state, state.media)
6875
6876    def __obje_type(self, line, state):
6877        """
6878        +1 FILE <MULTIMEDIA_FILE_REFN> {1:M}
6879          +2 FORM <MULTIMEDIA_FORMAT> {1:1}
6880            +3 TYPE <SOURCE_MEDIA_TYPE> {0:1}   # v5.5.1
6881
6882        Source_Media_type is one of (Photo, Audio, Book, etc.)
6883
6884        @param line: The current line in GedLine format
6885        @type line: GedLine
6886        @param state: The current state
6887        @type state: CurrentState
6888        """
6889        attr = Attribute()
6890        mtype = MEDIA_MAP.get(line.data.lower(),
6891                              (SourceMediaType.CUSTOM, line.data))
6892        attr.set_type(_('Media-Type'))
6893        attr.set_value(str(SourceMediaType(mtype)))
6894        state.media.attribute_list.append(attr)
6895
6896    def __obje_rin(self, line, state):
6897        """
6898        @param line: The current line in GedLine format
6899        @type line: GedLine
6900        @param state: The current state
6901        @type state: CurrentState
6902        """
6903        attr = Attribute()
6904        attr.set_type(line.token_text)  # Attribute: RIN
6905        attr.set_value(line.data)
6906        state.media.attribute_list.append(attr)
6907
6908    def __obje_chan(self, line, state):
6909        """
6910        @param line: The current line in GedLine format
6911        @type line: GedLine
6912        @param state: The current state
6913        @type state: CurrentState
6914        """
6915        self.__parse_change(line, state.media, state.level + 1, state)
6916
6917    def __person_attr_type(self, line, state):
6918        """
6919        @param line: The current line in GedLine format
6920        @type line: GedLine
6921        @param state: The current state
6922        @type state: CurrentState
6923        """
6924        if state.attr.get_type() == "":
6925            if line.data in GED_TO_GRAMPS_EVENT:
6926                name = GED_TO_GRAMPS_EVENT[line.data]
6927            else:
6928                name = line.data
6929            state.attr.set_type(name)
6930        else:
6931            self.__ignore(line, state)
6932
6933    def __person_attr_source(self, line, state):
6934        """
6935        @param line: The current line in GedLine format
6936        @type line: GedLine
6937        @param state: The current state
6938        @type state: CurrentState
6939        """
6940        state.attr.add_citation(self.handle_source(line, state.level, state))
6941
6942    def __person_attr_place(self, line, state):
6943        """
6944        @param line: The current line in GedLine format
6945        @type line: GedLine
6946        @param state: The current state
6947        @type state: CurrentState
6948        """
6949        val = line.data
6950        if state.attr.get_value() == "":
6951            state.attr.set_value(val)
6952            self.__skip_subordinate_levels(state.level + 1, state)
6953        else:
6954            self.__ignore(line, state)
6955
6956    def __person_attr_note(self, line, state):
6957        """
6958        @param line: The current line in GedLine format
6959        @type line: GedLine
6960        @param state: The current state
6961        @type state: CurrentState
6962        """
6963        self.__parse_note(line, state.attr, state)
6964
6965    #----------------------------------------------------------------------
6966    #
6967    # REPO parsing
6968    #
6969    #----------------------------------------------------------------------
6970
6971    def __parse_repo(self, line):
6972        """
6973        n @<XREF:REPO>@ REPO {1:1}
6974          +1 NAME <NAME_OF_REPOSITORY> {0:1} p.*
6975          +1 <<ADDRESS_STRUCTURE>> {0:1} p.*
6976          +1 <<NOTE_STRUCTURE>> {0:M} p.*
6977          +1 REFN <USER_REFERENCE_NUMBER> {0:M} p.*
6978          +1 RIN <AUTOMATED_RECORD_ID> {0:1} p.*
6979          +1 <<CHANGE_DATE>> {0:1} p.
6980        """
6981        repo = self.__find_or_create_repository(self.rid_map[line.token_text])
6982
6983        state = CurrentState()
6984        state.repo = repo
6985        state.level = 1
6986        self.__parse_level(state, self.repo_parse_tbl, self.__ignore)
6987
6988        self.__check_msgs(_("REPO (repository) Gramps ID %s") %
6989                          repo.get_gramps_id(), state, repo)
6990        self.dbase.commit_repository(repo, self.trans, repo.change)
6991
6992    def __repo_name(self, line, state):
6993        """
6994        @param line: The current line in GedLine format
6995        @type line: GedLine
6996        @param state: The current state
6997        @type state: CurrentState
6998        """
6999        state.repo.set_name(line.data)
7000
7001    def __repo_note(self, line, state):
7002        """
7003        @param line: The current line in GedLine format
7004        @type line: GedLine
7005        @param state: The current state
7006        @type state: CurrentState
7007        """
7008        self.__parse_note(line, state.repo, state)
7009
7010    def __repo_addr(self, line, state):
7011        """
7012        Parses the REPOsitory and HEADer COPR <ADDRESS_STRUCTURE>
7013
7014        n ADDR <ADDRESS_LINE> {0:1}
7015        +1 CONT <ADDRESS_LINE> {0:M}
7016        +1 ADR1 <ADDRESS_LINE1> {0:1}  (Street)
7017        +1 ADR2 <ADDRESS_LINE2> {0:1}  (Locality)
7018        +1 CITY <ADDRESS_CITY> {0:1}
7019        +1 STAE <ADDRESS_STATE> {0:1}
7020        +1 POST <ADDRESS_POSTAL_CODE> {0:1}
7021        +1 CTRY <ADDRESS_COUNTRY> {0:1}
7022        n PHON <PHONE_NUMBER> {0:3}
7023
7024        Some repositories do not try to break up the address,
7025        instead they put everything on a single line. Try to determine
7026        if this happened, and try to fix it.
7027        """
7028        free_form = line.data
7029
7030        sub_state = CurrentState(level=state.level + 1)
7031        sub_state.addr = Address()
7032
7033        self.__parse_level(sub_state, self.parse_addr_tbl, self.__ignore)
7034        state.msg += sub_state.msg
7035
7036        self.__merge_address(free_form, sub_state.addr, line, state)
7037        state.repo.add_address(sub_state.addr)
7038
7039    def __repo_phon(self, line, state):
7040        """
7041        @param line: The current line in GedLine format
7042        @type line: GedLine
7043        @param state: The current state
7044        @type state: CurrentState
7045        """
7046        address_list = state.repo.get_address_list()
7047        if address_list:
7048            if address_list[0].get_phone():
7049                self.__add_msg(_("Only one phone number supported"),
7050                               line, state)
7051            else:
7052                address_list[0].set_phone(line.data)
7053
7054    def __repo_fax(self, line, state):
7055        """
7056        @param line: The current line in GedLine format
7057        @type line: GedLine
7058        @param state: The current state
7059        @type state: CurrentState
7060        """
7061        url = Url()
7062        url.set_path(line.data)
7063        url.set_type(UrlType(_('FAX')))
7064        state.repo.add_url(url)
7065
7066    def __repo_www(self, line, state):
7067        """
7068        @param line: The current line in GedLine format
7069        @type line: GedLine
7070        @param state: The current state
7071        @type state: CurrentState
7072        """
7073        url = Url()
7074        url.set_path(line.data)
7075        url.set_type(UrlType(UrlType.WEB_HOME))
7076        state.repo.add_url(url)
7077
7078    def __repo_email(self, line, state):
7079        """
7080        @param line: The current line in GedLine format
7081        @type line: GedLine
7082        @param state: The current state
7083        @type state: CurrentState
7084        """
7085        url = Url()
7086        url.set_path(line.data)
7087        url.set_type(UrlType(UrlType.EMAIL))
7088        state.repo.add_url(url)
7089
7090    def __location_adr1(self, line, state):
7091        """
7092        @param line: The current line in GedLine format
7093        @type line: GedLine
7094        @param state: The current state
7095        @type state: CurrentState
7096        """
7097        if not state.location:
7098            state.location = Location()
7099        if state.location.get_street() != "":
7100            self.__add_msg(_("Warn: ADDR overwritten"), line, state)
7101        state.location.set_street(line.data)
7102
7103    def __location_adr2(self, line, state):
7104        """
7105        @param line: The current line in GedLine format
7106        @type line: GedLine
7107        @param state: The current state
7108        @type state: CurrentState
7109        """
7110        if not state.location:
7111            state.location = Location()
7112        state.location.set_locality(line.data)
7113
7114    def __location_city(self, line, state):
7115        """
7116        @param line: The current line in GedLine format
7117        @type line: GedLine
7118        @param state: The current state
7119        @type state: CurrentState
7120        """
7121        if not state.location:
7122            state.location = Location()
7123        state.location.set_city(line.data)
7124
7125    def __location_stae(self, line, state):
7126        """
7127        @param line: The current line in GedLine format
7128        @type line: GedLine
7129        @param state: The current state
7130        @type state: CurrentState
7131        """
7132        if not state.location:
7133            state.location = Location()
7134        state.location.set_state(line.data)
7135
7136    def __location_post(self, line, state):
7137        """
7138        @param line: The current line in GedLine format
7139        @type line: GedLine
7140        @param state: The current state
7141        @type state: CurrentState
7142        """
7143        if not state.location:
7144            state.location = Location()
7145        state.location.set_postal_code(line.data)
7146
7147    def __location_ctry(self, line, state):
7148        """
7149        @param line: The current line in GedLine format
7150        @type line: GedLine
7151        @param state: The current state
7152        @type state: CurrentState
7153        """
7154        if not state.location:
7155            state.location = Location()
7156        state.location.set_country(line.data)
7157
7158    def __location_phone(self, line, state):
7159        """
7160        @param line: The current line in GedLine format
7161        @type line: GedLine
7162        @param state: The current state
7163        @type state: CurrentState
7164        """
7165        if not state.location:
7166            state.location = Location()
7167        state.location.set_phone(line.data)
7168
7169    def __location_note(self, line, state):
7170        """
7171        @param line: The current line in GedLine format
7172        @type line: GedLine
7173        @param state: The current state
7174        @type state: CurrentState
7175        """
7176        if state.event:
7177            self.__parse_note(line, state.place, state)
7178        else:
7179            # This causes notes below SUBMitter to be ignored
7180            self.__not_recognized(line, state)
7181
7182    def __optional_note(self, line, state):
7183        """
7184        @param line: The current line in GedLine format
7185        @type line: GedLine
7186        @param state: The current state
7187        @type state: CurrentState
7188        """
7189        self.__parse_note(line, state.obj, state)
7190
7191    #----------------------------------------------------------------------
7192    #
7193    # HEAD parsing
7194    #
7195    #----------------------------------------------------------------------
7196
7197    def __parse_header(self):
7198        """
7199        Handling of the lines subordinate to the HEAD GEDCOM tag
7200
7201         n HEAD                                          {1:1}
7202           +1 SOUR <APPROVED_SYSTEM_ID>                  {1:1}
7203             +2 VERS <VERSION_NUMBER>                    {0:1}
7204             +2 NAME <NAME_OF_PRODUCT>                   {0:1}
7205             +2 CORP <NAME_OF_BUSINESS>                  {0:1}
7206               +3 <<ADDRESS_STRUCTURE>>                  {0:1}
7207             +2 DATA <NAME_OF_SOURCE_DATA>               {0:1}
7208               +3 DATE <PUBLICATION_DATE>                {0:1}
7209               +3 COPR <COPYRIGHT_SOURCE_DATA>           {0:1}
7210           +1 DEST <RECEIVING_SYSTEM_NAME>               {0:1*}
7211           +1 DATE <TRANSMISSION_DATE>                   {0:1}
7212             +2 TIME <TIME_VALUE>                        {0:1}
7213           +1 SUBM @<XREF:SUBM>@                         {1:1}
7214           +1 SUBN @<XREF:SUBN>@                         {0:1}
7215           +1 FILE <FILE_NAME>                           {0:1}
7216           +1 COPR <COPYRIGHT_GEDCOM_FILE>               {0:1}
7217           +1 GEDC                                       {1:1}
7218             +2 VERS <VERSION_NUMBER>                    {1:1}
7219             +2 FORM <GEDCOM_FORM>                       {1:1}
7220           +1 CHAR <CHARACTER_SET>                       {1:1}
7221             +2 VERS <VERSION_NUMBER>                    {0:1}
7222           +1 LANG <LANGUAGE_OF_TEXT>                    {0:1}
7223           +1 PLAC                                       {0:1}
7224             +2 FORM <PLACE_HIERARCHY>                   {1:1}
7225           +1 NOTE <GEDCOM_CONTENT_DESCRIPTION>          {0:1}
7226             +2 [CONT|CONC] <GEDCOM_CONTENT_DESCRIPTION> {0:M}
7227
7228          * NOTE: Submissions to the Family History Department for Ancestral
7229          File submission or for clearing temple ordinances must use a
7230          DESTination of ANSTFILE or TempleReady.
7231
7232        """
7233        state = CurrentState(level=1)
7234        self.__parse_level(state, self.head_parse_tbl, self.__undefined)
7235        self.__check_msgs(_("HEAD (header)"), state, None)
7236
7237    def __header_sour(self, line, state):
7238        """
7239        @param line: The current line in GedLine format
7240        @type line: GedLine
7241        @param state: The current state
7242        @type state: CurrentState
7243        """
7244        if line.data.strip() in ["FTW", "FTM"]:
7245            self.is_ftw = True
7246        # Some software (e.g. RootsMagic (http://files.rootsmagic.com/PAF-
7247        # Book/RootsMagic-for-PAF-Users-Printable.pdf) use the Addr fields for
7248        # 'Place Details (address, hospital, cemetary)'
7249        if line.data.strip().lower() in ['rootsmagic']:
7250            self.addr_is_detail = True
7251        # We will use the approved system ID as the name of the generating
7252        # software, in case we do not get the name in the proper place
7253        self.genby = line.data
7254        if self.use_def_src:
7255            sattr = SrcAttribute()
7256            sattr.set_type(_("Approved system identification"))
7257            sattr.set_value("%s" % self.genby)
7258            self.def_src.add_attribute(sattr)
7259        sub_state = CurrentState(level=state.level + 1)
7260        self.__parse_level(sub_state, self.header_sour_parse_tbl,
7261                           self.__undefined)
7262        state.msg += sub_state.msg
7263        # We can't produce the 'Generated by' statement till the end of the
7264        # SOUR level, because the name and version may come in any order
7265        if self.use_def_src:
7266            # feature request 2356: avoid genitive form
7267            sattr = SrcAttribute()
7268            sattr.set_type(_("Generated By"))
7269            sattr.set_value("%s %s" % (self.genby, self.genvers))
7270            self.def_src.add_attribute(sattr)
7271
7272    def __header_sour_name(self, line, state):
7273        """
7274        @param line: The current line in GedLine format
7275        @type line: GedLine
7276        @param state: The current state
7277        @type state: CurrentState
7278        """
7279        # This is where the name of the product that generated the GEDCOM file
7280        # should appear, and this will overwrite the approved system ID, if any
7281        self.genby = line.data
7282        if self.use_def_src:
7283            sattr = SrcAttribute()
7284            sattr.set_type(_("Name of software product"))
7285            sattr.set_value(self.genby)
7286            self.def_src.add_attribute(sattr)
7287
7288    def __header_sour_vers(self, line, state):
7289        """
7290        @param line: The current line in GedLine format
7291        @type line: GedLine
7292        @param state: The current state
7293        @type state: CurrentState
7294        """
7295        self.genvers = line.data
7296        if self.use_def_src:
7297            sattr = SrcAttribute()
7298            sattr.set_type(_("Version number of software product"))
7299            sattr.set_value(self.genvers)
7300            self.def_src.add_attribute(sattr)
7301
7302    def __header_sour_corp(self, line, state):
7303        """
7304        @param line: The current line in GedLine format
7305        @type line: GedLine
7306        @param state: The current state
7307        @type state: CurrentState
7308        """
7309        repo = Repository()
7310        sub_state = CurrentState(level=state.level + 1)
7311        sub_state.repo = repo
7312        self.__parse_level(sub_state, self.header_corp_addr, self.__undefined)
7313        state.msg += sub_state.msg
7314
7315        if self.use_def_src:
7316            repo.set_name(_("Business that produced the product: %s") %
7317                          line.data)
7318            rtype = RepositoryType()
7319            rtype.set((RepositoryType.CUSTOM, _('GEDCOM data')))
7320            repo.set_type(rtype)
7321            self.dbase.add_repository(repo, self.trans)
7322            repo_ref = RepoRef()
7323            repo_ref.set_reference_handle(repo.handle)
7324            mtype = SourceMediaType()
7325            mtype.set((SourceMediaType.UNKNOWN, ''))
7326            repo_ref.set_media_type(mtype)
7327            self.def_src.add_repo_reference(repo_ref)
7328
7329    def __header_sour_data(self, line, state):
7330        """
7331        @param line: The current line in GedLine format
7332        @type line: GedLine
7333        @param state: The current state
7334        @type state: CurrentState
7335        """
7336        if self.use_def_src:
7337            sattr = SrcAttribute()
7338            sattr.set_type(_("Name of source data"))
7339            sattr.set_value(line.data)
7340            self.def_src.add_attribute(sattr)
7341        sub_state = CurrentState(level=state.level + 1)
7342        self.__parse_level(sub_state, self.header_sour_data,
7343                           self.__undefined)
7344        state.msg += sub_state.msg
7345
7346    def __header_sour_copr(self, line, state):
7347        """
7348        @param line: The current line in GedLine format
7349        @type line: GedLine
7350        @param state: The current state
7351        @type state: CurrentState
7352        """
7353        if self.use_def_src:
7354            sattr = SrcAttribute()
7355            sattr.set_type(_("Copyright of source data"))
7356            sattr.set_value(line.data)
7357            self.def_src.add_attribute(sattr)
7358
7359    def __header_sour_date(self, line, state):
7360        """
7361        @param line: The current line in GedLine format
7362        @type line: GedLine
7363        @param state: The current state
7364        @type state: CurrentState
7365        """
7366        if self.use_def_src:
7367            # Because there is a DATE tag, line.data is automatically converted
7368            # to a Date object before getting to this point, so it has to be
7369            # converted back to a string
7370            text_date = str(line.data)
7371            sattr = SrcAttribute()
7372            sattr.set_type(_("Publication date of source data"))
7373            sattr.set_value(text_date)
7374            self.def_src.add_attribute(sattr)
7375
7376    def __header_file(self, line, state):
7377        """
7378        @param line: The current line in GedLine format
7379        @type line: GedLine
7380        @param state: The current state
7381        @type state: CurrentState
7382        """
7383        if self.use_def_src:
7384            filename = os.path.basename(line.data).split('\\')[-1]
7385            # feature request 2356: avoid genitive form
7386            self.def_src.set_title(_("Import from %s") % filename)
7387
7388    def __header_copr(self, line, state):
7389        """
7390        @param line: The current line in GedLine format
7391        @type line: GedLine
7392        @param state: The current state
7393        @type state: CurrentState
7394        """
7395        if self.use_def_src:
7396            self.def_src.set_publication_info(line.data)
7397
7398    def __header_subm(self, line, state):
7399        """
7400        @param line: The current line in GedLine format
7401        @type line: GedLine
7402        @param state: The current state
7403        @type state: CurrentState
7404
7405        +1 SUBM @<XREF:SUBM>@  {1:1}
7406        This should be simply be a cross-reference to the correct Submitter
7407        record. Note that there can be multiple Submitter records, so it is
7408        necessary to remember which one should be applied.
7409
7410        """
7411        self.subm = line.data[1:-1]
7412        sub_state = CurrentState(level=state.level + 1)
7413        self.__parse_level(sub_state, self.header_subm, self.__ignore)
7414        state.msg += sub_state.msg
7415
7416    def __header_subn(self, line, state):
7417        """
7418        @param line: The current line in GedLine format
7419        @type line: GedLine
7420        @param state: The current state
7421        @type state: CurrentState
7422        """
7423        if self.use_def_src:
7424            sattr = SrcAttribute()
7425            sattr.set_type(_('Submission record identifier'))
7426            sattr.set_value(line.token_text)
7427            self.def_src.add_attribute(sattr)
7428
7429    def __header_lang(self, line, state):
7430        """
7431        @param line: The current line in GedLine format
7432        @type line: GedLine
7433        @param state: The current state
7434        @type state: CurrentState
7435        """
7436        if self.use_def_src:
7437            sattr = SrcAttribute()
7438            sattr.set_type(_('Language of GEDCOM text'))
7439            sattr.set_value(line.data)
7440            self.def_src.add_attribute(sattr)
7441
7442    def __header_dest(self, line, state):
7443        """
7444        @param line: The current line in GedLine format
7445        @type line: GedLine
7446        @param state: The current state
7447        @type state: CurrentState
7448
7449        FIXME: This processing does not depend on DEST, so there seems to be
7450        no reason for it to be placed here. Perhaps it is supposed to be after
7451        all the SOUR levels have been processed, but self.genby was only
7452        assigned by the initial SOUR tag, so this could have been done there.
7453        Perhaps, as suggested by the text of the error message, it was
7454        supposed to test whenther the_DEST_ was LEGACY, in which case the
7455        coding is now wrong.
7456        """
7457        if self.genby.upper() == "LEGACY":
7458            fname = os.path.basename(self.filename)
7459            self.user.warn(
7460                _("Import of GEDCOM file %(filename)s with DEST=%(by)s, "
7461                  "could cause errors in the resulting database!") %
7462                {'filename': fname, 'by': self.genby},
7463                _("Look for nameless events."))
7464
7465    def __header_char(self, line, state):
7466        """
7467        @param line: The current line in GedLine format
7468        @type line: GedLine
7469        @param state: The current state
7470        @type state: CurrentState
7471        """
7472        #   +1 CHAR <CHARACTER_SET>                       {1:1}
7473        #     +2 VERS <VERSION_NUMBER>                    {0:1}
7474        encoding = line.data
7475        version = ""
7476        while True:
7477            line = self.__get_next_line()
7478            if self.__level_is_finished(line, state.level + 1):
7479                break
7480            elif line.token == TOKEN_VERS:
7481                version = line.data
7482
7483        if self.use_def_src:
7484            if version == "":
7485                sattr = SrcAttribute()
7486                sattr.set_type(_('Character set'))
7487                sattr.set_value(encoding)
7488                self.def_src.add_attribute(sattr)
7489            else:
7490                sattr = SrcAttribute()
7491                sattr.set_type(_('Character set and version'))
7492                sattr.set_value("%s %s" % (encoding, version))
7493                self.def_src.add_attribute(sattr)
7494
7495    def __header_gedc(self, line, state):
7496        """
7497        @param line: The current line in GedLine format
7498        @type line: GedLine
7499        @param state: The current state
7500        @type state: CurrentState
7501        """
7502        while True:
7503            line = self.__get_next_line()
7504            if self.__level_is_finished(line, state.level + 1):
7505                break
7506            elif line.token == TOKEN_VERS:
7507                if (not line.data) or line.data[0] != "5":
7508                    self.__add_msg(_("GEDCOM version not supported"),
7509                                   line, state)
7510                if self.use_def_src:
7511                    sattr = SrcAttribute()
7512                    sattr.set_type(_('GEDCOM version'))
7513                    sattr.set_value(line.data)
7514                    self.def_src.add_attribute(sattr)
7515            elif line.token == TOKEN_FORM:
7516                if line.data == "LINEAGE-LINKED":
7517                    pass
7518                elif line.data.upper() == "LINEAGE-LINKED":
7519                    # Allow Lineage-Linked etc. though it should be in
7520                    # uppercase  (Note: Gramps is not a validator! prc)
7521                    self.__add_msg(_("GEDCOM FORM should be in uppercase"),
7522                                   line, state)
7523                else:
7524                    self.__add_msg(_("GEDCOM FORM not supported"), line, state)
7525                if self.use_def_src:
7526                    sattr = SrcAttribute()
7527                    sattr.set_type(_('GEDCOM form'))
7528                    sattr.set_value(line.data)
7529                    self.def_src.add_attribute(sattr)
7530
7531    def __header_plac(self, line, state):
7532        """
7533        @param line: The current line in GedLine format
7534        @type line: GedLine
7535        @param state: The current state
7536        @type state: CurrentState
7537        """
7538        sub_state = CurrentState(level=state.level + 1)
7539        self.__parse_level(sub_state, self.place_form, self.__undefined)
7540        state.msg += sub_state.msg
7541
7542    def __place_form(self, line, state):
7543        """
7544        @param line: The current line in GedLine format
7545        @type line: GedLine
7546        @param state: The current state
7547        @type state: CurrentState
7548        """
7549        self.place_parser.parse_form(line)
7550
7551    def __header_date(self, line, state):
7552        """
7553        @param line: The current line in GedLine format
7554        @type line: GedLine
7555        @param state: The current state
7556        @type state: CurrentState
7557
7558        This processes the <TRANSMISSION_DATE>, i.e. the date when this
7559        [GEDCOM] transmission was created (as opposed to the date when the
7560        source data that was used to create the transmission was published or
7561        created
7562
7563        Because there is a DATE tag, line.data is automatically converted to a
7564        Date object before getting to this point, so it has to be converted
7565        back to a string
7566        """
7567        tx_date = str(line.data)
7568        tx_time = ""
7569        line = self.__get_next_line()
7570        if self.__level_is_finished(line, state.level):
7571            pass
7572        elif line.token == TOKEN_TIME:
7573            tx_time = str(line.data)
7574
7575        if self.use_def_src:
7576            if tx_time == "":
7577                sattr = SrcAttribute()
7578                sattr.set_type(_('Creation date of GEDCOM'))
7579                sattr.set_value(tx_date)
7580                self.def_src.add_attribute(sattr)
7581            else:
7582                sattr = SrcAttribute()
7583                sattr.set_type(_('Creation date and time of GEDCOM'))
7584                sattr.set_value("%s %s" % (tx_date, tx_time))
7585                self.def_src.add_attribute(sattr)
7586
7587    def __header_note(self, line, state):
7588        """
7589        @param line: The current line in GedLine format
7590        @type line: GedLine
7591        @param state: The current state
7592        @type state: CurrentState
7593        """
7594        if self.use_def_src:
7595            self.__parse_note(line, self.def_src, state)
7596
7597    def __header_subm_name(self, line, state):
7598        """
7599        @param line: The current line in GedLine format
7600        @type line: GedLine
7601        @param state: The current state
7602        @type state: CurrentState
7603        """
7604        if self.use_def_src:
7605            self.def_src.set_author(line.data)
7606
7607    def __parse_note(self, line, obj, state):
7608        if line.token == TOKEN_RNOTE:
7609            # reference to a named note defined elsewhere
7610            #NOTE_STRUCTURE: =
7611            #  n  NOTE @<XREF:NOTE>@  {1:1}
7612            #    +1 SOUR @<XREF:SOUR>@  {0:M}  # 5.5 only, not in 5.5.1
7613            handle = self.__find_note_handle(self.nid_map[line.data])
7614            obj.add_note(handle)
7615            self.note_type_map[handle] = OBJ_NOTETYPE.get(type(obj).__name__,
7616                                                          NoteType.GENERAL)
7617        else:
7618            # Embedded note
7619            #NOTE_STRUCTURE: =
7620            #  n  NOTE [<SUBMITTER_TEXT> | <NULL>]  {1:1}
7621            #    +1 [ CONC | CONT ] <SUBMITTER_TEXT>  {0:M}
7622            #    +1 SOUR @<XREF:SOUR>@  {0:M}
7623            if not line.data:
7624                self.__add_msg(_("Empty note ignored"), line, state)
7625                self.__skip_subordinate_levels(line.level + 1, state)
7626            else:
7627                new_note = Note(line.data)
7628                new_note.set_gramps_id(self.nid_map[""])
7629                new_note.set_handle(create_id())
7630
7631                sub_state = CurrentState(level=state.level + 1)
7632                sub_state.note = new_note
7633                self.__parse_level(sub_state, self.note_parse_tbl,
7634                                   self.__undefined)
7635                state.msg += sub_state.msg
7636
7637                # Add a default tag if provided
7638                self.__add_default_tag(new_note)
7639                # Set the type of the note
7640                new_note.set_type(OBJ_NOTETYPE.get(type(obj).__name__,
7641                                                   NoteType.GENERAL))
7642                self.dbase.commit_note(new_note, self.trans, new_note.change)
7643                obj.add_note(new_note.get_handle())
7644
7645    #----------------------------------------------------------------------
7646    #
7647    # NOTE parsing
7648    #
7649    #----------------------------------------------------------------------
7650
7651    def __parse_inline_note(self, line, level):
7652        """
7653        Handling of lines subordinate to the NOTE GEDCOM tag
7654
7655        n @<XREF:NOTE>@ NOTE <SUBMITTER_TEXT>  {1:1}
7656          +1 [ CONC | CONT] <SUBMITTER_TEXT>  {0:M}
7657          +1 <<SOURCE_CITATION>>  {0:M}
7658          +1 REFN <USER_REFERENCE_NUMBER>  {0:M}
7659            +2 TYPE <USER_REFERENCE_TYPE>  {0:1}
7660          +1 RIN <AUTOMATED_RECORD_ID>  {0:1}
7661          +1 <<CHANGE_DATE>>  {0:1}
7662        """
7663        state = CurrentState(level=1)
7664        if not line.data and \
7665                self.nid_map.clean(line.token_text) not in self.nid_map.map():
7666            self.__add_msg(_("Empty note ignored"), line)
7667            self.__skip_subordinate_levels(level, state)
7668        else:
7669            gid = self.nid_map[line.token_text]
7670            handle = self.__find_note_handle(gid)
7671            new_note = Note(line.data)
7672            new_note.set_handle(handle)
7673            new_note.set_gramps_id(gid)
7674            if handle in self.note_type_map:
7675                new_note.set_type(self.note_type_map[handle])
7676            sub_state = CurrentState(level=state.level)
7677            sub_state.note = new_note
7678            self.__parse_level(sub_state, self.note_parse_tbl,
7679                               self.__undefined)
7680            state.msg += sub_state.msg
7681
7682            self.dbase.commit_note(new_note, self.trans, new_note.change)
7683            self.__check_msgs(_("NOTE Gramps ID %s") %
7684                              new_note.get_gramps_id(), state, None)
7685
7686    def __note_chan(self, line, state):
7687        if state.note:
7688            self.__parse_change(line, state.note, state.level + 1, state)
7689
7690    def __parse_source_reference(self, citation, level, handle, state):
7691        """
7692        Read the data associated with a SOUR reference.
7693        """
7694        sub_state = CurrentState(level=level + 1)
7695        sub_state.citation = citation
7696        sub_state.handle = handle
7697        self.__parse_level(sub_state, self.citation_parse_tbl, self.__ignore)
7698        state.msg += sub_state.msg
7699
7700    def __parse_header_head(self):
7701        """
7702        Validate that this is a valid GEDCOM file.
7703        """
7704        line = self.__get_next_line()
7705        if line.token != TOKEN_HEAD:
7706            raise GedcomError("%s is not a GEDCOM file" % self.filename)
7707
7708    def __parse_submission(self, line, state):
7709        """
7710        @param line: The current line in GedLine format
7711        @type line: GedLine
7712        @param state: The current state
7713        @type state: CurrentState
7714
7715        Handling of lines subordinate to the level 0 SUMN (Submission) GEDCOM
7716        tag
7717
7718          n  @<XREF:SUBN>@ SUBN  {1:1]
7719            +1 SUBM @<XREF:SUBM>@ {0:1}
7720            +1 FAMF <NAME_OF_FAMILY_FILE>  {0:1}
7721            +1 TEMP <TEMPLE_CODE>  {0:1}
7722            +1 ANCE <GENERATIONS_OF_ANCESTORS>  {0:1}
7723            +1 DESC <GENERATIONS_OF_DESCENDANTS>  {0:1}
7724            +1 ORDI <ORDINANCE_PROCESS_FLAG>  {0:1}
7725            +1 RIN <AUTOMATED_RECORD_ID>  {0:1}
7726            +1 NOTE <NOTE_STRUCTURE> {0:m}
7727        """
7728        while True:
7729            line = self.__get_next_line()
7730            msg = ""
7731            if self.__level_is_finished(line, state.level):
7732                break
7733            elif line.token == TOKEN_SUBM:
7734                msg = _("Submission: Submitter")
7735            elif line.token == TOKEN_UNKNOWN and line.token_text == "FAMF":
7736                msg = _("Submission: Family file")
7737            elif line.token == TOKEN_TEMP:
7738                msg = _("Submission: Temple code")
7739            elif line.token == TOKEN_UNKNOWN and line.token_text == "ANCE":
7740                msg = _("Submission: Generations of ancestors")
7741            elif line.token == TOKEN_UNKNOWN and line.token_text == "DESC":
7742                msg = _("Submission: Generations of descendants")
7743            elif line.token == TOKEN_UNKNOWN and line.token_text == "ORDI":
7744                msg = _("Submission: Ordinance process flag")
7745            elif line.token == TOKEN_NOTE or line.token == TOKEN_RNOTE:
7746                self.__parse_note(line, self.def_src, state)
7747            else:
7748                self.__not_recognized(line, state)
7749                continue
7750
7751            if self.use_def_src and msg != "":
7752                sattr = SrcAttribute()
7753                sattr.set_type(msg)
7754                sattr.set_value(line.data)
7755                self.def_src.add_attribute(sattr)
7756                self.dbase.commit_source(self.def_src, self.trans)
7757
7758    def handle_source(self, line, level, state):
7759        """
7760        Handle the specified source, building a source reference to
7761        the object.
7762        """
7763        citation = Citation()
7764        if line.data and line.data[0] != "@":
7765            title = line.data
7766            handle = self.inline_srcs.get(title, create_id())
7767            src = Source()
7768            src.handle = handle
7769            src.gramps_id = self.sid_map[""]
7770            self.inline_srcs[title] = handle
7771        else:
7772            src = self.__find_or_create_source(self.sid_map[line.data])
7773            # We need to set the title to the cross reference identifier of the
7774            # SOURce record, just in case we never find the source record. If
7775            # we didn't find the source record, then the source object would
7776            # have got deleted by Chack and repair because the record is empty.
7777            # If we find the source record, the title is overwritten in
7778            # __source_title.
7779            if not src.title:
7780                src.set_title(line.data)
7781        self.dbase.commit_source(src, self.trans)
7782        self.__parse_source_reference(citation, level, src.handle, state)
7783        citation.set_reference_handle(src.handle)
7784        self.dbase.add_citation(citation, self.trans)
7785        return citation.handle
7786
7787    def __parse_change(self, line, obj, level, state):
7788        """
7789        CHANGE_DATE:=
7790
7791        >  n CHAN {1:1}
7792        >  +1 DATE <CHANGE_DATE> {1:1}
7793        >  +2 TIME <TIME_VALUE> {0:1}
7794        >  +1 <<NOTE_STRUCTURE>> {0:M}
7795
7796        The Note structure is ignored, since we have nothing
7797        corresponding in Gramps.
7798
7799        Based on the values calculated, attempt to convert to a valid
7800        change time using time.strptime. If this fails (and it shouldn't
7801        unless the value is meaningless and doesn't conform to the GEDCOM
7802        spec), the value is ignored.
7803        """
7804        tstr = None
7805        dstr = None
7806        dobj = None
7807        while True:
7808            line = self.__get_next_line()
7809            if self.__level_is_finished(line, level):
7810                break
7811            elif line.token == TOKEN_TIME:
7812                tstr = line.data
7813            elif line.token == TOKEN_DATE:
7814                #Lexer converted already to Date object
7815                dobj = line.data
7816            elif line.token == TOKEN_NOTE or line.token == TOKEN_RNOTE:
7817                self.__ignore(line, state)
7818            else:
7819                self.__not_recognized(line, state)
7820
7821        # Attempt to convert the values to a valid change time
7822        if dobj:
7823            dstr = "%s %s %s" % (dobj.get_day(), dobj.get_month(),
7824                                 dobj.get_year())
7825            try:
7826                if tstr:
7827                    try:
7828                        tstruct = time.strptime("%s %s" % (dstr, tstr),
7829                                                "%d %m %Y %H:%M:%S")
7830                    except ValueError:
7831                        #seconds is optional in GEDCOM
7832                        tstruct = time.strptime("%s %s" % (dstr, tstr),
7833                                                "%d %m %Y %H:%M")
7834                else:
7835                    tstruct = time.strptime(dstr, "%d %m %Y")
7836                val = time.mktime(tstruct)
7837                obj.change = val
7838            except (ValueError, OverflowError):
7839                # parse of time structure failed, so ignore. According to the
7840                # Python manual: "The functions in this [time] module do not
7841                # handle dates and times before the epoch or far in the future.
7842                # The cut-off point in the future is determined by the C
7843                # library; for Unix, it is typically in 2038." If the time is
7844                # too far in the future, this gives OverflowError.
7845                pass
7846
7847    def __do_refn(self, line, state, obj):
7848        """
7849        @param line: The current line in GedLine format
7850        @type line: GedLine
7851        @param state: The current state
7852        @type state: CurrentState
7853        @param obj: The object to attach the attribute
7854        @type obj: Gramps primary object
7855        """
7856        attr = Attribute()
7857        attr.set_type(line.token_text)          # Atrribute : REFN
7858        attr.set_value(line.data)
7859        # if there is a subsequent TYPE, we add it as a note to the attribute
7860        line = self.__chk_subordinate(state.level + 1, state, TOKEN_TYPE)
7861        if line:
7862            new_note = Note(line.data)
7863            new_note.set_gramps_id(self.nid_map[""])
7864            new_note.set_handle(create_id())
7865            new_note.set_type('REFN-TYPE')
7866            self.dbase.commit_note(new_note, self.trans, new_note.change)
7867            attr.add_note(new_note.get_handle())
7868        obj.attribute_list.append(attr)
7869
7870    def __build_event_pair(self, state, event_type, event_map, description):
7871        """
7872        n TYPE <EVENT_DESCRIPTOR> {0:1} p.*
7873        n DATE <DATE_VALUE> {0:1} p.*/*
7874        n <<PLACE_STRUCTURE>> {0:1} p.*
7875        n <<ADDRESS_STRUCTURE>> {0:1} p.*
7876        n AGE <AGE_AT_EVENT> {0:1} p.*
7877        n AGNC <RESPONSIBLE_AGENCY> {0:1} p.*
7878        n CAUS <CAUSE_OF_EVENT> {0:1} p.*
7879        n <<SOURCE_CITATION>> {0:M} p.*
7880        n <<MULTIMEDIA_LINK>> {0:M} p.*, *
7881        n <<NOTE_STRUCTURE>> {0:M} p.
7882        """
7883        event = Event()
7884        event_ref = EventRef()
7885        event.set_gramps_id(self.emapper.find_next())
7886        event.set_type(event_type)
7887
7888        if description and description != 'Y':
7889            event.set_description(description)
7890        self.dbase.add_event(event, self.trans)
7891
7892        sub_state = CurrentState()
7893        sub_state.level = state.level + 1
7894        sub_state.event_ref = event_ref
7895        sub_state.event = event
7896        sub_state.person = state.person
7897        sub_state.pf = self.place_parser
7898
7899        self.__parse_level(sub_state, event_map, self.__undefined)
7900        if(description == 'Y' and event.date.is_empty() and
7901           event.type == EventType.BIRTH and not event.place):
7902            event.set_description(_("No Date Information"))
7903        state.msg += sub_state.msg
7904
7905        self.__add_place(event, sub_state)
7906
7907        self.dbase.commit_event(event, self.trans)
7908
7909        event_ref.set_reference_handle(event.handle)
7910        return event_ref
7911
7912    def __build_family_event_pair(self, state, event_type, event_map,
7913                                  description):
7914        event = Event()
7915        event_ref = EventRef()
7916        event.set_gramps_id(self.emapper.find_next())
7917        event.set_type(event_type)
7918        if description and description != 'Y':
7919            event.set_description(description)
7920
7921        self.dbase.add_event(event, self.trans)
7922
7923        sub_state = CurrentState()
7924        sub_state.family = state.family
7925        sub_state.level = state.level + 1
7926        sub_state.event = event
7927        sub_state.event_ref = event_ref
7928        sub_state.pf = self.place_parser
7929
7930        self.__parse_level(sub_state, event_map, self.__undefined)
7931        state.msg += sub_state.msg
7932
7933        self.__add_place(event, sub_state)
7934
7935        self.dbase.commit_event(event, self.trans)
7936        event_ref.set_reference_handle(event.handle)
7937        return event_ref
7938
7939    def __do_photo(self, state):
7940        """
7941        Choose the primary photo from the list of media present for this
7942        person.  Supports FTM _PHOTO. and others _PRIM feature.
7943          0 INDI
7944          +1 _PHOTO @<XREF:OBJE>@ {1:1}
7945
7946          0 INDI
7947            +1 OBJE @<XREF:OBJE>@
7948              +2 _PRIM <Y/N>
7949
7950          0 INDI
7951            +1 OBJE
7952              +2 FILE primary_photo.jpg
7953              +2 _PRIM <Y/N>
7954
7955        For the _PHOTO varient, state.photo contains the XREF ('@M1@').
7956        For the _PRIM varients, state.photo contains the handle.
7957        Since Gramps currently uses the first media in the list as the
7958        primary, find the primary photo if already in the list, if present,
7959        move to beginning.  If not present, add at the beginning.
7960        This is run after all of the person processing is complete but before
7961        committing the person.
7962        """
7963        if state.photo.startswith('@'):
7964            gramps_id = self.oid_map[state.photo]
7965            handle = self.__find_media_handle(gramps_id)
7966        elif state.photo:
7967            handle = state.photo
7968        else:
7969            return
7970        for mref in state.person.media_list:
7971            if handle == mref.ref:
7972                state.person.media_list.remove(mref)
7973                state.person.media_list.insert(0, mref)
7974                return
7975        mref = MediaRef()
7976        mref.set_reference_handle(handle)
7977        state.person.media_list.insert(0, mref)
7978
7979    def __extract_temple(self, line):
7980        """ Determine the LDS Temple from the input line """
7981        def get_code(code):
7982            """ get the Temple code """
7983            if TEMPLES.is_valid_code(code):
7984                return code
7985            elif TEMPLES.is_valid_name(code):
7986                return TEMPLES.code(code)
7987
7988        code = get_code(line.data)
7989        if code:
7990            return code
7991
7992        # Not sure why we do this. Kind of ugly.
7993        code = get_code(line.data.split()[0])
7994        if code:
7995            return code
7996
7997        # Okay we have no clue which temple this is.
7998        # We should tell the user and store it anyway.
7999        self.__add_msg(_("Invalid temple code"), line, None)
8000        return line.data
8001
8002    def __add_default_source(self, obj):
8003        """
8004        Add the default source to the object.
8005        """
8006        if self.use_def_src and len(obj.get_citation_list()) == 0:
8007            citation = Citation()
8008            citation.set_reference_handle(self.def_src.handle)
8009            self.dbase.add_citation(citation, self.trans)
8010            obj.add_citation(citation.handle)
8011
8012    def __add_default_tag(self, obj):
8013        """
8014        Add the default tag to the object.
8015        """
8016        if self.default_tag:
8017            obj.add_tag(self.default_tag.handle)
8018
8019    def __subm_name(self, line, state):
8020        """
8021        @param line: The current line in GedLine format
8022        @type line: GedLine
8023        @param state: The current state
8024        @type state: CurrentState
8025        """
8026        state.res.set_name(line.data)
8027
8028    def __subm_addr(self, line, state):
8029        """
8030        @param line: The current line in GedLine format
8031        @type line: GedLine
8032        @param state: The current state
8033        @type state: CurrentState
8034        """
8035        free_form = line.data
8036
8037        sub_state = CurrentState(level=state.level + 1)
8038        sub_state.location = state.res
8039
8040        self.__parse_level(sub_state, self.parse_loc_tbl, self.__undefined)
8041        state.msg += sub_state.msg
8042
8043        self.__merge_address(free_form, state.res, line, state)
8044        # Researcher is a sub-type of LocationBase, so get_street and
8045        # set_street which are used in routines called from self.parse_loc_tbl
8046        # work fine.
8047        # Unfortunately, Researcher also has get_address and set_address, so we
8048        # need to copy the street into that.
8049        state.res.set_address(state.res.get_street())
8050
8051    def __subm_phon(self, line, state):
8052        """
8053        n PHON <PHONE_NUMBER> {0:3}
8054
8055        @param line: The current line in GedLine format
8056        @type line: GedLine
8057        @param state: The current state
8058        @type state: CurrentState
8059        """
8060        if state.res.get_phone():
8061            self.__add_msg(_("Only one phone number supported"), line, state)
8062        else:
8063            state.res.set_phone(line.data)
8064
8065    def __subm_email(self, line, state):
8066        """
8067        n EMAIL <ADDRESS_EMAIL> {0:3}
8068
8069        @param line: The current line in GedLine format
8070        @type line: GedLine
8071        @param state: The current state
8072        @type state: CurrentState
8073        """
8074        # only record the first multiple emails for researcher
8075        if not state.res.get_email():
8076            state.res.set_email(line.data)
8077        self.__repo_email(line, state)
8078
8079
8080#-------------------------------------------------------------------------
8081#
8082# GedcomStageOne
8083#
8084#-------------------------------------------------------------------------
8085class GedcomStageOne:
8086    """
8087    The GedcomStageOne parser scans the file quickly, looking for a few things.
8088     This includes:
8089
8090    1. Character set encoding
8091    2. Number of people and families in the list
8092    3. Child to family references, since Ancestry.com creates GEDCOM files
8093       without the FAMC references.
8094    """
8095    __BAD_UTF16 = _("Your GEDCOM file is corrupted. "
8096                    "The file appears to be encoded using the UTF16 "
8097                    "character set, but is missing the BOM marker.")
8098    __EMPTY_GED = _("Your GEDCOM file is empty.")
8099
8100    @staticmethod
8101    def __is_xref_value(value):
8102        """
8103        Return True if value is in the form of a XREF value. We assume that
8104        if we have a leading '@' character, then we are okay.
8105        """
8106        return value and value[0] == '@'
8107
8108    def __init__(self, ifile):
8109        self.ifile = ifile
8110        self.famc = defaultdict(list)
8111        self.fams = defaultdict(list)
8112        self.enc = ""
8113        self.pcnt = 0
8114        self.lcnt = 0
8115
8116    def __detect_file_decoder(self, input_file):
8117        """
8118        Detects the file encoding of the file by looking for a BOM
8119        (byte order marker) in the GEDCOM file. If we detect a UTF-16 or
8120        UTF-8-BOM encoded file, we choose appropriate decoders.  If no BOM
8121        is detected, we return in UTF-8 mode it is the more modern option;
8122        and anyway it doesn't really matter as we are only looking for GEDCOM
8123        keywords which are only 7-bit ASCII anyway.
8124        In any case, we Always return the file in text mode with transparent
8125        newline (CR, LF, or CRLF).
8126        """
8127        line = input_file.read(2)
8128        if line == b"\xef\xbb":
8129            input_file.read(1)
8130            self.enc = "utf_8_sig"
8131            return TextIOWrapper(input_file, encoding='utf_8_sig',
8132                                 errors='replace', newline=None)
8133        elif line == b"\xff\xfe" or line == b"\xfe\xff":
8134            self.enc = "UTF16"
8135            input_file.seek(0)
8136            return TextIOWrapper(input_file, encoding='utf_16',
8137                                 errors='replace', newline=None)
8138        elif not line:
8139            raise GedcomError(self.__EMPTY_GED)
8140        elif line == b"\x30\x00" or line == b"\x00\x30":
8141            raise GedcomError(self.__BAD_UTF16)
8142        else:
8143            input_file.seek(0)
8144            return TextIOWrapper(input_file, encoding='utf-8',
8145                                 errors='replace', newline=None)
8146
8147    def parse(self):
8148        """
8149        Parse the input file.
8150        """
8151        current_family_id = ""
8152
8153        reader = self.__detect_file_decoder(self.ifile)
8154
8155        for line in reader:
8156            # Scan for a few items, keep counts.  Also look for actual CHAR
8157            # Keyword to figure out actual encodeing for non-unicode file types
8158            line = line.strip()
8159            if not line:
8160                continue
8161            self.lcnt += 1
8162
8163            try:
8164                data = line.split(None, 3) + ['']
8165                (level, key, value) = data[:3]
8166                level = int(level)
8167                key = key.strip()
8168                value = value.strip()
8169            except:
8170                continue
8171
8172            if level == 0 and key[0] == '@':
8173                if value in ("FAM", "FAMILY"):
8174                    current_family_id = key.strip()[1:-1]
8175                elif value in ("INDI", "INDIVIDUAL"):
8176                    self.pcnt += 1
8177            elif key in ("HUSB", "HUSBAND", "WIFE") and \
8178                    self.__is_xref_value(value):
8179                self.fams[value[1:-1]].append(current_family_id)
8180            elif key in ("CHIL", "CHILD") and self.__is_xref_value(value):
8181                self.famc[value[1:-1]].append(current_family_id)
8182            elif key == 'CHAR' and not self.enc:
8183                assert isinstance(value, str)
8184                self.enc = value
8185        LOG.debug("parse pcnt %d", self.pcnt)
8186        LOG.debug("parse famc %s", dict(self.famc))
8187        LOG.debug("parse fams %s", dict(self.fams))
8188        self.ifile = reader  # need this to keep python from autoclosing file
8189
8190    def get_famc_map(self):
8191        """
8192        Return the Person to Child Family map
8193        """
8194        return self.famc
8195
8196    def get_fams_map(self):
8197        """
8198        Return the Person to Family map (where the person is a spouse)
8199        """
8200        return self.fams
8201
8202    def get_encoding(self):
8203        """
8204        Return the detected encoding
8205        """
8206        return self.enc.upper()
8207
8208    def set_encoding(self, enc):
8209        """
8210        Forces the encoding
8211        """
8212        assert isinstance(enc, str)
8213        self.enc = enc
8214
8215    def get_person_count(self):
8216        """
8217        Return the number of INDI records found
8218        """
8219        return self.pcnt
8220
8221    def get_line_count(self):
8222        """
8223        Return the number of lines in the file
8224        """
8225        return self.lcnt
8226
8227
8228#-------------------------------------------------------------------------
8229#
8230# make_gedcom_date
8231#
8232#-------------------------------------------------------------------------
8233def make_gedcom_date(subdate, calendar, mode, quality):
8234    """
8235    Convert a Gramps date structure into a GEDCOM compatible date.
8236    """
8237    retval = ""
8238    (day, mon, year) = subdate[0:3]
8239    (mmap, prefix) = CALENDAR_MAP.get(calendar, (MONTH, ""))
8240    if year < 0:
8241        year = -year
8242        bce = " B.C."
8243    else:
8244        bce = ""
8245    try:
8246        retval = __build_date_string(day, mon, year, bce, mmap)
8247    except IndexError:
8248        print("Month index error - %d" % mon)
8249        retval = "%d%s" % (year, bce)
8250    if calendar == Date.CAL_SWEDISH:
8251        # If Swedish calendar use ISO for for date and append (swedish)
8252        # to indicate calandar
8253        if year and not mon and not day:
8254            retval = "%i" % (year)
8255        else:
8256            retval = "%i-%02i-%02i" % (year, mon, day)
8257        retval = retval + " (swedish)"
8258        # Skip prefix @#DUNKNOWN@ as it seems
8259        # not used in all other genealogy applications.
8260        # Gramps can handle it on import, but not with (swedish) appended
8261        # to explain what calendar, the unknown refer to
8262        prefix = ""
8263    if prefix:
8264        retval = "%s %s" % (prefix, retval)
8265    if mode in DATE_MODIFIER:
8266        retval = "%s %s" % (DATE_MODIFIER[mode], retval)
8267    if quality in DATE_QUALITY:
8268        retval = "%s %s" % (DATE_QUALITY[quality], retval)
8269    return retval
8270
8271
8272def __build_date_string(day, mon, year, bce, mmap):
8273    """
8274    Build a date string from the supplied information.
8275    """
8276    if day == 0:
8277        if mon == 0:
8278            retval = '%d%s' % (year, bce)
8279        elif year == 0:
8280            retval = '(%s)' % mmap[mon]
8281        else:
8282            retval = "%s %d%s" % (mmap[mon], year, bce)
8283    elif mon == 0:
8284        retval = '%d%s' % (year, bce)
8285    elif year == 0:
8286        retval = "(%d %s)" % (day, mmap[mon])
8287    else:
8288        retval = "%d %s %d%s" % (day, mmap[mon], year, bce)
8289    return retval
8290