1#
2# Gramps - a GTK+/GNOME based genealogy program
3#
4# Copyright (C) 2000-2006  Martin Hawlisch, Donald N. Allingham
5# Copyright (C) 2008       Brian G. Matherly
6# Copyright (C) 2013       Vassilii Khachaturov
7#
8# This program is free software; you can redistribute it and/or modify
9# it under the terms of the GNU General Public License as published by
10# the Free Software Foundation; either version 2 of the License, or
11# (at your option) any later version.
12#
13# This program is distributed in the hope that it will be useful,
14# but WITHOUT ANY WARRANTY; without even the implied warranty of
15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16# GNU General Public License for more details.
17#
18# You should have received a copy of the GNU General Public License
19# along with this program; if not, write to the Free Software
20# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21#
22
23"Import from GeneWeb"
24
25#-------------------------------------------------------------------------
26#
27# standard python modules
28#
29#-------------------------------------------------------------------------
30import re
31import time
32
33#------------------------------------------------------------------------
34#
35# Set up logging
36#
37#------------------------------------------------------------------------
38import logging
39LOG = logging.getLogger(".ImportGeneWeb")
40
41#-------------------------------------------------------------------------
42#
43# Gramps modules
44#
45#-------------------------------------------------------------------------
46from gramps.gen.const import GRAMPS_LOCALE as glocale
47from gramps.gen.utils.libformatting import ImportInfo
48_ = glocale.translation.gettext
49ngettext = glocale.translation.ngettext # else "nearby" comments are ignored
50from gramps.gen.errors import GedcomError, GrampsImportError
51from gramps.gen.lib import (Attribute, AttributeType, ChildRef, Citation,
52        Date, DateError, Event, EventRef, EventRoleType, EventType,
53        Family, FamilyRelType, Name, NameType, Note, Person, PersonRef,
54        Place, Source, LdsOrd)
55from gramps.gen.db import DbTxn
56from html.entities import name2codepoint
57
58_date_parse = re.compile(
59    r'([kmes~?<>]+)?([0-9/]+)([J|H|F])?(\.\.)?([0-9/]+)?([J|H|F])?')
60_text_parse = re.compile(r'0\((.*)\)')
61
62_mod_map = {
63    '>' : Date.MOD_AFTER,
64    '<' : Date.MOD_BEFORE,
65    '~' : Date.MOD_ABOUT,
66    }
67
68_cal_map = {
69    'J' : Date.CAL_JULIAN,
70    'H' : Date.CAL_HEBREW,
71    'F' : Date.CAL_FRENCH,
72    }
73
74pevents_map = {
75    "#birt" : EventType.BIRTH, #Epers_Birth
76    "#bapt" : EventType.BAPTISM, #Epers_Baptism
77    "#deat" : EventType.DEATH, #Epers_Death
78    "#buri" : EventType.BURIAL, #Epers_Burial
79    "#crem" : EventType.CREMATION,
80    "#acco" : EventType((EventType.CUSTOM, _('Accomplishment'))) ,
81    "#acqu" : EventType((EventType.CUSTOM, _('Acquisition'))),
82    "#adhe" : EventType((EventType.CUSTOM, _('Adhesion'))),
83    "#awar" : EventType((EventType.CUSTOM, _('Award'))),
84    "#bapl" : LdsOrd.BAPTISM, #Epers_BaptismLDS
85    "#barm" : EventType.BAR_MITZVAH, #Epers_BarMitzvah
86    "#basm" : EventType.BAS_MITZVAH, #Epers_BatMitzvah
87    "#bles" : EventType.BLESS, #Epers_Benediction
88    "#cens" : EventType.CENSUS,
89    "#chgn" : EventType((EventType.CUSTOM, _('Change Name'))),
90    "#circ" : EventType((EventType.CUSTOM, _('Circumcision'))),
91    "#conf" : EventType.CONFIRMATION, #Epers_Confirmation
92    "#conl" : LdsOrd.CONFIRMATION, #Epers_ConfirmationLDS
93    "#degr" : EventType.DEGREE,
94    "#demm" : EventType((EventType.CUSTOM, _('Military Demobilisation'))),
95    "#dist" : EventType((EventType.CUSTOM, _('Award'))),
96    "#dotl" : LdsOrd.ENDOWMENT, #Epers_DotationLDS
97    "#educ" : EventType.EDUCATION, #Epers_Education
98    "#elec" : EventType.ELECTED, #Epers_Election
99    "#emig" : EventType.EMIGRATION,
100    "#endl" : EventType((EventType.CUSTOM, _('Dotation'))),
101    "#exco" : EventType((EventType.CUSTOM, _('Excommunication'))),
102    "#fcom" : EventType.FIRST_COMMUN,
103    "#flkl" : EventType((EventType.CUSTOM, _('LDS Family Link'))),
104    "#fune" : EventType((EventType.CUSTOM, _('Funeral'))),
105    "#grad" : EventType.GRADUATION,
106    "#hosp" : EventType((EventType.CUSTOM, _('Hospitalisation'))),
107    "#illn" : EventType((EventType.CUSTOM, _('Illness'))),
108    "#immi" : EventType.IMMIGRATION,
109    "#lpas" : EventType((EventType.CUSTOM, _('List Passenger'))),
110    "#mdis" : EventType((EventType.CUSTOM, _('Military Distinction'))),
111    "#mobm" : EventType((EventType.CUSTOM, _('Militaty Mobilisation'))),
112    "#mpro" : EventType((EventType.CUSTOM, _('Military Promotion'))),
113    "#mser" : EventType.MILITARY_SERV, #Epers_MilitaryService
114    "#natu" : EventType.NATURALIZATION, #Epers_Naturalisation
115    "#occu" : EventType.OCCUPATION, #Epers_Occupation
116    "#ordn" : EventType.ORDINATION, #Epers_Ordination
117    "#prop" : EventType.PROPERTY, #Epers_Property
118    "#resi" : EventType.RESIDENCE, #Epers_Residence
119    "#reti" : EventType.RETIREMENT,
120    "#slgc" : EventType((EventType.CUSTOM, _('LDS Seal to child'))), #Epers_ScellentChildLDS
121    "#slgp" : LdsOrd.SEAL_TO_PARENTS, #Epers_ScellentParentLDS
122    "#slgs" : LdsOrd.SEAL_TO_SPOUSE,
123    "#vteb" : EventType((EventType.CUSTOM, _('Sold property'))), #Epers_VenteBien
124    "#will" : EventType.WILL, #Epers_Will
125                     }
126
127fevents_map = {
128    "#marr" : EventType.MARRIAGE, #Efam_Marriage
129    "#nmar" : EventType.NUM_MARRIAGES,
130    "#nmen" : EventType((EventType.CUSTOM, _('No mention'))), #Efam_NoMention
131    "#enga" : EventType.ENGAGEMENT, #Efam_Engage
132    "#div"  : EventType.DIVORCE,
133    "#sep"  : EventType((EventType.CUSTOM, _('Separated'))), #Efam_Separated
134    "#anul" : EventType.ANNULMENT, #Efam_Annulation
135    "#marb" : EventType.MARR_BANNS, #Efam_MarriageBann
136    "#marc" : EventType.MARR_CONTR, #Efam_MarriageContract)
137    "#marl" : EventType.MARR_LIC, #Efam_MarriageLicense)
138    "#resi" : EventType.RESIDENCE, #Efam_Residence)
139                  }
140
141#-------------------------------------------------------------------------
142#
143#
144#
145#-------------------------------------------------------------------------
146def importData(database, filename, user):
147
148    global callback
149
150    try:
151        g = GeneWebParser(database,filename)
152    except IOError as msg:
153        user.notify_error(_("%s could not be opened\n") % filename,str(msg))
154        return
155
156    try:
157        status = g.parse_geneweb_file()
158    except IOError as msg:
159        errmsg = _("%s could not be opened\n") % filename
160        user.notify_error(errmsg,str(msg))
161        return
162    return ImportInfo({_("Results"): _("done")})
163
164#-------------------------------------------------------------------------
165# For a description of the file format see
166# http://cristal.inria.fr/~ddr/GeneWeb/en/gwformat.htm
167# https://github.com/geneanet/geneweb/issues/315
168#-------------------------------------------------------------------------
169class GeneWebParser:
170    def __init__(self, dbase, file):
171        self.db = dbase
172        if file: # Unit tests can create the parser w/o underlying file
173            self.f = open(file, "rUb")
174            self.filename = file
175            self.encoding = 'iso-8859-1'
176            self.gwplus = False
177
178    def get_next_line(self):
179        self.lineno += 1
180        line = self.f.readline()
181
182        try:
183            line = line.decode('utf-8')
184        except GrampsImportError as err:
185            self.errmsg(str(err))
186
187        if line:
188            try:
189                line = str(line.strip())
190            except UnicodeDecodeError:
191                line = line.decode(self.encoding).strip()
192        else:
193            line = None
194        return line
195
196    def parse_geneweb_file(self):
197        with DbTxn(_("GeneWeb import"), self.db, batch=True) as self.trans:
198            self.db.disable_signals()
199            t = time.time()
200            self.lineno = 0
201            self.index = 0
202            self.fam_count = 0
203            self.indi_count = 0
204
205            self.fkeys = []
206            self.ikeys = {}
207            self.pkeys = {}
208            self.skeys = {}
209
210            self.current_mode = None
211            self.current_family = None
212            self.current_husband_handle = None
213            self.current_child_birthplace_handle = None
214            self.current_child_source_handle = None
215            try:
216                while 1:
217                    line = self.get_next_line()
218                    if line is None:
219                        break
220                    if line == "":
221                        continue
222
223                    fields = line.split(" ")
224
225                    LOG.debug("LINE: %s" %line)
226
227                    if fields[0] == "gwplus":
228                        self.gwplus = True
229                        self.encoding = 'utf-8'
230                    elif fields[0] == "encoding:":
231                        self.encoding = fields[1]
232                    elif fields[0] == "fam":
233                        self.current_mode = "fam"
234                        self.read_family_line(line,fields)
235                    elif fields[0] == "rel":
236                        self.current_mode = "rel"
237                        self.read_relationship_person(line,fields)
238                    elif fields[0] == "src":
239                        self.read_source_line(line,fields)
240                    elif fields[0] in ("wit", "wit:"):
241                        self.read_witness_line(line,fields)
242                    elif fields[0] == "cbp":
243                        self.read_children_birthplace_line(line,fields)
244                    elif fields[0] == "csrc":
245                        self.read_children_source_line(line,fields)
246                    elif fields[0] == "beg" and self.current_mode == "fam":
247                        self.read_children_lines()
248                    elif fields[0] == "beg" and self.current_mode == "rel":
249                        self.read_relation_lines()
250                    elif fields[0] == "comm":
251                        self.read_family_comment(line,fields)
252                    elif fields[0] == "notes":
253                        self.read_person_notes_lines(line,fields)
254                    elif fields[0] == "fevt" and self.current_mode == "fam":
255                        #self.read_fevent_line(self.get_next_line())
256                        pass
257                    elif fields[0] == "pevt":
258                        #self.read_pevent_line(self.get_next_line(), fields)
259                        pass
260                    elif fields[0] == "notes-db":
261                        self.read_database_notes_lines(line,fields)
262                    elif fields[0] == "pages-ext" or "wizard-note":
263                        pass
264                    elif fields[0] == "end":
265                        self.current_mode = None
266                    else:
267                        LOG.warning("parse_geneweb_file(): Token >%s< unknown. line %d skipped: %s" %
268                                 (fields[0],self.lineno,line))
269            except GedcomError as err:
270                self.errmsg(str(err))
271
272            t = time.time() - t
273            # translators: leave all/any {...} untranslated
274            msg = ngettext('Import Complete: {number_of} second',
275                           'Import Complete: {number_of} seconds', t
276                          ).format(number_of=t)
277
278        self.db.enable_signals()
279        self.db.request_rebuild()
280
281        LOG.debug(msg)
282        LOG.debug("Families: %d" % len(self.fkeys))
283        LOG.debug("Individuals: %d" % len(self.ikeys))
284        return None
285
286    def read_family_line(self,line,fields):
287        self.current_husband_handle = None
288        self.current_child_birthplace_handle = None
289        self.current_child_source_handle = None
290        self.current_family = Family()
291        self.db.add_family(self.current_family,self.trans)
292        #self.db.commit_family(self.current_family,self.trans)
293        self.fkeys.append(self.current_family.get_handle())
294        idx = 1;
295
296        LOG.debug("\nHusband:")
297        (idx, husband) = self.parse_person(fields,idx,Person.MALE,None)
298        if husband:
299            self.current_husband_handle = husband.get_handle()
300            self.current_family.set_father_handle(husband.get_handle())
301            self.db.commit_family(self.current_family,self.trans)
302            husband.add_family_handle(self.current_family.get_handle())
303            self.db.commit_person(husband,self.trans)
304        LOG.debug("Marriage:")
305        idx = self.parse_marriage(fields,idx)
306        LOG.debug("Wife:")
307        (idx,wife) = self.parse_person(fields,idx,Person.FEMALE,None)
308        if wife:
309            self.current_family.set_mother_handle(wife.get_handle())
310            self.db.commit_family(self.current_family,self.trans)
311            wife.add_family_handle(self.current_family.get_handle())
312            self.db.commit_person(wife,self.trans)
313        return None
314
315    def read_relationship_person(self,line,fields):
316        LOG.debug(r"\Relationships:")
317        (idx,person) = self.parse_person(fields,1,Person.UNKNOWN,None)
318        if person:
319            self.current_relationship_person_handle = person.get_handle()
320
321    def read_relation_lines(self):
322        if not self.current_relationship_person_handle:
323            LOG.warning("Unknown person for relationship in line %d!" % self.lineno)
324            return None
325        rel_person = self.db.get_person_from_handle(self.current_relationship_person_handle)
326        while 1:
327            line = self.get_next_line()
328            if line is None or line == "end":
329                break
330            if line == "":
331                continue
332
333            # match relationship type and related person
334            line_re = re.compile("^- ([^:]+): (.*)$")
335            matches = line_re.match(line)
336            if matches:
337                #split related person into fields
338                fields = matches.groups()[1].split(" ")
339                if fields:
340                    (idx,asso_p) = self.parse_person(fields,0,Person.UNKNOWN,None)
341                    pref = PersonRef()
342                    pref.set_relation(matches.groups()[0])
343                    LOG.warning("TODO: Handle association types properly")
344                    pref.set_reference_handle(asso_p.get_handle())
345                    rel_person.add_person_ref(pref)
346                    self.db.commit_person(rel_person,self.trans)
347                else:
348                    LOG.warning("Invalid name of person in line %d" % self.lineno)
349            else:
350                LOG.warning("Invalid relationship in line %d" % self.lineno)
351                break
352        self.current_mode = None
353        return None
354
355    def read_source_line(self,line,fields):
356        if not self.current_family:
357            LOG.warning("Unknown family of child in line %d!" % self.lineno)
358            return None
359        source = self.get_or_create_source(self.decode(fields[1]))
360        self.current_family.add_citation(source.get_handle())
361        self.db.commit_family(self.current_family,self.trans)
362        return None
363
364    def read_witness_line(self,line,fields):
365        LOG.debug("Witness:")
366        if fields[1] == "m:":
367            (idx,wit_p) = self.parse_person(fields,2,Person.MALE,None)
368        elif fields[1] == "f:":
369            (idx,wit_p) = self.parse_person(fields,2,Person.FEMALE,None)
370        else:
371            (idx,wit_p) = self.parse_person(fields,1,None,None)
372        if wit_p:
373            mev = None
374            # search marriage event
375            for evr in self.current_family.get_event_ref_list():
376                ev = self.db.get_event_from_handle(evr.get_reference_handle())
377                if ev.get_type() == EventType.MARRIAGE:
378                    mev = ev # found.
379            if not mev: # No marriage event found create a new one
380                mev = self.create_event(EventType.MARRIAGE, None, None, None, None)
381                mar_ref = EventRef()
382                mar_ref.set_reference_handle(mev.get_handle())
383                self.current_family.add_event_ref(mar_ref)
384            wit_ref = EventRef()
385            wit_ref.set_role(EventRoleType(EventRoleType.WITNESS))
386            wit_ref.set_reference_handle(mev.get_handle())
387            wit_p.add_event_ref(wit_ref)
388            self.db.commit_person(wit_p,self.trans)
389        return None
390
391    def read_children_lines(self):
392        father_surname = "Dummy"
393        if not self.current_husband_handle:
394            LOG.warning("Unknown father for child in line %d!" % self.lineno)
395            return None
396        husb = self.db.get_person_from_handle(self.current_husband_handle)
397        father_surname = husb.get_primary_name().get_surname()
398        if not self.current_family:
399            LOG.warning("Unknown family of child in line %d!" % self.lineno)
400            return None
401        while 1:
402            line = self.get_next_line()
403            if line is None:
404                break
405            if line == "":
406                continue
407
408            fields = line.split(" ")
409            if fields[0] == "-":
410                LOG.debug("Child:")
411                child = None
412                if fields[1] == "h":
413                    (idx,child) = self.parse_person(fields,2,Person.MALE,father_surname)
414                elif fields[1] == "f":
415                    (idx,child) = self.parse_person(fields,2,Person.FEMALE,father_surname)
416                else:
417                    (idx,child) = self.parse_person(fields,1,Person.UNKNOWN,father_surname)
418
419                if child:
420                    childref = ChildRef()
421                    childref.set_reference_handle(child.get_handle())
422                    self.current_family.add_child_ref( childref)
423                    self.db.commit_family(self.current_family,self.trans)
424                    child.add_parent_family_handle( self.current_family.get_handle())
425                    if self.current_child_birthplace_handle:
426                        birth = None
427                        birth_ref = child.get_birth_ref()
428                        if birth_ref:
429                            birth = self.db.get_event_from_handle(birth_ref.ref)
430                        if not birth:
431                            birth = self.create_event(EventType.BIRTH)
432                            birth_ref = EventRef()
433                            birth_ref.set_reference_handle(birth.get_handle())
434                            child.set_birth_ref(birth_ref)
435                        birth.set_place_handle(self.current_child_birthplace_handle)
436                        self.db.commit_event(birth,self.trans)
437                    if self.current_child_source_handle:
438                        child.add_citation(self.current_child_source_handle)
439                    self.db.commit_person(child,self.trans)
440            else:
441                break
442        self.current_mode = None
443        return None
444
445
446    def read_children_birthplace_line(self,line,fields):
447        cbp = self.get_or_create_place(self.decode(fields[1]))
448        if cbp:
449            self.current_child_birthplace_handle = cbp.get_handle()
450        return None
451
452    def read_children_source_line(self,line,fields):
453        csrc = self.get_or_create_source(self.decode(fields[1]))
454        self.current_child_source_handle = csrc.handle
455        return None
456
457    def read_family_comment(self,line,fields):
458        if not self.current_family:
459            LOG.warning("Unknown family of child in line %d!" % self.lineno)
460            return None
461        n = Note()
462        n.set(line)
463        self.db.add_note(n,self.trans)
464        self.current_family.add_note(n.handle)
465        self.db.commit_family(self.current_family,self.trans)
466        return None
467
468    def _read_notes_lines(self, note_tag):
469        note_txt = ""
470        while True:
471            line = self.get_next_line()
472            if line is None:
473                break
474
475            fields = line.split(" ")
476            if fields[0] == "end" and fields[1] == note_tag:
477                break
478            elif fields[0] == "beg":
479                continue
480            else:
481                if note_txt:
482                    note_txt = note_txt + "\n" + line
483                else:
484                    note_txt = note_txt + line
485        if note_txt:
486            n = Note()
487            n.set(note_txt)
488            self.db.add_note(n,self.trans)
489            return n.handle
490        return None
491
492    def read_person_notes_lines(self,line,fields):
493        (idx,person) = self.parse_person(fields,1,None,None)
494        note_handle = self._read_notes_lines( fields[0])
495        if note_handle:
496            person.add_note(note_handle)
497            self.db.commit_person(person,self.trans)
498
499    def read_database_notes_lines(self,line,fields):
500        note_handle = self._read_notes_lines( fields[0])
501
502    def parse_marriage(self,fields,idx):
503        mariageDataRe = re.compile("^[+#-0-9].*$")
504
505        mar_date = None
506        mar_place = None
507        mar_source = None
508
509        sep_date = None
510        div_date = None
511
512        married = 1
513        engaged = 0
514
515        # skip to marriage date in case person contained unmatches tokens
516        #Alex: this failed when fields[idx] was an empty line. Fixed.
517        #while idx < len(fields) and not fields[idx][0] == "+":
518        while idx < len(fields) and not (fields[idx] and fields[idx][0] == "+"):
519            if fields[idx]:
520                LOG.warning(("parse_marriage(): Unknown field: " +
521                          "'%s' in line %d!") % (fields[idx], self.lineno))
522            idx += 1
523
524        while idx < len(fields) and mariageDataRe.match(fields[idx]):
525            field = fields[idx]
526            idx += 1
527            if field.startswith("+"):
528                field = field[1:]
529                mar_date = self.parse_date(self.decode(field))
530                LOG.debug(" Married at: %s" % field)
531            elif field.startswith("-"):
532                field = field[1:]
533                div_date = self.parse_date(self.decode(field))
534                LOG.debug(" Div at: %s" % field)
535            elif field == "#mp" and idx < len(fields):
536                mar_place = self.get_or_create_place(self.decode(fields[idx]))
537                LOG.debug(" Marriage place: %s" % fields[idx])
538                idx += 1
539            elif field == "#ms" and idx < len(fields):
540                mar_source = self.get_or_create_source(self.decode(fields[idx]))
541                LOG.debug(" Marriage source: %s" % fields[idx])
542                idx += 1
543            elif field == "#sep" and idx < len(fields):
544                sep_date = self.parse_date(self.decode(fields[idx]))
545                LOG.debug(" Seperated since: %s" % fields[idx])
546                idx += 1
547            elif field == "#nm":
548                LOG.debug(" Are not married.")
549                married = 0
550            elif field == "#noment":
551                LOG.debug(" Not mentioned.")
552            elif field == "#eng":
553                LOG.debug(" Are engaged.")
554                engaged = 1
555            else:
556                LOG.warning(("parse_marriage(): Unknown field " +
557                          "'%s'for mariage in line %d!") % (field, self.lineno))
558
559        if mar_date or mar_place or mar_source:
560            mar = self.create_event(
561                EventType.MARRIAGE, None, mar_date, mar_place, mar_source)
562            mar_ref = EventRef()
563            mar_ref.set_reference_handle(mar.get_handle())
564            mar_ref.set_role(EventRoleType.FAMILY)
565            self.current_family.add_event_ref(mar_ref)
566            self.current_family.set_relationship(
567                FamilyRelType(FamilyRelType.MARRIED))
568
569        if div_date:
570            div = self.create_event(
571                EventType.DIVORCE, None, div_date, None, None)
572            div_ref = EventRef()
573            div_ref.set_reference_handle(div.get_handle())
574            div_ref.set_role(EventRoleType.FAMILY)
575            self.current_family.add_event_ref(div_ref)
576
577        if sep_date or engaged:
578            sep = self.create_event(
579                EventType.ENGAGEMENT, None, sep_date, None, None)
580            sep_ref = EventRef()
581            sep_ref.set_reference_handle(sep.get_handle())
582            sep_ref.set_role(EventRoleType.FAMILY)
583            self.current_family.add_event_ref(sep_ref)
584
585        if not married:
586            self.current_family.set_relationship(
587                FamilyRelType(FamilyRelType.UNMARRIED))
588
589        self.db.commit_family(self.current_family,self.trans)
590        return idx
591
592    def parse_person(self,fields,idx,gender,father_surname):
593
594        if not father_surname:
595            if not idx < len(fields):
596                LOG.warning("Missing surname of person in line %d!" % self.lineno)
597                surname =""
598            else:
599                surname = self.decode(fields[idx])
600            idx += 1
601        else:
602            surname = father_surname
603
604        if not idx < len(fields):
605            LOG.warning("Missing firstname of person in line %d!" % self.lineno)
606            firstname = ""
607        else:
608            firstname = self.decode(fields[idx])
609        idx += 1
610        if idx < len(fields) and father_surname:
611            noSurnameRe = re.compile(r"^[({\[~><?0-9#].*$")
612            if not noSurnameRe.match(fields[idx]):
613                surname = self.decode(fields[idx])
614                idx += 1
615
616        LOG.debug("Person: %s %s" % (firstname, surname))
617        person = self.get_or_create_person(firstname,surname)
618        name = Name()
619        name.set_type( NameType(NameType.BIRTH))
620        name.set_first_name(firstname)
621        surname_obj = name.get_primary_surname()
622        surname_obj.set_surname(surname)
623        person.set_primary_name(name)
624        if person.get_gender() == Person.UNKNOWN and gender is not None:
625            person.set_gender(gender)
626        self.db.commit_person(person,self.trans)
627        personDataRe = re.compile(r"^[kmes0-9<>~#\[({!].*$")
628        dateRe = re.compile("^[kmes0-9~<>?]+.*$")
629
630        source = None
631        birth_parsed = False
632        birth_date = None
633        birth_place = None
634        birth_source = None
635
636        bapt_date = None
637        bapt_place = None
638        bapt_source = None
639
640        death_date = None
641        death_place = None
642        death_source = None
643        death_cause = None
644
645        crem_date = None
646        bur_date = None
647        bur_place = None
648        bur_source = None
649
650        public_name = None
651        firstname_aliases = []
652        nick_names = []
653        name_aliases = []
654        surname_aliases = []
655
656        while idx < len(fields) and personDataRe.match(fields[idx]):
657            field = fields[idx]
658            idx += 1
659            if field.startswith('('):
660                LOG.debug("Public Name: %s" % field)
661                public_name = self.decode(field[1:-1])
662            elif field.startswith('{'):
663                LOG.debug("Firstsname Alias: %s" % field)
664                firstname_aliases.append(self.decode(field[1:-1]))
665            elif field.startswith('['):
666                LOG.debug("Title: %s" % field)
667                titleparts = self.decode(field[1:-1]).split(":")
668                tname = ttitle = tplace = tstart = tend = tnth = None
669                try:
670                    tname = titleparts[0]
671                    ttitle = titleparts[1]
672                    if titleparts[2]:
673                        tplace = self.get_or_create_place(titleparts[2])
674                    tstart = self.parse_date(titleparts[3])
675                    tend = self.parse_date(titleparts[4])
676                    tnth = titleparts[5]
677                except IndexError:  # not all parts are written all the time
678                    pass
679                if tnth:    # Append title numer to title
680                    # TODO for Arabic, should the next comma be translated?
681                    ttitle += ", " + tnth
682                title = self.create_event(
683                           EventType.NOB_TITLE, ttitle, tstart, tplace)
684                # TODO: Geneweb has a start date and an end date, and therefore
685                # supports stuff like: FROM about 1955 TO between 1998 and 1999
686                # gramps only supports one single date or range.
687                if tname and tname != "*":
688                    n = Note()
689                    n.set(tname)
690                    self.db.add_note(n,self.trans)
691                    title.add_note( n.handle)
692                title_ref = EventRef()
693                title_ref.set_reference_handle(title.get_handle())
694                person.add_event_ref(title_ref)
695            elif field == '#nick' and idx < len(fields):
696                LOG.debug("Nick Name: %s" % fields[idx])
697                nick_names.append(self.decode(fields[idx]))
698                idx += 1
699            elif field == '#occu' and idx < len(fields):
700                LOG.debug("Occupation: %s" % fields[idx])
701                occu = self.create_event(
702                        EventType.OCCUPATION, self.decode(fields[idx]))
703                occu_ref = EventRef()
704                occu_ref.set_reference_handle(occu.get_handle())
705                person.add_event_ref(occu_ref)
706                idx += 1
707            elif field == '#alias' and idx < len(fields):
708                LOG.debug("Name Alias: %s" % fields[idx])
709                name_aliases.append(self.decode(fields[idx]))
710                idx += 1
711            elif field == '#salias' and idx < len(fields):
712                LOG.debug("Surname Alias: %s" % fields[idx])
713                surname_aliases.append(self.decode(fields[idx]))
714                idx += 1
715            elif field == '#image' and idx < len(fields):
716                LOG.debug("Image: %s" % fields[idx])
717                idx += 1
718            elif field == '#src' and idx < len(fields):
719                LOG.debug("Source: %s" % fields[idx])
720                source = self.get_or_create_source(self.decode(fields[idx]))
721                idx += 1
722            elif field == '#bs' and idx < len(fields):
723                LOG.debug("Birth Source: %s" % fields[idx])
724                birth_source = self.get_or_create_source(self.decode(fields[idx]))
725                idx += 1
726            elif field[0] == '!':
727                LOG.debug("Baptize at: %s" % field[1:])
728                bapt_date = self.parse_date(self.decode(field[1:]))
729            elif field == '#bp' and idx < len(fields):
730                LOG.debug("Birth Place: %s" % fields[idx])
731                birth_place = self.get_or_create_place(self.decode(fields[idx]))
732                idx += 1
733            elif field == '#pp' and idx < len(fields):
734                LOG.debug("Baptize Place: %s" % fields[idx])
735                bapt_place = self.get_or_create_place(self.decode(fields[idx]))
736                idx += 1
737            elif field == '#ps' and idx < len(fields):
738                LOG.debug("Baptize Source: %s" % fields[idx])
739                bapt_source = self.get_or_create_source(self.decode(fields[idx]))
740                idx += 1
741            elif field == '#dp' and idx < len(fields):
742                LOG.debug("Death Place: %s" % fields[idx])
743                death_place = self.get_or_create_place(self.decode(fields[idx]))
744                idx += 1
745            elif field == '#ds' and idx < len(fields):
746                LOG.debug("Death Source: %s" % fields[idx])
747                death_source = self.get_or_create_source(self.decode(fields[idx]))
748                idx += 1
749            elif field == '#buri' and idx < len(fields):
750                if fields[idx][0]!='#': # bug in GeneWeb: empty #buri fields
751                    LOG.debug("Burial Date: %s" % fields[idx])
752                    bur_date = self.parse_date(self.decode(fields[idx]))
753                    idx += 1
754            elif field == '#crem' and idx < len(fields):
755                LOG.debug("Cremention Date: %s" % fields[idx])
756                crem_date = self.parse_date(self.decode(fields[idx]))
757                idx += 1
758            elif field == '#rp' and idx < len(fields):
759                LOG.debug("Burial Place: %s" % fields[idx])
760                bur_place = self.get_or_create_place(self.decode(fields[idx]))
761                idx += 1
762            elif field == '#rs' and idx < len(fields):
763                LOG.debug("Burial Source: %s" % fields[idx])
764                bur_source = self.get_or_create_source(self.decode(fields[idx]))
765                idx += 1
766            elif field == '#apubl':
767                LOG.debug("This is a public record")
768            elif field == '#apriv':
769                LOG.debug("This is a private record")
770                person.set_privacy(True)
771            elif field == '#h':
772                LOG.debug("This is a restricted record")
773                #TODO: Gramps does currently not feature this level
774                person.set_privacy(True)
775            elif dateRe.match(field):
776                if not birth_parsed:
777                    LOG.debug("Birth Date: %s" % field)
778                    birth_date = self.parse_date(self.decode(field))
779                    birth_parsed = True
780                else:
781                    LOG.debug("Death Date: %s" % field)
782                    death_date = self.parse_date(self.decode(field))
783                    if field == "mj":
784                        death_cause = "Died joung"
785                    elif field.startswith("k"):
786                        death_cause = "Killed"
787                    elif field.startswith("m"):
788                        death_cause = "Murdered"
789                    elif field.startswith("e"):
790                        death_cause = "Executed"
791                    elif field.startswith("d"):
792                        death_cause = "Disappeared"
793                    #TODO: Set special death types more properly
794            else:
795                LOG.warning(("parse_person(): Unknown field " +
796                          "'%s' for person in line %d!") % (field, self.lineno))
797
798        if public_name:
799            name = person.get_primary_name()
800            name.set_type(NameType(NameType.BIRTH))
801            person.add_alternate_name(name)
802            name = Name()
803            name.set_type(NameType(NameType.AKA))
804            name.set_first_name(public_name)
805            surname_obj = name.get_primary_surname()
806            surname_obj.set_surname(surname)
807            person.set_primary_name(name)
808
809        for aka in nick_names:
810            name = Attribute()
811            name.set_type(AttributeType(AttributeType.NICKNAME))
812            name.set_value(aka)
813            person.add_attribute(name)
814
815        for aka in firstname_aliases:
816            name = Name()
817            name.set_type(NameType(NameType.AKA))
818            name.set_first_name(aka)
819            surname_obj = name.get_primary_surname()
820            surname_obj.set_surname(surname)
821            person.add_alternate_name(name)
822
823        for aka in name_aliases:
824            name = Name()
825            name.set_type(NameType(NameType.AKA))
826            name.set_first_name(aka)
827            surname_obj = name.get_primary_surname()
828            surname_obj.set_surname(surname)
829            person.add_alternate_name(name)
830
831        for aka in surname_aliases:
832            name = Name()
833            name.set_type(NameType(NameType.AKA))
834            if public_name:
835                name.set_first_name(public_name)
836            else:
837                name.set_first_name(firstname)
838            surname_obj = name.get_primary_surname()
839            surname_obj.set_surname(aka)
840            person.add_alternate_name(name)
841
842        if source:
843            person.add_citation(source.get_handle())
844
845        if birth_date or birth_place or birth_source:
846            birth = self.create_event(EventType.BIRTH, None, birth_date, birth_place, birth_source)
847            birth_ref = EventRef()
848            birth_ref.set_reference_handle( birth.get_handle())
849            person.set_birth_ref( birth_ref)
850
851        if bapt_date or bapt_place or bapt_source:
852            babt = self.create_event(EventType.BAPTISM, None, bapt_date, bapt_place, bapt_source)
853            babt_ref = EventRef()
854            babt_ref.set_reference_handle( babt.get_handle())
855            person.add_event_ref( babt_ref)
856
857        if death_date or death_place or death_source or death_cause:
858            death = self.create_event(EventType.DEATH, None, death_date, death_place, death_source)
859            if death_cause:
860                death.set_description(death_cause)
861                self.db.commit_event(death,self.trans)
862            death_ref = EventRef()
863            death_ref.set_reference_handle( death.get_handle())
864            person.set_death_ref( death_ref)
865
866        if bur_date:
867            bur = self.create_event(EventType.BURIAL, None, bur_date, bur_place, bur_source)
868            bur_ref = EventRef()
869            bur_ref.set_reference_handle( bur.get_handle())
870            person.add_event_ref( bur_ref)
871
872        if crem_date:
873            crem = self.create_event(EventType.CREMATION, None, crem_date, bur_place, bur_source)
874            crem_ref = EventRef()
875            crem_ref.set_reference_handle( crem.get_handle())
876            person.add_event_ref(crem_ref)
877
878        self.db.commit_person(person,self.trans)
879
880        return (idx,person)
881
882    def parse_date(self,field):
883        if field == "0":
884            return None
885        date = Date()
886        matches = _text_parse.match(field)
887        if matches:
888            groups = matches.groups()
889            date.set_as_text(groups[0])
890            date.set_modifier(Date.MOD_TEXTONLY)
891            return date
892
893        matches = _date_parse.match(field)
894        if matches:
895            groups = matches.groups()
896            mod = _mod_map.get(groups[0],Date.MOD_NONE)
897            if groups[3] == "..":
898                mod = Date.MOD_SPAN
899                cal2 = _cal_map.get(groups[5],Date.CAL_GREGORIAN)
900                sub2 = self.sub_date(groups[4])
901            else:
902                sub2 = (0,0,0)
903            cal1 = _cal_map.get(groups[2],Date.CAL_GREGORIAN)
904            sub1 = self.sub_date(groups[1])
905            try:
906                date.set(Date.QUAL_NONE,mod, cal1,
907                         (sub1[0],sub1[1],sub1[2],0,sub2[0],sub2[1],sub2[2],0))
908            except DateError as e:
909                # TRANSLATORS: leave the {date} and {gw_snippet} untranslated
910                # in the format string, but you may re-order them if needed.
911                LOG.warning(_(
912                    "Invalid date {date} in {gw_snippet}, "
913                    "preserving date as text."
914                    ).format(date=e.date.__dict__, gw_snippet=field))
915                date.set(modifier=Date.MOD_TEXTONLY, text=field)
916            return date
917        else:
918            return None
919
920    def sub_date(self,data):
921        vals = data.split('/')
922        if len(vals) == 1:
923            return (0,0,int(vals[0]))
924        elif len(vals) == 2:
925            return (0,int(vals[0]),int(vals[1]))
926        else:
927            return (int(vals[0]),int(vals[1]),int(vals[2]))
928
929    def create_event(self,type,desc=None,date=None,place=None,source=None):
930        event = Event()
931        if type:
932            event.set_type(EventType(type))
933        if desc:
934            event.set_description(desc)
935        if date:
936            event.set_date_object(date)
937        if place:
938            event.set_place_handle(place.get_handle())
939        if source:
940            event.add_citation(source.get_handle())
941        self.db.add_event(event,self.trans)
942        self.db.commit_event(event,self.trans)
943        return event
944
945    def get_or_create_person(self,firstname,lastname):
946        person = None
947        mykey = firstname+lastname
948        if mykey in self.ikeys and firstname != "?" and lastname != "?":
949            person = self.db.get_person_from_handle(self.ikeys[mykey])
950        else:
951            person = Person()
952            self.db.add_person(person,self.trans)
953            self.db.commit_person(person,self.trans)
954            self.ikeys[mykey] = person.get_handle()
955        return person
956
957    def get_or_create_place(self,place_name):
958        place = None
959        if place_name in self.pkeys:
960            place = self.db.get_place_from_handle(self.pkeys[place_name])
961        else:
962            place = Place()
963            place.set_title(place_name)
964            self.db.add_place(place,self.trans)
965            self.db.commit_place(place,self.trans)
966            self.pkeys[place_name] = place.get_handle()
967        return place
968
969    def get_or_create_source(self,source_name):
970        source = None
971        if source_name in self.skeys:
972            source = self.db.get_source_from_handle(self.skeys[source_name])
973        else:
974            source = Source()
975            source.set_title(source_name)
976            self.db.add_source(source,self.trans)
977            self.db.commit_source(source,self.trans)
978            self.skeys[source_name] = source.get_handle()
979        citation = Citation()
980        citation.set_reference_handle(source.get_handle())
981        self.db.add_citation(citation, self.trans)
982        self.db.commit_citation(citation, self.trans)
983        return citation
984
985    def read_fevent_line(self, event):
986
987        if fevents_map.get(event[0:5]) == None:
988            return #need to fix custom event types not in the map
989
990        fev = None
991        # get events for the current family
992        for evr in self.current_family.get_event_ref_list():
993            ev = self.db.get_event_from_handle(evr.get_reference_handle())
994            if ev.get_type() == fevents_map.get(event[0:5]):
995                fev = ev # found. Need to also check EventRef role
996                return
997            if not fev: # No event found create a new one
998                if evr.get_role() != EventRoleType(EventRoleType.FAMILY):
999                    continue
1000                else:
1001                    LOG.info((ev.get_type(), self.current_family.handle))
1002                    self.new_gwplus_fevent(event)
1003        while True:
1004            line = self.get_next_line()
1005            if line and line[0:5] in fevents_map:
1006                self.new_gwplus_fevent(line)
1007            elif line and line[0:4] == "wit:":
1008                continue
1009            else:
1010                self.current_mode = None
1011                #self.db.commit_family(self.current_family,self.trans)
1012                break
1013
1014    def read_pevent_line(self, event, fields):
1015
1016        name = fields[2] + fields[1]
1017
1018        try:
1019            self.person = self.ikeys[name]
1020        # check key on {ikey}
1021        except:
1022            self.person = "(TO_CHECK: %s)" % fields[1:]
1023            #GrampsImportError()
1024
1025        lastname = fields[1]
1026        firstname = fields[2]
1027        self.current_person = self.get_or_create_person(firstname, lastname)
1028
1029        #name = Name()
1030        #name.set_type(NameType(NameType.BIRTH))
1031        #name.set_first_name(firstname)
1032        #surname_obj = name.get_primary_surname()
1033        #surname_obj.set_surname(surname)
1034        #self.current_person.set_primary_name(name)
1035
1036        if pevents_map.get(event[0:5]) == None:
1037            return #need to fix custom event types not in the map
1038
1039        self.current_event = None
1040        # get events for the current person
1041        for evr in self.current_person.get_event_ref_list():
1042            ev = self.db.get_event_from_handle(evr.get_reference_handle())
1043            if ev.get_type() == pevents_map.get(event[0:5]):
1044                self.current_event = ev # found. Need to also check EventRef role
1045            if not self.current_event: # No event found create a new one
1046                self.current_event = self.new_gwplus_pevent(event)
1047        while True:
1048            line = self.get_next_line()
1049            if line and line[0:5] in pevents_map:
1050                self.current_mode = "person_event"
1051                self.current_event = self.new_gwplus_pevent(line)
1052            elif line and line[0:4] == "note":
1053                n = Note()
1054                n.set(line[5:])
1055                self.db.add_note(n, self.trans)
1056                if self.current_event:
1057                    self.current_event.add_note(n.handle)
1058                    self.db.commit_event(self.current_event, self.trans)
1059                else:
1060                    print('note', n.handle)
1061            else:
1062                self.current_mode = None
1063                #self.db.commit_person(self.current_person,self.trans)
1064                break
1065
1066    def new_gwplus_fevent(self, line):
1067
1068        source = place = note = type = None
1069        date = self.parse_date(self.decode(line[6:]))
1070
1071        idx = 0
1072        LOG.info((line, fevents_map.get(line[0:5])))
1073        type = fevents_map.get(line[0:5])
1074        data = line.split()
1075        date = self.parse_date(self.decode(line[6:]))
1076        for part in data:
1077            idx += 1
1078            if part == "#p":
1079                place = self.get_or_create_place(self.decode(data[idx]))
1080            if part == "#s":
1081                source = self.get_or_create_source(self.decode(data[idx]))
1082        self.current_event = self.create_event(type, None, None, None, None)
1083        print('new event', self.current_event.handle)
1084        if date:
1085            print(date)
1086            self.current_event.set_date_object(date)
1087        if place:
1088            print('place', place.handle)
1089            self.current_event.set_place_handle(place.get_handle())
1090        if source:
1091            print('source', source.handle)
1092            self.current_event.add_citation(source.get_handle())
1093        self.db.commit_event(self.current_event, self.trans)
1094        nev_ref = EventRef()
1095        nev_ref.set_reference_handle(self.current_event.get_handle())
1096        self.current_family.add_event_ref(nev_ref)
1097        self.db.commit_family(self.current_family, self.trans)
1098        return self.current_event
1099
1100    def new_gwplus_pevent(self, line):
1101
1102        source = place = note = type = None
1103        date = self.parse_date(self.decode(line[6:]))
1104
1105        idx = 0
1106        LOG.info((self.person, line, pevents_map.get(line[0:5])))
1107        type = pevents_map.get(line[0:5])
1108        data = line.split()
1109        date = self.parse_date(self.decode(line[6:]))
1110        for part in data:
1111            idx += 1
1112            if part == "#p":
1113                place = self.get_or_create_place(self.decode(data[idx]))
1114            if part == "#s":
1115                source = self.get_or_create_source(self.decode(data[idx]))
1116        self.current_event = self.create_event(type, None, None, None, None)
1117        print('new event', self.current_event.handle)
1118        if date:
1119            print(date)
1120            self.current_event.set_date_object(date)
1121        if place:
1122            print('place', place.handle)
1123            self.current_event.set_place_handle(place.get_handle())
1124        if source:
1125            print('source', source.handle)
1126            self.current_event.add_citation(source.get_handle())
1127        self.db.commit_event(self.current_event, self.trans)
1128        nev_ref = EventRef()
1129        nev_ref.set_reference_handle(self.current_event.get_handle())
1130        self.current_person.add_event_ref(nev_ref)
1131        self.db.commit_person(self.current_person, self.trans)
1132        return self.current_event
1133
1134    def decode(self,s):
1135        s = s.replace('_',' ')
1136        charref_re = re.compile('(&#)(x?)([0-9a-zA-Z]+)(;)')
1137        for match in charref_re.finditer(s):
1138            try:
1139                if match.group(2):  # HEX
1140                    nchar = chr(int(match.group(3),16))
1141                else:   # Decimal
1142                    nchar = chr(int(match.group(3)))
1143                s = s.replace(match.group(0), nchar)
1144            except UnicodeDecodeError:
1145                pass
1146
1147        # replace named entities
1148        entref_re = re.compile('(&)([a-zA-Z]+)(;)')
1149        for match in entref_re.finditer(s):
1150            try:
1151                if match.group(2) in name2codepoint:
1152                    nchar = chr(name2codepoint[match.group(2)])
1153                s = s.replace(match.group(0), nchar)
1154            except UnicodeDecodeError:
1155                pass
1156
1157        return(s)
1158
1159    def debug(self, txt):
1160        LOG.debug(txt)
1161