1#
2# Gramps - a GTK+/GNOME based genealogy program
3#
4# Copyright (C) 2000-2006  Donald N. Allingham
5# Copyright (C) 2008       Brian G. Matherly
6# Copyright (C) 2010       Jakim Friant
7#
8# This program is free software; you can redistribute it and/or modify
9# it under the terms of the GNU General Public License as published by
10# the Free Software Foundation; either version 2 of the License, or
11# (at your option) any later version.
12#
13# This program is distributed in the hope that it will be useful,
14# but WITHOUT ANY WARRANTY; without even the implied warranty of
15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16# GNU General Public License for more details.
17#
18# You should have received a copy of the GNU General Public License
19# along with this program; if not, write to the Free Software
20# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21#
22
23"""Tools/Database Processing/Extract Information from Names"""
24
25#-------------------------------------------------------------------------
26#
27# python modules
28#
29#-------------------------------------------------------------------------
30import re
31
32#-------------------------------------------------------------------------
33#
34# gnome/gtk
35#
36#-------------------------------------------------------------------------
37from gi.repository import Gtk
38from gi.repository import GObject
39
40#-------------------------------------------------------------------------
41#
42# gramps modules
43#
44#-------------------------------------------------------------------------
45from gramps.gen.const import URL_MANUAL_PAGE
46from gramps.gui.utils import ProgressMeter
47from gramps.gui.plug import tool
48from gramps.gui.dialog import OkDialog
49from gramps.gui.managedwindow import ManagedWindow
50from gramps.gui.display import display_help
51from gramps.gui.glade import Glade
52from gramps.gen.lib import NameOriginType, Surname
53from gramps.gen.db import DbTxn
54from gramps.gen.const import GRAMPS_LOCALE as glocale
55_ = glocale.translation.sgettext
56
57#-------------------------------------------------------------------------
58#
59# Constants
60#
61#-------------------------------------------------------------------------
62WIKI_HELP_PAGE = '%s_-_Tools' % URL_MANUAL_PAGE
63WIKI_HELP_SEC = _('manual|Extract_Information_from_Names')
64
65#-------------------------------------------------------------------------
66#
67# constants
68#
69#-------------------------------------------------------------------------
70
71# List of possible surname prefixes. Notice that you must run the tool
72# multiple times for prefixes such as "van der".
73PREFIX_LIST = [
74    "de", "van", "von", "di", "le", "du", "dela", "della",
75    "des", "vande", "ten", "da", "af", "den", "das", "dello",
76    "del", "en", "ein", "el" "et", "les", "lo", "los", "un",
77    "um", "una", "uno", "der", "ter", "te", "die"]
78
79CONNECTOR_LIST = ['e', 'y', ]
80CONNECTOR_LIST_NONSPLIT = ['de', 'van']
81
82_title_re = re.compile(r"^ ([A-Za-z][A-Za-z]+\.) \s+ (.+) $", re.VERBOSE)
83_nick_re = re.compile(r"(.+) \s* [(\"] (.+) [)\"]", re.VERBOSE)
84
85
86#-------------------------------------------------------------------------
87#
88# Search each name in the database, and compare the firstname against the
89# form of "Name (Nickname)".  If it matches, change the first name entry
90# to "Name" and add "Nickname" into the nickname field.  Also, search for
91# surname prefixes. If found, change the name entry and put the prefix in
92# the name prefix field.
93#
94#-------------------------------------------------------------------------
95
96
97class PatchNames(tool.BatchTool, ManagedWindow):
98    titleid = 1
99    nickid = 2
100    pref1id = 3
101    compid = 4
102
103    def __init__(self, dbstate, user, options_class, name, callback=None):
104        uistate = user.uistate
105        self.label = _('Name and title extraction tool')
106        ManagedWindow.__init__(self, uistate, [], self.__class__)
107        self.set_window(Gtk.Window(), Gtk.Label(), '')
108
109        tool.BatchTool.__init__(self, dbstate, user, options_class, name)
110        if self.fail:
111            self.close()
112            return
113
114        winprefix = Gtk.Dialog(
115            title=_("Default prefix and connector settings"),
116            transient_for=self.uistate.window, modal=True,
117            destroy_with_parent=True)
118        winprefix.add_button(_('_OK'), Gtk.ResponseType.ACCEPT)
119        winprefix.vbox.set_spacing(5)
120        hboxpref = Gtk.Box()
121        label = Gtk.Label(label=_('Prefixes to search for:'))
122        hboxpref.pack_start(label, False, False, 5)
123        self.prefixbox = Gtk.Entry()
124        self.prefixbox.set_text(', '.join(PREFIX_LIST))
125        hboxpref.pack_start(self.prefixbox, True, True, 0)
126        winprefix.vbox.pack_start(hboxpref, True, True, 0)
127        hboxcon = Gtk.Box()
128        label = Gtk.Label(label=_('Connectors splitting surnames:'))
129        hboxcon.pack_start(label, False, False, 5)
130        self.conbox = Gtk.Entry()
131        self.conbox.set_text(', '.join(CONNECTOR_LIST))
132        hboxcon.pack_start(self.conbox, True, True, 0)
133        winprefix.vbox.pack_start(hboxcon, True, True, 0)
134        hboxconns = Gtk.Box()
135        label = Gtk.Label(label=_('Connectors not splitting surnames:'))
136        hboxconns.pack_start(label, False, False, 5)
137        self.connsbox = Gtk.Entry()
138        self.connsbox.set_text(', '.join(CONNECTOR_LIST_NONSPLIT))
139        hboxconns.pack_start(self.connsbox, True, True, 0)
140        winprefix.vbox.pack_start(hboxconns, True, True, 0)
141        winprefix.resize(700, 100)
142        winprefix.show_all()
143
144        response = winprefix.run()
145        self.prefix_list = self.prefixbox.get_text().split(',')
146        self.prefix_list = list(map(strip, self.prefix_list))
147        self.prefixbox = None
148        self.connector_list = self.conbox.get_text().split(',')
149        self.connector_list = list(map(strip, self.connector_list))
150        self.conbox = None
151        self.connector_list_nonsplit = self.connsbox.get_text().split(',')
152        self.connector_list_nonsplit = list(
153            map(strip, self.connector_list_nonsplit))
154        self.connsbox = None
155
156        # Find a prefix in the first_name
157        self._fn_prefix_re = re.compile(
158            r"(\S+)\s+(%s)\s*$" % '|'.join(self.prefix_list), re.IGNORECASE)
159
160        # Find a prefix in the surname
161        self._sn_prefix_re = re.compile(
162            r"^\s*(%s)\s+(.+)" % '|'.join(self.prefix_list), re.IGNORECASE)
163        # Find a connector in the surname
164        self._sn_con_re = re.compile(
165            r"^\s*(.+)\s+(%s)\s+(.+)" % '|'.join(self.connector_list),
166            re.IGNORECASE)
167        winprefix.destroy()
168
169        self.cb = callback
170        self.handle_to_action = {}
171
172        self.progress = ProgressMeter(
173            _('Extracting Information from Names'), '',
174            parent=self.uistate.window)
175        self.progress.set_pass(_('Analyzing names'),
176                               self.db.get_number_of_people())
177
178        for person in self.db.iter_people():
179            key = person.handle
180            name = person.get_primary_name()
181            first = name.get_first_name()
182            sname = name.get_surname()
183
184            old_prefix = []
185            old_surn = []
186            old_con = []
187            old_prim = []
188            old_orig = []
189            for surn in name.get_surname_list():
190                old_prefix.append(surn.get_prefix())
191                old_surn.append(surn.get_surname())
192                old_con.append(surn.get_connector())
193                old_prim.append(surn.get_primary())
194                old_orig.append(surn.get_origintype())
195
196            if name.get_title():
197                old_title = [name.get_title()]
198            else:
199                old_title = []
200            new_title = []
201
202            match = _title_re.match(first)
203            while match:
204                groups = match.groups()
205                first = groups[1]
206                new_title.append(groups[0])
207                match = _title_re.match(first)
208            matchnick = _nick_re.match(first)
209
210            if new_title:
211                titleval = (" ".join(old_title + new_title), first)
212                if key in self.handle_to_action:
213                    self.handle_to_action[key][self.titleid] = titleval
214                else:
215                    self.handle_to_action[key] = {self.titleid: titleval}
216            elif matchnick:
217                # we check for nick, which changes given name like title
218                groups = matchnick.groups()
219                nickval = (groups[0], groups[1])
220                if key in self.handle_to_action:
221                    self.handle_to_action[key][self.nickid] = nickval
222                else:
223                    self.handle_to_action[key] = {self.nickid: nickval}
224            else:
225                # Try to find the name prefix in the given name, also this
226                # changes given name
227                match = self._fn_prefix_re.match(first)
228                if match:
229                    groups = match.groups()
230                    if old_prefix[0]:
231                        # Put the found prefix before the old prefix
232                        new_prefix = " ".join([groups[1], old_prefix[0]])
233                    else:
234                        new_prefix = groups[1]
235                    pref1val = (groups[0], new_prefix, groups[1])
236                    if key in self.handle_to_action:
237                        self.handle_to_action[key][self.pref1id] = pref1val
238                    else:
239                        self.handle_to_action[key] = {self.pref1id: pref1val}
240
241            #check for Gedcom import of compound surnames
242            if len(old_surn) == 1 and old_con[0] == '':
243                prefixes = old_prefix[0].split(',')
244                surnames = old_surn[0].split(',')
245                if len(prefixes) > 1 and len(prefixes) == len(surnames):
246                    #assume a list of prefix and a list of surnames
247                    prefixes = list(map(strip, prefixes))
248                    surnames = list(map(strip, surnames))
249                    primaries = [False] * len(prefixes)
250                    primaries[0] = True
251                    origs = []
252                    for ind in range(len(prefixes)):
253                        origs.append(NameOriginType())
254                    origs[0] = old_orig[0]
255                    compoundval = (surnames, prefixes, [''] * len(prefixes),
256                                   primaries, origs)
257                    if key in self.handle_to_action:
258                        self.handle_to_action[key][self.compid] = compoundval
259                    else:
260                        self.handle_to_action[key] = {self.compid: compoundval}
261                    #we cannot check compound surnames, so continue the loop
262                    continue
263
264            # Next, try to split surname in compounds: prefix surname connector
265            found = False
266            new_prefix_list = []
267            new_surname_list = []
268            new_connector_list = []
269            new_prim_list = []
270            new_orig_list = []
271            ind = 0
272            cont = True
273            for pref, surn, con, prim, orig in zip(
274                    old_prefix, old_surn, old_con, old_prim, old_orig):
275                surnval = surn.split()
276                if surnval == []:
277                    new_prefix_list.append(pref)
278                    new_surname_list.append('')
279                    new_connector_list.append(con)
280                    new_prim_list.append(prim)
281                    new_orig_list.append(orig)
282                    cont = False
283                    continue
284                val = surnval.pop(0)
285                while cont:
286                    new_prefix_list.append(pref)
287                    new_surname_list.append('')
288                    new_connector_list.append(con)
289                    new_prim_list.append(prim)
290                    new_orig_list.append(orig)
291
292                    while cont and (val.lower() in self.prefix_list):
293                        found = True
294                        if new_prefix_list[-1]:
295                            new_prefix_list[-1] += ' ' + val
296                        else:
297                            new_prefix_list[-1] = val
298                        try:
299                            val = surnval.pop(0)
300                        except IndexError:
301                            val = ''
302                            cont = False
303                    #after prefix we have a surname
304                    if cont:
305                        new_surname_list[-1] = val
306                        try:
307                            val = surnval.pop(0)
308                        except IndexError:
309                            val = ''
310                            cont = False
311                    #if value after surname indicates continue, then continue
312                    while cont and (
313                            val.lower() in self.connector_list_nonsplit):
314                        #add this val to the current surname
315                        new_surname_list[-1] += ' ' + val
316                        try:
317                            val = surnval.pop(0)
318                        except IndexError:
319                            val = ''
320                            cont = False
321                    # if previous is non-splitting connector, then add new val
322                    # to current surname
323                    if cont and (new_surname_list[-1].split()[-1].lower()
324                                 in self.connector_list_nonsplit):
325                        new_surname_list[-1] += ' ' + val
326                        try:
327                            val = surnval.pop(0)
328                        except IndexError:
329                            val = ''
330                            cont = False
331                    #if next is a connector, add it to the surname
332                    if cont and val.lower() in self.connector_list:
333                        found = True
334                        if new_connector_list[-1]:
335                            new_connector_list[-1] = ' ' + val
336                        else:
337                            new_connector_list[-1] = val
338                        try:
339                            val = surnval.pop(0)
340                        except IndexError:
341                            val = ''
342                            cont = False
343                    # initialize for a next surname in case there are still
344                    # val
345                    if cont:
346                        found = True  # we split surname
347                        pref = ''
348                        con = ''
349                        prim = False
350                        orig = NameOriginType()
351                ind += 1
352            if found:
353                compoundval = (new_surname_list, new_prefix_list,
354                               new_connector_list, new_prim_list,
355                               new_orig_list)
356                if key in self.handle_to_action:
357                    self.handle_to_action[key][self.compid] = compoundval
358                else:
359                    self.handle_to_action[key] = {self.compid: compoundval}
360
361            self.progress.step()
362
363        if self.handle_to_action:
364            self.display()
365        else:
366            self.progress.close()
367            self.close()
368            OkDialog(_('No modifications made'),
369                     _("No titles, nicknames or prefixes were found"),
370                     parent=self.uistate.window)
371
372    def build_menu_names(self, obj):
373        return (self.label, None)
374
375    def toggled(self, cell, path_string):
376        path = tuple(map(int, path_string.split(':')))
377        row = self.model[path]
378        row[0] = not row[0]
379        self.model.row_changed(path, row.iter)
380
381    def display(self):
382
383        self.top = Glade()
384        window = self.top.toplevel
385        self.top.connect_signals({
386            "destroy_passed_object": self.close,
387            "on_ok_clicked": self.on_ok_clicked,
388            "on_help_clicked": self.on_help_clicked,
389            "on_delete_event": self.close})
390
391        self.list = self.top.get_object("list")
392        self.set_window(window, self.top.get_object('title'), self.label)
393        self.setup_configs("interface.patchnames", 680, 400)
394
395        self.model = Gtk.ListStore(GObject.TYPE_BOOLEAN, GObject.TYPE_STRING,
396                                   GObject.TYPE_STRING, GObject.TYPE_STRING,
397                                   GObject.TYPE_STRING)
398
399        r = Gtk.CellRendererToggle()
400        r.connect('toggled', self.toggled)
401        c = Gtk.TreeViewColumn(_('Select'), r, active=0)
402        self.list.append_column(c)
403
404        c = Gtk.TreeViewColumn(_('ID'), Gtk.CellRendererText(), text=1)
405        self.list.append_column(c)
406
407        c = Gtk.TreeViewColumn(_('Type'), Gtk.CellRendererText(), text=2)
408        self.list.append_column(c)
409
410        c = Gtk.TreeViewColumn(_('Value'), Gtk.CellRendererText(), text=3)
411        self.list.append_column(c)
412
413        c = Gtk.TreeViewColumn(_('Current Name'), Gtk.CellRendererText(),
414                               text=4)
415        self.list.append_column(c)
416
417        self.list.set_model(self.model)
418
419        self.nick_hash = {}
420        self.title_hash = {}
421        self.prefix1_hash = {}
422        self.compound_hash = {}
423
424        self.progress.set_pass(_('Building display'),
425                               len(list(self.handle_to_action.keys())))
426
427        for key, data in self.handle_to_action.items():
428            p = self.db.get_person_from_handle(key)
429            gid = p.get_gramps_id()
430            if self.nickid in data:
431                given, nick = data[self.nickid]
432                handle = self.model.append()
433                self.model.set_value(handle, 0, 1)
434                self.model.set_value(handle, 1, gid)
435                self.model.set_value(handle, 2, _('Nickname'))
436                self.model.set_value(handle, 3, nick)
437                self.model.set_value(handle, 4,
438                                     p.get_primary_name().get_name())
439                self.nick_hash[key] = handle
440
441            if self.titleid in data:
442                title, given = data[self.titleid]
443                handle = self.model.append()
444                self.model.set_value(handle, 0, 1)
445                self.model.set_value(handle, 1, gid)
446                self.model.set_value(handle, 2, _('Person|Title'))
447                self.model.set_value(handle, 3, title)
448                self.model.set_value(
449                    handle, 4, p.get_primary_name().get_name())
450                self.title_hash[key] = handle
451
452            if self.pref1id in data:
453                given, prefixtotal, new_prefix = data[self.pref1id]
454                handle = self.model.append()
455                self.model.set_value(handle, 0, 1)
456                self.model.set_value(handle, 1, gid)
457                self.model.set_value(handle, 2, _('Prefix in given name'))
458                self.model.set_value(handle, 3, prefixtotal)
459                self.model.set_value(
460                    handle, 4, p.get_primary_name().get_name())
461                self.prefix1_hash[key] = handle
462
463            if self.compid in data:
464                surn_list, pref_list, con_list, prims, origs =\
465                    data[self.compid]
466                handle = self.model.append()
467                self.model.set_value(handle, 0, 1)
468                self.model.set_value(handle, 1, gid)
469                self.model.set_value(handle, 2, _('Compound surname'))
470                newval = ''
471                for sur, pre, con in zip(surn_list, pref_list, con_list):
472                    if newval:
473                        newval += '-['
474                    else:
475                        newval = '['
476                    newval += pre + ',' + sur
477                    if con:
478                        newval += ',' + con + ']'
479                    else:
480                        newval += ']'
481                self.model.set_value(handle, 3, newval)
482                self.model.set_value(handle, 4,
483                                     p.get_primary_name().get_name())
484                self.compound_hash[key] = handle
485
486            self.progress.step()
487
488        self.progress.close()
489        self.show()
490
491    def on_help_clicked(self, obj):
492        """Display the relevant portion of Gramps manual"""
493        display_help(webpage=WIKI_HELP_PAGE, section=WIKI_HELP_SEC)
494
495    def on_ok_clicked(self, obj):
496        with DbTxn(_("Extract information from names"), self.db, batch=True
497                   ) as trans:
498            self.db.disable_signals()
499
500            for key, data in self.handle_to_action.items():
501                p = self.db.get_person_from_handle(key)
502                if self.nickid in data:
503                    modelhandle = self.nick_hash[key]
504                    val = self.model.get_value(modelhandle, 0)
505                    if val:
506                        given, nick = data[self.nickid]
507                        name = p.get_primary_name()
508                        name.set_first_name(given.strip())
509                        name.set_nick_name(nick.strip())
510
511                if self.titleid in data:
512                    modelhandle = self.title_hash[key]
513                    val = self.model.get_value(modelhandle, 0)
514                    if val:
515                        title, given = data[self.titleid]
516                        name = p.get_primary_name()
517                        name.set_first_name(given.strip())
518                        name.set_title(title.strip())
519
520                if self.pref1id in data:
521                    modelhandle = self.prefix1_hash[key]
522                    val = self.model.get_value(modelhandle, 0)
523                    if val:
524                        given, prefixtotal, prefix = data[self.pref1id]
525                        name = p.get_primary_name()
526                        name.set_first_name(given.strip())
527                        oldpref = name.get_surname_list()[0].get_prefix().strip()
528                        if oldpref == '' or oldpref == prefix.strip():
529                            name.get_surname_list()[0].set_prefix(prefix)
530                        else:
531                            name.get_surname_list()[0].set_prefix(
532                                '%s %s' % (prefix, oldpref))
533
534                if self.compid in data:
535                    modelhandle = self.compound_hash[key]
536                    val = self.model.get_value(modelhandle, 0)
537                    if val:
538                        surns, prefs, cons, prims, origs = data[self.compid]
539                        name = p.get_primary_name()
540                        new_surn_list = []
541                        for surn, pref, con, prim, orig in zip(
542                                surns, prefs, cons, prims, origs):
543                            new_surn_list.append(Surname())
544                            new_surn_list[-1].set_surname(surn.strip())
545                            new_surn_list[-1].set_prefix(pref.strip())
546                            new_surn_list[-1].set_connector(con.strip())
547                            new_surn_list[-1].set_primary(prim)
548                            new_surn_list[-1].set_origintype(orig)
549                        name.set_surname_list(new_surn_list)
550
551                self.db.commit_person(p, trans)
552
553        self.db.enable_signals()
554        self.db.request_rebuild()
555        self.close()
556        self.cb()
557
558
559class PatchNamesOptions(tool.ToolOptions):
560    """
561    Defines options and provides handling interface.
562    """
563
564    def __init__(self, name, person_id=None):
565        tool.ToolOptions.__init__(self, name, person_id)
566
567
568def strip(arg):
569    return arg.strip()
570