1# 2# Gramps - a GTK+/GNOME based genealogy program 3# 4# Copyright (C) 2000-2006 Donald N. Allingham 5# Copyright (C) 2008 Brian G. Matherly 6# Copyright (C) 2010 Jakim Friant 7# 8# This program is free software; you can redistribute it and/or modify 9# it under the terms of the GNU General Public License as published by 10# the Free Software Foundation; either version 2 of the License, or 11# (at your option) any later version. 12# 13# This program is distributed in the hope that it will be useful, 14# but WITHOUT ANY WARRANTY; without even the implied warranty of 15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16# GNU General Public License for more details. 17# 18# You should have received a copy of the GNU General Public License 19# along with this program; if not, write to the Free Software 20# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 21# 22 23"""Tools/Database Processing/Extract Information from Names""" 24 25#------------------------------------------------------------------------- 26# 27# python modules 28# 29#------------------------------------------------------------------------- 30import re 31 32#------------------------------------------------------------------------- 33# 34# gnome/gtk 35# 36#------------------------------------------------------------------------- 37from gi.repository import Gtk 38from gi.repository import GObject 39 40#------------------------------------------------------------------------- 41# 42# gramps modules 43# 44#------------------------------------------------------------------------- 45from gramps.gen.const import URL_MANUAL_PAGE 46from gramps.gui.utils import ProgressMeter 47from gramps.gui.plug import tool 48from gramps.gui.dialog import OkDialog 49from gramps.gui.managedwindow import ManagedWindow 50from gramps.gui.display import display_help 51from gramps.gui.glade import Glade 52from gramps.gen.lib import NameOriginType, Surname 53from gramps.gen.db import DbTxn 54from gramps.gen.const import GRAMPS_LOCALE as glocale 55_ = glocale.translation.sgettext 56 57#------------------------------------------------------------------------- 58# 59# Constants 60# 61#------------------------------------------------------------------------- 62WIKI_HELP_PAGE = '%s_-_Tools' % URL_MANUAL_PAGE 63WIKI_HELP_SEC = _('manual|Extract_Information_from_Names') 64 65#------------------------------------------------------------------------- 66# 67# constants 68# 69#------------------------------------------------------------------------- 70 71# List of possible surname prefixes. Notice that you must run the tool 72# multiple times for prefixes such as "van der". 73PREFIX_LIST = [ 74 "de", "van", "von", "di", "le", "du", "dela", "della", 75 "des", "vande", "ten", "da", "af", "den", "das", "dello", 76 "del", "en", "ein", "el" "et", "les", "lo", "los", "un", 77 "um", "una", "uno", "der", "ter", "te", "die"] 78 79CONNECTOR_LIST = ['e', 'y', ] 80CONNECTOR_LIST_NONSPLIT = ['de', 'van'] 81 82_title_re = re.compile(r"^ ([A-Za-z][A-Za-z]+\.) \s+ (.+) $", re.VERBOSE) 83_nick_re = re.compile(r"(.+) \s* [(\"] (.+) [)\"]", re.VERBOSE) 84 85 86#------------------------------------------------------------------------- 87# 88# Search each name in the database, and compare the firstname against the 89# form of "Name (Nickname)". If it matches, change the first name entry 90# to "Name" and add "Nickname" into the nickname field. Also, search for 91# surname prefixes. If found, change the name entry and put the prefix in 92# the name prefix field. 93# 94#------------------------------------------------------------------------- 95 96 97class PatchNames(tool.BatchTool, ManagedWindow): 98 titleid = 1 99 nickid = 2 100 pref1id = 3 101 compid = 4 102 103 def __init__(self, dbstate, user, options_class, name, callback=None): 104 uistate = user.uistate 105 self.label = _('Name and title extraction tool') 106 ManagedWindow.__init__(self, uistate, [], self.__class__) 107 self.set_window(Gtk.Window(), Gtk.Label(), '') 108 109 tool.BatchTool.__init__(self, dbstate, user, options_class, name) 110 if self.fail: 111 self.close() 112 return 113 114 winprefix = Gtk.Dialog( 115 title=_("Default prefix and connector settings"), 116 transient_for=self.uistate.window, modal=True, 117 destroy_with_parent=True) 118 winprefix.add_button(_('_OK'), Gtk.ResponseType.ACCEPT) 119 winprefix.vbox.set_spacing(5) 120 hboxpref = Gtk.Box() 121 label = Gtk.Label(label=_('Prefixes to search for:')) 122 hboxpref.pack_start(label, False, False, 5) 123 self.prefixbox = Gtk.Entry() 124 self.prefixbox.set_text(', '.join(PREFIX_LIST)) 125 hboxpref.pack_start(self.prefixbox, True, True, 0) 126 winprefix.vbox.pack_start(hboxpref, True, True, 0) 127 hboxcon = Gtk.Box() 128 label = Gtk.Label(label=_('Connectors splitting surnames:')) 129 hboxcon.pack_start(label, False, False, 5) 130 self.conbox = Gtk.Entry() 131 self.conbox.set_text(', '.join(CONNECTOR_LIST)) 132 hboxcon.pack_start(self.conbox, True, True, 0) 133 winprefix.vbox.pack_start(hboxcon, True, True, 0) 134 hboxconns = Gtk.Box() 135 label = Gtk.Label(label=_('Connectors not splitting surnames:')) 136 hboxconns.pack_start(label, False, False, 5) 137 self.connsbox = Gtk.Entry() 138 self.connsbox.set_text(', '.join(CONNECTOR_LIST_NONSPLIT)) 139 hboxconns.pack_start(self.connsbox, True, True, 0) 140 winprefix.vbox.pack_start(hboxconns, True, True, 0) 141 winprefix.resize(700, 100) 142 winprefix.show_all() 143 144 response = winprefix.run() 145 self.prefix_list = self.prefixbox.get_text().split(',') 146 self.prefix_list = list(map(strip, self.prefix_list)) 147 self.prefixbox = None 148 self.connector_list = self.conbox.get_text().split(',') 149 self.connector_list = list(map(strip, self.connector_list)) 150 self.conbox = None 151 self.connector_list_nonsplit = self.connsbox.get_text().split(',') 152 self.connector_list_nonsplit = list( 153 map(strip, self.connector_list_nonsplit)) 154 self.connsbox = None 155 156 # Find a prefix in the first_name 157 self._fn_prefix_re = re.compile( 158 r"(\S+)\s+(%s)\s*$" % '|'.join(self.prefix_list), re.IGNORECASE) 159 160 # Find a prefix in the surname 161 self._sn_prefix_re = re.compile( 162 r"^\s*(%s)\s+(.+)" % '|'.join(self.prefix_list), re.IGNORECASE) 163 # Find a connector in the surname 164 self._sn_con_re = re.compile( 165 r"^\s*(.+)\s+(%s)\s+(.+)" % '|'.join(self.connector_list), 166 re.IGNORECASE) 167 winprefix.destroy() 168 169 self.cb = callback 170 self.handle_to_action = {} 171 172 self.progress = ProgressMeter( 173 _('Extracting Information from Names'), '', 174 parent=self.uistate.window) 175 self.progress.set_pass(_('Analyzing names'), 176 self.db.get_number_of_people()) 177 178 for person in self.db.iter_people(): 179 key = person.handle 180 name = person.get_primary_name() 181 first = name.get_first_name() 182 sname = name.get_surname() 183 184 old_prefix = [] 185 old_surn = [] 186 old_con = [] 187 old_prim = [] 188 old_orig = [] 189 for surn in name.get_surname_list(): 190 old_prefix.append(surn.get_prefix()) 191 old_surn.append(surn.get_surname()) 192 old_con.append(surn.get_connector()) 193 old_prim.append(surn.get_primary()) 194 old_orig.append(surn.get_origintype()) 195 196 if name.get_title(): 197 old_title = [name.get_title()] 198 else: 199 old_title = [] 200 new_title = [] 201 202 match = _title_re.match(first) 203 while match: 204 groups = match.groups() 205 first = groups[1] 206 new_title.append(groups[0]) 207 match = _title_re.match(first) 208 matchnick = _nick_re.match(first) 209 210 if new_title: 211 titleval = (" ".join(old_title + new_title), first) 212 if key in self.handle_to_action: 213 self.handle_to_action[key][self.titleid] = titleval 214 else: 215 self.handle_to_action[key] = {self.titleid: titleval} 216 elif matchnick: 217 # we check for nick, which changes given name like title 218 groups = matchnick.groups() 219 nickval = (groups[0], groups[1]) 220 if key in self.handle_to_action: 221 self.handle_to_action[key][self.nickid] = nickval 222 else: 223 self.handle_to_action[key] = {self.nickid: nickval} 224 else: 225 # Try to find the name prefix in the given name, also this 226 # changes given name 227 match = self._fn_prefix_re.match(first) 228 if match: 229 groups = match.groups() 230 if old_prefix[0]: 231 # Put the found prefix before the old prefix 232 new_prefix = " ".join([groups[1], old_prefix[0]]) 233 else: 234 new_prefix = groups[1] 235 pref1val = (groups[0], new_prefix, groups[1]) 236 if key in self.handle_to_action: 237 self.handle_to_action[key][self.pref1id] = pref1val 238 else: 239 self.handle_to_action[key] = {self.pref1id: pref1val} 240 241 #check for Gedcom import of compound surnames 242 if len(old_surn) == 1 and old_con[0] == '': 243 prefixes = old_prefix[0].split(',') 244 surnames = old_surn[0].split(',') 245 if len(prefixes) > 1 and len(prefixes) == len(surnames): 246 #assume a list of prefix and a list of surnames 247 prefixes = list(map(strip, prefixes)) 248 surnames = list(map(strip, surnames)) 249 primaries = [False] * len(prefixes) 250 primaries[0] = True 251 origs = [] 252 for ind in range(len(prefixes)): 253 origs.append(NameOriginType()) 254 origs[0] = old_orig[0] 255 compoundval = (surnames, prefixes, [''] * len(prefixes), 256 primaries, origs) 257 if key in self.handle_to_action: 258 self.handle_to_action[key][self.compid] = compoundval 259 else: 260 self.handle_to_action[key] = {self.compid: compoundval} 261 #we cannot check compound surnames, so continue the loop 262 continue 263 264 # Next, try to split surname in compounds: prefix surname connector 265 found = False 266 new_prefix_list = [] 267 new_surname_list = [] 268 new_connector_list = [] 269 new_prim_list = [] 270 new_orig_list = [] 271 ind = 0 272 cont = True 273 for pref, surn, con, prim, orig in zip( 274 old_prefix, old_surn, old_con, old_prim, old_orig): 275 surnval = surn.split() 276 if surnval == []: 277 new_prefix_list.append(pref) 278 new_surname_list.append('') 279 new_connector_list.append(con) 280 new_prim_list.append(prim) 281 new_orig_list.append(orig) 282 cont = False 283 continue 284 val = surnval.pop(0) 285 while cont: 286 new_prefix_list.append(pref) 287 new_surname_list.append('') 288 new_connector_list.append(con) 289 new_prim_list.append(prim) 290 new_orig_list.append(orig) 291 292 while cont and (val.lower() in self.prefix_list): 293 found = True 294 if new_prefix_list[-1]: 295 new_prefix_list[-1] += ' ' + val 296 else: 297 new_prefix_list[-1] = val 298 try: 299 val = surnval.pop(0) 300 except IndexError: 301 val = '' 302 cont = False 303 #after prefix we have a surname 304 if cont: 305 new_surname_list[-1] = val 306 try: 307 val = surnval.pop(0) 308 except IndexError: 309 val = '' 310 cont = False 311 #if value after surname indicates continue, then continue 312 while cont and ( 313 val.lower() in self.connector_list_nonsplit): 314 #add this val to the current surname 315 new_surname_list[-1] += ' ' + val 316 try: 317 val = surnval.pop(0) 318 except IndexError: 319 val = '' 320 cont = False 321 # if previous is non-splitting connector, then add new val 322 # to current surname 323 if cont and (new_surname_list[-1].split()[-1].lower() 324 in self.connector_list_nonsplit): 325 new_surname_list[-1] += ' ' + val 326 try: 327 val = surnval.pop(0) 328 except IndexError: 329 val = '' 330 cont = False 331 #if next is a connector, add it to the surname 332 if cont and val.lower() in self.connector_list: 333 found = True 334 if new_connector_list[-1]: 335 new_connector_list[-1] = ' ' + val 336 else: 337 new_connector_list[-1] = val 338 try: 339 val = surnval.pop(0) 340 except IndexError: 341 val = '' 342 cont = False 343 # initialize for a next surname in case there are still 344 # val 345 if cont: 346 found = True # we split surname 347 pref = '' 348 con = '' 349 prim = False 350 orig = NameOriginType() 351 ind += 1 352 if found: 353 compoundval = (new_surname_list, new_prefix_list, 354 new_connector_list, new_prim_list, 355 new_orig_list) 356 if key in self.handle_to_action: 357 self.handle_to_action[key][self.compid] = compoundval 358 else: 359 self.handle_to_action[key] = {self.compid: compoundval} 360 361 self.progress.step() 362 363 if self.handle_to_action: 364 self.display() 365 else: 366 self.progress.close() 367 self.close() 368 OkDialog(_('No modifications made'), 369 _("No titles, nicknames or prefixes were found"), 370 parent=self.uistate.window) 371 372 def build_menu_names(self, obj): 373 return (self.label, None) 374 375 def toggled(self, cell, path_string): 376 path = tuple(map(int, path_string.split(':'))) 377 row = self.model[path] 378 row[0] = not row[0] 379 self.model.row_changed(path, row.iter) 380 381 def display(self): 382 383 self.top = Glade() 384 window = self.top.toplevel 385 self.top.connect_signals({ 386 "destroy_passed_object": self.close, 387 "on_ok_clicked": self.on_ok_clicked, 388 "on_help_clicked": self.on_help_clicked, 389 "on_delete_event": self.close}) 390 391 self.list = self.top.get_object("list") 392 self.set_window(window, self.top.get_object('title'), self.label) 393 self.setup_configs("interface.patchnames", 680, 400) 394 395 self.model = Gtk.ListStore(GObject.TYPE_BOOLEAN, GObject.TYPE_STRING, 396 GObject.TYPE_STRING, GObject.TYPE_STRING, 397 GObject.TYPE_STRING) 398 399 r = Gtk.CellRendererToggle() 400 r.connect('toggled', self.toggled) 401 c = Gtk.TreeViewColumn(_('Select'), r, active=0) 402 self.list.append_column(c) 403 404 c = Gtk.TreeViewColumn(_('ID'), Gtk.CellRendererText(), text=1) 405 self.list.append_column(c) 406 407 c = Gtk.TreeViewColumn(_('Type'), Gtk.CellRendererText(), text=2) 408 self.list.append_column(c) 409 410 c = Gtk.TreeViewColumn(_('Value'), Gtk.CellRendererText(), text=3) 411 self.list.append_column(c) 412 413 c = Gtk.TreeViewColumn(_('Current Name'), Gtk.CellRendererText(), 414 text=4) 415 self.list.append_column(c) 416 417 self.list.set_model(self.model) 418 419 self.nick_hash = {} 420 self.title_hash = {} 421 self.prefix1_hash = {} 422 self.compound_hash = {} 423 424 self.progress.set_pass(_('Building display'), 425 len(list(self.handle_to_action.keys()))) 426 427 for key, data in self.handle_to_action.items(): 428 p = self.db.get_person_from_handle(key) 429 gid = p.get_gramps_id() 430 if self.nickid in data: 431 given, nick = data[self.nickid] 432 handle = self.model.append() 433 self.model.set_value(handle, 0, 1) 434 self.model.set_value(handle, 1, gid) 435 self.model.set_value(handle, 2, _('Nickname')) 436 self.model.set_value(handle, 3, nick) 437 self.model.set_value(handle, 4, 438 p.get_primary_name().get_name()) 439 self.nick_hash[key] = handle 440 441 if self.titleid in data: 442 title, given = data[self.titleid] 443 handle = self.model.append() 444 self.model.set_value(handle, 0, 1) 445 self.model.set_value(handle, 1, gid) 446 self.model.set_value(handle, 2, _('Person|Title')) 447 self.model.set_value(handle, 3, title) 448 self.model.set_value( 449 handle, 4, p.get_primary_name().get_name()) 450 self.title_hash[key] = handle 451 452 if self.pref1id in data: 453 given, prefixtotal, new_prefix = data[self.pref1id] 454 handle = self.model.append() 455 self.model.set_value(handle, 0, 1) 456 self.model.set_value(handle, 1, gid) 457 self.model.set_value(handle, 2, _('Prefix in given name')) 458 self.model.set_value(handle, 3, prefixtotal) 459 self.model.set_value( 460 handle, 4, p.get_primary_name().get_name()) 461 self.prefix1_hash[key] = handle 462 463 if self.compid in data: 464 surn_list, pref_list, con_list, prims, origs =\ 465 data[self.compid] 466 handle = self.model.append() 467 self.model.set_value(handle, 0, 1) 468 self.model.set_value(handle, 1, gid) 469 self.model.set_value(handle, 2, _('Compound surname')) 470 newval = '' 471 for sur, pre, con in zip(surn_list, pref_list, con_list): 472 if newval: 473 newval += '-[' 474 else: 475 newval = '[' 476 newval += pre + ',' + sur 477 if con: 478 newval += ',' + con + ']' 479 else: 480 newval += ']' 481 self.model.set_value(handle, 3, newval) 482 self.model.set_value(handle, 4, 483 p.get_primary_name().get_name()) 484 self.compound_hash[key] = handle 485 486 self.progress.step() 487 488 self.progress.close() 489 self.show() 490 491 def on_help_clicked(self, obj): 492 """Display the relevant portion of Gramps manual""" 493 display_help(webpage=WIKI_HELP_PAGE, section=WIKI_HELP_SEC) 494 495 def on_ok_clicked(self, obj): 496 with DbTxn(_("Extract information from names"), self.db, batch=True 497 ) as trans: 498 self.db.disable_signals() 499 500 for key, data in self.handle_to_action.items(): 501 p = self.db.get_person_from_handle(key) 502 if self.nickid in data: 503 modelhandle = self.nick_hash[key] 504 val = self.model.get_value(modelhandle, 0) 505 if val: 506 given, nick = data[self.nickid] 507 name = p.get_primary_name() 508 name.set_first_name(given.strip()) 509 name.set_nick_name(nick.strip()) 510 511 if self.titleid in data: 512 modelhandle = self.title_hash[key] 513 val = self.model.get_value(modelhandle, 0) 514 if val: 515 title, given = data[self.titleid] 516 name = p.get_primary_name() 517 name.set_first_name(given.strip()) 518 name.set_title(title.strip()) 519 520 if self.pref1id in data: 521 modelhandle = self.prefix1_hash[key] 522 val = self.model.get_value(modelhandle, 0) 523 if val: 524 given, prefixtotal, prefix = data[self.pref1id] 525 name = p.get_primary_name() 526 name.set_first_name(given.strip()) 527 oldpref = name.get_surname_list()[0].get_prefix().strip() 528 if oldpref == '' or oldpref == prefix.strip(): 529 name.get_surname_list()[0].set_prefix(prefix) 530 else: 531 name.get_surname_list()[0].set_prefix( 532 '%s %s' % (prefix, oldpref)) 533 534 if self.compid in data: 535 modelhandle = self.compound_hash[key] 536 val = self.model.get_value(modelhandle, 0) 537 if val: 538 surns, prefs, cons, prims, origs = data[self.compid] 539 name = p.get_primary_name() 540 new_surn_list = [] 541 for surn, pref, con, prim, orig in zip( 542 surns, prefs, cons, prims, origs): 543 new_surn_list.append(Surname()) 544 new_surn_list[-1].set_surname(surn.strip()) 545 new_surn_list[-1].set_prefix(pref.strip()) 546 new_surn_list[-1].set_connector(con.strip()) 547 new_surn_list[-1].set_primary(prim) 548 new_surn_list[-1].set_origintype(orig) 549 name.set_surname_list(new_surn_list) 550 551 self.db.commit_person(p, trans) 552 553 self.db.enable_signals() 554 self.db.request_rebuild() 555 self.close() 556 self.cb() 557 558 559class PatchNamesOptions(tool.ToolOptions): 560 """ 561 Defines options and provides handling interface. 562 """ 563 564 def __init__(self, name, person_id=None): 565 tool.ToolOptions.__init__(self, name, person_id) 566 567 568def strip(arg): 569 return arg.strip() 570