1# -*- coding: utf-8 -*-
2#
3# Gramps - a GTK+/GNOME based genealogy program
4#
5# Copyright (C) 2000-2006  Donald N. Allingham
6# Copyright (C) 2009       Brian G. Matherly
7# Copyright (C) 2013       John Ralls
8#
9# This program is free software; you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation; either version 2 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program; if not, write to the Free Software
21# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
22#
23
24#------------------------------------------------------------------------
25#
26# python modules
27#
28#------------------------------------------------------------------------
29import gettext
30import sys
31import os
32import codecs
33import locale
34import collections
35import logging
36from binascii import hexlify
37
38LOG = logging.getLogger("." + __name__)
39LOG.propagate = True
40HAVE_ICU = False
41_icu_err = None
42_hdlr = None
43# GrampsLocale initialization comes before command-line argument
44# passing, so one must set the log level directly. The default is
45# logging.WARN. Uncomment the following to change it to logging.DEBUG:
46# LOG.setLevel(logging.DEBUG)
47try:
48    from icu import Locale, Collator
49    HAVE_ICU = True
50except ImportError:
51    try:
52        from PyICU import Locale, Collator
53        HAVE_ICU = True
54    except ImportError as err:
55        # No logger, save the warning message for later.
56        _icu_err = ("ICU not loaded because %s. Localization will be impaired. "
57                    "Use your package manager to install PyICU" % str(err))
58
59ICU_LOCALES = None
60if HAVE_ICU:
61    ICU_LOCALES = Locale.getAvailableLocales()
62
63# Map of languages for converting to Microsoft locales and naming
64# locales for display to the user.  It's important to add to this list
65# when a new translation is added.  Note the dummy _(): That's just to
66# get xgettext to include the string in gramps.pot; actual translation
67# is done in _get_language_string() below.
68# (The gramps officially-supported language list is ALL_LINGUAS in setup.py)
69_ = lambda x: x
70_LOCALE_NAMES = {
71    'ar': ('Arabic_Saudi Arabia', '1256', _("Arabic")),
72    'bg': ('Bulgrian_Bulgaria', '1251', _("Bulgarian")),
73    'br': (None, None, _("Breton")), #Windows has no translation for Breton
74    'ca': ('Catalan_Spain', '1252', _("Catalan")),
75    'cs': ('Czech_Czech Republic', '1250', _("Czech")),
76    'da': ('Danish_Denmark', '1252', _("Danish")),
77    'de': ('German_Germany', '1252',  _("German")),
78    'el': ('Greek_Greece', '1253', _("Greek")),
79    'en': ('English_United States', '1252', _("English (USA)")),
80    'en_GB': ('English_United Kingdom', '1252', _("English")),
81    'eo': (None, None, _("Esperanto")), #Windows has no translation for Esperanto
82    'es': ('Spanish_Spain', '1252', _("Spanish")),
83    'fi': ('Finnish_Finland', '1252', _("Finnish")),
84    'fr': ('French_France', '1252', _("French")),
85    'ga': (None, None, _("Gaelic")), #Windows has no translation for Gaelic
86    'he': ('Hebrew_Israel', '1255', _("Hebrew")),
87    'hr': ('Croatian_Croatia', '1250', _("Croatian")),
88    'hu': ('Hungarian_Hungary', '1250', _("Hungarian")),
89    'is': ('Icelandic', '1252', _("Icelandic")),
90    'it': ('Italian_Italy', '1252', _("Italian")),
91    'ja': ('Japanese_Japan', '932', _("Japanese")),
92    'lt': ('Lithuanian_Lithuania', '1252', _("Lithuanian")),
93    'mk': (None, None, _("Macedonian")), #Windows has no translation for Macedonian
94    'nb': ('Norwegian_Norway', '1252', _("Norwegian Bokmal")),
95    'nl': ('Dutch_Netherlands', '1252', _("Dutch")),
96    'nn': ('Norwegian-Nynorsk_Norway', '1252', _("Norwegian Nynorsk")),
97    'pl': ('Polish_Poland', '1250', _("Polish")),
98    'pt_BR': ('Portuguese_Brazil', '1252', _("Portuguese (Brazil)")),
99    'pt_PT': ('Portuguese_Portugal', '1252', _("Portuguese (Portugal)")),
100    'ro': ('Romanian_Romania', '1250', _("Romanian")),
101    'ru': ('Russian_Russia', '1251', _("Russian")),
102    'sk': ('Slovak_Slovakia', '1250', _("Slovak"),),
103    'sl': ('Slovenian_Slovenia', '1250', _("Slovenian")),
104    'sq': ('Albanian_Albania', '1250', _("Albanian")),
105    'sr': ('Serbian(Cyrillic)_Serbia and Montenegro', '1251', _("Serbian")),
106    'sv': ('Swedish_Sweden', '1252', _("Swedish")),
107    'ta': (None, None, _("Tamil")), # Windows has no codepage for Tamil
108    'tr': ('Turkish_Turkey', '1254', _("Turkish")),
109    'uk': ('Ukrainian_Ukraine', '1251', _("Ukrainian")),
110    'vi': ('Vietnamese_Vietnam', '1258', _("Vietnamese")),
111    'zh_CN': ('Chinese_China', '936', _("Chinese (Simplified)")),
112    'zh_HK': ('Chinese_Hong Kong', '950', _("Chinese (Hong Kong)")),
113    'zh_TW': ('Chinese_Taiwan', '950', _("Chinese (Traditional)")),
114    }
115
116# locales with right-to-left text
117_RTL_LOCALES = ('ar', 'he')
118
119# locales with less than 70% currently translated
120INCOMPLETE_TRANSLATIONS = ('ar', 'bg', 'he', 'sq', 'ta', 'tr')
121
122def _check_mswin_locale(locale):
123    msloc = None
124    try:
125        msloc = _LOCALE_NAMES[locale[:5]][:2]
126        locale = locale[:5]
127    except KeyError:
128        try:
129            msloc = _LOCALE_NAMES[locale[:2]][:2]
130            locale = locale[:2]
131        except KeyError:
132            #US English is the outlier, all other English locales want
133            #real English:
134            if locale[:2] == ('en') and locale[:5] != 'en_US':
135                return ('en_GB', '1252')
136            return (None, None)
137    return (locale, msloc)
138
139def _check_mswin_locale_reverse(locale):
140    for (loc, msloc) in _LOCALE_NAMES.items():
141        if msloc and locale == msloc[0]:
142            return (loc, msloc[1])
143    #US English is the outlier, all other English locales want real English:
144    if locale.startswith('English') and locale != 'English_United States':
145        return ('en_GB', '1252')
146    return (None, None)
147
148def _check_gformat():
149    """
150    Some OS environments do not support the locale.nl_langinfo() method
151    of determing month names and other date related information.
152    """
153    try:
154        gformat = locale.nl_langinfo(locale.D_FMT).replace('%y','%Y')
155        # Gramps treats dates with '-' as ISO format, so replace separator
156        # on locale dates that use '-' to prevent confict
157        gformat = gformat.replace('-', '/')
158    except:
159        '''
160        Depending on the locale, the value returned for 20th Feb 2009
161        could be '20/2/2009', '20/02/2009', '20.2.2009', '20.02.2009',
162        '20-2-2009', '20-02-2009', '2009/02/20', '2009.02.20',
163        '2009-02-20', or '09-02-20' so to reduce the possible values to
164        test for, make sure both the month and the day are double digits,
165        preferably greater than 12 for human readablity
166        '''
167        import time
168        timestr = time.strftime('%x',(2005,10,25,1,1,1,1,1,1))
169
170        # Gramps treats dates with '-' as ISO format, so replace separator
171        # on locale dates that use '-' to prevent confict
172        timestr = timestr.replace('-', '/')
173
174        time2fmt_map = {'25/10/2005' : '%d/%m/%Y',
175                        '10/25/2005' : '%m/%d/%Y',
176                        '2005/10/25' : '%Y/%m/%d',
177                        '25.10.2005' : '%d.%m.%Y',
178                        '10.25.2005' : '%m.%d.%Y',
179                        '2005.10.25' : '%Y.%m.%d',
180                       }
181
182        try:
183            gformat = time2fmt_map[timestr]
184        except KeyError:
185            gformat = '%d/%m/%Y'  # default value
186    return gformat
187
188#------------------------------------------------------------------------
189#
190# GrampsLocale Class
191#
192#------------------------------------------------------------------------
193class GrampsLocale:
194    """
195    Encapsulate a locale.  This class is a sort-of-singleton: The
196    first instance created will query the environment and OSX defaults
197    for missing parameters (precedence is parameters passed to the
198    constructor, environment variables LANG, LC_COLLATE, LC_TIME,
199    etc., and LANGUAGE, OSX defaults settings when that's the
200    platform).  Subsequent calls to the constructor with no or
201    identical parameters will return the same Grampslocale
202    object. Construction with different parameters will result in a
203    new GrampsLocale instance with the specified parameters, but any
204    parameters left out will be filled in from the first instance.
205
206    :param localedir: The full path to the top level directory containing the
207                      translation files. Defaults to sys.prefix/share/locale.
208
209    :param lang: A single locale value which is used for unset locale.LC_FOO
210                 settings.
211
212    :param domain: The name of the applicable translation file. The default is
213                   "gramps", indicating files in LC_MESSAGES named gramps.mo.
214
215    :param languages: String with a ':'-separated list of two or five character
216                      codes corresponding to subidrectries in the localedir,
217                      e.g.: "fr" or "zh_CN".
218    """
219
220    DEFAULT_TRANSLATION_STR = "default"
221    __first_instance = None
222    encoding = None
223
224    def __new__(cls, localedir=None, lang=None, domain=None, languages=None):
225        if not GrampsLocale.__first_instance:
226            cls.__first_instance = super(GrampsLocale, cls).__new__(cls)
227            cls.__first_instance.initialized = False
228            return cls.__first_instance
229
230        if not cls.__first_instance.initialized:
231            raise RuntimeError("Second GrampsLocale created before first one was initialized")
232        if ((lang is None or lang == cls.__first_instance.lang)
233            and (localedir is None or localedir == cls.__first_instance.localedir)
234            and (domain is None or domain == cls.__first_instance.localedomain)
235            and (languages is None or len(languages) == 0 or
236                 languages == cls.__first_instance.language)):
237            return cls.__first_instance
238
239        return super(GrampsLocale, cls).__new__(cls)
240
241    def _win_init_environment(self):
242        """
243        The Windows implementation of Python ignores environment
244        variables when setting the locale; it only pays attention to
245        the control panel language settings -- which for practical
246        purposes limits one to the language for which one purchased
247        Windows. This function enables using alternative
248        localizations.
249        """
250
251        if 'LANG' in os.environ:
252            (lang, loc) = _check_mswin_locale(os.environ['LANG'])
253            if loc:
254                locale.setlocale(locale.LC_ALL, '.'.join(loc))
255                self.lang = lang
256                self.encoding = loc[1]
257            else:
258                LOG.debug("%%LANG%% value %s not usable", os.environ['LANG'])
259        if not self.lang:
260            locale.setlocale(locale.LC_ALL, '')
261            (lang, encoding) = locale.getlocale()
262            loc = _check_mswin_locale_reverse(lang)
263            if loc[0]:
264                self.lang = loc[0]
265                self.encoding = loc[1]
266            else:
267                (lang, loc) = _check_mswin_locale(locale.getdefaultlocale()[0])
268                if lang:
269                    self.lang = lang
270                    self.encoding = loc[1]
271                else:
272                    LOG.debug("No usable locale found in environment")
273
274        if not self.lang:
275            self.lang = 'C'
276            self.encoding = 'cp1252'
277
278        if 'LC_MESSAGES' in os.environ:
279            lang = self.check_available_translations(os.environ['LC_MESSAGES'])
280            if lang:
281                self.language = [lang]
282            else:
283                LOG.debug("No translation for %%LC_MESSAGES%% locale")
284        if 'LANGUAGE' in os.environ:
285            language = [x for x in [self.check_available_translations(l)
286                                    for l in os.environ["LANGUAGE"].split(":")]
287                        if x]
288            if language:
289                self.language = language
290            else:
291                LOG.debug("No languages with translations found in %%LANGUAGES%%")
292        if not self.language:
293            self.language = [self.lang[:5]]
294
295        if 'COLLATION' in os.environ:
296            coll = os.environ['COLLATION']
297            if HAVE_ICU:
298                if coll[:2] in ICU_LOCALES:
299                    self.collation = coll
300                else:
301                    self.collation = self.lang
302            else:
303                (coll, loc) = _check_mswin_locale(coll)
304                if not loc:
305                    (coll, loc) = _check_mswin_locale(self.lang)
306                    self.collation = '.'.join(loc)
307                    locale.setlocale(locale.LC_COLLATE, self.collation )
308        else:
309            if HAVE_ICU:
310                self.collation = self.lang
311            else:
312                (coll, loc) = _check_mswin_locale(self.lang)
313                if loc:
314                    self.collation = '.'.join(loc)
315                else:
316                    self.collation = 'C'
317                locale.setlocale(locale.LC_COLLATE, self.collation )
318
319# We can't import datahandler stuff or we'll get a circular
320# dependency, so we rely on the available translations list
321        if 'LC_TIME' in os.environ:
322            self.calendar = self.check_available_translations(os.environ['LC_TIME']) or self.lang
323        else:
324            self.calendar = self.lang
325
326        if 'LC_NUMERIC' in os.environ:
327            self.numeric = os.environ['LC_NUMERIC']
328        else:
329            self.numeric = self.lang
330
331        if 'LC_MONETARY' in os.environ:
332            self.currency = os.environ['LC_MONETARY']
333        else:
334            self.currency = self.lang
335
336    def _init_from_environment(self):
337
338        def _check_locale(locale):
339            if not locale[0]:
340                return False
341            lang = self.check_available_translations(locale[0])
342            if not lang and locale[0].startswith('en'):
343                locale = ('en_GB', 'UTF-8')
344                lang = 'en_GB'
345            if not lang:
346                return False
347            self.lang = locale[0]
348            self.encoding = locale[1]
349            self.language = [lang]
350            return True
351
352        _failure = False
353        try:
354            locale.setlocale(locale.LC_ALL, '')
355            if not _check_locale(locale.getdefaultlocale(envvars=('LC_ALL', 'LANG', 'LANGUAGE'))):
356                LOG.debug("Usable locale not found, localization settings ignored.");
357                self.lang = 'C'
358                self.encoding = 'ascii'
359                self.language = ['en']
360                _failure = True
361
362        except locale.Error as err:
363            LOG.debug("Locale error %s, localization settings ignored.",
364                        err);
365            self.lang = 'C'
366            self.encoding = 'ascii'
367            self.language = ['en']
368            _failure = True
369
370        #LC_MESSAGES
371        (loc, enc) = locale.getlocale(locale.LC_MESSAGES)
372        if loc:
373            language = self.check_available_translations(loc)
374            if language:
375                self.language = [language]
376            else:
377                LOG.debug("No translation for LC_MESSAGES locale %s", loc)
378
379        if HAVE_ICU:
380            self.calendar = locale.getlocale(locale.LC_TIME)[0] or self.lang[:5]
381            self.collation = locale.getlocale(locale.LC_COLLATE)[0] or self.lang[:5]
382        else:
383            loc = locale.getlocale(locale.LC_TIME)
384            if loc and self.check_available_translations(loc[0]):
385                self.calendar = '.'.join(loc)
386            else:
387                self.calendar = self.lang
388
389        loc = locale.getlocale(locale.LC_COLLATE)
390        if loc and loc[0]:
391            self.collation = '.'.join(loc)
392        else:
393            self.collation = self.lang
394
395        if HAVE_ICU and 'COLLATION' in os.environ:
396            self.collation = os.environ['COLLATION']
397
398        loc = locale.getlocale(locale.LC_NUMERIC)
399        if loc and loc[0]:
400            self.numeric = '.'.join(loc)
401        else:
402            self.numeric = self.lang
403
404        loc = locale.getlocale(locale.LC_MONETARY)
405        if loc and loc[0]:
406            self.currency = '.'.join(loc)
407        else:
408            self.currency = self.lang
409
410        # $LANGUAGE overrides $LANG, $LC_MESSAGES
411        if "LANGUAGE" in os.environ:
412            language = [x for x in [self.check_available_translations(l)
413                                    for l in os.environ["LANGUAGE"].split(":")]
414                            if x]
415            if language:
416                self.language = language
417                if not self.lang.startswith(self.language[0]):
418                    LOG.debug("Overiding locale setting '%s' with LANGUAGE setting '%s'", self.lang, self.language[0])
419                    self.lang = self.calendar = self.language[0]
420            elif _failure:
421                LOG.warning("No valid locale settings found, using US English")
422
423        if __debug__:
424            LOG.debug("The locale tformat for '%s' is '%s'",
425                      self.lang, _check_gformat())
426
427    def _win_bindtextdomain(self, localedomain, localedir):
428        """
429        Help routine for loading and setting up libintl attributes
430        Returns libintl
431        """
432        from ctypes import cdll
433        try:
434            libintl = cdll.LoadLibrary('libintl-8')
435            libintl.bindtextdomain(localedomain, localedir)
436            libintl.textdomain(localedomain)
437            libintl.bind_textdomain_codeset(localedomain, "UTF-8")
438
439        except WindowsError:
440            LOG.warning("Localization library libintl not on %PATH%, localization will be incomplete")
441
442    def __init_first_instance(self):
443        """
444        Initialize the primary locale from whatever might be
445        available. We only do this once, and the resulting
446        GrampsLocale is returned by default.
447        """
448        global _hdlr
449        _hdlr = logging.StreamHandler()
450        _hdlr.setFormatter(logging.Formatter(fmt="%(name)s.%(levelname)s: %(message)s"))
451        LOG.addHandler(_hdlr)
452
453        #Now that we have a logger set up we can issue the icu error if needed.
454        if not HAVE_ICU:
455            LOG.warning(_icu_err)
456
457        # Even the first instance can be overridden by passing lang
458        # and languages to the constructor. If it isn't (which is the
459        # expected behavior), do platform-specific setup:
460        if not (self.lang and self.language):
461            if sys.platform == 'darwin':
462                from . import maclocale
463                maclocale.mac_setup_localization(self)
464            elif sys.platform == 'win32':
465                self._win_init_environment()
466            else:
467                self._init_from_environment()
468        else:
469            self.numeric = self.currency = self.calendar = self.collation = self.lang
470
471        if not self.lang:
472            self.lang = 'en_US.UTF-8'
473        if not self.language:
474            self.language.append('en')
475        if not self.localedir and not self.lang.startswith('en'):
476            LOG.warning("No translations for %s were found, setting localization to U.S. English", self.localedomain)
477            self.lang = 'en_US.UTF-8'
478            self.language = ['en']
479
480#Next, we need to know what is the encoding from the native
481#environment. This is used by python standard library funcions which
482#localize their output, e.g. time.strftime(). NB: encoding is a class variable.
483        if not self.encoding:
484            self.encoding = (locale.getpreferredencoding()
485                             or sys.getdefaultencoding())
486        LOG.debug("Setting encoding to %s", self.encoding)
487
488        # Make sure that self.lang and self.language are reflected
489        # back into the environment for Gtk to use when its
490        # initialized. If self.lang isn't 'C', make sure that it has a
491        # 'UTF-8' suffix, because that's all that GtkBuilder can
492        # digest.
493
494        # Gtk+ has an 'en' po, but we don't. This is worked-around for
495        # our GrampsTranslation class but that isn't used to retrieve
496        # translations in GtkBuilder (glade), a direct call to libintl
497        # (gettext) is. If 'en' is in the translation list it gets
498        # skipped in favor of the next language, which can cause
499        # inappropriate translations of strings in glade/ui files. To
500        # prevent this, if 'en' is in self.language it's the last
501        # entry:
502
503        if 'en' in self.language:
504            self.language = self.language[:self.language.index('en') + 1]
505
506        # Linux note: You'll get unsupported locale errors from Gtk
507        # and untranslated strings if the requisite UTF-8 locale isn't
508        # installed. This is particularly a problem on Debian and
509        # Debian-derived distributions which by default don't install
510        # a lot of locales.
511        lang = locale.normalize(self.language[0] if self.language[0] else 'C')
512        check_lang = lang.split('.')
513        if not check_lang[0]  in ('C', 'en'):
514            if len(check_lang) < 2  or check_lang[1] not in ("utf-8", "UTF-8"):
515                lang = '.'.join((check_lang[0], 'UTF-8'))
516
517        os.environ["LANG"] = lang
518        #We need to convert 'en' and 'en_US' to 'C' to avoid confusing
519        #GtkBuilder when it's retrieving strings from our Glade files
520        #since we have neither an en.po nor an en_US.po.
521
522        os.environ["LANGUAGE"] = ':'.join(self.language)
523
524        # GtkBuilder uses GLib's g_dgettext wrapper, which oddly is bound
525        # with locale instead of gettext. Win32 doesn't support bindtextdomain.
526        if self.localedir:
527            if not sys.platform == 'win32':
528                # bug12278, _build_popup_ui() under linux and macOS
529                locale.textdomain(self.localedomain)
530                locale.bindtextdomain(self.localedomain, self.localedir)
531            else:
532                self._win_bindtextdomain(self.localedomain.encode('utf-8'),
533                                         self.localedir.encode('utf-8'))
534
535        self.rtl_locale = False
536        if self.language[0] in _RTL_LOCALES:
537            self.rtl_locale = True # right-to-left
538
539    def _init_secondary_locale(self):
540        """
541        Init a secondary locale. Secondary locales are used to provide
542        an alternate localization to the one used for the UI; for
543        example, some reports offer the option to use a different
544        language.
545
546        This GrampsLocale class does no caching of the secondary locale.
547        If any caching is desired it must be done externally.
548        """
549        if not self.localedir:
550            LOG.warning("No Localedir provided, unable to find translations")
551
552        if not self.localedomain:
553            if _firstlocaledomain: # TODO this variable is nowhere else
554                self.localedomain = _first.localedomain
555            else:
556                self.localedomain = "gramps"
557
558        _first = self._GrampsLocale__first_instance
559        if not self.lang and _first.lang:
560            self.lang = _first.lang
561
562        if not self.language:
563            if self.lang:
564                trans = self.check_available_translations(self.lang)
565            if trans:
566                self.language = [trans]
567
568        if not self.language and _first.language:
569            self.language = _first.language
570
571        self.numeric = self.currency = self.calendar = self.collation = self.lang
572
573        self.rtl_locale = False
574        if self.language[0] in _RTL_LOCALES:
575            self.rtl_locale = True # right-to-left
576
577    def __init__(self, localedir=None, lang=None, domain=None, languages=None):
578        """
579        Init a GrampsLocale. Run __init_first_instance() to set up the
580        environment if this is the first run. Return __first_instance
581        otherwise if called without arguments.
582        """
583        global _hdlr
584        #initialized is special, used only for the "first instance",
585        #and created by __new__(). It's used to prevent re-__init__ing
586        #__first_instance when __new__() returns its pointer.
587        if hasattr(self, 'initialized') and self.initialized:
588            return
589        _first = self._GrampsLocale__first_instance
590        self.localedir = None
591        # Everything breaks without localedir, so get that set up
592        # first.  Warnings are logged in _init_first_instance or
593        # _init_secondary_locale if this comes up empty.
594        if localedir and os.path.exists(os.path.abspath(localedir)):
595            self.localedir = localedir
596        elif (_first and hasattr(_first, 'localedir') and _first.localedir and
597              os.path.exists(os.path.abspath(_first.localedir))):
598            self.localedir = _first.localedir
599        else:
600            LOG.warning('Missing or invalid localedir %s; no translations'
601                        ' will be available.', repr(localedir))
602        self.lang = lang
603        self.localedomain = domain or 'gramps'
604        if languages:
605            self.language = [x for x in [self.check_available_translations(l)
606                                         for l in languages.split(":")]
607                             if x]
608        else:
609            self.language = None
610
611        if self == _first:
612            self._GrampsLocale__init_first_instance()
613        else:
614            self._init_secondary_locale()
615
616        self.icu_locales = {}
617        self.collator = None
618        if HAVE_ICU:
619            self.icu_locales["default"] = Locale.createFromName(self.lang)
620            if self.collation and self.collation != self.lang:
621                self.icu_locales["collation"] = Locale.createFromName(self.collation)
622            else:
623                self.icu_locales["collation"] = self.icu_locales["default"]
624            try:
625                self.collator = Collator.createInstance(self.icu_locales["collation"])
626            except ICUError as err:
627                LOG.warning("Unable to create collator: %s", str(err))
628                self.collator = None
629
630        try:
631            self.translation = self._get_translation(self.localedomain,
632                                                     self.localedir,
633                                                     self.language)
634        except ValueError:
635            LOG.warning("Unable to find translation for languages in %s, using US English", ':'.join(self.language))
636            self.translation = GrampsNullTranslations()
637            self.translation._language = "en"
638
639        if _hdlr:
640            LOG.removeHandler(_hdlr)
641            _hdlr = None
642        self._dd = self._dp = None
643        #Guards against running twice on the first instance.
644        self.initialized = True
645
646    def _get_translation(self, domain = None,
647                         localedir = None,
648                         languages=None):
649        """
650        Get a translation of one of our classes. Doesn't return the
651        singleton so that it can be used by get_addon_translation()
652        """
653        if not domain:
654            domain = self.localedomain
655        if not languages:
656            languages = self.language
657        if not localedir:
658            localedir = self.localedir
659
660        for lang in languages:
661            if gettext.find(domain, localedir, [lang]):
662                translator = gettext.translation(domain, localedir,
663                                                 [lang],
664                                                 class_ = GrampsTranslations)
665                translator._language = lang
666                return translator
667
668            elif lang.startswith("en") or lang.startswith("C"):
669                translator = GrampsNullTranslations()
670                translator._language = "en"
671                return translator
672
673        if not languages or len(languages) == 0:
674            LOG.warning("No language provided, using US English")
675        else:
676            raise ValueError("No usable translations in %s for " %
677                             ':'.join(languages))
678        translator = GrampsNullTranslations()
679        translator._language = "en"
680        return translator
681
682    def _get_language_string(self, lang_code):
683        """
684        Given a language code of the form "lang_region", return a text string
685        representing that language.
686        """
687        try:
688            lang = _LOCALE_NAMES[lang_code][2]
689        except KeyError:
690            try:
691                lang = _LOCALE_NAMES[lang_code[:2]][2]
692            except KeyError:
693                LOG.debug("Gramps has no translation for %s", lang_code)
694                lang = None
695        except IndexError as err:
696            LOG.debug("Bad Index for tuple %s\n" % _LOCALE_NAMES[lang_code][0])
697            lang = None
698
699        if lang:
700            return self.translation.gettext(lang)
701        return lang
702
703#-------------------------------------------------------------------------
704#
705# Properties
706#
707#-------------------------------------------------------------------------
708    @property
709    def date_displayer(self):
710        """
711        Return the locale's date displayer; if it hasn't already been
712        cached, set it from datehandler.LANG_TO_DISPLAY. If one isn't
713        available for the selected locale, attempt to fall back on the
714        first_instance's locale before settling on the 'C' displayer.
715
716        .. note:: This is the getter for the date_displayer property
717        """
718        if self._dd:
719            return self._dd
720
721        from ..config import config
722        try:
723            val = config.get('preferences.date-format')
724        except AttributeError:
725            val = 0;
726
727        from ..datehandler import LANG_TO_DISPLAY as displayers
728        _first = self._GrampsLocale__first_instance
729        if self.calendar in displayers:
730            self._dd = displayers[self.calendar](val)
731        elif self.calendar[:2] in displayers:
732            self._dd = displayers[self.calendar[:2]](val)
733        elif self != _first and _first.calendar in displayers:
734            self._dd = displayers[_first.calendar](val, blocale=self)
735        elif self != _first and _first.calendar[:2] in displayers:
736            self._dd = displayers[_first.calendar[:2]](val, blocale=self)
737        else:
738            self._dd = displayers['C'](val, blocale=self)
739
740        return self._dd
741
742    @property
743    def date_parser(self):
744        """
745        Return the locale's date parser; if it hasn't already been
746        cached, set it from datehandler.LANG_TO_PARSER. If one isn't
747        available for the selected locale, attempt to fall back on the
748        first_instance's locale before settling on the 'C' parser.
749
750        .. note:: This is the getter for the date_parser property
751        """
752        if self._dp:
753            return self._dp
754
755        from ..datehandler import LANG_TO_PARSER as parsers
756        _first = self._GrampsLocale__first_instance
757        if self.calendar in parsers:
758            self._dp = parsers[self.calendar]()
759        elif self.calendar[:2] in parsers:
760            self._dp = parsers[self.calendar]()
761        elif self != _first and _first.calendar in parsers:
762            self._dp = parsers[_first.calendar]()
763        elif self != _first and _first.calendar[:2] in parsers:
764            self._dp = parsers[_first.calendar[:2]]()
765        else:
766            self._dp = parsers['C']()
767
768        return self._dp
769
770#-------------------------------------------------------------------------
771#
772# Public Functions
773#
774#-------------------------------------------------------------------------
775
776    def get_localedomain(self):
777        """
778        Get the LOCALEDOMAIN used for the Gramps application.
779        Required by gui/glade.py to pass to Gtk.Builder
780        """
781        return self.localedomain
782
783    def get_language_list(self):
784        """
785        Return the list of configured languages.  Used by
786        ViewManager.check_for_updates to select the language for the
787        addons descriptions.
788        """
789        return self.language
790
791
792    def get_addon_translator(self, filename, domain="addon",
793                             languages=None):
794        """
795        Get a translator for an addon.
796
797        :param filename: filename of a file in directory with full path, or
798                         None to get from self.
799        :param domain: the name of the .mo file under the LANG/LC_MESSAGES dir
800        :param languages: a list of languages to force
801        :returns: a gettext.translation object
802
803        Example::
804
805        _ = glocale.get_addon_translator(languages=["fr_BE.utf8"]).gettext
806
807        .. seealso:: the python gettext documentation.
808
809        Assumes path/filename = path/locale/LANG/LC_MESSAGES/addon.mo.
810        """
811        gramps_translator = self._get_translation()
812
813        path = self.localedir
814        if filename:
815            path = os.path.join(os.path.dirname(os.path.abspath(filename)), "locale")
816        if languages:
817            addon_translator = self._get_translation(domain,
818                                                     path,
819                                                     languages=languages)
820        else:
821            addon_translator = self._get_translation(domain, path)
822        gramps_translator.add_fallback(addon_translator)
823        return gramps_translator # with a language fallback
824
825    def get_available_translations(self, localedir = None, localedomain = None):
826        """
827        Get a list of available translations.
828
829        :returns: A list of translation languages.
830        :rtype: unicode[]
831
832        """
833        languages = ["en"]
834
835        if not localedir and self.localedir:
836            localedir = self.localedir
837        else:
838            return languages
839
840        if not localedomain and self.localedomain:
841            localedomain = self.localedomain
842        else:
843            localedomain = 'gramps'
844
845        for langdir in os.listdir(self.localedir):
846            mofilename = os.path.join(localedir, langdir,
847                                      "LC_MESSAGES",
848                                      "%s.mo" % localedomain )
849            if os.path.exists(mofilename):
850                languages.append(langdir)
851
852        languages.sort()
853
854        return languages
855
856    def check_available_translations(self, locale):
857        """
858        Test a locale for having a translation available
859        locale -- string with standard language code, locale code, or name
860        """
861        if not self.localedir:
862            return None
863        #Note that this isn't a typo for self.language; self.languages
864        #is cached so we don't have to query the file system every
865        #time this function is called.
866        if not hasattr(self, 'languages'):
867            self.languages = self.get_available_translations()
868
869        if not locale:
870            return None
871
872        if locale[:5] in self.languages:
873            return locale[:5]
874        #US English is the outlier, all other English locales want real English:
875        if locale[:2] == 'en' and locale[:5] != 'en_US':
876            return 'en_GB'
877        if locale[:2] in self.languages:
878            return locale[:2]
879        return None
880
881    def get_language_dict(self):
882        '''
883        return a dictionary of language names : codes for use by language
884        pickers.
885        '''
886        return {self._get_language_string(code) : code
887                for code in self.get_available_translations()
888                if self._get_language_string(code)}
889
890    def trans_objclass(self, objclass_str):
891        """
892        Translates objclass_str into "... %s", where objclass_str
893        is 'Person', 'person', 'Family', 'family', etc.
894        """
895        _ = self.translation.gettext
896        objclass = objclass_str.lower()
897        if objclass == "person":
898            return _("the person")
899        elif objclass == "family":
900            return _("the family")
901        elif objclass == "place":
902            return _("the place")
903        elif objclass == "event":
904            return _("the event")
905        elif objclass == "repository":
906            return _("the repository")
907        elif objclass == "note":
908            return _("the note")
909        elif objclass == "media":
910            return _("the media")
911        elif objclass == "source":
912            return _("the source")
913        elif objclass == "filter":
914            return _("the filter")
915        elif objclass == "citation":
916            return _("the citation")
917        else:
918            return _("See details")
919
920    def sort_key(self, string):
921        """
922        Return a value suitable to pass to the "key" parameter of sorted()
923        """
924
925        if HAVE_ICU and self.collator:
926            # ICU can digest strings and unicode
927            # Use hexlify() as to make a consistent string, fixing bug #10077
928            return hexlify(self.collator.getCollationKey(string).getByteArray()).decode()
929        else:
930            if isinstance(string, bytes):
931                string = string.decode("utf-8", "replace")
932            try:
933                key = locale.strxfrm(string)
934            except Exception as err:
935                LOG.warning("Failed to obtain key for %s because %s",
936                         self.collation, str(err))
937                return string
938            return key
939
940    def get_collation(self):
941        """
942        Return the collation without any character encoding.
943        """
944        return self.collation.split('.')[0]
945
946    def strcoll(self, string1, string2):
947        """
948        Given two localized strings, compare them and return -1 if
949        string1 would sort first, 1 if string2 would, and 0 if
950        they are the same.
951        """
952        key1 = self.sort_key(string1)
953        key2 = self.sort_key(string2)
954        return (-1 if key1 < key2 else (1 if key1 > key2 else 0))
955
956
957    def get_date(self, date):
958        """
959        Return a string representing the date appropriate for the language being
960        translated.
961
962        :param date: The date to be represented.
963        :type date: :class:`~gen.lib.date.Date`
964        :returns: The date as text in the proper language.
965        :rtype: unicode
966        """
967        return self.date_displayer.display(date)
968
969    def get_type(self, name):
970        """
971        Return a string representing the name appropriate for the language being
972        translated.
973
974        :param name: The name type to be represented.
975        :returns: The name as text in the proper language.
976        :rtype: unicode
977        """
978        from ..lib.grampstype import GrampsType
979        return GrampsType.xml_str(name)
980
981    def format(self, format, val, grouping=False, monetary=False):
982        """
983        Format a number in the current numeric locale. See python's
984        locale.format for details.  ICU's formatting codes are
985        incompatible with locale's, so just use locale.format for now.
986        """
987        return locale.format(format, val, grouping, monetary)
988
989    def format_string(self, format, val, grouping=False):
990        """
991        Format a string in the current numeric locale. See python's
992        locale.format_string for details.  ICU's message formatting codes are
993        incompatible with locale's, so just use locale.format_string
994        for now.
995        """
996        return locale.format_string(format, val, grouping)
997
998    def float(self, val):
999        """
1000        Parse a string to a floating point number. Uses locale.atof(),
1001        in future with ICU present will use icu.NumberFormat.parse().
1002        """
1003        try:
1004            return locale.atof(val)
1005        except ValueError:
1006            point = locale.localeconv()['decimal_point']
1007            sep = locale.localeconv()['thousands_sep']
1008            try:
1009                if point == ',':
1010                    return locale.atof(val.replace(' ', sep).replace('.', sep))
1011                elif point == '.':
1012                    return locale.atof(val.replace(' ', sep).replace(',', sep))
1013                else:
1014                    return None
1015            except ValueError:
1016                return None
1017
1018#-------------------------------------------------------------------------
1019#
1020# Translations Classes
1021#
1022#-------------------------------------------------------------------------
1023class Lexeme(str):
1024    r"""
1025    Created with :meth:`~GrampsTranslations.lexgettext`
1026
1027    .. rubric:: Example
1028
1029    Python code::
1030
1031        _ = lexgettext
1032        dec = _("localized lexeme inflections||December")
1033        xmas = _("lexeme||Christmas")
1034        text = _("{holiday} is celebrated in {month}".format(
1035                    holiday=xmas, month=dec))
1036        greeting = _("Merry {holiday}!").format(holiday=xmas)
1037        XMAS = xmas.upper()
1038        print ("\n".join([XMAS, text, greeting]))
1039
1040    Translation database (Russian example)::
1041
1042        msgid "lexeme||December"
1043        msgstr "NOMINATIVE=декабрь|GENITIVE=декабря|ABLATIVE=декабрём|LOCATIVE=декабре"
1044
1045        msgid "lexeme||Christmas"
1046        msgstr "NOMINATIVE=рождество|GENITIVE=рождества|ABLATIVE=рождеством"
1047
1048        msgid "{holiday} is celebrated in {month}"
1049        msgstr "{holiday} празднуют в {month.f[LOCATIVE]}"
1050
1051        msgid "Merry {holiday}!"
1052        msgstr "Счастливого {holiday.f[GENITIVE]}!"
1053
1054    Prints out::
1055
1056        In English locale:
1057            CHRISTMAS
1058            Christmas is celebrated in December
1059            Merry Christmas!
1060
1061        In Russian locale:
1062            РОЖДЕСТВО
1063            рождество празднуют в декабре
1064            Счастливого рождества!
1065
1066    .. rubric:: Description
1067
1068    Stores an arbitrary number of forms, e.g., inflections.
1069    These forms are accessible under dictionary keys for each form.
1070    The names of the forms are language-specific. They are assigned
1071    by the human translator of the corresponding language (in XX.po)
1072    as in the example above,
1073    see :meth:`~GrampsTranslations.lexgettext` docs
1074    for more info.
1075
1076    The translated format string can then refer to a specific form
1077    of the lexeme using ``.``:attr:`~Lexeme.f` and square brackets:
1078    ``{holiday.f[GENITIVE]}``
1079    expects holiday to be a Lexeme which has a form ``'GENITIVE'`` in it.
1080
1081    An instance of Lexeme can also be used as a regular unicode string.
1082    In this case, the work will be delegated to the string for the very
1083    first form provided in the translated string. In the example above,
1084    ``{holiday}`` in the translated string will expand to the Russian
1085    nominative form for Christmas, and ``xmas.upper()`` will produce
1086    the same nominative form in capital letters.
1087
1088    .. rubric:: Motivation
1089
1090    Lexeme is the term used in linguistics for the set of forms taken
1091    by a particular word, e.g. cases for a noun or tenses for a verb.
1092
1093    Gramps often needs to compose sentences from several blocks of
1094    text and single words, often by using python string formatting.
1095
1096    For instance, formatting a date range is done similarly to this::
1097
1098        _("Between {startdate_month} {startdate_year}"
1099              "and {enddate_month} {enddate_year}").format(
1100                 startdate_month = m1,
1101                 startdate_year = y1,
1102                 enddate_month = m2,
1103                 enddate_year = y2)
1104
1105    To make such text translatable, the arguments injected into
1106    format string need to bear all the linguistical information
1107    on how to plug them into a sentence, i.e., the forms, depending
1108    on the linguistic context of where the argument appears.
1109    The format string needs to select the relevant linguistic form.
1110    This is why ``m1`` and ``m2`` are instances of :class:`~Lexeme`.
1111
1112    On the other hand, for languages where there is no linguistic
1113    variation in such sentences, the code needs not to be aware of
1114    the underlying :class:`~Lexeme` complexity;
1115    and so they can be processed just like simple strings
1116    both when passed around in the code and when formatted.
1117    """
1118
1119    def __new__(cls, iterable, *args, **kwargs):
1120        if isinstance(iterable, str):
1121            newobj = str.__new__(cls, iterable, *args, **kwargs)
1122        else:
1123            od = collections.OrderedDict(iterable)
1124            l = list(od.values()) or [""]
1125            newobj = str.__new__(cls, l[0], *args, **kwargs)
1126            newobj._forms = od
1127        return newobj
1128
1129    def variants(self):
1130        """All lexeme forms, in the same order as given upon construction.
1131        The first one returned is the default form, which is used when the
1132        Lexeme instance is used in lieu of a string object.
1133
1134        Same as ``f.values()``"""
1135        return self._forms.values()
1136
1137    @property
1138    def f(self):
1139        """Dictionary of the lexeme forms"""
1140        return self._forms
1141
1142class GrampsTranslations(gettext.GNUTranslations):
1143    """
1144    Overrides and extends gettext.GNUTranslations. See the Python gettext
1145    "Class API" documentation for how to use this.
1146    """
1147    def language(self):
1148        """
1149        Return the target languge of this translations object.
1150        """
1151        return self._language
1152
1153    def gettext(self, msgid):
1154        """
1155        Obtain translation of gettext, return a unicode object
1156
1157        :param msgid: The string to translated.
1158        :type msgid: unicode
1159        :returns: Translation or the original.
1160        :rtype: unicode
1161        """
1162        # If msgid =="" then gettext will return po file header
1163        # and that's not what we want.
1164        if len(msgid.strip()) == 0:
1165            return msgid
1166        return gettext.GNUTranslations.gettext(self, msgid)
1167
1168    def ngettext(self, singular, plural, num):
1169        """
1170        The translation of singular/plural is returned unless the translation is
1171        not available and the singular contains the separator. In that case,
1172        the returned value is the singular.
1173
1174        :param singular: The singular form of the string to be translated.
1175                         may contain a context seperator
1176        :type singular: unicode
1177        :param plural: The plural form of the string to be translated.
1178        :type plural: unicode
1179        :param num: the amount for which to decide the translation
1180        :type num: int
1181        :returns: Translation or the original.
1182        :rtype: unicode
1183        """
1184        return gettext.GNUTranslations.ngettext(self, singular, plural, num)
1185
1186    def sgettext(self, msgid, sep='|'):
1187        """
1188        Strip the context used for resolving translation ambiguities.
1189
1190        The translation of msgid is returned unless the translation is
1191        not available and the msgid contains the separator. In that case,
1192        the returned value is the portion of msgid following the last
1193        separator. Default separator is '|'.
1194
1195        :param msgid: The string to translated.
1196        :type msgid: unicode
1197        :param sep: The separator marking the context.
1198        :type sep: unicode
1199        :returns: Translation or the original with context stripped.
1200        :rtype: unicode
1201        """
1202        msgval = self.gettext(msgid)
1203        if msgval == msgid:
1204            sep_idx = msgid.rfind(sep)
1205            msgval = msgid[sep_idx+1:]
1206        return msgval
1207
1208    def lexgettext(self, msgid):
1209        """
1210        Extract all inflections of the same lexeme,
1211        stripping the '|'-separated context using :meth:`~sgettext`
1212
1213        The *resulting* message provided by the translator
1214        is supposed to be '|'-separated as well.
1215        The possible formats are either (1) a single string
1216        for a language with no inflections, or (2) a list of
1217        <inflection name>=<inflected form>, separated with '|'.
1218        For example:
1219
1220           (1) "Uninflectable"
1221           (2) "n=Inflected-nominative|g=Inflected-genitive|d=Inflected-dative"
1222
1223        See :class:`~Lexeme` documentation for detailed explanation and example.
1224
1225        :param msgid: The string to translated.
1226        :type msgid: unicode
1227        :returns: Translation or the original with context stripped.
1228        :rtype: unicode (for option (1)) / Lexeme (option (2))
1229        """
1230        variants = self.sgettext(msgid).split('|')
1231        return Lexeme([v.split('=') for v in variants]
1232                ) if len(variants) > 1 else variants[0]
1233
1234class GrampsNullTranslations(gettext.NullTranslations):
1235    """
1236    Extends gettext.NullTranslations to provide the sgettext method.
1237
1238    Note that it's necessary for msgid to be unicode. If it's not,
1239    neither will be the returned string.
1240    """
1241    def sgettext(self, msgid, sep='|'):
1242        msgval = self.gettext(msgid)
1243        if msgval == msgid:
1244            sep_idx = msgid.rfind(sep)
1245            msgval = msgid[sep_idx+1:]
1246        return msgval
1247
1248    lexgettext = sgettext
1249
1250    def language(self):
1251        """
1252        The null translation returns the raw msgids, which are in English
1253        """
1254        return "en"
1255