1# -*- coding: utf-8 -*-
2#
3# Copyright 2009 Facebook
4#
5# Licensed under the Apache License, Version 2.0 (the "License"); you may
6# not use this file except in compliance with the License. You may obtain
7# a copy of the License at
8#
9#     http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14# License for the specific language governing permissions and limitations
15# under the License.
16
17"""Translation methods for generating localized strings.
18
19To load a locale and generate a translated string::
20
21    user_locale = tornado.locale.get("es_LA")
22    print(user_locale.translate("Sign out"))
23
24`tornado.locale.get()` returns the closest matching locale, not necessarily the
25specific locale you requested. You can support pluralization with
26additional arguments to `~Locale.translate()`, e.g.::
27
28    people = [...]
29    message = user_locale.translate(
30        "%(list)s is online", "%(list)s are online", len(people))
31    print(message % {"list": user_locale.list(people)})
32
33The first string is chosen if ``len(people) == 1``, otherwise the second
34string is chosen.
35
36Applications should call one of `load_translations` (which uses a simple
37CSV format) or `load_gettext_translations` (which uses the ``.mo`` format
38supported by `gettext` and related tools).  If neither method is called,
39the `Locale.translate` method will simply return the original string.
40"""
41
42from __future__ import absolute_import, division, print_function
43
44import codecs
45import csv
46import datetime
47from io import BytesIO
48import numbers
49import os
50import re
51
52from tornado import escape
53from tornado.log import gen_log
54from tornado.util import PY3
55
56from tornado._locale_data import LOCALE_NAMES
57
58_default_locale = "en_US"
59_translations = {}  # type: dict
60_supported_locales = frozenset([_default_locale])
61_use_gettext = False
62CONTEXT_SEPARATOR = "\x04"
63
64
65def get(*locale_codes):
66    """Returns the closest match for the given locale codes.
67
68    We iterate over all given locale codes in order. If we have a tight
69    or a loose match for the code (e.g., "en" for "en_US"), we return
70    the locale. Otherwise we move to the next code in the list.
71
72    By default we return ``en_US`` if no translations are found for any of
73    the specified locales. You can change the default locale with
74    `set_default_locale()`.
75    """
76    return Locale.get_closest(*locale_codes)
77
78
79def set_default_locale(code):
80    """Sets the default locale.
81
82    The default locale is assumed to be the language used for all strings
83    in the system. The translations loaded from disk are mappings from
84    the default locale to the destination locale. Consequently, you don't
85    need to create a translation file for the default locale.
86    """
87    global _default_locale
88    global _supported_locales
89    _default_locale = code
90    _supported_locales = frozenset(list(_translations.keys()) + [_default_locale])
91
92
93def load_translations(directory, encoding=None):
94    """Loads translations from CSV files in a directory.
95
96    Translations are strings with optional Python-style named placeholders
97    (e.g., ``My name is %(name)s``) and their associated translations.
98
99    The directory should have translation files of the form ``LOCALE.csv``,
100    e.g. ``es_GT.csv``. The CSV files should have two or three columns: string,
101    translation, and an optional plural indicator. Plural indicators should
102    be one of "plural" or "singular". A given string can have both singular
103    and plural forms. For example ``%(name)s liked this`` may have a
104    different verb conjugation depending on whether %(name)s is one
105    name or a list of names. There should be two rows in the CSV file for
106    that string, one with plural indicator "singular", and one "plural".
107    For strings with no verbs that would change on translation, simply
108    use "unknown" or the empty string (or don't include the column at all).
109
110    The file is read using the `csv` module in the default "excel" dialect.
111    In this format there should not be spaces after the commas.
112
113    If no ``encoding`` parameter is given, the encoding will be
114    detected automatically (among UTF-8 and UTF-16) if the file
115    contains a byte-order marker (BOM), defaulting to UTF-8 if no BOM
116    is present.
117
118    Example translation ``es_LA.csv``::
119
120        "I love you","Te amo"
121        "%(name)s liked this","A %(name)s les gustó esto","plural"
122        "%(name)s liked this","A %(name)s le gustó esto","singular"
123
124    .. versionchanged:: 4.3
125       Added ``encoding`` parameter. Added support for BOM-based encoding
126       detection, UTF-16, and UTF-8-with-BOM.
127    """
128    global _translations
129    global _supported_locales
130    _translations = {}
131    for path in os.listdir(directory):
132        if not path.endswith(".csv"):
133            continue
134        locale, extension = path.split(".")
135        if not re.match("[a-z]+(_[A-Z]+)?$", locale):
136            gen_log.error("Unrecognized locale %r (path: %s)", locale,
137                          os.path.join(directory, path))
138            continue
139        full_path = os.path.join(directory, path)
140        if encoding is None:
141            # Try to autodetect encoding based on the BOM.
142            with open(full_path, 'rb') as f:
143                data = f.read(len(codecs.BOM_UTF16_LE))
144            if data in (codecs.BOM_UTF16_LE, codecs.BOM_UTF16_BE):
145                encoding = 'utf-16'
146            else:
147                # utf-8-sig is "utf-8 with optional BOM". It's discouraged
148                # in most cases but is common with CSV files because Excel
149                # cannot read utf-8 files without a BOM.
150                encoding = 'utf-8-sig'
151        if PY3:
152            # python 3: csv.reader requires a file open in text mode.
153            # Force utf8 to avoid dependence on $LANG environment variable.
154            f = open(full_path, "r", encoding=encoding)
155        else:
156            # python 2: csv can only handle byte strings (in ascii-compatible
157            # encodings), which we decode below. Transcode everything into
158            # utf8 before passing it to csv.reader.
159            f = BytesIO()
160            with codecs.open(full_path, "r", encoding=encoding) as infile:
161                f.write(escape.utf8(infile.read()))
162            f.seek(0)
163        _translations[locale] = {}
164        for i, row in enumerate(csv.reader(f)):
165            if not row or len(row) < 2:
166                continue
167            row = [escape.to_unicode(c).strip() for c in row]
168            english, translation = row[:2]
169            if len(row) > 2:
170                plural = row[2] or "unknown"
171            else:
172                plural = "unknown"
173            if plural not in ("plural", "singular", "unknown"):
174                gen_log.error("Unrecognized plural indicator %r in %s line %d",
175                              plural, path, i + 1)
176                continue
177            _translations[locale].setdefault(plural, {})[english] = translation
178        f.close()
179    _supported_locales = frozenset(list(_translations.keys()) + [_default_locale])
180    gen_log.debug("Supported locales: %s", sorted(_supported_locales))
181
182
183def load_gettext_translations(directory, domain):
184    """Loads translations from `gettext`'s locale tree
185
186    Locale tree is similar to system's ``/usr/share/locale``, like::
187
188        {directory}/{lang}/LC_MESSAGES/{domain}.mo
189
190    Three steps are required to have your app translated:
191
192    1. Generate POT translation file::
193
194        xgettext --language=Python --keyword=_:1,2 -d mydomain file1.py file2.html etc
195
196    2. Merge against existing POT file::
197
198        msgmerge old.po mydomain.po > new.po
199
200    3. Compile::
201
202        msgfmt mydomain.po -o {directory}/pt_BR/LC_MESSAGES/mydomain.mo
203    """
204    import gettext
205    global _translations
206    global _supported_locales
207    global _use_gettext
208    _translations = {}
209    for lang in os.listdir(directory):
210        if lang.startswith('.'):
211            continue  # skip .svn, etc
212        if os.path.isfile(os.path.join(directory, lang)):
213            continue
214        try:
215            os.stat(os.path.join(directory, lang, "LC_MESSAGES", domain + ".mo"))
216            _translations[lang] = gettext.translation(domain, directory,
217                                                      languages=[lang])
218        except Exception as e:
219            gen_log.error("Cannot load translation for '%s': %s", lang, str(e))
220            continue
221    _supported_locales = frozenset(list(_translations.keys()) + [_default_locale])
222    _use_gettext = True
223    gen_log.debug("Supported locales: %s", sorted(_supported_locales))
224
225
226def get_supported_locales():
227    """Returns a list of all the supported locale codes."""
228    return _supported_locales
229
230
231class Locale(object):
232    """Object representing a locale.
233
234    After calling one of `load_translations` or `load_gettext_translations`,
235    call `get` or `get_closest` to get a Locale object.
236    """
237    @classmethod
238    def get_closest(cls, *locale_codes):
239        """Returns the closest match for the given locale code."""
240        for code in locale_codes:
241            if not code:
242                continue
243            code = code.replace("-", "_")
244            parts = code.split("_")
245            if len(parts) > 2:
246                continue
247            elif len(parts) == 2:
248                code = parts[0].lower() + "_" + parts[1].upper()
249            if code in _supported_locales:
250                return cls.get(code)
251            if parts[0].lower() in _supported_locales:
252                return cls.get(parts[0].lower())
253        return cls.get(_default_locale)
254
255    @classmethod
256    def get(cls, code):
257        """Returns the Locale for the given locale code.
258
259        If it is not supported, we raise an exception.
260        """
261        if not hasattr(cls, "_cache"):
262            cls._cache = {}
263        if code not in cls._cache:
264            assert code in _supported_locales
265            translations = _translations.get(code, None)
266            if translations is None:
267                locale = CSVLocale(code, {})
268            elif _use_gettext:
269                locale = GettextLocale(code, translations)
270            else:
271                locale = CSVLocale(code, translations)
272            cls._cache[code] = locale
273        return cls._cache[code]
274
275    def __init__(self, code, translations):
276        self.code = code
277        self.name = LOCALE_NAMES.get(code, {}).get("name", u"Unknown")
278        self.rtl = False
279        for prefix in ["fa", "ar", "he"]:
280            if self.code.startswith(prefix):
281                self.rtl = True
282                break
283        self.translations = translations
284
285        # Initialize strings for date formatting
286        _ = self.translate
287        self._months = [
288            _("January"), _("February"), _("March"), _("April"),
289            _("May"), _("June"), _("July"), _("August"),
290            _("September"), _("October"), _("November"), _("December")]
291        self._weekdays = [
292            _("Monday"), _("Tuesday"), _("Wednesday"), _("Thursday"),
293            _("Friday"), _("Saturday"), _("Sunday")]
294
295    def translate(self, message, plural_message=None, count=None):
296        """Returns the translation for the given message for this locale.
297
298        If ``plural_message`` is given, you must also provide
299        ``count``. We return ``plural_message`` when ``count != 1``,
300        and we return the singular form for the given message when
301        ``count == 1``.
302        """
303        raise NotImplementedError()
304
305    def pgettext(self, context, message, plural_message=None, count=None):
306        raise NotImplementedError()
307
308    def format_date(self, date, gmt_offset=0, relative=True, shorter=False,
309                    full_format=False):
310        """Formats the given date (which should be GMT).
311
312        By default, we return a relative time (e.g., "2 minutes ago"). You
313        can return an absolute date string with ``relative=False``.
314
315        You can force a full format date ("July 10, 1980") with
316        ``full_format=True``.
317
318        This method is primarily intended for dates in the past.
319        For dates in the future, we fall back to full format.
320        """
321        if isinstance(date, numbers.Real):
322            date = datetime.datetime.utcfromtimestamp(date)
323        now = datetime.datetime.utcnow()
324        if date > now:
325            if relative and (date - now).seconds < 60:
326                # Due to click skew, things are some things slightly
327                # in the future. Round timestamps in the immediate
328                # future down to now in relative mode.
329                date = now
330            else:
331                # Otherwise, future dates always use the full format.
332                full_format = True
333        local_date = date - datetime.timedelta(minutes=gmt_offset)
334        local_now = now - datetime.timedelta(minutes=gmt_offset)
335        local_yesterday = local_now - datetime.timedelta(hours=24)
336        difference = now - date
337        seconds = difference.seconds
338        days = difference.days
339
340        _ = self.translate
341        format = None
342        if not full_format:
343            if relative and days == 0:
344                if seconds < 50:
345                    return _("1 second ago", "%(seconds)d seconds ago",
346                             seconds) % {"seconds": seconds}
347
348                if seconds < 50 * 60:
349                    minutes = round(seconds / 60.0)
350                    return _("1 minute ago", "%(minutes)d minutes ago",
351                             minutes) % {"minutes": minutes}
352
353                hours = round(seconds / (60.0 * 60))
354                return _("1 hour ago", "%(hours)d hours ago",
355                         hours) % {"hours": hours}
356
357            if days == 0:
358                format = _("%(time)s")
359            elif days == 1 and local_date.day == local_yesterday.day and \
360                    relative:
361                format = _("yesterday") if shorter else \
362                    _("yesterday at %(time)s")
363            elif days < 5:
364                format = _("%(weekday)s") if shorter else \
365                    _("%(weekday)s at %(time)s")
366            elif days < 334:  # 11mo, since confusing for same month last year
367                format = _("%(month_name)s %(day)s") if shorter else \
368                    _("%(month_name)s %(day)s at %(time)s")
369
370        if format is None:
371            format = _("%(month_name)s %(day)s, %(year)s") if shorter else \
372                _("%(month_name)s %(day)s, %(year)s at %(time)s")
373
374        tfhour_clock = self.code not in ("en", "en_US", "zh_CN")
375        if tfhour_clock:
376            str_time = "%d:%02d" % (local_date.hour, local_date.minute)
377        elif self.code == "zh_CN":
378            str_time = "%s%d:%02d" % (
379                (u'\u4e0a\u5348', u'\u4e0b\u5348')[local_date.hour >= 12],
380                local_date.hour % 12 or 12, local_date.minute)
381        else:
382            str_time = "%d:%02d %s" % (
383                local_date.hour % 12 or 12, local_date.minute,
384                ("am", "pm")[local_date.hour >= 12])
385
386        return format % {
387            "month_name": self._months[local_date.month - 1],
388            "weekday": self._weekdays[local_date.weekday()],
389            "day": str(local_date.day),
390            "year": str(local_date.year),
391            "time": str_time
392        }
393
394    def format_day(self, date, gmt_offset=0, dow=True):
395        """Formats the given date as a day of week.
396
397        Example: "Monday, January 22". You can remove the day of week with
398        ``dow=False``.
399        """
400        local_date = date - datetime.timedelta(minutes=gmt_offset)
401        _ = self.translate
402        if dow:
403            return _("%(weekday)s, %(month_name)s %(day)s") % {
404                "month_name": self._months[local_date.month - 1],
405                "weekday": self._weekdays[local_date.weekday()],
406                "day": str(local_date.day),
407            }
408        else:
409            return _("%(month_name)s %(day)s") % {
410                "month_name": self._months[local_date.month - 1],
411                "day": str(local_date.day),
412            }
413
414    def list(self, parts):
415        """Returns a comma-separated list for the given list of parts.
416
417        The format is, e.g., "A, B and C", "A and B" or just "A" for lists
418        of size 1.
419        """
420        _ = self.translate
421        if len(parts) == 0:
422            return ""
423        if len(parts) == 1:
424            return parts[0]
425        comma = u' \u0648 ' if self.code.startswith("fa") else u", "
426        return _("%(commas)s and %(last)s") % {
427            "commas": comma.join(parts[:-1]),
428            "last": parts[len(parts) - 1],
429        }
430
431    def friendly_number(self, value):
432        """Returns a comma-separated number for the given integer."""
433        if self.code not in ("en", "en_US"):
434            return str(value)
435        value = str(value)
436        parts = []
437        while value:
438            parts.append(value[-3:])
439            value = value[:-3]
440        return ",".join(reversed(parts))
441
442
443class CSVLocale(Locale):
444    """Locale implementation using tornado's CSV translation format."""
445    def translate(self, message, plural_message=None, count=None):
446        if plural_message is not None:
447            assert count is not None
448            if count != 1:
449                message = plural_message
450                message_dict = self.translations.get("plural", {})
451            else:
452                message_dict = self.translations.get("singular", {})
453        else:
454            message_dict = self.translations.get("unknown", {})
455        return message_dict.get(message, message)
456
457    def pgettext(self, context, message, plural_message=None, count=None):
458        if self.translations:
459            gen_log.warning('pgettext is not supported by CSVLocale')
460        return self.translate(message, plural_message, count)
461
462
463class GettextLocale(Locale):
464    """Locale implementation using the `gettext` module."""
465    def __init__(self, code, translations):
466        try:
467            # python 2
468            self.ngettext = translations.ungettext
469            self.gettext = translations.ugettext
470        except AttributeError:
471            # python 3
472            self.ngettext = translations.ngettext
473            self.gettext = translations.gettext
474        # self.gettext must exist before __init__ is called, since it
475        # calls into self.translate
476        super(GettextLocale, self).__init__(code, translations)
477
478    def translate(self, message, plural_message=None, count=None):
479        if plural_message is not None:
480            assert count is not None
481            return self.ngettext(message, plural_message, count)
482        else:
483            return self.gettext(message)
484
485    def pgettext(self, context, message, plural_message=None, count=None):
486        """Allows to set context for translation, accepts plural forms.
487
488        Usage example::
489
490            pgettext("law", "right")
491            pgettext("good", "right")
492
493        Plural message example::
494
495            pgettext("organization", "club", "clubs", len(clubs))
496            pgettext("stick", "club", "clubs", len(clubs))
497
498        To generate POT file with context, add following options to step 1
499        of `load_gettext_translations` sequence::
500
501            xgettext [basic options] --keyword=pgettext:1c,2 --keyword=pgettext:1c,2,3
502
503        .. versionadded:: 4.2
504        """
505        if plural_message is not None:
506            assert count is not None
507            msgs_with_ctxt = ("%s%s%s" % (context, CONTEXT_SEPARATOR, message),
508                              "%s%s%s" % (context, CONTEXT_SEPARATOR, plural_message),
509                              count)
510            result = self.ngettext(*msgs_with_ctxt)
511            if CONTEXT_SEPARATOR in result:
512                # Translation not found
513                result = self.ngettext(message, plural_message, count)
514            return result
515        else:
516            msg_with_ctxt = "%s%s%s" % (context, CONTEXT_SEPARATOR, message)
517            result = self.gettext(msg_with_ctxt)
518            if CONTEXT_SEPARATOR in result:
519                # Translation not found
520                result = message
521            return result
522