1# -*- coding: utf-8 -*- 2# 3# Copyright 2009 Facebook 4# 5# Licensed under the Apache License, Version 2.0 (the "License"); you may 6# not use this file except in compliance with the License. You may obtain 7# a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 13# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 14# License for the specific language governing permissions and limitations 15# under the License. 16 17"""Translation methods for generating localized strings. 18 19To load a locale and generate a translated string:: 20 21 user_locale = tornado.locale.get("es_LA") 22 print(user_locale.translate("Sign out")) 23 24`tornado.locale.get()` returns the closest matching locale, not necessarily the 25specific locale you requested. You can support pluralization with 26additional arguments to `~Locale.translate()`, e.g.:: 27 28 people = [...] 29 message = user_locale.translate( 30 "%(list)s is online", "%(list)s are online", len(people)) 31 print(message % {"list": user_locale.list(people)}) 32 33The first string is chosen if ``len(people) == 1``, otherwise the second 34string is chosen. 35 36Applications should call one of `load_translations` (which uses a simple 37CSV format) or `load_gettext_translations` (which uses the ``.mo`` format 38supported by `gettext` and related tools). If neither method is called, 39the `Locale.translate` method will simply return the original string. 40""" 41 42from __future__ import absolute_import, division, print_function 43 44import codecs 45import csv 46import datetime 47from io import BytesIO 48import numbers 49import os 50import re 51 52from tornado import escape 53from tornado.log import gen_log 54from tornado.util import PY3 55 56from tornado._locale_data import LOCALE_NAMES 57 58_default_locale = "en_US" 59_translations = {} # type: dict 60_supported_locales = frozenset([_default_locale]) 61_use_gettext = False 62CONTEXT_SEPARATOR = "\x04" 63 64 65def get(*locale_codes): 66 """Returns the closest match for the given locale codes. 67 68 We iterate over all given locale codes in order. If we have a tight 69 or a loose match for the code (e.g., "en" for "en_US"), we return 70 the locale. Otherwise we move to the next code in the list. 71 72 By default we return ``en_US`` if no translations are found for any of 73 the specified locales. You can change the default locale with 74 `set_default_locale()`. 75 """ 76 return Locale.get_closest(*locale_codes) 77 78 79def set_default_locale(code): 80 """Sets the default locale. 81 82 The default locale is assumed to be the language used for all strings 83 in the system. The translations loaded from disk are mappings from 84 the default locale to the destination locale. Consequently, you don't 85 need to create a translation file for the default locale. 86 """ 87 global _default_locale 88 global _supported_locales 89 _default_locale = code 90 _supported_locales = frozenset(list(_translations.keys()) + [_default_locale]) 91 92 93def load_translations(directory, encoding=None): 94 """Loads translations from CSV files in a directory. 95 96 Translations are strings with optional Python-style named placeholders 97 (e.g., ``My name is %(name)s``) and their associated translations. 98 99 The directory should have translation files of the form ``LOCALE.csv``, 100 e.g. ``es_GT.csv``. The CSV files should have two or three columns: string, 101 translation, and an optional plural indicator. Plural indicators should 102 be one of "plural" or "singular". A given string can have both singular 103 and plural forms. For example ``%(name)s liked this`` may have a 104 different verb conjugation depending on whether %(name)s is one 105 name or a list of names. There should be two rows in the CSV file for 106 that string, one with plural indicator "singular", and one "plural". 107 For strings with no verbs that would change on translation, simply 108 use "unknown" or the empty string (or don't include the column at all). 109 110 The file is read using the `csv` module in the default "excel" dialect. 111 In this format there should not be spaces after the commas. 112 113 If no ``encoding`` parameter is given, the encoding will be 114 detected automatically (among UTF-8 and UTF-16) if the file 115 contains a byte-order marker (BOM), defaulting to UTF-8 if no BOM 116 is present. 117 118 Example translation ``es_LA.csv``:: 119 120 "I love you","Te amo" 121 "%(name)s liked this","A %(name)s les gustó esto","plural" 122 "%(name)s liked this","A %(name)s le gustó esto","singular" 123 124 .. versionchanged:: 4.3 125 Added ``encoding`` parameter. Added support for BOM-based encoding 126 detection, UTF-16, and UTF-8-with-BOM. 127 """ 128 global _translations 129 global _supported_locales 130 _translations = {} 131 for path in os.listdir(directory): 132 if not path.endswith(".csv"): 133 continue 134 locale, extension = path.split(".") 135 if not re.match("[a-z]+(_[A-Z]+)?$", locale): 136 gen_log.error("Unrecognized locale %r (path: %s)", locale, 137 os.path.join(directory, path)) 138 continue 139 full_path = os.path.join(directory, path) 140 if encoding is None: 141 # Try to autodetect encoding based on the BOM. 142 with open(full_path, 'rb') as f: 143 data = f.read(len(codecs.BOM_UTF16_LE)) 144 if data in (codecs.BOM_UTF16_LE, codecs.BOM_UTF16_BE): 145 encoding = 'utf-16' 146 else: 147 # utf-8-sig is "utf-8 with optional BOM". It's discouraged 148 # in most cases but is common with CSV files because Excel 149 # cannot read utf-8 files without a BOM. 150 encoding = 'utf-8-sig' 151 if PY3: 152 # python 3: csv.reader requires a file open in text mode. 153 # Force utf8 to avoid dependence on $LANG environment variable. 154 f = open(full_path, "r", encoding=encoding) 155 else: 156 # python 2: csv can only handle byte strings (in ascii-compatible 157 # encodings), which we decode below. Transcode everything into 158 # utf8 before passing it to csv.reader. 159 f = BytesIO() 160 with codecs.open(full_path, "r", encoding=encoding) as infile: 161 f.write(escape.utf8(infile.read())) 162 f.seek(0) 163 _translations[locale] = {} 164 for i, row in enumerate(csv.reader(f)): 165 if not row or len(row) < 2: 166 continue 167 row = [escape.to_unicode(c).strip() for c in row] 168 english, translation = row[:2] 169 if len(row) > 2: 170 plural = row[2] or "unknown" 171 else: 172 plural = "unknown" 173 if plural not in ("plural", "singular", "unknown"): 174 gen_log.error("Unrecognized plural indicator %r in %s line %d", 175 plural, path, i + 1) 176 continue 177 _translations[locale].setdefault(plural, {})[english] = translation 178 f.close() 179 _supported_locales = frozenset(list(_translations.keys()) + [_default_locale]) 180 gen_log.debug("Supported locales: %s", sorted(_supported_locales)) 181 182 183def load_gettext_translations(directory, domain): 184 """Loads translations from `gettext`'s locale tree 185 186 Locale tree is similar to system's ``/usr/share/locale``, like:: 187 188 {directory}/{lang}/LC_MESSAGES/{domain}.mo 189 190 Three steps are required to have your app translated: 191 192 1. Generate POT translation file:: 193 194 xgettext --language=Python --keyword=_:1,2 -d mydomain file1.py file2.html etc 195 196 2. Merge against existing POT file:: 197 198 msgmerge old.po mydomain.po > new.po 199 200 3. Compile:: 201 202 msgfmt mydomain.po -o {directory}/pt_BR/LC_MESSAGES/mydomain.mo 203 """ 204 import gettext 205 global _translations 206 global _supported_locales 207 global _use_gettext 208 _translations = {} 209 for lang in os.listdir(directory): 210 if lang.startswith('.'): 211 continue # skip .svn, etc 212 if os.path.isfile(os.path.join(directory, lang)): 213 continue 214 try: 215 os.stat(os.path.join(directory, lang, "LC_MESSAGES", domain + ".mo")) 216 _translations[lang] = gettext.translation(domain, directory, 217 languages=[lang]) 218 except Exception as e: 219 gen_log.error("Cannot load translation for '%s': %s", lang, str(e)) 220 continue 221 _supported_locales = frozenset(list(_translations.keys()) + [_default_locale]) 222 _use_gettext = True 223 gen_log.debug("Supported locales: %s", sorted(_supported_locales)) 224 225 226def get_supported_locales(): 227 """Returns a list of all the supported locale codes.""" 228 return _supported_locales 229 230 231class Locale(object): 232 """Object representing a locale. 233 234 After calling one of `load_translations` or `load_gettext_translations`, 235 call `get` or `get_closest` to get a Locale object. 236 """ 237 @classmethod 238 def get_closest(cls, *locale_codes): 239 """Returns the closest match for the given locale code.""" 240 for code in locale_codes: 241 if not code: 242 continue 243 code = code.replace("-", "_") 244 parts = code.split("_") 245 if len(parts) > 2: 246 continue 247 elif len(parts) == 2: 248 code = parts[0].lower() + "_" + parts[1].upper() 249 if code in _supported_locales: 250 return cls.get(code) 251 if parts[0].lower() in _supported_locales: 252 return cls.get(parts[0].lower()) 253 return cls.get(_default_locale) 254 255 @classmethod 256 def get(cls, code): 257 """Returns the Locale for the given locale code. 258 259 If it is not supported, we raise an exception. 260 """ 261 if not hasattr(cls, "_cache"): 262 cls._cache = {} 263 if code not in cls._cache: 264 assert code in _supported_locales 265 translations = _translations.get(code, None) 266 if translations is None: 267 locale = CSVLocale(code, {}) 268 elif _use_gettext: 269 locale = GettextLocale(code, translations) 270 else: 271 locale = CSVLocale(code, translations) 272 cls._cache[code] = locale 273 return cls._cache[code] 274 275 def __init__(self, code, translations): 276 self.code = code 277 self.name = LOCALE_NAMES.get(code, {}).get("name", u"Unknown") 278 self.rtl = False 279 for prefix in ["fa", "ar", "he"]: 280 if self.code.startswith(prefix): 281 self.rtl = True 282 break 283 self.translations = translations 284 285 # Initialize strings for date formatting 286 _ = self.translate 287 self._months = [ 288 _("January"), _("February"), _("March"), _("April"), 289 _("May"), _("June"), _("July"), _("August"), 290 _("September"), _("October"), _("November"), _("December")] 291 self._weekdays = [ 292 _("Monday"), _("Tuesday"), _("Wednesday"), _("Thursday"), 293 _("Friday"), _("Saturday"), _("Sunday")] 294 295 def translate(self, message, plural_message=None, count=None): 296 """Returns the translation for the given message for this locale. 297 298 If ``plural_message`` is given, you must also provide 299 ``count``. We return ``plural_message`` when ``count != 1``, 300 and we return the singular form for the given message when 301 ``count == 1``. 302 """ 303 raise NotImplementedError() 304 305 def pgettext(self, context, message, plural_message=None, count=None): 306 raise NotImplementedError() 307 308 def format_date(self, date, gmt_offset=0, relative=True, shorter=False, 309 full_format=False): 310 """Formats the given date (which should be GMT). 311 312 By default, we return a relative time (e.g., "2 minutes ago"). You 313 can return an absolute date string with ``relative=False``. 314 315 You can force a full format date ("July 10, 1980") with 316 ``full_format=True``. 317 318 This method is primarily intended for dates in the past. 319 For dates in the future, we fall back to full format. 320 """ 321 if isinstance(date, numbers.Real): 322 date = datetime.datetime.utcfromtimestamp(date) 323 now = datetime.datetime.utcnow() 324 if date > now: 325 if relative and (date - now).seconds < 60: 326 # Due to click skew, things are some things slightly 327 # in the future. Round timestamps in the immediate 328 # future down to now in relative mode. 329 date = now 330 else: 331 # Otherwise, future dates always use the full format. 332 full_format = True 333 local_date = date - datetime.timedelta(minutes=gmt_offset) 334 local_now = now - datetime.timedelta(minutes=gmt_offset) 335 local_yesterday = local_now - datetime.timedelta(hours=24) 336 difference = now - date 337 seconds = difference.seconds 338 days = difference.days 339 340 _ = self.translate 341 format = None 342 if not full_format: 343 if relative and days == 0: 344 if seconds < 50: 345 return _("1 second ago", "%(seconds)d seconds ago", 346 seconds) % {"seconds": seconds} 347 348 if seconds < 50 * 60: 349 minutes = round(seconds / 60.0) 350 return _("1 minute ago", "%(minutes)d minutes ago", 351 minutes) % {"minutes": minutes} 352 353 hours = round(seconds / (60.0 * 60)) 354 return _("1 hour ago", "%(hours)d hours ago", 355 hours) % {"hours": hours} 356 357 if days == 0: 358 format = _("%(time)s") 359 elif days == 1 and local_date.day == local_yesterday.day and \ 360 relative: 361 format = _("yesterday") if shorter else \ 362 _("yesterday at %(time)s") 363 elif days < 5: 364 format = _("%(weekday)s") if shorter else \ 365 _("%(weekday)s at %(time)s") 366 elif days < 334: # 11mo, since confusing for same month last year 367 format = _("%(month_name)s %(day)s") if shorter else \ 368 _("%(month_name)s %(day)s at %(time)s") 369 370 if format is None: 371 format = _("%(month_name)s %(day)s, %(year)s") if shorter else \ 372 _("%(month_name)s %(day)s, %(year)s at %(time)s") 373 374 tfhour_clock = self.code not in ("en", "en_US", "zh_CN") 375 if tfhour_clock: 376 str_time = "%d:%02d" % (local_date.hour, local_date.minute) 377 elif self.code == "zh_CN": 378 str_time = "%s%d:%02d" % ( 379 (u'\u4e0a\u5348', u'\u4e0b\u5348')[local_date.hour >= 12], 380 local_date.hour % 12 or 12, local_date.minute) 381 else: 382 str_time = "%d:%02d %s" % ( 383 local_date.hour % 12 or 12, local_date.minute, 384 ("am", "pm")[local_date.hour >= 12]) 385 386 return format % { 387 "month_name": self._months[local_date.month - 1], 388 "weekday": self._weekdays[local_date.weekday()], 389 "day": str(local_date.day), 390 "year": str(local_date.year), 391 "time": str_time 392 } 393 394 def format_day(self, date, gmt_offset=0, dow=True): 395 """Formats the given date as a day of week. 396 397 Example: "Monday, January 22". You can remove the day of week with 398 ``dow=False``. 399 """ 400 local_date = date - datetime.timedelta(minutes=gmt_offset) 401 _ = self.translate 402 if dow: 403 return _("%(weekday)s, %(month_name)s %(day)s") % { 404 "month_name": self._months[local_date.month - 1], 405 "weekday": self._weekdays[local_date.weekday()], 406 "day": str(local_date.day), 407 } 408 else: 409 return _("%(month_name)s %(day)s") % { 410 "month_name": self._months[local_date.month - 1], 411 "day": str(local_date.day), 412 } 413 414 def list(self, parts): 415 """Returns a comma-separated list for the given list of parts. 416 417 The format is, e.g., "A, B and C", "A and B" or just "A" for lists 418 of size 1. 419 """ 420 _ = self.translate 421 if len(parts) == 0: 422 return "" 423 if len(parts) == 1: 424 return parts[0] 425 comma = u' \u0648 ' if self.code.startswith("fa") else u", " 426 return _("%(commas)s and %(last)s") % { 427 "commas": comma.join(parts[:-1]), 428 "last": parts[len(parts) - 1], 429 } 430 431 def friendly_number(self, value): 432 """Returns a comma-separated number for the given integer.""" 433 if self.code not in ("en", "en_US"): 434 return str(value) 435 value = str(value) 436 parts = [] 437 while value: 438 parts.append(value[-3:]) 439 value = value[:-3] 440 return ",".join(reversed(parts)) 441 442 443class CSVLocale(Locale): 444 """Locale implementation using tornado's CSV translation format.""" 445 def translate(self, message, plural_message=None, count=None): 446 if plural_message is not None: 447 assert count is not None 448 if count != 1: 449 message = plural_message 450 message_dict = self.translations.get("plural", {}) 451 else: 452 message_dict = self.translations.get("singular", {}) 453 else: 454 message_dict = self.translations.get("unknown", {}) 455 return message_dict.get(message, message) 456 457 def pgettext(self, context, message, plural_message=None, count=None): 458 if self.translations: 459 gen_log.warning('pgettext is not supported by CSVLocale') 460 return self.translate(message, plural_message, count) 461 462 463class GettextLocale(Locale): 464 """Locale implementation using the `gettext` module.""" 465 def __init__(self, code, translations): 466 try: 467 # python 2 468 self.ngettext = translations.ungettext 469 self.gettext = translations.ugettext 470 except AttributeError: 471 # python 3 472 self.ngettext = translations.ngettext 473 self.gettext = translations.gettext 474 # self.gettext must exist before __init__ is called, since it 475 # calls into self.translate 476 super(GettextLocale, self).__init__(code, translations) 477 478 def translate(self, message, plural_message=None, count=None): 479 if plural_message is not None: 480 assert count is not None 481 return self.ngettext(message, plural_message, count) 482 else: 483 return self.gettext(message) 484 485 def pgettext(self, context, message, plural_message=None, count=None): 486 """Allows to set context for translation, accepts plural forms. 487 488 Usage example:: 489 490 pgettext("law", "right") 491 pgettext("good", "right") 492 493 Plural message example:: 494 495 pgettext("organization", "club", "clubs", len(clubs)) 496 pgettext("stick", "club", "clubs", len(clubs)) 497 498 To generate POT file with context, add following options to step 1 499 of `load_gettext_translations` sequence:: 500 501 xgettext [basic options] --keyword=pgettext:1c,2 --keyword=pgettext:1c,2,3 502 503 .. versionadded:: 4.2 504 """ 505 if plural_message is not None: 506 assert count is not None 507 msgs_with_ctxt = ("%s%s%s" % (context, CONTEXT_SEPARATOR, message), 508 "%s%s%s" % (context, CONTEXT_SEPARATOR, plural_message), 509 count) 510 result = self.ngettext(*msgs_with_ctxt) 511 if CONTEXT_SEPARATOR in result: 512 # Translation not found 513 result = self.ngettext(message, plural_message, count) 514 return result 515 else: 516 msg_with_ctxt = "%s%s%s" % (context, CONTEXT_SEPARATOR, message) 517 result = self.gettext(msg_with_ctxt) 518 if CONTEXT_SEPARATOR in result: 519 # Translation not found 520 result = message 521 return result 522