1# -*- coding: utf-8 -*- 2""" 3 babel.messages.catalog 4 ~~~~~~~~~~~~~~~~~~~~~~ 5 6 Data structures for message catalogs. 7 8 :copyright: (c) 2013-2021 by the Babel Team. 9 :license: BSD, see LICENSE for more details. 10""" 11 12import re 13import time 14 15from cgi import parse_header 16from collections import OrderedDict 17from datetime import datetime, time as time_ 18from difflib import get_close_matches 19from email import message_from_string 20from copy import copy 21 22from babel import __version__ as VERSION 23from babel.core import Locale, UnknownLocaleError 24from babel.dates import format_datetime 25from babel.messages.plurals import get_plural 26from babel.util import distinct, LOCALTZ, FixedOffsetTimezone 27from babel._compat import string_types, number_types, PY2, cmp, text_type, force_text 28 29__all__ = ['Message', 'Catalog', 'TranslationError'] 30 31 32PYTHON_FORMAT = re.compile(r''' 33 \% 34 (?:\(([\w]*)\))? 35 ( 36 [-#0\ +]?(?:\*|[\d]+)? 37 (?:\.(?:\*|[\d]+))? 38 [hlL]? 39 ) 40 ([diouxXeEfFgGcrs%]) 41''', re.VERBOSE) 42 43 44def _parse_datetime_header(value): 45 match = re.match(r'^(?P<datetime>.*?)(?P<tzoffset>[+-]\d{4})?$', value) 46 47 tt = time.strptime(match.group('datetime'), '%Y-%m-%d %H:%M') 48 ts = time.mktime(tt) 49 dt = datetime.fromtimestamp(ts) 50 51 # Separate the offset into a sign component, hours, and # minutes 52 tzoffset = match.group('tzoffset') 53 if tzoffset is not None: 54 plus_minus_s, rest = tzoffset[0], tzoffset[1:] 55 hours_offset_s, mins_offset_s = rest[:2], rest[2:] 56 57 # Make them all integers 58 plus_minus = int(plus_minus_s + '1') 59 hours_offset = int(hours_offset_s) 60 mins_offset = int(mins_offset_s) 61 62 # Calculate net offset 63 net_mins_offset = hours_offset * 60 64 net_mins_offset += mins_offset 65 net_mins_offset *= plus_minus 66 67 # Create an offset object 68 tzoffset = FixedOffsetTimezone(net_mins_offset) 69 70 # Store the offset in a datetime object 71 dt = dt.replace(tzinfo=tzoffset) 72 73 return dt 74 75 76class Message(object): 77 """Representation of a single message in a catalog.""" 78 79 def __init__(self, id, string=u'', locations=(), flags=(), auto_comments=(), 80 user_comments=(), previous_id=(), lineno=None, context=None): 81 """Create the message object. 82 83 :param id: the message ID, or a ``(singular, plural)`` tuple for 84 pluralizable messages 85 :param string: the translated message string, or a 86 ``(singular, plural)`` tuple for pluralizable messages 87 :param locations: a sequence of ``(filename, lineno)`` tuples 88 :param flags: a set or sequence of flags 89 :param auto_comments: a sequence of automatic comments for the message 90 :param user_comments: a sequence of user comments for the message 91 :param previous_id: the previous message ID, or a ``(singular, plural)`` 92 tuple for pluralizable messages 93 :param lineno: the line number on which the msgid line was found in the 94 PO file, if any 95 :param context: the message context 96 """ 97 self.id = id 98 if not string and self.pluralizable: 99 string = (u'', u'') 100 self.string = string 101 self.locations = list(distinct(locations)) 102 self.flags = set(flags) 103 if id and self.python_format: 104 self.flags.add('python-format') 105 else: 106 self.flags.discard('python-format') 107 self.auto_comments = list(distinct(auto_comments)) 108 self.user_comments = list(distinct(user_comments)) 109 if isinstance(previous_id, string_types): 110 self.previous_id = [previous_id] 111 else: 112 self.previous_id = list(previous_id) 113 self.lineno = lineno 114 self.context = context 115 116 def __repr__(self): 117 return '<%s %r (flags: %r)>' % (type(self).__name__, self.id, 118 list(self.flags)) 119 120 def __cmp__(self, other): 121 """Compare Messages, taking into account plural ids""" 122 def values_to_compare(obj): 123 if isinstance(obj, Message) and obj.pluralizable: 124 return obj.id[0], obj.context or '' 125 return obj.id, obj.context or '' 126 return cmp(values_to_compare(self), values_to_compare(other)) 127 128 def __gt__(self, other): 129 return self.__cmp__(other) > 0 130 131 def __lt__(self, other): 132 return self.__cmp__(other) < 0 133 134 def __ge__(self, other): 135 return self.__cmp__(other) >= 0 136 137 def __le__(self, other): 138 return self.__cmp__(other) <= 0 139 140 def __eq__(self, other): 141 return self.__cmp__(other) == 0 142 143 def __ne__(self, other): 144 return self.__cmp__(other) != 0 145 146 def clone(self): 147 return Message(*map(copy, (self.id, self.string, self.locations, 148 self.flags, self.auto_comments, 149 self.user_comments, self.previous_id, 150 self.lineno, self.context))) 151 152 def check(self, catalog=None): 153 """Run various validation checks on the message. Some validations 154 are only performed if the catalog is provided. This method returns 155 a sequence of `TranslationError` objects. 156 157 :rtype: ``iterator`` 158 :param catalog: A catalog instance that is passed to the checkers 159 :see: `Catalog.check` for a way to perform checks for all messages 160 in a catalog. 161 """ 162 from babel.messages.checkers import checkers 163 errors = [] 164 for checker in checkers: 165 try: 166 checker(catalog, self) 167 except TranslationError as e: 168 errors.append(e) 169 return errors 170 171 @property 172 def fuzzy(self): 173 """Whether the translation is fuzzy. 174 175 >>> Message('foo').fuzzy 176 False 177 >>> msg = Message('foo', 'foo', flags=['fuzzy']) 178 >>> msg.fuzzy 179 True 180 >>> msg 181 <Message 'foo' (flags: ['fuzzy'])> 182 183 :type: `bool`""" 184 return 'fuzzy' in self.flags 185 186 @property 187 def pluralizable(self): 188 """Whether the message is plurizable. 189 190 >>> Message('foo').pluralizable 191 False 192 >>> Message(('foo', 'bar')).pluralizable 193 True 194 195 :type: `bool`""" 196 return isinstance(self.id, (list, tuple)) 197 198 @property 199 def python_format(self): 200 """Whether the message contains Python-style parameters. 201 202 >>> Message('foo %(name)s bar').python_format 203 True 204 >>> Message(('foo %(name)s', 'foo %(name)s')).python_format 205 True 206 207 :type: `bool`""" 208 ids = self.id 209 if not isinstance(ids, (list, tuple)): 210 ids = [ids] 211 return any(PYTHON_FORMAT.search(id) for id in ids) 212 213 214class TranslationError(Exception): 215 """Exception thrown by translation checkers when invalid message 216 translations are encountered.""" 217 218 219DEFAULT_HEADER = u"""\ 220# Translations template for PROJECT. 221# Copyright (C) YEAR ORGANIZATION 222# This file is distributed under the same license as the PROJECT project. 223# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR. 224#""" 225 226 227if PY2: 228 def _parse_header(header_string): 229 # message_from_string only works for str, not for unicode 230 headers = message_from_string(header_string.encode('utf8')) 231 decoded_headers = {} 232 for name, value in headers.items(): 233 name = name.decode('utf8') 234 value = value.decode('utf8') 235 decoded_headers[name] = value 236 return decoded_headers 237 238else: 239 _parse_header = message_from_string 240 241 242class Catalog(object): 243 """Representation of a message catalog.""" 244 245 def __init__(self, locale=None, domain=None, header_comment=DEFAULT_HEADER, 246 project=None, version=None, copyright_holder=None, 247 msgid_bugs_address=None, creation_date=None, 248 revision_date=None, last_translator=None, language_team=None, 249 charset=None, fuzzy=True): 250 """Initialize the catalog object. 251 252 :param locale: the locale identifier or `Locale` object, or `None` 253 if the catalog is not bound to a locale (which basically 254 means it's a template) 255 :param domain: the message domain 256 :param header_comment: the header comment as string, or `None` for the 257 default header 258 :param project: the project's name 259 :param version: the project's version 260 :param copyright_holder: the copyright holder of the catalog 261 :param msgid_bugs_address: the email address or URL to submit bug 262 reports to 263 :param creation_date: the date the catalog was created 264 :param revision_date: the date the catalog was revised 265 :param last_translator: the name and email of the last translator 266 :param language_team: the name and email of the language team 267 :param charset: the encoding to use in the output (defaults to utf-8) 268 :param fuzzy: the fuzzy bit on the catalog header 269 """ 270 self.domain = domain 271 self.locale = locale 272 self._header_comment = header_comment 273 self._messages = OrderedDict() 274 275 self.project = project or 'PROJECT' 276 self.version = version or 'VERSION' 277 self.copyright_holder = copyright_holder or 'ORGANIZATION' 278 self.msgid_bugs_address = msgid_bugs_address or 'EMAIL@ADDRESS' 279 280 self.last_translator = last_translator or 'FULL NAME <EMAIL@ADDRESS>' 281 """Name and email address of the last translator.""" 282 self.language_team = language_team or 'LANGUAGE <LL@li.org>' 283 """Name and email address of the language team.""" 284 285 self.charset = charset or 'utf-8' 286 287 if creation_date is None: 288 creation_date = datetime.now(LOCALTZ) 289 elif isinstance(creation_date, datetime) and not creation_date.tzinfo: 290 creation_date = creation_date.replace(tzinfo=LOCALTZ) 291 self.creation_date = creation_date 292 if revision_date is None: 293 revision_date = 'YEAR-MO-DA HO:MI+ZONE' 294 elif isinstance(revision_date, datetime) and not revision_date.tzinfo: 295 revision_date = revision_date.replace(tzinfo=LOCALTZ) 296 self.revision_date = revision_date 297 self.fuzzy = fuzzy 298 299 self.obsolete = OrderedDict() # Dictionary of obsolete messages 300 self._num_plurals = None 301 self._plural_expr = None 302 303 def _set_locale(self, locale): 304 if locale is None: 305 self._locale_identifier = None 306 self._locale = None 307 return 308 309 if isinstance(locale, Locale): 310 self._locale_identifier = text_type(locale) 311 self._locale = locale 312 return 313 314 if isinstance(locale, string_types): 315 self._locale_identifier = text_type(locale) 316 try: 317 self._locale = Locale.parse(locale) 318 except UnknownLocaleError: 319 self._locale = None 320 return 321 322 raise TypeError('`locale` must be a Locale, a locale identifier string, or None; got %r' % locale) 323 324 def _get_locale(self): 325 return self._locale 326 327 def _get_locale_identifier(self): 328 return self._locale_identifier 329 330 locale = property(_get_locale, _set_locale) 331 locale_identifier = property(_get_locale_identifier) 332 333 def _get_header_comment(self): 334 comment = self._header_comment 335 year = datetime.now(LOCALTZ).strftime('%Y') 336 if hasattr(self.revision_date, 'strftime'): 337 year = self.revision_date.strftime('%Y') 338 comment = comment.replace('PROJECT', self.project) \ 339 .replace('VERSION', self.version) \ 340 .replace('YEAR', year) \ 341 .replace('ORGANIZATION', self.copyright_holder) 342 locale_name = (self.locale.english_name if self.locale else self.locale_identifier) 343 if locale_name: 344 comment = comment.replace('Translations template', '%s translations' % locale_name) 345 return comment 346 347 def _set_header_comment(self, string): 348 self._header_comment = string 349 350 header_comment = property(_get_header_comment, _set_header_comment, doc="""\ 351 The header comment for the catalog. 352 353 >>> catalog = Catalog(project='Foobar', version='1.0', 354 ... copyright_holder='Foo Company') 355 >>> print(catalog.header_comment) #doctest: +ELLIPSIS 356 # Translations template for Foobar. 357 # Copyright (C) ... Foo Company 358 # This file is distributed under the same license as the Foobar project. 359 # FIRST AUTHOR <EMAIL@ADDRESS>, .... 360 # 361 362 The header can also be set from a string. Any known upper-case variables 363 will be replaced when the header is retrieved again: 364 365 >>> catalog = Catalog(project='Foobar', version='1.0', 366 ... copyright_holder='Foo Company') 367 >>> catalog.header_comment = '''\\ 368 ... # The POT for my really cool PROJECT project. 369 ... # Copyright (C) 1990-2003 ORGANIZATION 370 ... # This file is distributed under the same license as the PROJECT 371 ... # project. 372 ... #''' 373 >>> print(catalog.header_comment) 374 # The POT for my really cool Foobar project. 375 # Copyright (C) 1990-2003 Foo Company 376 # This file is distributed under the same license as the Foobar 377 # project. 378 # 379 380 :type: `unicode` 381 """) 382 383 def _get_mime_headers(self): 384 headers = [] 385 headers.append(('Project-Id-Version', 386 '%s %s' % (self.project, self.version))) 387 headers.append(('Report-Msgid-Bugs-To', self.msgid_bugs_address)) 388 headers.append(('POT-Creation-Date', 389 format_datetime(self.creation_date, 'yyyy-MM-dd HH:mmZ', 390 locale='en'))) 391 if isinstance(self.revision_date, (datetime, time_) + number_types): 392 headers.append(('PO-Revision-Date', 393 format_datetime(self.revision_date, 394 'yyyy-MM-dd HH:mmZ', locale='en'))) 395 else: 396 headers.append(('PO-Revision-Date', self.revision_date)) 397 headers.append(('Last-Translator', self.last_translator)) 398 if self.locale_identifier: 399 headers.append(('Language', str(self.locale_identifier))) 400 if self.locale_identifier and ('LANGUAGE' in self.language_team): 401 headers.append(('Language-Team', 402 self.language_team.replace('LANGUAGE', 403 str(self.locale_identifier)))) 404 else: 405 headers.append(('Language-Team', self.language_team)) 406 if self.locale is not None: 407 headers.append(('Plural-Forms', self.plural_forms)) 408 headers.append(('MIME-Version', '1.0')) 409 headers.append(('Content-Type', 410 'text/plain; charset=%s' % self.charset)) 411 headers.append(('Content-Transfer-Encoding', '8bit')) 412 headers.append(('Generated-By', 'Babel %s\n' % VERSION)) 413 return headers 414 415 def _set_mime_headers(self, headers): 416 for name, value in headers: 417 name = force_text(name.lower(), encoding=self.charset) 418 value = force_text(value, encoding=self.charset) 419 if name == 'project-id-version': 420 parts = value.split(' ') 421 self.project = u' '.join(parts[:-1]) 422 self.version = parts[-1] 423 elif name == 'report-msgid-bugs-to': 424 self.msgid_bugs_address = value 425 elif name == 'last-translator': 426 self.last_translator = value 427 elif name == 'language': 428 value = value.replace('-', '_') 429 self._set_locale(value) 430 elif name == 'language-team': 431 self.language_team = value 432 elif name == 'content-type': 433 mimetype, params = parse_header(value) 434 if 'charset' in params: 435 self.charset = params['charset'].lower() 436 elif name == 'plural-forms': 437 _, params = parse_header(' ;' + value) 438 self._num_plurals = int(params.get('nplurals', 2)) 439 self._plural_expr = params.get('plural', '(n != 1)') 440 elif name == 'pot-creation-date': 441 self.creation_date = _parse_datetime_header(value) 442 elif name == 'po-revision-date': 443 # Keep the value if it's not the default one 444 if 'YEAR' not in value: 445 self.revision_date = _parse_datetime_header(value) 446 447 mime_headers = property(_get_mime_headers, _set_mime_headers, doc="""\ 448 The MIME headers of the catalog, used for the special ``msgid ""`` entry. 449 450 The behavior of this property changes slightly depending on whether a locale 451 is set or not, the latter indicating that the catalog is actually a template 452 for actual translations. 453 454 Here's an example of the output for such a catalog template: 455 456 >>> from babel.dates import UTC 457 >>> created = datetime(1990, 4, 1, 15, 30, tzinfo=UTC) 458 >>> catalog = Catalog(project='Foobar', version='1.0', 459 ... creation_date=created) 460 >>> for name, value in catalog.mime_headers: 461 ... print('%s: %s' % (name, value)) 462 Project-Id-Version: Foobar 1.0 463 Report-Msgid-Bugs-To: EMAIL@ADDRESS 464 POT-Creation-Date: 1990-04-01 15:30+0000 465 PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE 466 Last-Translator: FULL NAME <EMAIL@ADDRESS> 467 Language-Team: LANGUAGE <LL@li.org> 468 MIME-Version: 1.0 469 Content-Type: text/plain; charset=utf-8 470 Content-Transfer-Encoding: 8bit 471 Generated-By: Babel ... 472 473 And here's an example of the output when the locale is set: 474 475 >>> revised = datetime(1990, 8, 3, 12, 0, tzinfo=UTC) 476 >>> catalog = Catalog(locale='de_DE', project='Foobar', version='1.0', 477 ... creation_date=created, revision_date=revised, 478 ... last_translator='John Doe <jd@example.com>', 479 ... language_team='de_DE <de@example.com>') 480 >>> for name, value in catalog.mime_headers: 481 ... print('%s: %s' % (name, value)) 482 Project-Id-Version: Foobar 1.0 483 Report-Msgid-Bugs-To: EMAIL@ADDRESS 484 POT-Creation-Date: 1990-04-01 15:30+0000 485 PO-Revision-Date: 1990-08-03 12:00+0000 486 Last-Translator: John Doe <jd@example.com> 487 Language: de_DE 488 Language-Team: de_DE <de@example.com> 489 Plural-Forms: nplurals=2; plural=(n != 1) 490 MIME-Version: 1.0 491 Content-Type: text/plain; charset=utf-8 492 Content-Transfer-Encoding: 8bit 493 Generated-By: Babel ... 494 495 :type: `list` 496 """) 497 498 @property 499 def num_plurals(self): 500 """The number of plurals used by the catalog or locale. 501 502 >>> Catalog(locale='en').num_plurals 503 2 504 >>> Catalog(locale='ga').num_plurals 505 5 506 507 :type: `int`""" 508 if self._num_plurals is None: 509 num = 2 510 if self.locale: 511 num = get_plural(self.locale)[0] 512 self._num_plurals = num 513 return self._num_plurals 514 515 @property 516 def plural_expr(self): 517 """The plural expression used by the catalog or locale. 518 519 >>> Catalog(locale='en').plural_expr 520 '(n != 1)' 521 >>> Catalog(locale='ga').plural_expr 522 '(n==1 ? 0 : n==2 ? 1 : n>=3 && n<=6 ? 2 : n>=7 && n<=10 ? 3 : 4)' 523 >>> Catalog(locale='ding').plural_expr # unknown locale 524 '(n != 1)' 525 526 :type: `string_types`""" 527 if self._plural_expr is None: 528 expr = '(n != 1)' 529 if self.locale: 530 expr = get_plural(self.locale)[1] 531 self._plural_expr = expr 532 return self._plural_expr 533 534 @property 535 def plural_forms(self): 536 """Return the plural forms declaration for the locale. 537 538 >>> Catalog(locale='en').plural_forms 539 'nplurals=2; plural=(n != 1)' 540 >>> Catalog(locale='pt_BR').plural_forms 541 'nplurals=2; plural=(n > 1)' 542 543 :type: `str`""" 544 return 'nplurals=%s; plural=%s' % (self.num_plurals, self.plural_expr) 545 546 def __contains__(self, id): 547 """Return whether the catalog has a message with the specified ID.""" 548 return self._key_for(id) in self._messages 549 550 def __len__(self): 551 """The number of messages in the catalog. 552 553 This does not include the special ``msgid ""`` entry.""" 554 return len(self._messages) 555 556 def __iter__(self): 557 """Iterates through all the entries in the catalog, in the order they 558 were added, yielding a `Message` object for every entry. 559 560 :rtype: ``iterator``""" 561 buf = [] 562 for name, value in self.mime_headers: 563 buf.append('%s: %s' % (name, value)) 564 flags = set() 565 if self.fuzzy: 566 flags |= {'fuzzy'} 567 yield Message(u'', '\n'.join(buf), flags=flags) 568 for key in self._messages: 569 yield self._messages[key] 570 571 def __repr__(self): 572 locale = '' 573 if self.locale: 574 locale = ' %s' % self.locale 575 return '<%s %r%s>' % (type(self).__name__, self.domain, locale) 576 577 def __delitem__(self, id): 578 """Delete the message with the specified ID.""" 579 self.delete(id) 580 581 def __getitem__(self, id): 582 """Return the message with the specified ID. 583 584 :param id: the message ID 585 """ 586 return self.get(id) 587 588 def __setitem__(self, id, message): 589 """Add or update the message with the specified ID. 590 591 >>> catalog = Catalog() 592 >>> catalog[u'foo'] = Message(u'foo') 593 >>> catalog[u'foo'] 594 <Message u'foo' (flags: [])> 595 596 If a message with that ID is already in the catalog, it is updated 597 to include the locations and flags of the new message. 598 599 >>> catalog = Catalog() 600 >>> catalog[u'foo'] = Message(u'foo', locations=[('main.py', 1)]) 601 >>> catalog[u'foo'].locations 602 [('main.py', 1)] 603 >>> catalog[u'foo'] = Message(u'foo', locations=[('utils.py', 5)]) 604 >>> catalog[u'foo'].locations 605 [('main.py', 1), ('utils.py', 5)] 606 607 :param id: the message ID 608 :param message: the `Message` object 609 """ 610 assert isinstance(message, Message), 'expected a Message object' 611 key = self._key_for(id, message.context) 612 current = self._messages.get(key) 613 if current: 614 if message.pluralizable and not current.pluralizable: 615 # The new message adds pluralization 616 current.id = message.id 617 current.string = message.string 618 current.locations = list(distinct(current.locations + 619 message.locations)) 620 current.auto_comments = list(distinct(current.auto_comments + 621 message.auto_comments)) 622 current.user_comments = list(distinct(current.user_comments + 623 message.user_comments)) 624 current.flags |= message.flags 625 message = current 626 elif id == '': 627 # special treatment for the header message 628 self.mime_headers = _parse_header(message.string).items() 629 self.header_comment = '\n'.join([('# %s' % c).rstrip() for c 630 in message.user_comments]) 631 self.fuzzy = message.fuzzy 632 else: 633 if isinstance(id, (list, tuple)): 634 assert isinstance(message.string, (list, tuple)), \ 635 'Expected sequence but got %s' % type(message.string) 636 self._messages[key] = message 637 638 def add(self, id, string=None, locations=(), flags=(), auto_comments=(), 639 user_comments=(), previous_id=(), lineno=None, context=None): 640 """Add or update the message with the specified ID. 641 642 >>> catalog = Catalog() 643 >>> catalog.add(u'foo') 644 <Message ...> 645 >>> catalog[u'foo'] 646 <Message u'foo' (flags: [])> 647 648 This method simply constructs a `Message` object with the given 649 arguments and invokes `__setitem__` with that object. 650 651 :param id: the message ID, or a ``(singular, plural)`` tuple for 652 pluralizable messages 653 :param string: the translated message string, or a 654 ``(singular, plural)`` tuple for pluralizable messages 655 :param locations: a sequence of ``(filename, lineno)`` tuples 656 :param flags: a set or sequence of flags 657 :param auto_comments: a sequence of automatic comments 658 :param user_comments: a sequence of user comments 659 :param previous_id: the previous message ID, or a ``(singular, plural)`` 660 tuple for pluralizable messages 661 :param lineno: the line number on which the msgid line was found in the 662 PO file, if any 663 :param context: the message context 664 """ 665 message = Message(id, string, list(locations), flags, auto_comments, 666 user_comments, previous_id, lineno=lineno, 667 context=context) 668 self[id] = message 669 return message 670 671 def check(self): 672 """Run various validation checks on the translations in the catalog. 673 674 For every message which fails validation, this method yield a 675 ``(message, errors)`` tuple, where ``message`` is the `Message` object 676 and ``errors`` is a sequence of `TranslationError` objects. 677 678 :rtype: ``iterator`` 679 """ 680 for message in self._messages.values(): 681 errors = message.check(catalog=self) 682 if errors: 683 yield message, errors 684 685 def get(self, id, context=None): 686 """Return the message with the specified ID and context. 687 688 :param id: the message ID 689 :param context: the message context, or ``None`` for no context 690 """ 691 return self._messages.get(self._key_for(id, context)) 692 693 def delete(self, id, context=None): 694 """Delete the message with the specified ID and context. 695 696 :param id: the message ID 697 :param context: the message context, or ``None`` for no context 698 """ 699 key = self._key_for(id, context) 700 if key in self._messages: 701 del self._messages[key] 702 703 def update(self, template, no_fuzzy_matching=False, update_header_comment=False, keep_user_comments=True): 704 """Update the catalog based on the given template catalog. 705 706 >>> from babel.messages import Catalog 707 >>> template = Catalog() 708 >>> template.add('green', locations=[('main.py', 99)]) 709 <Message ...> 710 >>> template.add('blue', locations=[('main.py', 100)]) 711 <Message ...> 712 >>> template.add(('salad', 'salads'), locations=[('util.py', 42)]) 713 <Message ...> 714 >>> catalog = Catalog(locale='de_DE') 715 >>> catalog.add('blue', u'blau', locations=[('main.py', 98)]) 716 <Message ...> 717 >>> catalog.add('head', u'Kopf', locations=[('util.py', 33)]) 718 <Message ...> 719 >>> catalog.add(('salad', 'salads'), (u'Salat', u'Salate'), 720 ... locations=[('util.py', 38)]) 721 <Message ...> 722 723 >>> catalog.update(template) 724 >>> len(catalog) 725 3 726 727 >>> msg1 = catalog['green'] 728 >>> msg1.string 729 >>> msg1.locations 730 [('main.py', 99)] 731 732 >>> msg2 = catalog['blue'] 733 >>> msg2.string 734 u'blau' 735 >>> msg2.locations 736 [('main.py', 100)] 737 738 >>> msg3 = catalog['salad'] 739 >>> msg3.string 740 (u'Salat', u'Salate') 741 >>> msg3.locations 742 [('util.py', 42)] 743 744 Messages that are in the catalog but not in the template are removed 745 from the main collection, but can still be accessed via the `obsolete` 746 member: 747 748 >>> 'head' in catalog 749 False 750 >>> list(catalog.obsolete.values()) 751 [<Message 'head' (flags: [])>] 752 753 :param template: the reference catalog, usually read from a POT file 754 :param no_fuzzy_matching: whether to use fuzzy matching of message IDs 755 """ 756 messages = self._messages 757 remaining = messages.copy() 758 self._messages = OrderedDict() 759 760 # Prepare for fuzzy matching 761 fuzzy_candidates = [] 762 if not no_fuzzy_matching: 763 fuzzy_candidates = dict([ 764 (self._key_for(msgid), messages[msgid].context) 765 for msgid in messages if msgid and messages[msgid].string 766 ]) 767 fuzzy_matches = set() 768 769 def _merge(message, oldkey, newkey): 770 message = message.clone() 771 fuzzy = False 772 if oldkey != newkey: 773 fuzzy = True 774 fuzzy_matches.add(oldkey) 775 oldmsg = messages.get(oldkey) 776 if isinstance(oldmsg.id, string_types): 777 message.previous_id = [oldmsg.id] 778 else: 779 message.previous_id = list(oldmsg.id) 780 else: 781 oldmsg = remaining.pop(oldkey, None) 782 message.string = oldmsg.string 783 784 if keep_user_comments: 785 message.user_comments = list(distinct(oldmsg.user_comments)) 786 787 if isinstance(message.id, (list, tuple)): 788 if not isinstance(message.string, (list, tuple)): 789 fuzzy = True 790 message.string = tuple( 791 [message.string] + ([u''] * (len(message.id) - 1)) 792 ) 793 elif len(message.string) != self.num_plurals: 794 fuzzy = True 795 message.string = tuple(message.string[:len(oldmsg.string)]) 796 elif isinstance(message.string, (list, tuple)): 797 fuzzy = True 798 message.string = message.string[0] 799 message.flags |= oldmsg.flags 800 if fuzzy: 801 message.flags |= {u'fuzzy'} 802 self[message.id] = message 803 804 for message in template: 805 if message.id: 806 key = self._key_for(message.id, message.context) 807 if key in messages: 808 _merge(message, key, key) 809 else: 810 if not no_fuzzy_matching: 811 # do some fuzzy matching with difflib 812 if isinstance(key, tuple): 813 matchkey = key[0] # just the msgid, no context 814 else: 815 matchkey = key 816 matches = get_close_matches(matchkey.lower().strip(), 817 fuzzy_candidates.keys(), 1) 818 if matches: 819 newkey = matches[0] 820 newctxt = fuzzy_candidates[newkey] 821 if newctxt is not None: 822 newkey = newkey, newctxt 823 _merge(message, newkey, key) 824 continue 825 826 self[message.id] = message 827 828 for msgid in remaining: 829 if no_fuzzy_matching or msgid not in fuzzy_matches: 830 self.obsolete[msgid] = remaining[msgid] 831 832 if update_header_comment: 833 # Allow the updated catalog's header to be rewritten based on the 834 # template's header 835 self.header_comment = template.header_comment 836 837 # Make updated catalog's POT-Creation-Date equal to the template 838 # used to update the catalog 839 self.creation_date = template.creation_date 840 841 def _key_for(self, id, context=None): 842 """The key for a message is just the singular ID even for pluralizable 843 messages, but is a ``(msgid, msgctxt)`` tuple for context-specific 844 messages. 845 """ 846 key = id 847 if isinstance(key, (list, tuple)): 848 key = id[0] 849 if context is not None: 850 key = (key, context) 851 return key 852