1# -*- coding: utf-8 -*-
2"""
3    babel.messages.catalog
4    ~~~~~~~~~~~~~~~~~~~~~~
5
6    Data structures for message catalogs.
7
8    :copyright: (c) 2013-2021 by the Babel Team.
9    :license: BSD, see LICENSE for more details.
10"""
11
12import re
13import time
14
15from cgi import parse_header
16from collections import OrderedDict
17from datetime import datetime, time as time_
18from difflib import get_close_matches
19from email import message_from_string
20from copy import copy
21
22from babel import __version__ as VERSION
23from babel.core import Locale, UnknownLocaleError
24from babel.dates import format_datetime
25from babel.messages.plurals import get_plural
26from babel.util import distinct, LOCALTZ, FixedOffsetTimezone
27from babel._compat import string_types, number_types, PY2, cmp, text_type, force_text
28
29__all__ = ['Message', 'Catalog', 'TranslationError']
30
31
32PYTHON_FORMAT = re.compile(r'''
33    \%
34        (?:\(([\w]*)\))?
35        (
36            [-#0\ +]?(?:\*|[\d]+)?
37            (?:\.(?:\*|[\d]+))?
38            [hlL]?
39        )
40        ([diouxXeEfFgGcrs%])
41''', re.VERBOSE)
42
43
44def _parse_datetime_header(value):
45    match = re.match(r'^(?P<datetime>.*?)(?P<tzoffset>[+-]\d{4})?$', value)
46
47    tt = time.strptime(match.group('datetime'), '%Y-%m-%d %H:%M')
48    ts = time.mktime(tt)
49    dt = datetime.fromtimestamp(ts)
50
51    # Separate the offset into a sign component, hours, and # minutes
52    tzoffset = match.group('tzoffset')
53    if tzoffset is not None:
54        plus_minus_s, rest = tzoffset[0], tzoffset[1:]
55        hours_offset_s, mins_offset_s = rest[:2], rest[2:]
56
57        # Make them all integers
58        plus_minus = int(plus_minus_s + '1')
59        hours_offset = int(hours_offset_s)
60        mins_offset = int(mins_offset_s)
61
62        # Calculate net offset
63        net_mins_offset = hours_offset * 60
64        net_mins_offset += mins_offset
65        net_mins_offset *= plus_minus
66
67        # Create an offset object
68        tzoffset = FixedOffsetTimezone(net_mins_offset)
69
70        # Store the offset in a datetime object
71        dt = dt.replace(tzinfo=tzoffset)
72
73    return dt
74
75
76class Message(object):
77    """Representation of a single message in a catalog."""
78
79    def __init__(self, id, string=u'', locations=(), flags=(), auto_comments=(),
80                 user_comments=(), previous_id=(), lineno=None, context=None):
81        """Create the message object.
82
83        :param id: the message ID, or a ``(singular, plural)`` tuple for
84                   pluralizable messages
85        :param string: the translated message string, or a
86                       ``(singular, plural)`` tuple for pluralizable messages
87        :param locations: a sequence of ``(filename, lineno)`` tuples
88        :param flags: a set or sequence of flags
89        :param auto_comments: a sequence of automatic comments for the message
90        :param user_comments: a sequence of user comments for the message
91        :param previous_id: the previous message ID, or a ``(singular, plural)``
92                            tuple for pluralizable messages
93        :param lineno: the line number on which the msgid line was found in the
94                       PO file, if any
95        :param context: the message context
96        """
97        self.id = id
98        if not string and self.pluralizable:
99            string = (u'', u'')
100        self.string = string
101        self.locations = list(distinct(locations))
102        self.flags = set(flags)
103        if id and self.python_format:
104            self.flags.add('python-format')
105        else:
106            self.flags.discard('python-format')
107        self.auto_comments = list(distinct(auto_comments))
108        self.user_comments = list(distinct(user_comments))
109        if isinstance(previous_id, string_types):
110            self.previous_id = [previous_id]
111        else:
112            self.previous_id = list(previous_id)
113        self.lineno = lineno
114        self.context = context
115
116    def __repr__(self):
117        return '<%s %r (flags: %r)>' % (type(self).__name__, self.id,
118                                        list(self.flags))
119
120    def __cmp__(self, other):
121        """Compare Messages, taking into account plural ids"""
122        def values_to_compare(obj):
123            if isinstance(obj, Message) and obj.pluralizable:
124                return obj.id[0], obj.context or ''
125            return obj.id, obj.context or ''
126        return cmp(values_to_compare(self), values_to_compare(other))
127
128    def __gt__(self, other):
129        return self.__cmp__(other) > 0
130
131    def __lt__(self, other):
132        return self.__cmp__(other) < 0
133
134    def __ge__(self, other):
135        return self.__cmp__(other) >= 0
136
137    def __le__(self, other):
138        return self.__cmp__(other) <= 0
139
140    def __eq__(self, other):
141        return self.__cmp__(other) == 0
142
143    def __ne__(self, other):
144        return self.__cmp__(other) != 0
145
146    def clone(self):
147        return Message(*map(copy, (self.id, self.string, self.locations,
148                                   self.flags, self.auto_comments,
149                                   self.user_comments, self.previous_id,
150                                   self.lineno, self.context)))
151
152    def check(self, catalog=None):
153        """Run various validation checks on the message.  Some validations
154        are only performed if the catalog is provided.  This method returns
155        a sequence of `TranslationError` objects.
156
157        :rtype: ``iterator``
158        :param catalog: A catalog instance that is passed to the checkers
159        :see: `Catalog.check` for a way to perform checks for all messages
160              in a catalog.
161        """
162        from babel.messages.checkers import checkers
163        errors = []
164        for checker in checkers:
165            try:
166                checker(catalog, self)
167            except TranslationError as e:
168                errors.append(e)
169        return errors
170
171    @property
172    def fuzzy(self):
173        """Whether the translation is fuzzy.
174
175        >>> Message('foo').fuzzy
176        False
177        >>> msg = Message('foo', 'foo', flags=['fuzzy'])
178        >>> msg.fuzzy
179        True
180        >>> msg
181        <Message 'foo' (flags: ['fuzzy'])>
182
183        :type:  `bool`"""
184        return 'fuzzy' in self.flags
185
186    @property
187    def pluralizable(self):
188        """Whether the message is plurizable.
189
190        >>> Message('foo').pluralizable
191        False
192        >>> Message(('foo', 'bar')).pluralizable
193        True
194
195        :type:  `bool`"""
196        return isinstance(self.id, (list, tuple))
197
198    @property
199    def python_format(self):
200        """Whether the message contains Python-style parameters.
201
202        >>> Message('foo %(name)s bar').python_format
203        True
204        >>> Message(('foo %(name)s', 'foo %(name)s')).python_format
205        True
206
207        :type:  `bool`"""
208        ids = self.id
209        if not isinstance(ids, (list, tuple)):
210            ids = [ids]
211        return any(PYTHON_FORMAT.search(id) for id in ids)
212
213
214class TranslationError(Exception):
215    """Exception thrown by translation checkers when invalid message
216    translations are encountered."""
217
218
219DEFAULT_HEADER = u"""\
220# Translations template for PROJECT.
221# Copyright (C) YEAR ORGANIZATION
222# This file is distributed under the same license as the PROJECT project.
223# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
224#"""
225
226
227if PY2:
228    def _parse_header(header_string):
229        # message_from_string only works for str, not for unicode
230        headers = message_from_string(header_string.encode('utf8'))
231        decoded_headers = {}
232        for name, value in headers.items():
233            name = name.decode('utf8')
234            value = value.decode('utf8')
235            decoded_headers[name] = value
236        return decoded_headers
237
238else:
239    _parse_header = message_from_string
240
241
242class Catalog(object):
243    """Representation of a message catalog."""
244
245    def __init__(self, locale=None, domain=None, header_comment=DEFAULT_HEADER,
246                 project=None, version=None, copyright_holder=None,
247                 msgid_bugs_address=None, creation_date=None,
248                 revision_date=None, last_translator=None, language_team=None,
249                 charset=None, fuzzy=True):
250        """Initialize the catalog object.
251
252        :param locale: the locale identifier or `Locale` object, or `None`
253                       if the catalog is not bound to a locale (which basically
254                       means it's a template)
255        :param domain: the message domain
256        :param header_comment: the header comment as string, or `None` for the
257                               default header
258        :param project: the project's name
259        :param version: the project's version
260        :param copyright_holder: the copyright holder of the catalog
261        :param msgid_bugs_address: the email address or URL to submit bug
262                                   reports to
263        :param creation_date: the date the catalog was created
264        :param revision_date: the date the catalog was revised
265        :param last_translator: the name and email of the last translator
266        :param language_team: the name and email of the language team
267        :param charset: the encoding to use in the output (defaults to utf-8)
268        :param fuzzy: the fuzzy bit on the catalog header
269        """
270        self.domain = domain
271        self.locale = locale
272        self._header_comment = header_comment
273        self._messages = OrderedDict()
274
275        self.project = project or 'PROJECT'
276        self.version = version or 'VERSION'
277        self.copyright_holder = copyright_holder or 'ORGANIZATION'
278        self.msgid_bugs_address = msgid_bugs_address or 'EMAIL@ADDRESS'
279
280        self.last_translator = last_translator or 'FULL NAME <EMAIL@ADDRESS>'
281        """Name and email address of the last translator."""
282        self.language_team = language_team or 'LANGUAGE <LL@li.org>'
283        """Name and email address of the language team."""
284
285        self.charset = charset or 'utf-8'
286
287        if creation_date is None:
288            creation_date = datetime.now(LOCALTZ)
289        elif isinstance(creation_date, datetime) and not creation_date.tzinfo:
290            creation_date = creation_date.replace(tzinfo=LOCALTZ)
291        self.creation_date = creation_date
292        if revision_date is None:
293            revision_date = 'YEAR-MO-DA HO:MI+ZONE'
294        elif isinstance(revision_date, datetime) and not revision_date.tzinfo:
295            revision_date = revision_date.replace(tzinfo=LOCALTZ)
296        self.revision_date = revision_date
297        self.fuzzy = fuzzy
298
299        self.obsolete = OrderedDict()  # Dictionary of obsolete messages
300        self._num_plurals = None
301        self._plural_expr = None
302
303    def _set_locale(self, locale):
304        if locale is None:
305            self._locale_identifier = None
306            self._locale = None
307            return
308
309        if isinstance(locale, Locale):
310            self._locale_identifier = text_type(locale)
311            self._locale = locale
312            return
313
314        if isinstance(locale, string_types):
315            self._locale_identifier = text_type(locale)
316            try:
317                self._locale = Locale.parse(locale)
318            except UnknownLocaleError:
319                self._locale = None
320            return
321
322        raise TypeError('`locale` must be a Locale, a locale identifier string, or None; got %r' % locale)
323
324    def _get_locale(self):
325        return self._locale
326
327    def _get_locale_identifier(self):
328        return self._locale_identifier
329
330    locale = property(_get_locale, _set_locale)
331    locale_identifier = property(_get_locale_identifier)
332
333    def _get_header_comment(self):
334        comment = self._header_comment
335        year = datetime.now(LOCALTZ).strftime('%Y')
336        if hasattr(self.revision_date, 'strftime'):
337            year = self.revision_date.strftime('%Y')
338        comment = comment.replace('PROJECT', self.project) \
339                         .replace('VERSION', self.version) \
340                         .replace('YEAR', year) \
341                         .replace('ORGANIZATION', self.copyright_holder)
342        locale_name = (self.locale.english_name if self.locale else self.locale_identifier)
343        if locale_name:
344            comment = comment.replace('Translations template', '%s translations' % locale_name)
345        return comment
346
347    def _set_header_comment(self, string):
348        self._header_comment = string
349
350    header_comment = property(_get_header_comment, _set_header_comment, doc="""\
351    The header comment for the catalog.
352
353    >>> catalog = Catalog(project='Foobar', version='1.0',
354    ...                   copyright_holder='Foo Company')
355    >>> print(catalog.header_comment) #doctest: +ELLIPSIS
356    # Translations template for Foobar.
357    # Copyright (C) ... Foo Company
358    # This file is distributed under the same license as the Foobar project.
359    # FIRST AUTHOR <EMAIL@ADDRESS>, ....
360    #
361
362    The header can also be set from a string. Any known upper-case variables
363    will be replaced when the header is retrieved again:
364
365    >>> catalog = Catalog(project='Foobar', version='1.0',
366    ...                   copyright_holder='Foo Company')
367    >>> catalog.header_comment = '''\\
368    ... # The POT for my really cool PROJECT project.
369    ... # Copyright (C) 1990-2003 ORGANIZATION
370    ... # This file is distributed under the same license as the PROJECT
371    ... # project.
372    ... #'''
373    >>> print(catalog.header_comment)
374    # The POT for my really cool Foobar project.
375    # Copyright (C) 1990-2003 Foo Company
376    # This file is distributed under the same license as the Foobar
377    # project.
378    #
379
380    :type: `unicode`
381    """)
382
383    def _get_mime_headers(self):
384        headers = []
385        headers.append(('Project-Id-Version',
386                        '%s %s' % (self.project, self.version)))
387        headers.append(('Report-Msgid-Bugs-To', self.msgid_bugs_address))
388        headers.append(('POT-Creation-Date',
389                        format_datetime(self.creation_date, 'yyyy-MM-dd HH:mmZ',
390                                        locale='en')))
391        if isinstance(self.revision_date, (datetime, time_) + number_types):
392            headers.append(('PO-Revision-Date',
393                            format_datetime(self.revision_date,
394                                            'yyyy-MM-dd HH:mmZ', locale='en')))
395        else:
396            headers.append(('PO-Revision-Date', self.revision_date))
397        headers.append(('Last-Translator', self.last_translator))
398        if self.locale_identifier:
399            headers.append(('Language', str(self.locale_identifier)))
400        if self.locale_identifier and ('LANGUAGE' in self.language_team):
401            headers.append(('Language-Team',
402                            self.language_team.replace('LANGUAGE',
403                                                       str(self.locale_identifier))))
404        else:
405            headers.append(('Language-Team', self.language_team))
406        if self.locale is not None:
407            headers.append(('Plural-Forms', self.plural_forms))
408        headers.append(('MIME-Version', '1.0'))
409        headers.append(('Content-Type',
410                        'text/plain; charset=%s' % self.charset))
411        headers.append(('Content-Transfer-Encoding', '8bit'))
412        headers.append(('Generated-By', 'Babel %s\n' % VERSION))
413        return headers
414
415    def _set_mime_headers(self, headers):
416        for name, value in headers:
417            name = force_text(name.lower(), encoding=self.charset)
418            value = force_text(value, encoding=self.charset)
419            if name == 'project-id-version':
420                parts = value.split(' ')
421                self.project = u' '.join(parts[:-1])
422                self.version = parts[-1]
423            elif name == 'report-msgid-bugs-to':
424                self.msgid_bugs_address = value
425            elif name == 'last-translator':
426                self.last_translator = value
427            elif name == 'language':
428                value = value.replace('-', '_')
429                self._set_locale(value)
430            elif name == 'language-team':
431                self.language_team = value
432            elif name == 'content-type':
433                mimetype, params = parse_header(value)
434                if 'charset' in params:
435                    self.charset = params['charset'].lower()
436            elif name == 'plural-forms':
437                _, params = parse_header(' ;' + value)
438                self._num_plurals = int(params.get('nplurals', 2))
439                self._plural_expr = params.get('plural', '(n != 1)')
440            elif name == 'pot-creation-date':
441                self.creation_date = _parse_datetime_header(value)
442            elif name == 'po-revision-date':
443                # Keep the value if it's not the default one
444                if 'YEAR' not in value:
445                    self.revision_date = _parse_datetime_header(value)
446
447    mime_headers = property(_get_mime_headers, _set_mime_headers, doc="""\
448    The MIME headers of the catalog, used for the special ``msgid ""`` entry.
449
450    The behavior of this property changes slightly depending on whether a locale
451    is set or not, the latter indicating that the catalog is actually a template
452    for actual translations.
453
454    Here's an example of the output for such a catalog template:
455
456    >>> from babel.dates import UTC
457    >>> created = datetime(1990, 4, 1, 15, 30, tzinfo=UTC)
458    >>> catalog = Catalog(project='Foobar', version='1.0',
459    ...                   creation_date=created)
460    >>> for name, value in catalog.mime_headers:
461    ...     print('%s: %s' % (name, value))
462    Project-Id-Version: Foobar 1.0
463    Report-Msgid-Bugs-To: EMAIL@ADDRESS
464    POT-Creation-Date: 1990-04-01 15:30+0000
465    PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE
466    Last-Translator: FULL NAME <EMAIL@ADDRESS>
467    Language-Team: LANGUAGE <LL@li.org>
468    MIME-Version: 1.0
469    Content-Type: text/plain; charset=utf-8
470    Content-Transfer-Encoding: 8bit
471    Generated-By: Babel ...
472
473    And here's an example of the output when the locale is set:
474
475    >>> revised = datetime(1990, 8, 3, 12, 0, tzinfo=UTC)
476    >>> catalog = Catalog(locale='de_DE', project='Foobar', version='1.0',
477    ...                   creation_date=created, revision_date=revised,
478    ...                   last_translator='John Doe <jd@example.com>',
479    ...                   language_team='de_DE <de@example.com>')
480    >>> for name, value in catalog.mime_headers:
481    ...     print('%s: %s' % (name, value))
482    Project-Id-Version: Foobar 1.0
483    Report-Msgid-Bugs-To: EMAIL@ADDRESS
484    POT-Creation-Date: 1990-04-01 15:30+0000
485    PO-Revision-Date: 1990-08-03 12:00+0000
486    Last-Translator: John Doe <jd@example.com>
487    Language: de_DE
488    Language-Team: de_DE <de@example.com>
489    Plural-Forms: nplurals=2; plural=(n != 1)
490    MIME-Version: 1.0
491    Content-Type: text/plain; charset=utf-8
492    Content-Transfer-Encoding: 8bit
493    Generated-By: Babel ...
494
495    :type: `list`
496    """)
497
498    @property
499    def num_plurals(self):
500        """The number of plurals used by the catalog or locale.
501
502        >>> Catalog(locale='en').num_plurals
503        2
504        >>> Catalog(locale='ga').num_plurals
505        5
506
507        :type: `int`"""
508        if self._num_plurals is None:
509            num = 2
510            if self.locale:
511                num = get_plural(self.locale)[0]
512            self._num_plurals = num
513        return self._num_plurals
514
515    @property
516    def plural_expr(self):
517        """The plural expression used by the catalog or locale.
518
519        >>> Catalog(locale='en').plural_expr
520        '(n != 1)'
521        >>> Catalog(locale='ga').plural_expr
522        '(n==1 ? 0 : n==2 ? 1 : n>=3 && n<=6 ? 2 : n>=7 && n<=10 ? 3 : 4)'
523        >>> Catalog(locale='ding').plural_expr  # unknown locale
524        '(n != 1)'
525
526        :type: `string_types`"""
527        if self._plural_expr is None:
528            expr = '(n != 1)'
529            if self.locale:
530                expr = get_plural(self.locale)[1]
531            self._plural_expr = expr
532        return self._plural_expr
533
534    @property
535    def plural_forms(self):
536        """Return the plural forms declaration for the locale.
537
538        >>> Catalog(locale='en').plural_forms
539        'nplurals=2; plural=(n != 1)'
540        >>> Catalog(locale='pt_BR').plural_forms
541        'nplurals=2; plural=(n > 1)'
542
543        :type: `str`"""
544        return 'nplurals=%s; plural=%s' % (self.num_plurals, self.plural_expr)
545
546    def __contains__(self, id):
547        """Return whether the catalog has a message with the specified ID."""
548        return self._key_for(id) in self._messages
549
550    def __len__(self):
551        """The number of messages in the catalog.
552
553        This does not include the special ``msgid ""`` entry."""
554        return len(self._messages)
555
556    def __iter__(self):
557        """Iterates through all the entries in the catalog, in the order they
558        were added, yielding a `Message` object for every entry.
559
560        :rtype: ``iterator``"""
561        buf = []
562        for name, value in self.mime_headers:
563            buf.append('%s: %s' % (name, value))
564        flags = set()
565        if self.fuzzy:
566            flags |= {'fuzzy'}
567        yield Message(u'', '\n'.join(buf), flags=flags)
568        for key in self._messages:
569            yield self._messages[key]
570
571    def __repr__(self):
572        locale = ''
573        if self.locale:
574            locale = ' %s' % self.locale
575        return '<%s %r%s>' % (type(self).__name__, self.domain, locale)
576
577    def __delitem__(self, id):
578        """Delete the message with the specified ID."""
579        self.delete(id)
580
581    def __getitem__(self, id):
582        """Return the message with the specified ID.
583
584        :param id: the message ID
585        """
586        return self.get(id)
587
588    def __setitem__(self, id, message):
589        """Add or update the message with the specified ID.
590
591        >>> catalog = Catalog()
592        >>> catalog[u'foo'] = Message(u'foo')
593        >>> catalog[u'foo']
594        <Message u'foo' (flags: [])>
595
596        If a message with that ID is already in the catalog, it is updated
597        to include the locations and flags of the new message.
598
599        >>> catalog = Catalog()
600        >>> catalog[u'foo'] = Message(u'foo', locations=[('main.py', 1)])
601        >>> catalog[u'foo'].locations
602        [('main.py', 1)]
603        >>> catalog[u'foo'] = Message(u'foo', locations=[('utils.py', 5)])
604        >>> catalog[u'foo'].locations
605        [('main.py', 1), ('utils.py', 5)]
606
607        :param id: the message ID
608        :param message: the `Message` object
609        """
610        assert isinstance(message, Message), 'expected a Message object'
611        key = self._key_for(id, message.context)
612        current = self._messages.get(key)
613        if current:
614            if message.pluralizable and not current.pluralizable:
615                # The new message adds pluralization
616                current.id = message.id
617                current.string = message.string
618            current.locations = list(distinct(current.locations +
619                                              message.locations))
620            current.auto_comments = list(distinct(current.auto_comments +
621                                                  message.auto_comments))
622            current.user_comments = list(distinct(current.user_comments +
623                                                  message.user_comments))
624            current.flags |= message.flags
625            message = current
626        elif id == '':
627            # special treatment for the header message
628            self.mime_headers = _parse_header(message.string).items()
629            self.header_comment = '\n'.join([('# %s' % c).rstrip() for c
630                                             in message.user_comments])
631            self.fuzzy = message.fuzzy
632        else:
633            if isinstance(id, (list, tuple)):
634                assert isinstance(message.string, (list, tuple)), \
635                    'Expected sequence but got %s' % type(message.string)
636            self._messages[key] = message
637
638    def add(self, id, string=None, locations=(), flags=(), auto_comments=(),
639            user_comments=(), previous_id=(), lineno=None, context=None):
640        """Add or update the message with the specified ID.
641
642        >>> catalog = Catalog()
643        >>> catalog.add(u'foo')
644        <Message ...>
645        >>> catalog[u'foo']
646        <Message u'foo' (flags: [])>
647
648        This method simply constructs a `Message` object with the given
649        arguments and invokes `__setitem__` with that object.
650
651        :param id: the message ID, or a ``(singular, plural)`` tuple for
652                   pluralizable messages
653        :param string: the translated message string, or a
654                       ``(singular, plural)`` tuple for pluralizable messages
655        :param locations: a sequence of ``(filename, lineno)`` tuples
656        :param flags: a set or sequence of flags
657        :param auto_comments: a sequence of automatic comments
658        :param user_comments: a sequence of user comments
659        :param previous_id: the previous message ID, or a ``(singular, plural)``
660                            tuple for pluralizable messages
661        :param lineno: the line number on which the msgid line was found in the
662                       PO file, if any
663        :param context: the message context
664        """
665        message = Message(id, string, list(locations), flags, auto_comments,
666                          user_comments, previous_id, lineno=lineno,
667                          context=context)
668        self[id] = message
669        return message
670
671    def check(self):
672        """Run various validation checks on the translations in the catalog.
673
674        For every message which fails validation, this method yield a
675        ``(message, errors)`` tuple, where ``message`` is the `Message` object
676        and ``errors`` is a sequence of `TranslationError` objects.
677
678        :rtype: ``iterator``
679        """
680        for message in self._messages.values():
681            errors = message.check(catalog=self)
682            if errors:
683                yield message, errors
684
685    def get(self, id, context=None):
686        """Return the message with the specified ID and context.
687
688        :param id: the message ID
689        :param context: the message context, or ``None`` for no context
690        """
691        return self._messages.get(self._key_for(id, context))
692
693    def delete(self, id, context=None):
694        """Delete the message with the specified ID and context.
695
696        :param id: the message ID
697        :param context: the message context, or ``None`` for no context
698        """
699        key = self._key_for(id, context)
700        if key in self._messages:
701            del self._messages[key]
702
703    def update(self, template, no_fuzzy_matching=False, update_header_comment=False, keep_user_comments=True):
704        """Update the catalog based on the given template catalog.
705
706        >>> from babel.messages import Catalog
707        >>> template = Catalog()
708        >>> template.add('green', locations=[('main.py', 99)])
709        <Message ...>
710        >>> template.add('blue', locations=[('main.py', 100)])
711        <Message ...>
712        >>> template.add(('salad', 'salads'), locations=[('util.py', 42)])
713        <Message ...>
714        >>> catalog = Catalog(locale='de_DE')
715        >>> catalog.add('blue', u'blau', locations=[('main.py', 98)])
716        <Message ...>
717        >>> catalog.add('head', u'Kopf', locations=[('util.py', 33)])
718        <Message ...>
719        >>> catalog.add(('salad', 'salads'), (u'Salat', u'Salate'),
720        ...             locations=[('util.py', 38)])
721        <Message ...>
722
723        >>> catalog.update(template)
724        >>> len(catalog)
725        3
726
727        >>> msg1 = catalog['green']
728        >>> msg1.string
729        >>> msg1.locations
730        [('main.py', 99)]
731
732        >>> msg2 = catalog['blue']
733        >>> msg2.string
734        u'blau'
735        >>> msg2.locations
736        [('main.py', 100)]
737
738        >>> msg3 = catalog['salad']
739        >>> msg3.string
740        (u'Salat', u'Salate')
741        >>> msg3.locations
742        [('util.py', 42)]
743
744        Messages that are in the catalog but not in the template are removed
745        from the main collection, but can still be accessed via the `obsolete`
746        member:
747
748        >>> 'head' in catalog
749        False
750        >>> list(catalog.obsolete.values())
751        [<Message 'head' (flags: [])>]
752
753        :param template: the reference catalog, usually read from a POT file
754        :param no_fuzzy_matching: whether to use fuzzy matching of message IDs
755        """
756        messages = self._messages
757        remaining = messages.copy()
758        self._messages = OrderedDict()
759
760        # Prepare for fuzzy matching
761        fuzzy_candidates = []
762        if not no_fuzzy_matching:
763            fuzzy_candidates = dict([
764                (self._key_for(msgid), messages[msgid].context)
765                for msgid in messages if msgid and messages[msgid].string
766            ])
767        fuzzy_matches = set()
768
769        def _merge(message, oldkey, newkey):
770            message = message.clone()
771            fuzzy = False
772            if oldkey != newkey:
773                fuzzy = True
774                fuzzy_matches.add(oldkey)
775                oldmsg = messages.get(oldkey)
776                if isinstance(oldmsg.id, string_types):
777                    message.previous_id = [oldmsg.id]
778                else:
779                    message.previous_id = list(oldmsg.id)
780            else:
781                oldmsg = remaining.pop(oldkey, None)
782            message.string = oldmsg.string
783
784            if keep_user_comments:
785                message.user_comments = list(distinct(oldmsg.user_comments))
786
787            if isinstance(message.id, (list, tuple)):
788                if not isinstance(message.string, (list, tuple)):
789                    fuzzy = True
790                    message.string = tuple(
791                        [message.string] + ([u''] * (len(message.id) - 1))
792                    )
793                elif len(message.string) != self.num_plurals:
794                    fuzzy = True
795                    message.string = tuple(message.string[:len(oldmsg.string)])
796            elif isinstance(message.string, (list, tuple)):
797                fuzzy = True
798                message.string = message.string[0]
799            message.flags |= oldmsg.flags
800            if fuzzy:
801                message.flags |= {u'fuzzy'}
802            self[message.id] = message
803
804        for message in template:
805            if message.id:
806                key = self._key_for(message.id, message.context)
807                if key in messages:
808                    _merge(message, key, key)
809                else:
810                    if not no_fuzzy_matching:
811                        # do some fuzzy matching with difflib
812                        if isinstance(key, tuple):
813                            matchkey = key[0]  # just the msgid, no context
814                        else:
815                            matchkey = key
816                        matches = get_close_matches(matchkey.lower().strip(),
817                                                    fuzzy_candidates.keys(), 1)
818                        if matches:
819                            newkey = matches[0]
820                            newctxt = fuzzy_candidates[newkey]
821                            if newctxt is not None:
822                                newkey = newkey, newctxt
823                            _merge(message, newkey, key)
824                            continue
825
826                    self[message.id] = message
827
828        for msgid in remaining:
829            if no_fuzzy_matching or msgid not in fuzzy_matches:
830                self.obsolete[msgid] = remaining[msgid]
831
832        if update_header_comment:
833            # Allow the updated catalog's header to be rewritten based on the
834            # template's header
835            self.header_comment = template.header_comment
836
837        # Make updated catalog's POT-Creation-Date equal to the template
838        # used to update the catalog
839        self.creation_date = template.creation_date
840
841    def _key_for(self, id, context=None):
842        """The key for a message is just the singular ID even for pluralizable
843        messages, but is a ``(msgid, msgctxt)`` tuple for context-specific
844        messages.
845        """
846        key = id
847        if isinstance(key, (list, tuple)):
848            key = id[0]
849        if context is not None:
850            key = (key, context)
851        return key
852