1# -*- coding: utf-8 -*-
2# Part of Odoo. See LICENSE file for full copyright and licensing details.
3import codecs
4import fnmatch
5import functools
6import inspect
7import io
8import locale
9import logging
10import os
11import polib
12import re
13import tarfile
14import tempfile
15import threading
16from collections import defaultdict, namedtuple
17from datetime import datetime
18from os.path import join
19
20from pathlib import Path
21from babel.messages import extract
22from lxml import etree, html
23
24import odoo
25from . import config, pycompat
26from .misc import file_open, get_iso_codes, SKIPPED_ELEMENT_TYPES
27
28_logger = logging.getLogger(__name__)
29
30# used to notify web client that these translations should be loaded in the UI
31WEB_TRANSLATION_COMMENT = "openerp-web"
32
33SKIPPED_ELEMENTS = ('script', 'style', 'title')
34
35_LOCALE2WIN32 = {
36    'af_ZA': 'Afrikaans_South Africa',
37    'sq_AL': 'Albanian_Albania',
38    'ar_SA': 'Arabic_Saudi Arabia',
39    'eu_ES': 'Basque_Spain',
40    'be_BY': 'Belarusian_Belarus',
41    'bs_BA': 'Bosnian_Bosnia and Herzegovina',
42    'bg_BG': 'Bulgarian_Bulgaria',
43    'ca_ES': 'Catalan_Spain',
44    'hr_HR': 'Croatian_Croatia',
45    'zh_CN': 'Chinese_China',
46    'zh_TW': 'Chinese_Taiwan',
47    'cs_CZ': 'Czech_Czech Republic',
48    'da_DK': 'Danish_Denmark',
49    'nl_NL': 'Dutch_Netherlands',
50    'et_EE': 'Estonian_Estonia',
51    'fa_IR': 'Farsi_Iran',
52    'ph_PH': 'Filipino_Philippines',
53    'fi_FI': 'Finnish_Finland',
54    'fr_FR': 'French_France',
55    'fr_BE': 'French_France',
56    'fr_CH': 'French_France',
57    'fr_CA': 'French_France',
58    'ga': 'Scottish Gaelic',
59    'gl_ES': 'Galician_Spain',
60    'ka_GE': 'Georgian_Georgia',
61    'de_DE': 'German_Germany',
62    'el_GR': 'Greek_Greece',
63    'gu': 'Gujarati_India',
64    'he_IL': 'Hebrew_Israel',
65    'hi_IN': 'Hindi',
66    'hu': 'Hungarian_Hungary',
67    'is_IS': 'Icelandic_Iceland',
68    'id_ID': 'Indonesian_Indonesia',
69    'it_IT': 'Italian_Italy',
70    'ja_JP': 'Japanese_Japan',
71    'kn_IN': 'Kannada',
72    'km_KH': 'Khmer',
73    'ko_KR': 'Korean_Korea',
74    'lo_LA': 'Lao_Laos',
75    'lt_LT': 'Lithuanian_Lithuania',
76    'lat': 'Latvian_Latvia',
77    'ml_IN': 'Malayalam_India',
78    'mi_NZ': 'Maori',
79    'mn': 'Cyrillic_Mongolian',
80    'no_NO': 'Norwegian_Norway',
81    'nn_NO': 'Norwegian-Nynorsk_Norway',
82    'pl': 'Polish_Poland',
83    'pt_PT': 'Portuguese_Portugal',
84    'pt_BR': 'Portuguese_Brazil',
85    'ro_RO': 'Romanian_Romania',
86    'ru_RU': 'Russian_Russia',
87    'sr_CS': 'Serbian (Cyrillic)_Serbia and Montenegro',
88    'sk_SK': 'Slovak_Slovakia',
89    'sl_SI': 'Slovenian_Slovenia',
90    #should find more specific locales for Spanish countries,
91    #but better than nothing
92    'es_AR': 'Spanish_Spain',
93    'es_BO': 'Spanish_Spain',
94    'es_CL': 'Spanish_Spain',
95    'es_CO': 'Spanish_Spain',
96    'es_CR': 'Spanish_Spain',
97    'es_DO': 'Spanish_Spain',
98    'es_EC': 'Spanish_Spain',
99    'es_ES': 'Spanish_Spain',
100    'es_GT': 'Spanish_Spain',
101    'es_HN': 'Spanish_Spain',
102    'es_MX': 'Spanish_Spain',
103    'es_NI': 'Spanish_Spain',
104    'es_PA': 'Spanish_Spain',
105    'es_PE': 'Spanish_Spain',
106    'es_PR': 'Spanish_Spain',
107    'es_PY': 'Spanish_Spain',
108    'es_SV': 'Spanish_Spain',
109    'es_UY': 'Spanish_Spain',
110    'es_VE': 'Spanish_Spain',
111    'sv_SE': 'Swedish_Sweden',
112    'ta_IN': 'English_Australia',
113    'th_TH': 'Thai_Thailand',
114    'tr_TR': 'Turkish_Turkey',
115    'uk_UA': 'Ukrainian_Ukraine',
116    'vi_VN': 'Vietnamese_Viet Nam',
117    'tlh_TLH': 'Klingon',
118
119}
120
121# These are not all English small words, just those that could potentially be isolated within views
122ENGLISH_SMALL_WORDS = set("as at by do go if in me no of ok on or to up us we".split())
123
124
125# these direct uses of CSV are ok.
126import csv # pylint: disable=deprecated-module
127class UNIX_LINE_TERMINATOR(csv.excel):
128    lineterminator = '\n'
129
130csv.register_dialect("UNIX", UNIX_LINE_TERMINATOR)
131
132
133# FIXME: holy shit this whole thing needs to be cleaned up hard it's a mess
134def encode(s):
135    assert isinstance(s, str)
136    return s
137
138# which elements are translated inline
139TRANSLATED_ELEMENTS = {
140    'abbr', 'b', 'bdi', 'bdo', 'br', 'cite', 'code', 'data', 'del', 'dfn', 'em',
141    'font', 'i', 'ins', 'kbd', 'keygen', 'mark', 'math', 'meter', 'output',
142    'progress', 'q', 'ruby', 's', 'samp', 'small', 'span', 'strong', 'sub',
143    'sup', 'time', 'u', 'var', 'wbr', 'text',
144}
145
146# which attributes must be translated
147TRANSLATED_ATTRS = {
148    'string', 'help', 'sum', 'avg', 'confirm', 'placeholder', 'alt', 'title', 'aria-label',
149    'aria-keyshortcuts', 'aria-placeholder', 'aria-roledescription', 'aria-valuetext',
150    'value_label',
151}
152
153TRANSLATED_ATTRS = TRANSLATED_ATTRS | {'t-attf-' + attr for attr in TRANSLATED_ATTRS}
154
155avoid_pattern = re.compile(r"\s*<!DOCTYPE", re.IGNORECASE | re.MULTILINE | re.UNICODE)
156node_pattern = re.compile(r"<[^>]*>(.*)</[^<]*>", re.DOTALL | re.MULTILINE | re.UNICODE)
157
158
159def translate_xml_node(node, callback, parse, serialize):
160    """ Return the translation of the given XML/HTML node.
161
162        :param callback: callback(text) returns translated text or None
163        :param parse: parse(text) returns a node (text is unicode)
164        :param serialize: serialize(node) returns unicode text
165    """
166
167    def nonspace(text):
168        return bool(text) and len(re.sub(r'\W+', '', text)) > 1
169
170    def concat(text1, text2):
171        return text2 if text1 is None else text1 + (text2 or "")
172
173    def append_content(node, source):
174        """ Append the content of ``source`` node to ``node``. """
175        if len(node):
176            node[-1].tail = concat(node[-1].tail, source.text)
177        else:
178            node.text = concat(node.text, source.text)
179        for child in source:
180            node.append(child)
181
182    def translate_text(text):
183        """ Return the translation of ``text`` (the term to translate is without
184            surrounding spaces), or a falsy value if no translation applies.
185        """
186        term = text.strip()
187        trans = term and callback(term)
188        return trans and text.replace(term, trans)
189
190    def translate_content(node):
191        """ Return ``node`` with its content translated inline. """
192        # serialize the node that contains the stuff to translate
193        text = serialize(node)
194        # retrieve the node's content and translate it
195        match = node_pattern.match(text)
196        trans = translate_text(match.group(1))
197        if trans:
198            # replace the content, and convert it back to an XML node
199            text = text[:match.start(1)] + trans + text[match.end(1):]
200            try:
201                node = parse(text)
202            except etree.ParseError:
203                # fallback: escape the translation as text
204                node = etree.Element(node.tag, node.attrib, node.nsmap)
205                node.text = trans
206        return node
207
208    def process(node):
209        """ If ``node`` can be translated inline, return ``(has_text, node)``,
210            where ``has_text`` is a boolean that tells whether ``node`` contains
211            some actual text to translate. Otherwise return ``(None, result)``,
212            where ``result`` is the translation of ``node`` except for its tail.
213        """
214        if (
215            isinstance(node, SKIPPED_ELEMENT_TYPES) or
216            node.tag in SKIPPED_ELEMENTS or
217            node.get('t-translation', "").strip() == "off" or
218            node.tag == 'attribute' and node.get('name') not in TRANSLATED_ATTRS or
219            node.getparent() is None and avoid_pattern.match(node.text or "")
220        ):
221            return (None, node)
222
223        # make an element like node that will contain the result
224        result = etree.Element(node.tag, node.attrib, node.nsmap)
225
226        # use a "todo" node to translate content by parts
227        todo = etree.Element('div', nsmap=node.nsmap)
228        if avoid_pattern.match(node.text or ""):
229            result.text = node.text
230        else:
231            todo.text = node.text
232        todo_has_text = nonspace(todo.text)
233
234        # process children recursively
235        for child in node:
236            child_has_text, child = process(child)
237            if child_has_text is None:
238                # translate the content of todo and append it to result
239                append_content(result, translate_content(todo) if todo_has_text else todo)
240                # add translated child to result
241                result.append(child)
242                # move child's untranslated tail to todo
243                todo = etree.Element('div', nsmap=node.nsmap)
244                todo.text, child.tail = child.tail, None
245                todo_has_text = nonspace(todo.text)
246            else:
247                # child is translatable inline; add it to todo
248                todo.append(child)
249                todo_has_text = todo_has_text or child_has_text
250
251        # determine whether node is translatable inline
252        if (
253            node.tag in TRANSLATED_ELEMENTS and
254            not (result.text or len(result)) and
255            not any(name.startswith("t-") for name in node.attrib)
256        ):
257            # complete result and return it
258            append_content(result, todo)
259            result.tail = node.tail
260            has_text = (
261                todo_has_text or nonspace(result.text) or nonspace(result.tail)
262                or any((key in TRANSLATED_ATTRS and val) for key, val in result.attrib.items())
263            )
264            return (has_text, result)
265
266        # translate the content of todo and append it to result
267        append_content(result, translate_content(todo) if todo_has_text else todo)
268
269        # translate the required attributes
270        for name, value in result.attrib.items():
271            if name in TRANSLATED_ATTRS:
272                result.set(name, translate_text(value) or value)
273
274        # add the untranslated tail to result
275        result.tail = node.tail
276
277        return (None, result)
278
279    has_text, node = process(node)
280    if has_text is True:
281        # translate the node as a whole
282        wrapped = etree.Element('div')
283        wrapped.append(node)
284        return translate_content(wrapped)[0]
285
286    return node
287
288
289def parse_xml(text):
290    return etree.fromstring(text)
291
292def serialize_xml(node):
293    return etree.tostring(node, method='xml', encoding='unicode')
294
295_HTML_PARSER = etree.HTMLParser(encoding='utf8')
296
297def parse_html(text):
298    return html.fragment_fromstring(text, parser=_HTML_PARSER)
299
300def serialize_html(node):
301    return etree.tostring(node, method='html', encoding='unicode')
302
303
304def xml_translate(callback, value):
305    """ Translate an XML value (string), using `callback` for translating text
306        appearing in `value`.
307    """
308    if not value:
309        return value
310
311    try:
312        root = parse_xml(value)
313        result = translate_xml_node(root, callback, parse_xml, serialize_xml)
314        return serialize_xml(result)
315    except etree.ParseError:
316        # fallback for translated terms: use an HTML parser and wrap the term
317        root = parse_html(u"<div>%s</div>" % value)
318        result = translate_xml_node(root, callback, parse_xml, serialize_xml)
319        # remove tags <div> and </div> from result
320        return serialize_xml(result)[5:-6]
321
322def html_translate(callback, value):
323    """ Translate an HTML value (string), using `callback` for translating text
324        appearing in `value`.
325    """
326    if not value:
327        return value
328
329    try:
330        # value may be some HTML fragment, wrap it into a div
331        root = parse_html("<div>%s</div>" % value)
332        result = translate_xml_node(root, callback, parse_html, serialize_html)
333        # remove tags <div> and </div> from result
334        value = serialize_html(result)[5:-6]
335    except ValueError:
336        _logger.exception("Cannot translate malformed HTML, using source value instead")
337
338    return value
339
340
341#
342# Warning: better use self.env['ir.translation']._get_source if you can
343#
344def translate(cr, name, source_type, lang, source=None):
345    if source and name:
346        cr.execute('select value from ir_translation where lang=%s and type=%s and name=%s and src=%s and md5(src)=md5(%s)', (lang, source_type, str(name), source, source))
347    elif name:
348        cr.execute('select value from ir_translation where lang=%s and type=%s and name=%s', (lang, source_type, str(name)))
349    elif source:
350        cr.execute('select value from ir_translation where lang=%s and type=%s and src=%s and md5(src)=md5(%s)', (lang, source_type, source, source))
351    res_trans = cr.fetchone()
352    res = res_trans and res_trans[0] or False
353    return res
354
355def translate_sql_constraint(cr, key, lang):
356    cr.execute("""
357        SELECT COALESCE(t.value, c.message) as message
358        FROM ir_model_constraint c
359        LEFT JOIN
360        (SELECT res_id, value FROM ir_translation
361         WHERE type='model'
362           AND name='ir.model.constraint,message'
363           AND lang=%s
364           AND value!='') AS t
365        ON c.id=t.res_id
366        WHERE name=%s and type='u'
367        """, (lang, key))
368    return cr.fetchone()[0]
369
370class GettextAlias(object):
371
372    def _get_db(self):
373        # find current DB based on thread/worker db name (see netsvc)
374        db_name = getattr(threading.currentThread(), 'dbname', None)
375        if db_name:
376            return odoo.sql_db.db_connect(db_name)
377
378    def _get_cr(self, frame, allow_create=True):
379        # try, in order: cr, cursor, self.env.cr, self.cr,
380        # request.env.cr
381        if 'cr' in frame.f_locals:
382            return frame.f_locals['cr'], False
383        if 'cursor' in frame.f_locals:
384            return frame.f_locals['cursor'], False
385        s = frame.f_locals.get('self')
386        if hasattr(s, 'env'):
387            return s.env.cr, False
388        if hasattr(s, 'cr'):
389            return s.cr, False
390        try:
391            from odoo.http import request
392            return request.env.cr, False
393        except RuntimeError:
394            pass
395        if allow_create:
396            # create a new cursor
397            db = self._get_db()
398            if db is not None:
399                return db.cursor(), True
400        return None, False
401
402    def _get_uid(self, frame):
403        # try, in order: uid, user, self.env.uid
404        if 'uid' in frame.f_locals:
405            return frame.f_locals['uid']
406        if 'user' in frame.f_locals:
407            return int(frame.f_locals['user'])      # user may be a record
408        s = frame.f_locals.get('self')
409        return s.env.uid
410
411    def _get_lang(self, frame):
412        # try, in order: context.get('lang'), kwargs['context'].get('lang'),
413        # self.env.lang, self.localcontext.get('lang'), request.env.lang
414        lang = None
415        if frame.f_locals.get('context'):
416            lang = frame.f_locals['context'].get('lang')
417        if not lang:
418            kwargs = frame.f_locals.get('kwargs', {})
419            if kwargs.get('context'):
420                lang = kwargs['context'].get('lang')
421        if not lang:
422            s = frame.f_locals.get('self')
423            if hasattr(s, 'env'):
424                lang = s.env.lang
425            if not lang:
426                if hasattr(s, 'localcontext'):
427                    lang = s.localcontext.get('lang')
428            if not lang:
429                try:
430                    from odoo.http import request
431                    lang = request.env.lang
432                except RuntimeError:
433                    pass
434            if not lang:
435                # Last resort: attempt to guess the language of the user
436                # Pitfall: some operations are performed in sudo mode, and we
437                #          don't know the original uid, so the language may
438                #          be wrong when the admin language differs.
439                (cr, dummy) = self._get_cr(frame, allow_create=False)
440                uid = self._get_uid(frame)
441                if cr and uid:
442                    env = odoo.api.Environment(cr, uid, {})
443                    lang = env['res.users'].context_get()['lang']
444        return lang
445
446    def __call__(self, source, *args, **kwargs):
447        translation = self._get_translation(source)
448        assert not (args and kwargs)
449        if args or kwargs:
450            try:
451                return translation % (args or kwargs)
452            except (TypeError, ValueError, KeyError):
453                bad = translation
454                # fallback: apply to source before logging exception (in case source fails)
455                translation = source % (args or kwargs)
456                _logger.exception('Bad translation %r for string %r', bad, source)
457        return translation
458
459    def _get_translation(self, source):
460        res = source
461        cr = None
462        is_new_cr = False
463        try:
464            frame = inspect.currentframe()
465            if frame is None:
466                return source
467            frame = frame.f_back
468            if not frame:
469                return source
470            frame = frame.f_back
471            if not frame:
472                return source
473            lang = self._get_lang(frame)
474            if lang:
475                cr, is_new_cr = self._get_cr(frame)
476                if cr:
477                    # Try to use ir.translation to benefit from global cache if possible
478                    env = odoo.api.Environment(cr, odoo.SUPERUSER_ID, {})
479                    res = env['ir.translation']._get_source(None, ('code',), lang, source)
480                else:
481                    _logger.debug('no context cursor detected, skipping translation for "%r"', source)
482            else:
483                _logger.debug('no translation language detected, skipping translation for "%r" ', source)
484        except Exception:
485            _logger.debug('translation went wrong for "%r", skipped', source)
486                # if so, double-check the root/base translations filenames
487        finally:
488            if cr and is_new_cr:
489                cr.close()
490        return res or ''
491
492
493@functools.total_ordering
494class _lt:
495    """ Lazy code translation
496
497    Similar to GettextAlias but the translation lookup will be done only at
498    __str__ execution.
499
500    A code using translated global variables such as:
501
502    LABEL = _lt("User")
503
504    def _compute_label(self):
505        context = {'lang': self.partner_id.lang}
506        self.user_label = LABEL
507
508    works as expected (unlike the classic GettextAlias implementation).
509    """
510
511    __slots__ = ['_source', '_args']
512    def __init__(self, source, *args, **kwargs):
513        self._source = source
514        assert not (args and kwargs)
515        self._args = args or kwargs
516
517    def __str__(self):
518        # Call _._get_translation() like _() does, so that we have the same number
519        # of stack frames calling _get_translation()
520        translation = _._get_translation(self._source)
521        if self._args:
522            try:
523                return translation % self._args
524            except (TypeError, ValueError, KeyError):
525                bad = translation
526                # fallback: apply to source before logging exception (in case source fails)
527                translation = self._source % self._args
528                _logger.exception('Bad translation %r for string %r', bad, self._source)
529        return translation
530
531    def __eq__(self, other):
532        """ Prevent using equal operators
533
534        Prevent direct comparisons with ``self``.
535        One should compare the translation of ``self._source`` as ``str(self) == X``.
536        """
537        raise NotImplementedError()
538
539    def __lt__(self, other):
540        raise NotImplementedError()
541
542    def __add__(self, other):
543        # Call _._get_translation() like _() does, so that we have the same number
544        # of stack frames calling _get_translation()
545        if isinstance(other, str):
546            return _._get_translation(self._source) + other
547        elif isinstance(other, _lt):
548            return _._get_translation(self._source) + _._get_translation(other._source)
549        return NotImplemented
550
551    def __radd__(self, other):
552        # Call _._get_translation() like _() does, so that we have the same number
553        # of stack frames calling _get_translation()
554        if isinstance(other, str):
555            return other + _._get_translation(self._source)
556        return NotImplemented
557
558_ = GettextAlias()
559
560
561def quote(s):
562    """Returns quoted PO term string, with special PO characters escaped"""
563    assert r"\n" not in s, "Translation terms may not include escaped newlines ('\\n'), please use only literal newlines! (in '%s')" % s
564    return '"%s"' % s.replace('\\','\\\\') \
565                     .replace('"','\\"') \
566                     .replace('\n', '\\n"\n"')
567
568re_escaped_char = re.compile(r"(\\.)")
569re_escaped_replacements = {'n': '\n', 't': '\t',}
570
571def _sub_replacement(match_obj):
572    return re_escaped_replacements.get(match_obj.group(1)[1], match_obj.group(1)[1])
573
574def unquote(str):
575    """Returns unquoted PO term string, with special PO characters unescaped"""
576    return re_escaped_char.sub(_sub_replacement, str[1:-1])
577
578def TranslationFileReader(source, fileformat='po'):
579    """ Iterate over translation file to return Odoo translation entries """
580    if fileformat == 'csv':
581        return CSVFileReader(source)
582    if fileformat == 'po':
583        return PoFileReader(source)
584    _logger.info('Bad file format: %s', fileformat)
585    raise Exception(_('Bad file format: %s', fileformat))
586
587class CSVFileReader:
588    def __init__(self, source):
589        _reader = codecs.getreader('utf-8')
590        self.source = csv.DictReader(_reader(source), quotechar='"', delimiter=',')
591        self.prev_code_src = ""
592
593    def __iter__(self):
594        for entry in self.source:
595
596            # determine <module>.<imd_name> from res_id
597            if entry["res_id"] and entry["res_id"].isnumeric():
598                # res_id is an id or line number
599                entry["res_id"] = int(entry["res_id"])
600            elif not entry.get("imd_name"):
601                # res_id is an external id and must follow <module>.<name>
602                entry["module"], entry["imd_name"] = entry["res_id"].split(".")
603                entry["res_id"] = None
604            if entry["type"] == "model" or entry["type"] == "model_terms":
605                entry["imd_model"] = entry["name"].partition(',')[0]
606
607            if entry["type"] == "code":
608                if entry["src"] == self.prev_code_src:
609                    # skip entry due to unicity constrain on code translations
610                    continue
611                self.prev_code_src = entry["src"]
612
613            yield entry
614
615class PoFileReader:
616    """ Iterate over po file to return Odoo translation entries """
617    def __init__(self, source):
618
619        def get_pot_path(source_name):
620            # when fileobj is a TemporaryFile, its name is an inter in P3, a string in P2
621            if isinstance(source_name, str) and source_name.endswith('.po'):
622                # Normally the path looks like /path/to/xxx/i18n/lang.po
623                # and we try to find the corresponding
624                # /path/to/xxx/i18n/xxx.pot file.
625                # (Sometimes we have 'i18n_extra' instead of just 'i18n')
626                path = Path(source_name)
627                filename = path.parent.parent.name + '.pot'
628                pot_path = path.with_name(filename)
629                return pot_path.exists() and str(pot_path) or False
630            return False
631
632        # polib accepts a path or the file content as a string, not a fileobj
633        if isinstance(source, str):
634            self.pofile = polib.pofile(source)
635            pot_path = get_pot_path(source)
636        else:
637            # either a BufferedIOBase or result from NamedTemporaryFile
638            self.pofile = polib.pofile(source.read().decode())
639            pot_path = get_pot_path(source.name)
640
641        if pot_path:
642            # Make a reader for the POT file
643            # (Because the POT comments are correct on GitHub but the
644            # PO comments tends to be outdated. See LP bug 933496.)
645            self.pofile.merge(polib.pofile(pot_path))
646
647    def __iter__(self):
648        for entry in self.pofile:
649            if entry.obsolete:
650                continue
651
652            # in case of moduleS keep only the first
653            match = re.match(r"(module[s]?): (\w+)", entry.comment)
654            _, module = match.groups()
655            comments = "\n".join([c for c in entry.comment.split('\n') if not c.startswith('module:')])
656            source = entry.msgid
657            translation = entry.msgstr
658            found_code_occurrence = False
659            for occurrence, line_number in entry.occurrences:
660                match = re.match(r'(model|model_terms):([\w.]+),([\w]+):(\w+)\.([^ ]+)', occurrence)
661                if match:
662                    type, model_name, field_name, module, xmlid = match.groups()
663                    yield {
664                        'type': type,
665                        'imd_model': model_name,
666                        'name': model_name+','+field_name,
667                        'imd_name': xmlid,
668                        'res_id': None,
669                        'src': source,
670                        'value': translation,
671                        'comments': comments,
672                        'module': module,
673                    }
674                    continue
675
676                match = re.match(r'(code):([\w/.]+)', occurrence)
677                if match:
678                    type, name = match.groups()
679                    if found_code_occurrence:
680                        # unicity constrain on code translation
681                        continue
682                    found_code_occurrence = True
683                    yield {
684                        'type': type,
685                        'name': name,
686                        'src': source,
687                        'value': translation,
688                        'comments': comments,
689                        'res_id': int(line_number),
690                        'module': module,
691                    }
692                    continue
693
694                match = re.match(r'(selection):([\w.]+),([\w]+)', occurrence)
695                if match:
696                    _logger.info("Skipped deprecated occurrence %s", occurrence)
697                    continue
698
699                match = re.match(r'(sql_constraint|constraint):([\w.]+)', occurrence)
700                if match:
701                    _logger.info("Skipped deprecated occurrence %s", occurrence)
702                    continue
703                _logger.error("malformed po file: unknown occurrence: %s", occurrence)
704
705def TranslationFileWriter(target, fileformat='po', lang=None):
706    """ Iterate over translation file to return Odoo translation entries """
707    if fileformat == 'csv':
708        return CSVFileWriter(target)
709
710    if fileformat == 'po':
711        return PoFileWriter(target, lang=lang)
712
713    if fileformat == 'tgz':
714        return TarFileWriter(target, lang=lang)
715
716    raise Exception(_('Unrecognized extension: must be one of '
717                      '.csv, .po, or .tgz (received .%s).') % fileformat)
718
719
720class CSVFileWriter:
721    def __init__(self, target):
722        self.writer = pycompat.csv_writer(target, dialect='UNIX')
723        # write header first
724        self.writer.writerow(("module","type","name","res_id","src","value","comments"))
725
726
727    def write_rows(self, rows):
728        for module, type, name, res_id, src, trad, comments in rows:
729            comments = '\n'.join(comments)
730            self.writer.writerow((module, type, name, res_id, src, trad, comments))
731
732
733class PoFileWriter:
734    """ Iterate over po file to return Odoo translation entries """
735    def __init__(self, target, lang):
736
737        self.buffer = target
738        self.lang = lang
739        self.po = polib.POFile()
740
741    def write_rows(self, rows):
742        # we now group the translations by source. That means one translation per source.
743        grouped_rows = {}
744        modules = set([])
745        for module, type, name, res_id, src, trad, comments in rows:
746            row = grouped_rows.setdefault(src, {})
747            row.setdefault('modules', set()).add(module)
748            if not row.get('translation') and trad != src:
749                row['translation'] = trad
750            row.setdefault('tnrs', []).append((type, name, res_id))
751            row.setdefault('comments', set()).update(comments)
752            modules.add(module)
753
754        for src, row in sorted(grouped_rows.items()):
755            if not self.lang:
756                # translation template, so no translation value
757                row['translation'] = ''
758            elif not row.get('translation'):
759                row['translation'] = ''
760            self.add_entry(row['modules'], sorted(row['tnrs']), src, row['translation'], row['comments'])
761
762        import odoo.release as release
763        self.po.header = "Translation of %s.\n" \
764                    "This file contains the translation of the following modules:\n" \
765                    "%s" % (release.description, ''.join("\t* %s\n" % m for m in modules))
766        now = datetime.utcnow().strftime('%Y-%m-%d %H:%M+0000')
767        self.po.metadata = {
768            'Project-Id-Version': "%s %s" % (release.description, release.version),
769            'Report-Msgid-Bugs-To': '',
770            'POT-Creation-Date': now,
771            'PO-Revision-Date': now,
772            'Last-Translator': '',
773            'Language-Team': '',
774            'MIME-Version': '1.0',
775            'Content-Type': 'text/plain; charset=UTF-8',
776            'Content-Transfer-Encoding': '',
777            'Plural-Forms': '',
778        }
779
780        # buffer expects bytes
781        self.buffer.write(str(self.po).encode())
782
783    def add_entry(self, modules, tnrs, source, trad, comments=None):
784        entry = polib.POEntry(
785            msgid=source,
786            msgstr=trad,
787        )
788        plural = len(modules) > 1 and 's' or ''
789        entry.comment = "module%s: %s" % (plural, ', '.join(modules))
790        if comments:
791            entry.comment += "\n" + "\n".join(comments)
792
793        code = False
794        for typy, name, res_id in tnrs:
795            if typy == 'code':
796                code = True
797                res_id = 0
798            if isinstance(res_id, int) or res_id.isdigit():
799                # second term of occurrence must be a digit
800                # occurrence line at 0 are discarded when rendered to string
801                entry.occurrences.append((u"%s:%s" % (typy, name), str(res_id)))
802            else:
803                entry.occurrences.append((u"%s:%s:%s" % (typy, name, res_id), ''))
804        if code:
805            entry.flags.append("python-format")
806        self.po.append(entry)
807
808
809class TarFileWriter:
810
811    def __init__(self, target, lang):
812        self.tar = tarfile.open(fileobj=target, mode='w|gz')
813        self.lang = lang
814
815    def write_rows(self, rows):
816        rows_by_module = defaultdict(list)
817        for row in rows:
818            module = row[0]
819            rows_by_module[module].append(row)
820
821        for mod, modrows in rows_by_module.items():
822            with io.BytesIO() as buf:
823                po = PoFileWriter(buf, lang=self.lang)
824                po.write_rows(modrows)
825                buf.seek(0)
826
827                info = tarfile.TarInfo(
828                    join(mod, 'i18n', '{basename}.{ext}'.format(
829                        basename=self.lang or mod,
830                        ext='po' if self.lang else 'pot',
831                    )))
832                # addfile will read <size> bytes from the buffer so
833                # size *must* be set first
834                info.size = len(buf.getvalue())
835
836                self.tar.addfile(info, fileobj=buf)
837
838        self.tar.close()
839
840# Methods to export the translation file
841def trans_export(lang, modules, buffer, format, cr):
842    reader = TranslationModuleReader(cr, modules=modules, lang=lang)
843    writer = TranslationFileWriter(buffer, fileformat=format, lang=lang)
844    writer.write_rows(reader)
845
846
847def trans_parse_rml(de):
848    res = []
849    for n in de:
850        for m in n:
851            if isinstance(m, SKIPPED_ELEMENT_TYPES) or not m.text:
852                continue
853            string_list = [s.replace('\n', ' ').strip() for s in re.split('\[\[.+?\]\]', m.text)]
854            for s in string_list:
855                if s:
856                    res.append(s.encode("utf8"))
857        res.extend(trans_parse_rml(n))
858    return res
859
860
861def _push(callback, term, source_line):
862    """ Sanity check before pushing translation terms """
863    term = (term or "").strip()
864    # Avoid non-char tokens like ':' '...' '.00' etc.
865    if len(term) > 8 or any(x.isalpha() for x in term):
866        callback(term, source_line)
867
868
869# tests whether an object is in a list of modules
870def in_modules(object_name, modules):
871    if 'all' in modules:
872        return True
873
874    module_dict = {
875        'ir': 'base',
876        'res': 'base',
877    }
878    module = object_name.split('.')[0]
879    module = module_dict.get(module, module)
880    return module in modules
881
882
883def _extract_translatable_qweb_terms(element, callback):
884    """ Helper method to walk an etree document representing
885        a QWeb template, and call ``callback(term)`` for each
886        translatable term that is found in the document.
887
888        :param etree._Element element: root of etree document to extract terms from
889        :param Callable callback: a callable in the form ``f(term, source_line)``,
890                                  that will be called for each extracted term.
891    """
892    # not using elementTree.iterparse because we need to skip sub-trees in case
893    # the ancestor element had a reason to be skipped
894    for el in element:
895        if isinstance(el, SKIPPED_ELEMENT_TYPES): continue
896        if (el.tag.lower() not in SKIPPED_ELEMENTS
897                and "t-js" not in el.attrib
898                and not ("t-jquery" in el.attrib and "t-operation" not in el.attrib)
899                and el.get("t-translation", '').strip() != "off"):
900
901            _push(callback, el.text, el.sourceline)
902            # Do not export terms contained on the Component directive of OWL
903            # attributes in this context are most of the time variables,
904            # not real HTML attributes.
905            # Node tags starting with a capital letter are considered OWL Components
906            # and a widespread convention and good practice for DOM tags is to write
907            # them all lower case.
908            # https://www.w3schools.com/html/html5_syntax.asp
909            # https://github.com/odoo/owl/blob/master/doc/reference/component.md#composition
910            if not el.tag[0].isupper() and 't-component' not in el.attrib:
911                for att in ('title', 'alt', 'label', 'placeholder', 'aria-label'):
912                    if att in el.attrib:
913                        _push(callback, el.attrib[att], el.sourceline)
914            _extract_translatable_qweb_terms(el, callback)
915        _push(callback, el.tail, el.sourceline)
916
917
918def babel_extract_qweb(fileobj, keywords, comment_tags, options):
919    """Babel message extractor for qweb template files.
920
921    :param fileobj: the file-like object the messages should be extracted from
922    :param keywords: a list of keywords (i.e. function names) that should
923                     be recognized as translation functions
924    :param comment_tags: a list of translator tags to search for and
925                         include in the results
926    :param options: a dictionary of additional options (optional)
927    :return: an iterator over ``(lineno, funcname, message, comments)``
928             tuples
929    :rtype: Iterable
930    """
931    result = []
932    def handle_text(text, lineno):
933        result.append((lineno, None, text, []))
934    tree = etree.parse(fileobj)
935    _extract_translatable_qweb_terms(tree.getroot(), handle_text)
936    return result
937
938ImdInfo = namedtuple('ExternalId', ['name', 'model', 'res_id', 'module'])
939
940
941class TranslationModuleReader:
942    """ Retrieve translated records per module
943
944    :param cr: cursor to database to export
945    :param modules: list of modules to filter the exported terms, can be ['all']
946                    records with no external id are always ignored
947    :param lang: language code to retrieve the translations
948                 retrieve source terms only if not set
949    """
950
951    def __init__(self, cr, modules=None, lang=None):
952        self._cr = cr
953        self._modules = modules or ['all']
954        self._lang = lang
955        self.env = odoo.api.Environment(cr, odoo.SUPERUSER_ID, {})
956        self._to_translate = []
957        self._path_list = [(path, True) for path in odoo.addons.__path__]
958        self._installed_modules = [
959            m['name']
960            for m in self.env['ir.module.module'].search_read([('state', '=', 'installed')], fields=['name'])
961        ]
962
963        self._export_translatable_records()
964        self._export_translatable_resources()
965
966
967    def __iter__(self):
968        """ Export ir.translation values for all retrieved records """
969
970        IrTranslation = self.env['ir.translation']
971        for module, source, name, res_id, ttype, comments, record_id in self._to_translate:
972            trans = (
973                IrTranslation._get_source(name if type != "code" else None, ttype, self._lang, source, res_id=record_id)
974                if self._lang
975                else ""
976            )
977            yield (module, ttype, name, res_id, source, encode(trans) or '', comments)
978
979    def _push_translation(self, module, ttype, name, res_id, source, comments=None, record_id=None):
980        """ Insert a translation that will be used in the file generation
981        In po file will create an entry
982        #: <ttype>:<name>:<res_id>
983        #, <comment>
984        msgid "<source>"
985        record_id is the database id of the record being translated
986        """
987        # empty and one-letter terms are ignored, they probably are not meant to be
988        # translated, and would be very hard to translate anyway.
989        sanitized_term = (source or '').strip()
990        # remove non-alphanumeric chars
991        sanitized_term = re.sub(r'\W+', '', sanitized_term)
992        if not sanitized_term or len(sanitized_term) <= 1:
993            return
994        self._to_translate.append((module, source, name, res_id, ttype, tuple(comments or ()), record_id))
995
996    def _get_translatable_records(self, imd_records):
997        """ Filter the records that are translatable
998
999        A record is considered as untranslatable if:
1000        - it does not exist
1001        - the model is flagged with _translate=False
1002        - it is a field of a model flagged with _translate=False
1003        - it is a selection of a field of a model flagged with _translate=False
1004
1005        :param records: a list of namedtuple ImdInfo belonging to the same model
1006        """
1007        model = next(iter(imd_records)).model
1008        if model not in self.env:
1009            _logger.error("Unable to find object %r", model)
1010            return self.env["_unknown"].browse()
1011
1012        if not self.env[model]._translate:
1013            return self.env[model].browse()
1014
1015        res_ids = [r.res_id for r in imd_records]
1016        records = self.env[model].browse(res_ids).exists()
1017        if len(records) < len(res_ids):
1018            missing_ids = set(res_ids) - set(records.ids)
1019            missing_records = [f"{r.module}.{r.name}" for r in imd_records if r.res_id in missing_ids]
1020            _logger.warning("Unable to find records of type %r with external ids %s", model, ', '.join(missing_records))
1021            if not records:
1022                return records
1023
1024        if model == 'ir.model.fields.selection':
1025            fields = defaultdict(list)
1026            for selection in records:
1027                fields[selection.field_id] = selection
1028            for field, selection in fields.items():
1029                field_name = field.name
1030                field_model = self.env.get(field.model)
1031                if (field_model is None or not field_model._translate or
1032                        field_name not in field_model._fields):
1033                    # the selection is linked to a model with _translate=False, remove it
1034                    records -= selection
1035        elif model == 'ir.model.fields':
1036            for field in records:
1037                field_name = field.name
1038                field_model = self.env.get(field.model)
1039                if (field_model is None or not field_model._translate or
1040                        field_name not in field_model._fields):
1041                    # the field is linked to a model with _translate=False, remove it
1042                    records -= field
1043
1044        return records
1045
1046
1047    def _export_translatable_records(self):
1048        """ Export translations of all translated records having an external id """
1049
1050        query = """SELECT min(name), model, res_id, module
1051                     FROM ir_model_data
1052                    WHERE module = ANY(%s)
1053                 GROUP BY model, res_id, module
1054                 ORDER BY module, model, min(name)"""
1055
1056        if 'all' not in self._modules:
1057            query_param = list(self._modules)
1058        else:
1059            query_param = self._installed_modules
1060
1061        self._cr.execute(query, (query_param,))
1062
1063        records_per_model = defaultdict(dict)
1064        for (xml_name, model, res_id, module) in self._cr.fetchall():
1065            records_per_model[model][res_id] = ImdInfo(xml_name, model, res_id, module)
1066
1067        for model, imd_per_id in records_per_model.items():
1068            records = self._get_translatable_records(imd_per_id.values())
1069            if not records:
1070                continue
1071
1072            for record in records:
1073                module = imd_per_id[record.id].module
1074                xml_name = "%s.%s" % (module, imd_per_id[record.id].name)
1075                for field_name, field in record._fields.items():
1076                    if field.translate:
1077                        name = model + "," + field_name
1078                        try:
1079                            value = record[field_name] or ''
1080                        except Exception:
1081                            continue
1082                        for term in set(field.get_trans_terms(value)):
1083                            trans_type = 'model_terms' if callable(field.translate) else 'model'
1084                            self._push_translation(module, trans_type, name, xml_name, term, record_id=record.id)
1085
1086    def _get_module_from_path(self, path):
1087        for (mp, rec) in self._path_list:
1088            mp = os.path.join(mp, '')
1089            dirname = os.path.join(os.path.dirname(path), '')
1090            if rec and path.startswith(mp) and dirname != mp:
1091                path = path[len(mp):]
1092                return path.split(os.path.sep)[0]
1093        return 'base' # files that are not in a module are considered as being in 'base' module
1094
1095    def _verified_module_filepaths(self, fname, path, root):
1096        fabsolutepath = join(root, fname)
1097        frelativepath = fabsolutepath[len(path):]
1098        display_path = "addons%s" % frelativepath
1099        module = self._get_module_from_path(fabsolutepath)
1100        if ('all' in self._modules or module in self._modules) and module in self._installed_modules:
1101            if os.path.sep != '/':
1102                display_path = display_path.replace(os.path.sep, '/')
1103            return module, fabsolutepath, frelativepath, display_path
1104        return None, None, None, None
1105
1106    def _babel_extract_terms(self, fname, path, root, extract_method="python", trans_type='code',
1107                               extra_comments=None, extract_keywords={'_': None}):
1108
1109        module, fabsolutepath, _, display_path = self._verified_module_filepaths(fname, path, root)
1110        if not module:
1111            return
1112        extra_comments = extra_comments or []
1113        src_file = open(fabsolutepath, 'rb')
1114        options = {}
1115        if extract_method == 'python':
1116            options['encoding'] = 'UTF-8'
1117        try:
1118            for extracted in extract.extract(extract_method, src_file, keywords=extract_keywords, options=options):
1119                # Babel 0.9.6 yields lineno, message, comments
1120                # Babel 1.3 yields lineno, message, comments, context
1121                lineno, message, comments = extracted[:3]
1122                self._push_translation(module, trans_type, display_path, lineno,
1123                                 encode(message), comments + extra_comments)
1124        except Exception:
1125            _logger.exception("Failed to extract terms from %s", fabsolutepath)
1126        finally:
1127            src_file.close()
1128
1129    def _export_translatable_resources(self):
1130        """ Export translations for static terms
1131
1132        This will include:
1133        - the python strings marked with _() or _lt()
1134        - the javascript strings marked with _t() or _lt() inside static/src/js/
1135        - the strings inside Qweb files inside static/src/xml/
1136        """
1137
1138        # Also scan these non-addon paths
1139        for bin_path in ['osv', 'report', 'modules', 'service', 'tools']:
1140            self._path_list.append((os.path.join(config['root_path'], bin_path), True))
1141        # non-recursive scan for individual files in root directory but without
1142        # scanning subdirectories that may contain addons
1143        self._path_list.append((config['root_path'], False))
1144        _logger.debug("Scanning modules at paths: %s", self._path_list)
1145
1146        for (path, recursive) in self._path_list:
1147            _logger.debug("Scanning files of modules at %s", path)
1148            for root, dummy, files in os.walk(path, followlinks=True):
1149                for fname in fnmatch.filter(files, '*.py'):
1150                    self._babel_extract_terms(fname, path, root,
1151                                              extract_keywords={'_': None, '_lt': None})
1152                if fnmatch.fnmatch(root, '*/static/src*'):
1153                    # Javascript source files
1154                    for fname in fnmatch.filter(files, '*.js'):
1155                        self._babel_extract_terms(fname, path, root, 'javascript',
1156                                                  extra_comments=[WEB_TRANSLATION_COMMENT],
1157                                                  extract_keywords={'_t': None, '_lt': None})
1158                    # QWeb template files
1159                    for fname in fnmatch.filter(files, '*.xml'):
1160                        self._babel_extract_terms(fname, path, root, 'odoo.tools.translate:babel_extract_qweb',
1161                                                  extra_comments=[WEB_TRANSLATION_COMMENT])
1162
1163                if not recursive:
1164                    # due to topdown, first iteration is in first level
1165                    break
1166
1167
1168def trans_load(cr, filename, lang, verbose=True, create_empty_translation=False, overwrite=False):
1169    try:
1170        with file_open(filename, mode='rb') as fileobj:
1171            _logger.info("loading %s", filename)
1172            fileformat = os.path.splitext(filename)[-1][1:].lower()
1173            return trans_load_data(cr, fileobj, fileformat, lang,
1174                                   verbose=verbose,
1175                                   create_empty_translation=create_empty_translation,
1176                                   overwrite=overwrite)
1177    except IOError:
1178        if verbose:
1179            _logger.error("couldn't read translation file %s", filename)
1180        return None
1181
1182
1183def trans_load_data(cr, fileobj, fileformat, lang,
1184                    verbose=True, create_empty_translation=False, overwrite=False):
1185    """Populates the ir_translation table.
1186
1187    :param fileobj: buffer open to a translation file
1188    :param fileformat: format of the `fielobj` file, one of 'po' or 'csv'
1189    :param lang: language code of the translations contained in `fileobj`
1190                 language must be present and activated in the database
1191    :param verbose: increase log output
1192    :param create_empty_translation: create an ir.translation record, even if no value
1193                                     is provided in the translation entry
1194    :param overwrite: if an ir.translation already exists for a term, replace it with
1195                      the one in `fileobj`
1196    """
1197    if verbose:
1198        _logger.info('loading translation file for language %s', lang)
1199
1200    env = odoo.api.Environment(cr, odoo.SUPERUSER_ID, {})
1201
1202    try:
1203        if not env['res.lang']._lang_get(lang):
1204            _logger.error("Couldn't read translation for lang '%s', language not found", lang)
1205            return None
1206
1207        # now, the serious things: we read the language file
1208        fileobj.seek(0)
1209        reader = TranslationFileReader(fileobj, fileformat=fileformat)
1210
1211        # read the rest of the file with a cursor-like object for fast inserting translations"
1212        Translation = env['ir.translation']
1213        irt_cursor = Translation._get_import_cursor(overwrite)
1214
1215        def process_row(row):
1216            """Process a single PO (or POT) entry."""
1217            # dictionary which holds values for this line of the csv file
1218            # {'lang': ..., 'type': ..., 'name': ..., 'res_id': ...,
1219            #  'src': ..., 'value': ..., 'module':...}
1220            dic = dict.fromkeys(('type', 'name', 'res_id', 'src', 'value',
1221                                 'comments', 'imd_model', 'imd_name', 'module'))
1222            dic['lang'] = lang
1223            dic.update(row)
1224
1225            # do not import empty values
1226            if not create_empty_translation and not dic['value']:
1227                return
1228
1229            irt_cursor.push(dic)
1230
1231        # First process the entries from the PO file (doing so also fills/removes
1232        # the entries from the POT file).
1233        for row in reader:
1234            process_row(row)
1235
1236        irt_cursor.finish()
1237        Translation.clear_caches()
1238        if verbose:
1239            _logger.info("translation file loaded successfully")
1240
1241    except IOError:
1242        iso_lang = get_iso_codes(lang)
1243        filename = '[lang: %s][format: %s]' % (iso_lang or 'new', fileformat)
1244        _logger.exception("couldn't read translation file %s", filename)
1245
1246
1247def get_locales(lang=None):
1248    if lang is None:
1249        lang = locale.getdefaultlocale()[0]
1250
1251    if os.name == 'nt':
1252        lang = _LOCALE2WIN32.get(lang, lang)
1253
1254    def process(enc):
1255        ln = locale._build_localename((lang, enc))
1256        yield ln
1257        nln = locale.normalize(ln)
1258        if nln != ln:
1259            yield nln
1260
1261    for x in process('utf8'): yield x
1262
1263    prefenc = locale.getpreferredencoding()
1264    if prefenc:
1265        for x in process(prefenc): yield x
1266
1267        prefenc = {
1268            'latin1': 'latin9',
1269            'iso-8859-1': 'iso8859-15',
1270            'cp1252': '1252',
1271        }.get(prefenc.lower())
1272        if prefenc:
1273            for x in process(prefenc): yield x
1274
1275    yield lang
1276
1277
1278def resetlocale():
1279    # locale.resetlocale is bugged with some locales.
1280    for ln in get_locales():
1281        try:
1282            return locale.setlocale(locale.LC_ALL, ln)
1283        except locale.Error:
1284            continue
1285
1286
1287def load_language(cr, lang):
1288    """ Loads a translation terms for a language.
1289    Used mainly to automate language loading at db initialization.
1290
1291    :param lang: language ISO code with optional _underscore_ and l10n flavor (ex: 'fr', 'fr_BE', but not 'fr-BE')
1292    :type lang: str
1293    """
1294    env = odoo.api.Environment(cr, odoo.SUPERUSER_ID, {})
1295    installer = env['base.language.install'].create({'lang': lang})
1296    installer.lang_install()
1297