1#!/usr/local/bin/python3.8 -s
2#
3# Copyright (c) 2010-2018 Shaun McCance <shaunm@gnome.org>
4#
5# ITS Tool program is free software: you can redistribute it and/or modify
6# it under the terms of the GNU General Public License as published by the
7# Free Software Foundation, either version 3 of the License, or (at your
8# option) any later version.
9#
10# ITS Tool is distributed in the hope that it will be useful, but WITHOUT
11# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
13# for more details.
14#
15# You should have received a copy of the GNU General Public License along
16# with ITS Tool; if not, write to the Free Software Foundation, 59 Temple
17# Place, Suite 330, Boston, MA  0211-1307  USA.
18#
19from __future__ import print_function
20from __future__ import unicode_literals
21
22VERSION="@VERSION@"
23DATADIR="@DATADIR@"
24
25import gettext
26import hashlib
27import libxml2
28import optparse
29import os
30import os.path
31import re
32import sys
33import time
34import io
35
36PY3 = sys.version_info[0] == 3
37if PY3:
38    string_types = str,
39    def ustr(s, encoding=None):
40        if isinstance(s, str):
41            return s
42        elif encoding:
43            return str(s, encoding)
44        else:
45            return str(s)
46    ustr_type = str
47    def pr_str(s):
48        """Return a string that can be safely print()ed"""
49        # Since print works on both bytes and unicode, just return the argument
50        return s
51else:
52    string_types = basestring,
53    ustr = ustr_type = unicode
54    def pr_str(s):
55        """Return a string that can be safely print()ed"""
56        if isinstance(s, str):
57            # Since print works on str, just return the argument
58            return s
59        else:
60            # print may not work on unicode if the output encoding cannot be
61            # detected, so just encode with UTF-8
62            return unicode.encode(s, 'utf-8')
63
64NS_ITS = 'http://www.w3.org/2005/11/its'
65NS_ITST = 'http://itstool.org/extensions/'
66NS_BLANK = 'http://itstool.org/extensions/blank/'
67NS_XLINK = 'http://www.w3.org/1999/xlink'
68NS_XML = 'http://www.w3.org/XML/1998/namespace'
69
70class NoneTranslations:
71    def gettext(self, message):
72        return None
73
74    def lgettext(self, message):
75        return None
76
77    def ngettext(self, msgid1, msgid2, n):
78        return None
79
80    def lngettext(self, msgid1, msgid2, n):
81        return None
82
83    def ugettext(self, message):
84        return None
85
86    def ungettext(self, msgid1, msgid2, n):
87        return None
88
89
90class MessageList (object):
91    def __init__ (self):
92        self._messages = []
93        self._by_node = {}
94        self._has_credits = False
95
96    def add_message (self, message, node):
97        self._messages.append (message)
98        if node is not None:
99            self._by_node[node] = message
100
101    def add_credits(self):
102        if self._has_credits:
103            return
104        msg = Message()
105        msg.set_context('_')
106        msg.add_text('translator-credits')
107        msg.add_comment(Comment('Put one translator per line, in the form NAME <EMAIL>, YEAR1, YEAR2'))
108        self._messages.append(msg)
109        self._has_credits = True
110
111    def get_message_by_node (self, node):
112        return self._by_node.get(node, None)
113
114    def get_nodes_with_messages (self):
115        return list(self._by_node.keys())
116
117    def output (self, out):
118        msgs = []
119        msgdict = {}
120        for msg in self._messages:
121            key = (msg.get_context(), msg.get_string())
122            if key in msgdict:
123                for source in msg.get_sources():
124                    msgdict[key].add_source(source)
125                for marker in msg.get_markers():
126                    msgdict[key].add_marker(marker)
127                for comment in msg.get_comments():
128                    msgdict[key].add_comment(comment)
129                for idvalue in msg.get_id_values():
130                    msgdict[key].add_id_value(idvalue)
131                if msg.get_preserve_space():
132                    msgdict[key].set_preserve_space()
133                if msg.get_locale_filter() is not None:
134                    locale = msgdict[key].get_locale_filter()
135                    if locale is not None:
136                        msgdict[key].set_locale_filter('%s, %s' % (locale, msg.get_locale_filter()))
137                    else:
138                        msgdict[key].set_locale_filter(msg.get_locale_filter())
139
140            else:
141                msgs.append(msg)
142                msgdict[key] = msg
143        out.write('msgid ""\n')
144        out.write('msgstr ""\n')
145        out.write('"Project-Id-Version: PACKAGE VERSION\\n"\n')
146        out.write('"POT-Creation-Date: %s\\n"\n' % time.strftime("%Y-%m-%d %H:%M%z"))
147        out.write('"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\\n"\n')
148        out.write('"Last-Translator: FULL NAME <EMAIL@ADDRESS>\\n"\n')
149        out.write('"Language-Team: LANGUAGE <LL@li.org>\\n"\n')
150        out.write('"MIME-Version: 1.0\\n"\n')
151        out.write('"Content-Type: text/plain; charset=UTF-8\\n"\n')
152        out.write('"Content-Transfer-Encoding: 8bit\\n"\n')
153        out.write('\n')
154        for msg in msgs:
155            out.write(msg.format())
156            out.write('\n')
157
158
159class Comment (object):
160    def __init__ (self, text):
161        self._text = ustr(text)
162        assert(text is not None)
163        self._markers = []
164
165    def add_marker (self, marker):
166        self._markers.append(marker)
167
168    def get_markers (self):
169        return self._markers
170
171    def get_text (self):
172        return self._text
173
174    def format (self):
175        ret = ''
176        markers = {}
177        for marker in self._markers:
178            if marker not in markers:
179                ret += '#. (itstool) comment: ' + marker + '\n'
180                markers[marker] = marker
181        if '\n' in self._text:
182            doadd = False
183            for line in self._text.split('\n'):
184                if line != '':
185                    doadd = True
186                if not doadd:
187                    continue
188                ret += '#. %s\n' % line
189        else:
190            text = self._text
191            while len(text) > 72:
192                j = text.rfind(' ', 0, 72)
193                if j == -1:
194                    j = text.find(' ')
195                if j == -1:
196                    break
197                ret += '#. %s\n' % text[:j]
198                text = text[j+1:]
199            ret += '#. %s\n' % text
200        return ret
201
202
203class Placeholder (object):
204    def __init__ (self, node):
205        self.node = node
206        self.name = ustr(node.name, 'utf-8')
207
208
209class Message (object):
210    def __init__ (self):
211        self._message = []
212        self._empty = True
213        self._ctxt = None
214        self._placeholders = []
215        self._sources = []
216        self._markers = []
217        self._id_values = []
218        self._locale_filter = None
219        self._comments = []
220        self._preserve = False
221
222    def __repr__(self):
223        if self._empty:
224            return "Empty message"
225        return self.get_string()
226
227    def escape (self, text):
228        return text.replace('\\','\\\\').replace('"', "\\\"").replace("\n","\\n").replace("\t","\\t")
229
230    def add_text (self, text):
231        if len(self._message) == 0 or not(isinstance(self._message[-1], string_types)):
232            self._message.append('')
233        if not isinstance(text, ustr_type):
234            text = ustr(text, 'utf-8')
235        self._message[-1] += text.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;')
236        if re.sub('\s+', ' ', text).strip() != '':
237            self._empty = False
238
239    def add_entity_ref (self, name):
240        self._message.append('&' + name + ';')
241        self._empty = False
242
243    def add_placeholder (self, node):
244        holder = Placeholder(node)
245        self._placeholders.append(holder)
246        self._message.append(holder)
247
248    def get_placeholder (self, name):
249        placeholder = 1
250        for holder in self._placeholders:
251            holdername = '%s-%i' % (holder.name, placeholder)
252            if holdername == ustr(name, 'utf-8'):
253                return holder
254            placeholder += 1
255
256    def add_start_tag (self, node):
257        if len(self._message) == 0 or not(isinstance(self._message[-1], string_types)):
258            self._message.append('')
259        if node.ns() is not None and node.ns().name is not None:
260            self._message[-1] += ('<%s:%s' % (ustr(node.ns().name, 'utf-8'), ustr(node.name, 'utf-8')))
261        else:
262            self._message[-1] += ('<%s' % ustr(node.name, 'utf-8'))
263        for prop in xml_attr_iter(node):
264            name = prop.name
265            if prop.ns() is not None:
266                name = prop.ns().name + ':' + name
267            atval = prop.content
268            if not isinstance(atval, ustr_type):
269                atval = ustr(atval, 'utf-8')
270            atval = atval.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;').replace('"', '&quot;')
271            self._message += " %s=\"%s\"" % (name, atval)
272        if node.children is not None:
273            self._message[-1] += '>'
274        else:
275            self._message[-1] += '/>'
276
277    def add_end_tag (self, node):
278        if node.children is not None:
279            if len(self._message) == 0 or not(isinstance(self._message[-1], string_types)):
280                self._message.append('')
281            if node.ns() is not None and node.ns().name is not None:
282                self._message[-1] += ('</%s:%s>' % (ustr(node.ns().name, 'utf-8'), ustr(node.name, 'utf-8')))
283            else:
284                self._message[-1] += ('</%s>' % ustr(node.name, 'utf-8'))
285
286    def is_empty (self):
287        return self._empty
288
289    def get_context (self):
290        return self._ctxt
291
292    def set_context (self, ctxt):
293        self._ctxt = ctxt
294
295    def add_source (self, source):
296        if not isinstance(source, ustr_type):
297            source = ustr(source, 'utf-8')
298        self._sources.append(source)
299
300    def get_sources (self):
301        return self._sources
302
303    def add_marker (self, marker):
304        if not isinstance(marker, ustr_type):
305            marker = ustr(marker, 'utf-8')
306        self._markers.append(marker)
307
308    def get_markers (self):
309        return self._markers
310
311    def add_id_value(self, id_value):
312        self._id_values.append(id_value)
313
314    def get_id_values(self):
315        return self._id_values
316
317    def add_comment (self, comment):
318        if comment is not None:
319            self._comments.append(comment)
320
321    def get_comments (self):
322        return self._comments
323
324    def get_string (self):
325        message = ''
326        placeholder = 1
327        for msg in self._message:
328            if isinstance(msg, string_types):
329                message += msg
330            elif isinstance(msg, Placeholder):
331                message += '<_:%s-%i/>' % (msg.name, placeholder)
332                placeholder += 1
333        if not self._preserve:
334            message = re.sub('\s+', ' ', message).strip()
335        return message
336
337    def get_preserve_space (self):
338        return self._preserve
339
340    def set_preserve_space (self, preserve=True):
341        self._preserve = preserve
342
343    def get_locale_filter(self):
344        return self._locale_filter
345
346    def set_locale_filter(self, locale):
347        self._locale_filter = locale
348
349    def format (self):
350        ret = ''
351        markers = {}
352        for marker in self._markers:
353            if marker not in markers:
354                ret += '#. (itstool) path: ' + marker + '\n'
355                markers[marker] = marker
356        for idvalue in self._id_values:
357            ret += '#. (itstool) id: ' + idvalue + '\n'
358        if self._locale_filter is not None:
359            ret += '#. (itstool) ' + self._locale_filter[1] + ' locale: ' + self._locale_filter[0] + '\n'
360        comments = []
361        commentsdict = {}
362        for comment in self._comments:
363            key = comment.get_text()
364            if key in commentsdict:
365                for marker in comment.get_markers():
366                    commentsdict[key].add_marker(marker)
367            else:
368                comments.append(comment)
369                commentsdict[key] = comment
370        for i in range(len(comments)):
371            if i != 0:
372                ret += '#.\n'
373            ret += comments[i].format()
374        for source in self._sources:
375            ret += '#: %s\n' % source
376        if self._preserve:
377            ret += '#, no-wrap\n'
378        if self._ctxt is not None:
379            ret += 'msgctxt "%s"\n' % self._ctxt
380        message = self.get_string()
381        if self._preserve:
382            ret += 'msgid ""\n'
383            lines = message.split('\n')
384            for line, no in zip(lines, list(range(len(lines)))):
385                if no == len(lines) - 1:
386                    ret += '"%s"\n' % self.escape(line)
387                else:
388                    ret += '"%s\\n"\n' % self.escape(line)
389        else:
390            ret += 'msgid "%s"\n' % self.escape(message)
391        ret += 'msgstr ""\n'
392        return ret
393
394
395def xml_child_iter (node):
396    child = node.children
397    while child is not None:
398        yield child
399        child = child.next
400
401def xml_attr_iter (node):
402    attr = node.get_properties()
403    while attr is not None:
404        yield attr
405        attr = attr.next
406
407def xml_is_ns_name (node, ns, name):
408    if node.type != 'element':
409        return False
410    return node.name == name and node.ns() is not None and node.ns().content == ns
411
412def xml_get_node_path(node):
413    # The built-in nodePath() method only does numeric indexes
414    # when necessary for disambiguation. For various reasons,
415    # we prefer always using indexes.
416    name = node.name
417    if node.ns() is not None and node.ns().name is not None:
418        name = node.ns().name + ':' + name
419    if node.type == 'attribute':
420        name = '@' + name
421    name = '/' + name
422    if node.type == 'element' and node.parent.type == 'element':
423        count = 1
424        prev = node.previousElementSibling()
425        while prev is not None:
426            if prev.name == node.name:
427                if prev.ns() is None:
428                    if node.ns() is None:
429                        count += 1
430                else:
431                    if node.ns() is not None:
432                        if prev.ns().name == node.ns().name:
433                            count += 1
434            prev = prev.previousElementSibling()
435        name = '%s[%i]' % (name, count)
436    if node.parent.type == 'element':
437        name = xml_get_node_path(node.parent) + name
438    return name
439
440def xml_error_catcher(doc, error):
441    doc._xml_err += " %s" % error
442
443def fix_node_ns (node, nsdefs):
444    childnsdefs = nsdefs.copy()
445    nsdef = node.nsDefs()
446    while nsdef is not None:
447        nextnsdef = nsdef.next
448        if nsdef.name in nsdefs and nsdefs[nsdef.name] == nsdef.content:
449            node.removeNsDef(nsdef.content)
450        else:
451            childnsdefs[nsdef.name] = nsdef.content
452        nsdef = nextnsdef
453    for child in xml_child_iter(node):
454        if child.type == 'element':
455            fix_node_ns(child, childnsdefs)
456
457
458class LocNote (object):
459    def __init__(self, locnote=None, locnoteref=None, locnotetype=None, space=False):
460        self.locnote = locnote
461        self.locnoteref = locnoteref
462        self.locnotetype = locnotetype
463        if self.locnotetype != 'alert':
464            self.locnotetype = 'description'
465        self._preserve_space=space
466
467    def __repr__(self):
468        if self.locnote is not None:
469            if self._preserve_space:
470                return self.locnote
471            else:
472                return re.sub('\s+', ' ', self.locnote).strip()
473        elif self.locnoteref is not None:
474            return '(itstool) link: ' + re.sub('\s+', ' ', self.locnoteref).strip()
475        return ''
476
477
478class Document (object):
479    def __init__ (self, filename, messages, load_dtd=False, keep_entities=False):
480        self._xml_err = ''
481        libxml2.registerErrorHandler(xml_error_catcher, self)
482        try:
483            ctxt = libxml2.createFileParserCtxt(filename)
484        except:
485            sys.stderr.write('Error: cannot open XML file %s\n' % filename)
486            sys.exit(1)
487        ctxt.lineNumbers(1)
488        self._load_dtd = load_dtd
489        self._keep_entities = keep_entities
490        if load_dtd:
491            ctxt.loadSubset(1)
492        if keep_entities:
493            ctxt.ctxtUseOptions(libxml2.XML_PARSE_DTDLOAD)
494            ctxt.replaceEntities(0)
495        else:
496            ctxt.replaceEntities(1)
497        ctxt.parseDocument()
498        self._filename = filename
499        self._doc = ctxt.doc()
500        self._localrules = []
501        def pre_process (node):
502            for child in xml_child_iter(node):
503                if xml_is_ns_name(child, 'http://www.w3.org/2001/XInclude', 'include'):
504                    if child.nsProp('parse', None) == 'text':
505                        child.xincludeProcessTree()
506                elif xml_is_ns_name(child, NS_ITS, 'rules'):
507                    if child.hasNsProp('href', NS_XLINK):
508                        href = child.nsProp('href', NS_XLINK)
509                        fileref = os.path.join(os.path.dirname(filename), href)
510                        if not os.path.exists(fileref):
511                            if opts.itspath is not None:
512                                for pathdir in opts.itspath:
513                                    fileref = os.path.join(pathdir, href)
514                                    if os.path.exists(fileref):
515                                        break
516                        if not os.path.exists(fileref):
517                            sys.stderr.write('Error: Could not locate ITS file %s\n' % href)
518                            sys.exit(1)
519                        hctxt = libxml2.createFileParserCtxt(fileref)
520                        hctxt.replaceEntities(1)
521                        hctxt.parseDocument()
522                        root = hctxt.doc().getRootElement()
523                        version = None
524                        if root.hasNsProp('version', None):
525                            version = root.nsProp('version', None)
526                        else:
527                            sys.stderr.write('Warning: ITS file %s missing version attribute\n' %
528                                             os.path.basename(href))
529                        if version is not None and version not in ('1.0', '2.0'):
530                            sys.stderr.write('Warning: Skipping ITS file %s with unknown version %s\n' %
531                                             (os.path.basename(href), root.nsProp('version', None)))
532                        else:
533                            self._localrules.append(root)
534                    version = None
535                    if child.hasNsProp('version', None):
536                        version = child.nsProp('version', None)
537                    else:
538                        root = child.doc.getRootElement()
539                        if root.hasNsProp('version', NS_ITS):
540                            version = root.nsProp('version', NS_ITS)
541                        else:
542                            sys.stderr.write('Warning: Local ITS rules missing version attribute\n')
543                    if version is not None and version not in ('1.0', '2.0'):
544                        sys.stderr.write('Warning: Skipping local ITS rules with unknown version %s\n' %
545                                         version)
546                    else:
547                        self._localrules.append(child)
548                pre_process(child)
549        pre_process(self._doc)
550        try:
551            self._check_errors()
552        except libxml2.parserError as e:
553            sys.stderr.write('Error: Could not parse document:\n%s\n' % ustr(e))
554            sys.exit(1)
555        self._msgs = messages
556        self._its_translate_nodes = {}
557        self._its_within_text_nodes = {}
558        self._its_locale_filters = {}
559        self._its_id_values = {}
560        self._its_loc_notes = {}
561        self._its_preserve_space_nodes = {}
562        self._itst_drop_nodes = {}
563        self._itst_contexts = {}
564        self._its_lang = {}
565        self._itst_lang_attr = {}
566        self._itst_credits = None
567        self._its_externals = {}
568
569    def __del__ (self):
570        self._doc.freeDoc()
571
572    def _check_errors(self):
573        if self._xml_err:
574            raise libxml2.parserError(self._xml_err)
575
576    def register_its_params(self, xpath, rules, params={}):
577        for child in xml_child_iter(rules):
578            if xml_is_ns_name(child, NS_ITS, 'param'):
579                name = child.nsProp('name', None)
580                if name in params:
581                    value = params[name]
582                else:
583                    value = child.getContent()
584                xpath.xpathRegisterVariable(name, None, value)
585
586    def apply_its_rule(self, rule, xpath):
587        if rule.type != 'element':
588            return
589        if xml_is_ns_name(rule, NS_ITS, 'translateRule'):
590            if rule.nsProp('selector', None) is not None:
591                for node in self._try_xpath_eval(xpath, rule.nsProp('selector', None)):
592                    self._its_translate_nodes[node] = rule.nsProp('translate', None)
593        elif xml_is_ns_name(rule, NS_ITS, 'withinTextRule'):
594            if rule.nsProp('selector', None) is not None:
595                for node in self._try_xpath_eval(xpath, rule.nsProp('selector', None)):
596                    self._its_within_text_nodes[node] = rule.nsProp('withinText', None)
597        elif xml_is_ns_name(rule, NS_ITST, 'preserveSpaceRule'):
598            if rule.nsProp('selector', None) is not None:
599                for node in self._try_xpath_eval(xpath, rule.nsProp('selector', None)):
600                    val = rule.nsProp('preserveSpace', None)
601                    if val == 'yes':
602                        self._its_preserve_space_nodes[node] = 'preserve'
603        elif xml_is_ns_name(rule, NS_ITS, 'preserveSpaceRule'):
604            if rule.nsProp('selector', None) is not None:
605                for node in self._try_xpath_eval(xpath, rule.nsProp('selector', None)):
606                    self._its_preserve_space_nodes[node] = rule.nsProp('space', None)
607        elif xml_is_ns_name(rule, NS_ITS, 'localeFilterRule'):
608            if rule.nsProp('selector', None) is not None:
609                if rule.hasNsProp('localeFilterList', None):
610                    lst = rule.nsProp('localeFilterList', None)
611                else:
612                    lst = '*'
613                if rule.hasNsProp('localeFilterType', None):
614                    typ = rule.nsProp('localeFilterType', None)
615                else:
616                    typ = 'include'
617                for node in self._try_xpath_eval(xpath, rule.nsProp('selector', None)):
618                    self._its_locale_filters[node] = (lst, typ)
619        elif xml_is_ns_name(rule, NS_ITST, 'dropRule'):
620            if rule.nsProp('selector', None) is not None:
621                for node in self._try_xpath_eval(xpath, rule.nsProp('selector', None)):
622                    self._itst_drop_nodes[node] = rule.nsProp('drop', None)
623        elif xml_is_ns_name(rule, NS_ITS, 'idValueRule'):
624            sel = rule.nsProp('selector', None)
625            idv = rule.nsProp('idValue', None)
626            if sel is not None and idv is not None:
627                for node in self._try_xpath_eval(xpath, sel):
628                    try:
629                        oldnode = xpath.contextNode()
630                    except:
631                        oldnode = None
632                    xpath.setContextNode(node)
633                    idvalue = self._try_xpath_eval(xpath, idv)
634                    if isinstance(idvalue, string_types):
635                        self._its_id_values[node] = idvalue
636                    else:
637                        for val in idvalue:
638                            self._its_id_values[node] = val.content
639                            break
640                    xpath.setContextNode(oldnode)
641            pass
642        elif xml_is_ns_name(rule, NS_ITST, 'contextRule'):
643            if rule.nsProp('selector', None) is not None:
644                for node in self._try_xpath_eval(xpath, rule.nsProp('selector', None)):
645                    if rule.hasNsProp('context', None):
646                        self._itst_contexts[node] = rule.nsProp('context', None)
647                    elif rule.hasNsProp('contextPointer', None):
648                        try:
649                            oldnode = xpath.contextNode()
650                        except:
651                            oldnode = None
652                        xpath.setContextNode(node)
653                        ctxt = self._try_xpath_eval(xpath, rule.nsProp('contextPointer', None))
654                        if isinstance(ctxt, string_types):
655                            self._itst_contexts[node] = ctxt
656                        else:
657                            for ctxt in ctxt:
658                                self._itst_contexts[node] = ctxt.content
659                                break
660                        xpath.setContextNode(oldnode)
661        elif xml_is_ns_name(rule, NS_ITS, 'locNoteRule'):
662            locnote = None
663            notetype = rule.nsProp('locNoteType', None)
664            for child in xml_child_iter(rule):
665                if xml_is_ns_name(child, NS_ITS, 'locNote'):
666                    locnote = LocNote(locnote=child.content, locnotetype=notetype)
667                    break
668            if locnote is None:
669                if rule.hasNsProp('locNoteRef', None):
670                    locnote = LocNote(locnoteref=rule.nsProp('locNoteRef', None), locnotetype=notetype)
671            if rule.nsProp('selector', None) is not None:
672                for node in self._try_xpath_eval(xpath, rule.nsProp('selector', None)):
673                    if locnote is not None:
674                        self._its_loc_notes.setdefault(node, []).append(locnote)
675                    else:
676                        if rule.hasNsProp('locNotePointer', None):
677                            sel = rule.nsProp('locNotePointer', None)
678                            ref = False
679                        elif rule.hasNsProp('locNoteRefPointer', None):
680                            sel = rule.nsProp('locNoteRefPointer', None)
681                            ref = True
682                        else:
683                            continue
684                        try:
685                            oldnode = xpath.contextNode()
686                        except:
687                            oldnode = None
688                        xpath.setContextNode(node)
689                        note = self._try_xpath_eval(xpath, sel)
690                        if isinstance(note, string_types):
691                            if ref:
692                                nodenote = LocNote(locnoteref=note, locnotetype=notetype)
693                            else:
694                                nodenote = LocNote(locnote=note, locnotetype=notetype)
695                            self._its_loc_notes.setdefault(node, []).append(nodenote)
696                        else:
697                            for note in note:
698                                if ref:
699                                    nodenote = LocNote(locnoteref=note.content, locnotetype=notetype)
700                                else:
701                                    nodenote = LocNote(locnote=note.content, locnotetype=notetype,
702                                                       space=self.get_preserve_space(note))
703                                self._its_loc_notes.setdefault(node, []).append(nodenote)
704                                break
705                        xpath.setContextNode(oldnode)
706        elif xml_is_ns_name(rule, NS_ITS, 'langRule'):
707            if rule.nsProp('selector', None) is not None and rule.nsProp('langPointer', None) is not None:
708                for node in self._try_xpath_eval(xpath, rule.nsProp('selector', None)):
709                    try:
710                        oldnode = xpath.contextNode()
711                    except:
712                        oldnode = None
713                    xpath.setContextNode(node)
714                    res = self._try_xpath_eval(xpath, rule.nsProp('langPointer', None))
715                    if len(res) > 0:
716                        self._its_lang[node] = res[0].content
717                    # We need to construct language attributes, not just read
718                    # language information. Technically, langPointer could be
719                    # any XPath expression. But if it looks like an attribute
720                    # accessor, just use the attribute name.
721                    if rule.nsProp('langPointer', None)[0] == '@':
722                        self._itst_lang_attr[node] = rule.nsProp('langPointer', None)[1:]
723                    xpath.setContextNode(oldnode)
724        elif xml_is_ns_name(rule, NS_ITST, 'credits'):
725            if rule.nsProp('appendTo', None) is not None:
726                for node in self._try_xpath_eval(xpath, rule.nsProp('appendTo', None)):
727                    self._itst_credits = (node, rule)
728                    break
729        elif (xml_is_ns_name(rule, NS_ITS, 'externalResourceRefRule') or
730              xml_is_ns_name(rule, NS_ITST, 'externalRefRule')):
731            sel = rule.nsProp('selector', None)
732            if xml_is_ns_name(rule, NS_ITS, 'externalResourceRefRule'):
733                ptr = rule.nsProp('externalResourceRefPointer', None)
734            else:
735                ptr = rule.nsProp('refPointer', None)
736            if sel is not None and ptr is not None:
737                for node in self._try_xpath_eval(xpath, sel):
738                    try:
739                        oldnode = xpath.contextNode()
740                    except:
741                        oldnode = None
742                    xpath.setContextNode(node)
743                    res = self._try_xpath_eval(xpath, ptr)
744                    if len(res) > 0:
745                        self._its_externals[node] = res[0].content
746                    xpath.setContextNode(oldnode)
747
748    def apply_its_rules(self, builtins, params={}):
749        if builtins:
750            dirs = []
751            ddir = os.getenv('XDG_DATA_HOME', '')
752            if ddir == '':
753                ddir = os.path.join(os.path.expanduser('~'), '.local', 'share')
754            dirs.append(ddir)
755            ddir = os.getenv('XDG_DATA_DIRS', '')
756            if ddir == '':
757                if DATADIR not in ('/usr/local/share', '/usr/share'):
758                    ddir += DATADIR + ':'
759                ddir += '/usr/local/share:/usr/share'
760            dirs.extend(ddir.split(':'))
761            ddone = {}
762            for ddir in dirs:
763                itsdir = os.path.join(ddir, 'itstool', 'its')
764                if not os.path.exists(itsdir):
765                    continue
766                for dfile in os.listdir(itsdir):
767                    if dfile.endswith('.its'):
768                        if not ddone.get(dfile, False):
769                            self.apply_its_file(os.path.join(itsdir, dfile), params=params)
770                            ddone[dfile] = True
771        self.apply_local_its_rules(params=params)
772
773    def apply_its_file(self, filename, params={}):
774        doc = libxml2.parseFile(filename)
775        root = doc.getRootElement()
776        if not xml_is_ns_name(root, NS_ITS, 'rules'):
777            return
778        version = None
779        if root.hasNsProp('version', None):
780            version = root.nsProp('version', None)
781        else:
782            sys.stderr.write('Warning: ITS file %s missing version attribute\n' %
783                             os.path.basename(filename))
784        if version is not None and version not in ('1.0', '2.0'):
785            sys.stderr.write('Warning: Skipping ITS file %s with unknown version %s\n' %
786                             (os.path.basename(filename), root.nsProp('version', None)))
787            return
788        matched = True
789        for match in xml_child_iter(root):
790            if xml_is_ns_name(match, NS_ITST, 'match'):
791                matched = False
792                xpath = self._doc.xpathNewContext()
793                par = match
794                nss = {}
795                while par is not None:
796                    nsdef = par.nsDefs()
797                    while nsdef is not None:
798                        if nsdef.name is not None:
799                            if nsdef.name not in nss:
800                                nss[nsdef.name] = nsdef.content
801                                xpath.xpathRegisterNs(nsdef.name, nsdef.content)
802                        nsdef = nsdef.next
803                    par = par.parent
804                if match.hasNsProp('selector', None):
805                    if len(self._try_xpath_eval(xpath, match.nsProp('selector', None))) > 0:
806                        matched = True
807                        break
808        if matched == False:
809            return
810        for rule in xml_child_iter(root):
811            xpath = self._doc.xpathNewContext()
812            par = match
813            nss = {}
814            while par is not None:
815                nsdef = par.nsDefs()
816                while nsdef is not None:
817                    if nsdef.name is not None:
818                        if nsdef.name not in nss:
819                            nss[nsdef.name] = nsdef.content
820                            xpath.xpathRegisterNs(nsdef.name, nsdef.content)
821                    nsdef = nsdef.next
822                par = par.parent
823            self.register_its_params(xpath, root, params=params)
824            self.apply_its_rule(rule, xpath)
825
826    def apply_local_its_rules(self, params={}):
827        for rules in self._localrules:
828            def reg_ns(xpath, node):
829                if node.parent is not None:
830                    reg_ns(xpath, node.parent)
831                nsdef = node.nsDefs()
832                while nsdef is not None:
833                    if nsdef.name is not None:
834                        xpath.xpathRegisterNs(nsdef.name, nsdef.content)
835                    nsdef = nsdef.next
836            xpath = self._doc.xpathNewContext()
837            reg_ns(xpath, rules)
838            self.register_its_params(xpath, rules, params=params)
839            for rule in xml_child_iter(rules):
840                if rule.type != 'element':
841                    continue
842                if rule.nsDefs() is not None:
843                    rule_xpath = self._doc.xpathNewContext()
844                    reg_ns(rule_xpath, rule)
845                    self.register_its_params(rule_xpath, rules, params=params)
846                else:
847                    rule_xpath = xpath
848                self.apply_its_rule(rule, rule_xpath)
849
850    def _append_credits(self, parent, node, trdata):
851        if xml_is_ns_name(node, NS_ITST, 'for-each'):
852            select = node.nsProp('select', None)
853            if select == 'years':
854                for year in trdata[2].split(','):
855                    for child in xml_child_iter(node):
856                        self._append_credits(parent, child, trdata + (year.strip(),))
857        elif xml_is_ns_name(node, NS_ITST, 'value-of'):
858            select = node.nsProp('select', None)
859            val = None
860            if select == 'name':
861                val = trdata[0]
862            elif select == 'email':
863                val = trdata[1]
864            elif select == 'years':
865                val = trdata[2]
866            elif select == 'year' and len(trdata) == 4:
867                val = trdata[3]
868            if val is not None:
869                if not PY3:
870                    val = val.encode('utf-8')
871                parent.addContent(val)
872        else:
873            newnode = node.copyNode(2)
874            parent.addChild(newnode)
875            for child in xml_child_iter(node):
876                self._append_credits(newnode, child, trdata)
877
878    def merge_credits(self, translations, language, node):
879        if self._itst_credits is None:
880            return
881        # Dear Python, please implement pgettext.
882        # http://bugs.python.org/issue2504
883        # Sincerely, Shaun
884        trans = translations.ugettext('_\x04translator-credits')
885        if trans is None or trans == 'translator-credits':
886            return
887        regex = re.compile('(.*) \<(.*)\>, (.*)')
888        for credit in trans.split('\n'):
889            match = regex.match(credit)
890            if not match:
891                continue
892            trdata = match.groups()
893            for node in xml_child_iter(self._itst_credits[1]):
894                self._append_credits(self._itst_credits[0], node, trdata)
895
896    def join_translations(self, translations, node=None, strict=False):
897        is_root = False
898        if node is None:
899            is_root = True
900            self.generate_messages(comments=False)
901            node = self._doc.getRootElement()
902        if node is None or node.type != 'element':
903            return
904        if self.get_itst_drop(node) == 'yes':
905            prev = node.prev
906            node.unlinkNode()
907            node.freeNode()
908            if prev is not None and prev.isBlankNode():
909                prev.unlinkNode()
910                prev.freeNode()
911            return
912        msg = self._msgs.get_message_by_node(node)
913        if msg is None:
914            self.translate_attrs(node, node)
915            children = [child for child in xml_child_iter(node)]
916            for child in children:
917                self.join_translations(translations, node=child, strict=strict)
918        else:
919            prevnode = None
920            if node.prev is not None and node.prev.type == 'text':
921                prevtext = node.prev.content
922                if re.sub('\s+', '', prevtext) == '':
923                    prevnode = node.prev
924            for lang in sorted(list(translations.keys()), reverse=True):
925                locale = self.get_its_locale_filter(node)
926                lmatch = match_locale_list(locale[0], lang)
927                if (locale[1] == 'include' and not lmatch) or (locale[1] == 'exclude' and lmatch):
928                    continue
929                newnode = self.get_translated(node, translations[lang], strict=strict, lang=lang)
930                if newnode != node:
931                    newnode.setProp('xml:lang', lang)
932                    node.addNextSibling(newnode)
933                    if prevnode is not None:
934                        node.addNextSibling(prevnode.copyNode(0))
935        if is_root:
936            # Because of the way we create nodes and rewrite the document,
937            # we end up with lots of redundant namespace definitions. We
938            # kill them off in one fell swoop at the end.
939            fix_node_ns(node, {})
940            self._check_errors()
941
942    def merge_translations(self, translations, language, node=None, strict=False):
943        is_root = False
944        if node is None:
945            is_root = True
946            self.generate_messages(comments=False)
947            node = self._doc.getRootElement()
948        if node is None or node.type != 'element':
949            return
950        drop = False
951        locale = self.get_its_locale_filter(node)
952        if locale[1] == 'include':
953            if locale[0] != '*':
954                if not match_locale_list(locale[0], language):
955                    drop = True
956        elif locale[1] == 'exclude':
957            if match_locale_list(locale[0], language):
958                drop = True
959        if self.get_itst_drop(node) == 'yes' or drop:
960            prev = node.prev
961            node.unlinkNode()
962            node.freeNode()
963            if prev is not None and prev.isBlankNode():
964                prev.unlinkNode()
965                prev.freeNode()
966            return
967        if is_root:
968            self.merge_credits(translations, language, node)
969        msg = self._msgs.get_message_by_node(node)
970        if msg is None:
971            self.translate_attrs(node, node)
972            children = [child for child in xml_child_iter(node)]
973            for child in children:
974                self.merge_translations(translations, language, node=child, strict=strict)
975        else:
976            newnode = self.get_translated(node, translations, strict=strict, lang=language)
977            if newnode != node:
978                self.translate_attrs(node, newnode)
979                node.replaceNode(newnode)
980        if is_root:
981            # Apply language attributes to untranslated nodes. We don't do
982            # this before processing, because then these attributes would
983            # be copied into the new nodes. We apply the attribute without
984            # checking whether it was translated, because any that were will
985            # just be floating around, unattached to a document.
986            for lcnode in self._msgs.get_nodes_with_messages():
987                attr = self._itst_lang_attr.get(lcnode)
988                if attr is None:
989                    continue
990                origlang = None
991                lcpar = lcnode
992                while lcpar is not None:
993                    origlang = self._its_lang.get(lcpar)
994                    if origlang is not None:
995                        break
996                    lcpar = lcpar.parent
997                if origlang is not None:
998                    lcnode.setProp(attr, origlang)
999            # And then set the language attribute on the root node.
1000            if language is not None:
1001                attr = self._itst_lang_attr.get(node)
1002                if attr is not None:
1003                    node.setProp(attr, language)
1004            # Because of the way we create nodes and rewrite the document,
1005            # we end up with lots of redundant namespace definitions. We
1006            # kill them off in one fell swoop at the end.
1007            fix_node_ns(node, {})
1008            self._check_errors()
1009
1010    def translate_attrs(self, oldnode, newnode):
1011        trans_attrs = [attr for attr in xml_attr_iter(oldnode) if self._its_translate_nodes.get(attr, 'no') == 'yes']
1012        for attr in trans_attrs:
1013            srccontent = attr.get_content()
1014            if not PY3:
1015                srccontent = srccontent.decode('utf-8')
1016            newcontent = translations.ugettext(srccontent)
1017            if newcontent:
1018                if not PY3:
1019                    newcontent = newcontent.encode('utf-8')
1020                newnode.setProp(attr.name, newcontent)
1021
1022    def get_translated (self, node, translations, strict=False, lang=None):
1023        msg = self._msgs.get_message_by_node(node)
1024        if msg is None:
1025            return node
1026        msgstr = msg.get_string()
1027        # Dear Python, please implement pgettext.
1028        # http://bugs.python.org/issue2504
1029        # Sincerely, Shaun
1030        if msg.get_context() is not None:
1031            msgstr = msg.get_context() + '\x04' + msgstr
1032        trans = translations.ugettext(msgstr)
1033        if trans is None:
1034            return node
1035        nss = {}
1036        def reg_ns(node, nss):
1037            if node.parent is not None:
1038                reg_ns(node.parent, nss)
1039            nsdef = node.nsDefs()
1040            while nsdef is not None:
1041                nss[nsdef.name] = nsdef.content
1042                nsdef = nsdef.next
1043        reg_ns(node, nss)
1044        nss['_'] = NS_BLANK
1045        try:
1046            blurb = node.doc.intSubset().serialize('utf-8')
1047        except Exception:
1048            blurb = ''
1049        blurb += '<' + ustr(node.name, 'utf-8')
1050        for nsname in list(nss.keys()):
1051            if nsname is None:
1052                blurb += ' xmlns="%s"' % nss[nsname]
1053            else:
1054                blurb += ' xmlns:%s="%s"' % (nsname, nss[nsname])
1055        blurb += '>%s</%s>' % (trans, ustr(node.name, 'utf-8'))
1056        if not PY3:
1057            blurb = blurb.encode('utf-8')
1058        ctxt = libxml2.createDocParserCtxt(blurb)
1059        if self._load_dtd:
1060            ctxt.loadSubset(1)
1061        if self._keep_entities:
1062            ctxt.ctxtUseOptions(libxml2.XML_PARSE_DTDLOAD)
1063            ctxt.replaceEntities(0)
1064        else:
1065            ctxt.replaceEntities(1)
1066        ctxt.parseDocument()
1067        trnode = ctxt.doc().getRootElement().copyNode(1)
1068        try:
1069            self._check_errors()
1070        except libxml2.parserError:
1071            if strict:
1072                raise
1073            else:
1074                sys.stderr.write(pr_str('Warning: Could not merge %stranslation for msgid:\n%s\n' % (
1075                        (lang + ' ') if lang is not None else '',
1076                        msgstr)))
1077                self._xml_err = ''
1078                return node
1079        def scan_node(node):
1080            children = [child for child in xml_child_iter(node)]
1081            for child in children:
1082                if child.type != 'element':
1083                    continue
1084                if child.ns() is not None and child.ns().content == NS_BLANK:
1085                    ph_node = msg.get_placeholder(child.name).node
1086                    if self.has_child_elements(ph_node):
1087                        self.merge_translations(translations, None, ph_node, strict=strict)
1088                        child.replaceNode(ph_node)
1089                    else:
1090                        repl = self.get_translated(ph_node, translations, strict=strict, lang=lang)
1091                        child.replaceNode(repl)
1092                scan_node(child)
1093        try:
1094            scan_node(trnode)
1095        except:
1096            if strict:
1097                raise
1098            else:
1099                sys.stderr.write(pr_str('Warning: Could not merge %stranslation for msgid:\n%s\n' % (
1100                    (lang + ' ') if lang is not None else '',
1101                    msgstr)))
1102                self._xml_err = ''
1103                return node
1104        retnode = node.copyNode(2)
1105        for child in xml_child_iter(trnode):
1106            retnode.addChild(child.copyNode(1))
1107        ctxt.doc().freeDoc()
1108        return retnode
1109
1110    def generate_messages(self, comments=True):
1111        if self._itst_credits is not None:
1112            self._msgs.add_credits()
1113        for child in xml_child_iter(self._doc):
1114            if child.type == 'element':
1115                self.generate_message(child, None, comments=comments)
1116                break
1117
1118    def generate_message (self, node, msg, comments=True, path=None):
1119        if node.type in ('text', 'cdata') and msg is not None:
1120            msg.add_text(node.content)
1121            return
1122        if node.type == 'entity_ref':
1123            msg.add_entity_ref(node.name);
1124        if node.type != 'element':
1125            return
1126        if node.hasNsProp('drop', NS_ITST) and node.nsProp('drop', NS_ITST) == 'yes':
1127            return
1128        if self._itst_drop_nodes.get(node, 'no') == 'yes':
1129            return
1130        locfil = self.get_its_locale_filter(node)
1131        if locfil == ('', 'include') or locfil == ('*', 'exclude'):
1132            return
1133        if path is None:
1134            path = ''
1135        translate = self.get_its_translate(node)
1136        withinText = False
1137        if translate == 'no':
1138            if msg is not None:
1139                msg.add_placeholder(node)
1140            is_unit = False
1141            msg = None
1142        else:
1143            is_unit = msg is None or self.is_translation_unit(node)
1144            if is_unit:
1145                if msg is not None:
1146                    msg.add_placeholder(node)
1147                msg = Message()
1148                ctxt = None
1149                if node.hasNsProp('context', NS_ITST):
1150                    ctxt = node.nsProp('context', NS_ITST)
1151                if ctxt is None:
1152                    ctxt = self._itst_contexts.get(node)
1153                if ctxt is not None:
1154                    msg.set_context(ctxt)
1155                idvalue = self.get_its_id_value(node)
1156                if idvalue is not None:
1157                    basename = os.path.basename(self._filename)
1158                    msg.add_id_value(basename + '#' + idvalue)
1159                if self.get_preserve_space(node):
1160                    msg.set_preserve_space()
1161                if self.get_its_locale_filter(node) != ('*', 'include'):
1162                    msg.set_locale_filter(self.get_its_locale_filter(node))
1163                msg.add_source('%s:%i' % (self._doc.name, node.lineNo()))
1164                msg.add_marker('%s/%s' % (ustr(node.parent.name, 'utf-8'), ustr(node.name, 'utf-8')))
1165            else:
1166                withinText = True
1167                msg.add_start_tag(node)
1168
1169        if not withinText:
1170            # Add msg for translatable node attributes
1171            for attr in xml_attr_iter(node):
1172                if self._its_translate_nodes.get(attr, 'no') == 'yes':
1173                    attr_msg = Message()
1174                    if self.get_preserve_space(attr):
1175                        attr_msg.set_preserve_space()
1176                    attr_msg.add_source('%s:%i' % (self._doc.name, node.lineNo()))
1177                    attr_msg.add_marker('%s/%s@%s' % (node.parent.name, node.name, attr.name))
1178                    attr_msg.add_text(attr.content)
1179                    if comments:
1180                        for locnote in self.get_its_loc_notes(attr):
1181                            comment = Comment(locnote)
1182                            comment.add_marker ('%s/%s@%s' % (
1183                                    node.parent.name, node.name, attr.name))
1184                            attr_msg.add_comment(comment)
1185                    self._msgs.add_message(attr_msg, attr)
1186
1187        if comments and msg is not None:
1188            cnode = node
1189            while cnode is not None:
1190                hasnote = False
1191                for locnote in self.get_its_loc_notes(cnode, inherit=(not withinText)):
1192                    comment = Comment(locnote)
1193                    if withinText:
1194                        comment.add_marker('.%s/%s' % (path, cnode.name))
1195                    msg.add_comment(comment)
1196                    hasnote = True
1197                if hasnote or not is_unit:
1198                    break
1199                cnode = cnode.parent
1200
1201        self.generate_external_resource_message(node)
1202        for attr in xml_attr_iter(node):
1203            self.generate_external_resource_message(attr)
1204            idvalue = self.get_its_id_value(attr)
1205            if idvalue is not None:
1206                basename = os.path.basename(self._filename)
1207                msg.add_id_value(basename + '#' + idvalue)
1208
1209        if withinText:
1210            path = path + '/' + node.name
1211        for child in xml_child_iter(node):
1212            self.generate_message(child, msg, comments=comments, path=path)
1213
1214        if translate:
1215            if is_unit and not msg.is_empty():
1216                self._msgs.add_message(msg, node)
1217            elif msg is not None:
1218                msg.add_end_tag(node)
1219
1220    def generate_external_resource_message(self, node):
1221        if node not in self._its_externals:
1222            return
1223        resref = self._its_externals[node]
1224        if node.type == 'element':
1225            translate = self.get_its_translate(node)
1226            marker = '%s/%s' % (node.parent.name, node.name)
1227        else:
1228            translate = self.get_its_translate(node.parent)
1229            marker = '%s/%s/@%s' % (node.parent.parent.name, node.parent.name, node.name)
1230        if translate == 'no':
1231            return
1232        msg = Message()
1233        try:
1234            fullfile = os.path.join(os.path.dirname(self._filename), resref)
1235            filefp = open(fullfile, 'rb')
1236            filemd5 = hashlib.md5(filefp.read()).hexdigest()
1237            filefp.close()
1238        except Exception:
1239            filemd5 = '__failed__'
1240        txt = "external ref='%s' md5='%s'" % (resref, filemd5)
1241        msg.set_context('_')
1242        msg.add_text(txt)
1243        msg.add_source('%s:%i' % (self._doc.name, node.lineNo()))
1244        msg.add_marker(marker)
1245        msg.add_comment(Comment('This is a reference to an external file such as an image or'
1246                                ' video. When the file changes, the md5 hash will change to'
1247                                ' let you know you need to update your localized copy. The'
1248                                ' msgstr is not used at all. Set it to whatever you like'
1249                                ' once you have updated your copy of the file.'))
1250        self._msgs.add_message(msg, None)
1251
1252    def is_translation_unit (self, node):
1253        return self.get_its_within_text(node) != 'yes'
1254
1255    def has_child_elements(self, node):
1256        return len([child for child in xml_child_iter(node) if child.type=='element'])
1257
1258    def get_preserve_space (self, node):
1259        while node.type in ('attribute', 'element'):
1260            if node.getSpacePreserve() == 1:
1261                return True
1262            if node in self._its_preserve_space_nodes:
1263                return (self._its_preserve_space_nodes[node] == 'preserve')
1264            node = node.parent
1265        return False
1266
1267    def get_its_translate(self, node):
1268        val = None
1269        if node.hasNsProp('translate', NS_ITS):
1270            val = node.nsProp('translate', NS_ITS)
1271        elif xml_is_ns_name(node, NS_ITS, 'span') and node.hasNsProp('translate', None):
1272            val = node.nsProp('translate', None)
1273        elif node in self._its_translate_nodes:
1274            val = self._its_translate_nodes[node]
1275        if val is not None:
1276            return val
1277        if node.type == 'attribute':
1278            return 'no'
1279        if node.parent.type == 'element':
1280            return self.get_its_translate(node.parent)
1281        return 'yes'
1282
1283    def get_its_within_text(self, node):
1284        if node.hasNsProp('withinText', NS_ITS):
1285            val = node.nsProp('withinText', NS_ITS)
1286        elif xml_is_ns_name(node, NS_ITS, 'span') and node.hasNsProp('withinText', None):
1287            val = node.nsProp('withinText', None)
1288        else:
1289            return self._its_within_text_nodes.get(node, 'no')
1290        if val in ('yes', 'nested'):
1291            return val
1292        return 'no'
1293
1294    def get_its_locale_filter(self, node):
1295        if node.hasNsProp('localeFilterList', NS_ITS) or node.hasNsProp('localeFilterType', NS_ITS):
1296            if node.hasNsProp('localeFilterList', NS_ITS):
1297                lst = node.nsProp('localeFilterList', NS_ITS)
1298            else:
1299                lst = '*'
1300            if node.hasNsProp('localeFilterType', NS_ITS):
1301                typ = node.nsProp('localeFilterType', NS_ITS)
1302            else:
1303                typ = 'include'
1304            return (lst, typ)
1305        if (xml_is_ns_name(node, NS_ITS, 'span') and
1306            (node.hasNsProp('localeFilterList', None) or node.hasNsProp('localeFilterType', None))):
1307            if node.hasNsProp('localeFilterList', None):
1308                lst = node.nsProp('localeFilterList', None)
1309            else:
1310                lst = '*'
1311            if node.hasNsProp('localeFilterType', None):
1312                typ = node.nsProp('localeFilterType', None)
1313            else:
1314                typ = 'include'
1315            return (lst, typ)
1316        if node in self._its_locale_filters:
1317            return self._its_locale_filters[node]
1318        if node.parent.type == 'element':
1319            return self.get_its_locale_filter(node.parent)
1320        return ('*', 'include')
1321
1322    def get_itst_drop(self, node):
1323        if node.hasNsProp('drop', NS_ITST) and node.nsProp('drop', NS_ITST) == 'yes':
1324            return 'yes'
1325        if self._itst_drop_nodes.get(node, 'no') == 'yes':
1326            return 'yes'
1327        return 'no'
1328
1329    def get_its_id_value(self, node):
1330        if node.hasNsProp('id', NS_XML):
1331            return node.nsProp('id', NS_XML)
1332        return self._its_id_values.get(node, None)
1333
1334    def get_its_loc_notes(self, node, inherit=True):
1335        ret = []
1336        if node.hasNsProp('locNote', NS_ITS) or node.hasNsProp('locNoteRef', NS_ITS) or node.hasNsProp('locNoteType', NS_ITS):
1337            notetype = node.nsProp('locNoteType', NS_ITS)
1338            if node.hasNsProp('locNote', NS_ITS):
1339                ret.append(LocNote(locnote=node.nsProp('locNote', NS_ITS), locnotetype=notetype))
1340            elif node.hasNsProp('locNoteRef', NS_ITS):
1341                ret.append(LocNote(locnoteref=node.nsProp('locNoteRef', NS_ITS), locnotetype=notetype))
1342        elif  xml_is_ns_name(node, NS_ITS, 'span'):
1343            if node.hasNsProp('locNote', None) or node.hasNsProp('locNoteRef', None) or node.hasNsProp('locNoteType', None):
1344                notetype = node.nsProp('locNoteType', None)
1345                if node.hasNsProp('locNote', None):
1346                    ret.append(LocNote(locnote=node.nsProp('locNote', None), locnotetype=notetype))
1347                elif node.hasNsProp('locNoteRef', None):
1348                    ret.append(LocNote(locnoteref=node.nsProp('locNoteRef', None), locnotetype=notetype))
1349        for locnote in reversed(self._its_loc_notes.get(node, [])):
1350            ret.append(locnote)
1351        if (len(ret) == 0 and inherit and
1352            node.type != 'attribute' and node.parent is not None and node.parent.type == 'element'):
1353            return self.get_its_loc_notes(node.parent)
1354        return ret
1355
1356    def output_test_data(self, category, out, node=None):
1357        if node is None:
1358            node = self._doc.getRootElement()
1359        compval = ''
1360        if category == 'translate':
1361            compval = 'translate="%s"' % self.get_its_translate(node)
1362        elif category == 'withinText':
1363            if node.type != 'attribute':
1364                compval = 'withinText="%s"' % self.get_its_within_text(node)
1365        elif category == 'localeFilter':
1366            compval = 'localeFilterList="%s"\tlocaleFilterType="%s"' % self.get_its_locale_filter(node)
1367        elif category == 'locNote':
1368            val = self.get_its_loc_notes(node)
1369            if len(val) > 0:
1370                if val[0].locnote is not None:
1371                    compval = 'locNote="%s"\tlocNoteType="%s"' % (ustr(val[0]), val[0].locnotetype)
1372                elif val[0].locnoteref is not None:
1373                    compval = 'locNoteRef="%s"\tlocNoteType="%s"' % (val[0].locnoteref, val[0].locnotetype)
1374        elif category == 'externalResourceRef':
1375            val = self._its_externals.get(node, '')
1376            if val != '':
1377                compval = 'externalResourceRef="%s"' % val
1378        elif category == 'idValue':
1379            val = self.get_its_id_value(node)
1380            if val is not None:
1381                compval = 'idValue="%s"' % val
1382        elif category == 'preserveSpace':
1383            if self.get_preserve_space(node):
1384                compval = 'space="preserve"'
1385            else:
1386                compval = 'space="default"'
1387        else:
1388            sys.stderr.write('Error: Unrecognized category %s\n' % category)
1389            sys.exit(1)
1390        if compval != '':
1391            out.write('%s\t%s\r\n' % (xml_get_node_path(node), compval))
1392        else:
1393            out.write('%s\r\n' % (xml_get_node_path(node)))
1394        for attr in sorted(xml_attr_iter(node), key=ustr):
1395            self.output_test_data(category, out, attr)
1396        for child in xml_child_iter(node):
1397            if child.type == 'element':
1398                self.output_test_data(category, out, child)
1399
1400    @staticmethod
1401    def _try_xpath_eval (xpath, expr):
1402        try:
1403            return xpath.xpathEval(expr)
1404        except:
1405            sys.stderr.write('Warning: Invalid XPath: %s\n' % expr)
1406            return []
1407
1408def match_locale_list(extranges, locale):
1409    if extranges.strip() == '':
1410        return False
1411    for extrange in [extrange.strip() for extrange in extranges.split(',')]:
1412        if match_locale(extrange, locale):
1413            return True
1414    return False
1415
1416def match_locale(extrange, locale):
1417    # Extended filtering for extended language ranges as
1418    # defined by RFC4647, part of BCP47.
1419    # http://tools.ietf.org/html/rfc4647#section-3.3.2
1420    rangelist = [x.lower() for x in extrange.split('-')]
1421    localelist = [x.lower() for x in locale.split('-')]
1422    if rangelist[0] not in ('*', localelist[0]):
1423        return False
1424    rangei = localei = 0
1425    while rangei < len(rangelist):
1426        if rangelist[rangei] == '*':
1427            rangei += 1
1428            continue
1429        if localei >= len(localelist):
1430            return False
1431        if rangelist[rangei] in ('*', localelist[localei]):
1432            rangei += 1
1433            localei += 1
1434            continue
1435        if len(localelist[localei]) == 1:
1436            return False
1437        localei += 1
1438    return True
1439
1440_locale_pattern = re.compile('([a-zA-Z0-9-]+)(_[A-Za-z0-9]+)?(@[A-Za-z0-9]+)?(\.[A-Za-z0-9]+)?')
1441def convert_locale (locale):
1442    # Automatically convert POSIX-style locales to BCP47
1443    match = _locale_pattern.match(locale)
1444    if match is None:
1445        return locale
1446    ret = match.group(1).lower()
1447    variant = match.group(3)
1448    if variant == '@cyrillic':
1449        ret += '-Cyrl'
1450        variant = None
1451    if variant == '@devanagari':
1452        ret += '-Deva'
1453        variant = None
1454    elif variant == '@latin':
1455        ret += '-Latn'
1456        variant = None
1457    elif variant == '@shaw':
1458        ret += '-Shaw'
1459        variant = None
1460    if match.group(2) is not None:
1461        ret += '-' + match.group(2)[1:].upper()
1462    if variant is not None and variant != '@euro':
1463        ret += '-' + variant[1:].lower()
1464    return ret
1465
1466
1467if __name__ == '__main__':
1468    options = optparse.OptionParser()
1469    options.set_usage('\n  itstool [OPTIONS] [XMLFILES]\n' +
1470                      '  itstool -m <MOFILE> [OPTIONS] [XMLFILES]\n' +
1471                      '  itstool -j <XMLFILE> [OPTIONS] [MOFILES]')
1472    options.add_option('-i', '--its',
1473                       action='append',
1474                       dest='itsfile',
1475                       metavar='ITS',
1476                       help='Load the ITS rules in the file ITS (can specify multiple times)')
1477    options.add_option('-l', '--lang',
1478                       dest='lang',
1479                       default=None,
1480                       metavar='LANGUAGE',
1481                       help='Explicitly set the language code for output file')
1482    options.add_option('-j', '--join',
1483                       dest='join',
1484                       metavar='FILE',
1485                       help='Join multiple MO files with the XML file FILE and output XML file')
1486    options.add_option('-m', '--merge',
1487                       dest='merge',
1488                       metavar='FILE',
1489                       help='Merge from a PO or MO file FILE and output XML files')
1490    options.add_option('-n', '--no-builtins',
1491                       action='store_true',
1492                       dest='nobuiltins',
1493                       default=False,
1494                       help='Do not apply the built-in ITS rules')
1495    options.add_option('-o', '--output',
1496                       dest='output',
1497                       default=None,
1498                       metavar='OUT',
1499                       help='Output PO files to file OUT or XML files in directory OUT')
1500    options.add_option('--path',
1501                       action='append',
1502                       dest='itspath',
1503                       default=None,
1504                       metavar='PATHS',
1505                       help='Extra path where ITS files may be found (can specify multiple times)')
1506    options.add_option('-s', '--strict',
1507                       action='store_true',
1508                       dest='strict',
1509                       default=False,
1510                       help='Exit with error when PO files contain broken XML')
1511    options.add_option('-d', '--load-dtd',
1512                       action='store_true',
1513                       dest='load_dtd',
1514                       default=False,
1515                       help='Load external DTDs used by input XML')
1516    options.add_option('-k', '--keep-entities',
1517                       action='store_true',
1518                       dest='keep_entities',
1519                       default=False,
1520                       help='Keep entity reference unexpanded')
1521    options.add_option('-p', '--param',
1522                       action='append',
1523                       dest='params',
1524                       default=[],
1525                       nargs=2,
1526                       metavar='NAME VALUE',
1527                       help='Define the ITS parameter NAME to the value VALUE (can specify multiple times)')
1528    options.add_option('-t', '--test',
1529                       dest='test',
1530                       default=None,
1531                       metavar='CATEGORY',
1532                       help='Generate conformance test output for CATEGORY')
1533    options.add_option('-v', '--version',
1534                       action='store_true',
1535                       dest='version',
1536                       default=False,
1537                       help='Print itstool version and exit')
1538    (opts, args) = options.parse_args(sys.argv)
1539
1540    if opts.version:
1541        print('itstool %s' % VERSION)
1542        sys.exit(0)
1543
1544    params = {}
1545    for name, value in opts.params:
1546        params[name] = value
1547
1548    if opts.merge is None and opts.join is None:
1549        messages = MessageList()
1550        for filename in args[1:]:
1551            doc = Document(filename, messages, load_dtd=opts.load_dtd, keep_entities=opts.keep_entities)
1552            doc.apply_its_rules(not(opts.nobuiltins), params=params)
1553            if opts.itsfile is not None:
1554                for itsfile in opts.itsfile:
1555                    doc.apply_its_file(itsfile, params=params)
1556            if opts.test is None:
1557                doc.generate_messages()
1558        if opts.output is None or opts.output == '-':
1559            out = sys.stdout
1560        else:
1561            try:
1562                out = io.open(opts.output, 'wt', encoding='utf-8')
1563            except:
1564                sys.stderr.write('Error: Cannot write to file %s\n' % opts.output)
1565                sys.exit(1)
1566        if opts.test is not None:
1567            doc.output_test_data(opts.test, out)
1568        else:
1569            messages.output(out)
1570            out.flush()
1571    elif opts.merge is not None:
1572        try:
1573            translations = gettext.GNUTranslations(open(opts.merge, 'rb'))
1574        except:
1575            sys.stderr.write('Error: cannot open mo file %s\n' % opts.merge)
1576            sys.exit(1)
1577        if PY3:
1578            translations.ugettext = translations.gettext
1579        translations.add_fallback(NoneTranslations())
1580        if opts.lang is None:
1581            opts.lang = convert_locale(os.path.splitext(os.path.basename(opts.merge))[0])
1582        if opts.output is None:
1583            out = './'
1584        elif os.path.isdir(opts.output):
1585            out = opts.output
1586        elif len(args) == 2:
1587            if opts.output == '-':
1588                out = sys.stdout
1589            else:
1590                out = open(opts.output, 'wb')
1591        else:
1592            sys.stderr.write('Error: Non-directory output for multiple files\n')
1593            sys.exit(1)
1594        for filename in args[1:]:
1595            messages = MessageList()
1596            doc = Document(filename, messages, load_dtd=opts.load_dtd, keep_entities=opts.keep_entities)
1597            doc.apply_its_rules(not(opts.nobuiltins), params=params)
1598            if opts.itsfile is not None:
1599                for itsfile in opts.itsfile:
1600                    doc.apply_its_file(itsfile, params=params)
1601            try:
1602                doc.merge_translations(translations, opts.lang, strict=opts.strict)
1603            except Exception as e:
1604                raise
1605                sys.stderr.write('Error: Could not merge translations:\n%s\n' % ustr(e))
1606                sys.exit(1)
1607            serialized = doc._doc.serialize('utf-8')
1608            if PY3:
1609                # For some reason, under py3, our serialized data is returns as a str.
1610                # Let's encode it to bytes
1611                serialized = serialized.encode('utf-8')
1612            fout = out
1613            fout_is_str = isinstance(fout, string_types)
1614            if fout_is_str:
1615                fout = open(os.path.join(fout, os.path.basename(filename)), 'wb')
1616            fout.write(serialized)
1617            fout.flush()
1618            if fout_is_str:
1619                fout.close()
1620    elif opts.join is not None:
1621        translations = {}
1622        for filename in args[1:]:
1623            try:
1624                thistr = gettext.GNUTranslations(open(filename, 'rb'))
1625            except:
1626                sys.stderr.write('Error: cannot open mo file %s\n' % filename)
1627                sys.exit(1)
1628            thistr.add_fallback(NoneTranslations())
1629            if PY3:
1630                thistr.ugettext = thistr.gettext
1631            lang = convert_locale(os.path.splitext(os.path.basename(filename))[0])
1632            translations[lang] = thistr
1633        if opts.output is None:
1634            out = sys.stdout
1635        elif os.path.isdir(opts.output):
1636            out = open(os.path.join(opts.output, os.path.basename(filename)), 'wb')
1637        else:
1638            out = open(opts.output, 'wb')
1639        messages = MessageList()
1640        doc = Document(opts.join, messages)
1641        doc.apply_its_rules(not(opts.nobuiltins), params=params)
1642        if opts.itsfile is not None:
1643            for itsfile in opts.itsfile:
1644                doc.apply_its_file(itsfile, params=params)
1645        doc.join_translations(translations, strict=opts.strict)
1646        serialized = doc._doc.serialize('utf-8')
1647        if PY3:
1648            # For some reason, under py3, our serialized data is returns as a str.
1649            # Let's encode it to bytes
1650            serialized = serialized.encode('utf-8')
1651        out.write(serialized)
1652        out.flush()
1653