1#!/usr/local/bin/python3.8 -s 2# 3# Copyright (c) 2010-2018 Shaun McCance <shaunm@gnome.org> 4# 5# ITS Tool program is free software: you can redistribute it and/or modify 6# it under the terms of the GNU General Public License as published by the 7# Free Software Foundation, either version 3 of the License, or (at your 8# option) any later version. 9# 10# ITS Tool is distributed in the hope that it will be useful, but WITHOUT 11# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 13# for more details. 14# 15# You should have received a copy of the GNU General Public License along 16# with ITS Tool; if not, write to the Free Software Foundation, 59 Temple 17# Place, Suite 330, Boston, MA 0211-1307 USA. 18# 19from __future__ import print_function 20from __future__ import unicode_literals 21 22VERSION="@VERSION@" 23DATADIR="@DATADIR@" 24 25import gettext 26import hashlib 27import libxml2 28import optparse 29import os 30import os.path 31import re 32import sys 33import time 34import io 35 36PY3 = sys.version_info[0] == 3 37if PY3: 38 string_types = str, 39 def ustr(s, encoding=None): 40 if isinstance(s, str): 41 return s 42 elif encoding: 43 return str(s, encoding) 44 else: 45 return str(s) 46 ustr_type = str 47 def pr_str(s): 48 """Return a string that can be safely print()ed""" 49 # Since print works on both bytes and unicode, just return the argument 50 return s 51else: 52 string_types = basestring, 53 ustr = ustr_type = unicode 54 def pr_str(s): 55 """Return a string that can be safely print()ed""" 56 if isinstance(s, str): 57 # Since print works on str, just return the argument 58 return s 59 else: 60 # print may not work on unicode if the output encoding cannot be 61 # detected, so just encode with UTF-8 62 return unicode.encode(s, 'utf-8') 63 64NS_ITS = 'http://www.w3.org/2005/11/its' 65NS_ITST = 'http://itstool.org/extensions/' 66NS_BLANK = 'http://itstool.org/extensions/blank/' 67NS_XLINK = 'http://www.w3.org/1999/xlink' 68NS_XML = 'http://www.w3.org/XML/1998/namespace' 69 70class NoneTranslations: 71 def gettext(self, message): 72 return None 73 74 def lgettext(self, message): 75 return None 76 77 def ngettext(self, msgid1, msgid2, n): 78 return None 79 80 def lngettext(self, msgid1, msgid2, n): 81 return None 82 83 def ugettext(self, message): 84 return None 85 86 def ungettext(self, msgid1, msgid2, n): 87 return None 88 89 90class MessageList (object): 91 def __init__ (self): 92 self._messages = [] 93 self._by_node = {} 94 self._has_credits = False 95 96 def add_message (self, message, node): 97 self._messages.append (message) 98 if node is not None: 99 self._by_node[node] = message 100 101 def add_credits(self): 102 if self._has_credits: 103 return 104 msg = Message() 105 msg.set_context('_') 106 msg.add_text('translator-credits') 107 msg.add_comment(Comment('Put one translator per line, in the form NAME <EMAIL>, YEAR1, YEAR2')) 108 self._messages.append(msg) 109 self._has_credits = True 110 111 def get_message_by_node (self, node): 112 return self._by_node.get(node, None) 113 114 def get_nodes_with_messages (self): 115 return list(self._by_node.keys()) 116 117 def output (self, out): 118 msgs = [] 119 msgdict = {} 120 for msg in self._messages: 121 key = (msg.get_context(), msg.get_string()) 122 if key in msgdict: 123 for source in msg.get_sources(): 124 msgdict[key].add_source(source) 125 for marker in msg.get_markers(): 126 msgdict[key].add_marker(marker) 127 for comment in msg.get_comments(): 128 msgdict[key].add_comment(comment) 129 for idvalue in msg.get_id_values(): 130 msgdict[key].add_id_value(idvalue) 131 if msg.get_preserve_space(): 132 msgdict[key].set_preserve_space() 133 if msg.get_locale_filter() is not None: 134 locale = msgdict[key].get_locale_filter() 135 if locale is not None: 136 msgdict[key].set_locale_filter('%s, %s' % (locale, msg.get_locale_filter())) 137 else: 138 msgdict[key].set_locale_filter(msg.get_locale_filter()) 139 140 else: 141 msgs.append(msg) 142 msgdict[key] = msg 143 out.write('msgid ""\n') 144 out.write('msgstr ""\n') 145 out.write('"Project-Id-Version: PACKAGE VERSION\\n"\n') 146 out.write('"POT-Creation-Date: %s\\n"\n' % time.strftime("%Y-%m-%d %H:%M%z")) 147 out.write('"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\\n"\n') 148 out.write('"Last-Translator: FULL NAME <EMAIL@ADDRESS>\\n"\n') 149 out.write('"Language-Team: LANGUAGE <LL@li.org>\\n"\n') 150 out.write('"MIME-Version: 1.0\\n"\n') 151 out.write('"Content-Type: text/plain; charset=UTF-8\\n"\n') 152 out.write('"Content-Transfer-Encoding: 8bit\\n"\n') 153 out.write('\n') 154 for msg in msgs: 155 out.write(msg.format()) 156 out.write('\n') 157 158 159class Comment (object): 160 def __init__ (self, text): 161 self._text = ustr(text) 162 assert(text is not None) 163 self._markers = [] 164 165 def add_marker (self, marker): 166 self._markers.append(marker) 167 168 def get_markers (self): 169 return self._markers 170 171 def get_text (self): 172 return self._text 173 174 def format (self): 175 ret = '' 176 markers = {} 177 for marker in self._markers: 178 if marker not in markers: 179 ret += '#. (itstool) comment: ' + marker + '\n' 180 markers[marker] = marker 181 if '\n' in self._text: 182 doadd = False 183 for line in self._text.split('\n'): 184 if line != '': 185 doadd = True 186 if not doadd: 187 continue 188 ret += '#. %s\n' % line 189 else: 190 text = self._text 191 while len(text) > 72: 192 j = text.rfind(' ', 0, 72) 193 if j == -1: 194 j = text.find(' ') 195 if j == -1: 196 break 197 ret += '#. %s\n' % text[:j] 198 text = text[j+1:] 199 ret += '#. %s\n' % text 200 return ret 201 202 203class Placeholder (object): 204 def __init__ (self, node): 205 self.node = node 206 self.name = ustr(node.name, 'utf-8') 207 208 209class Message (object): 210 def __init__ (self): 211 self._message = [] 212 self._empty = True 213 self._ctxt = None 214 self._placeholders = [] 215 self._sources = [] 216 self._markers = [] 217 self._id_values = [] 218 self._locale_filter = None 219 self._comments = [] 220 self._preserve = False 221 222 def __repr__(self): 223 if self._empty: 224 return "Empty message" 225 return self.get_string() 226 227 def escape (self, text): 228 return text.replace('\\','\\\\').replace('"', "\\\"").replace("\n","\\n").replace("\t","\\t") 229 230 def add_text (self, text): 231 if len(self._message) == 0 or not(isinstance(self._message[-1], string_types)): 232 self._message.append('') 233 if not isinstance(text, ustr_type): 234 text = ustr(text, 'utf-8') 235 self._message[-1] += text.replace('&', '&').replace('<', '<').replace('>', '>') 236 if re.sub('\s+', ' ', text).strip() != '': 237 self._empty = False 238 239 def add_entity_ref (self, name): 240 self._message.append('&' + name + ';') 241 self._empty = False 242 243 def add_placeholder (self, node): 244 holder = Placeholder(node) 245 self._placeholders.append(holder) 246 self._message.append(holder) 247 248 def get_placeholder (self, name): 249 placeholder = 1 250 for holder in self._placeholders: 251 holdername = '%s-%i' % (holder.name, placeholder) 252 if holdername == ustr(name, 'utf-8'): 253 return holder 254 placeholder += 1 255 256 def add_start_tag (self, node): 257 if len(self._message) == 0 or not(isinstance(self._message[-1], string_types)): 258 self._message.append('') 259 if node.ns() is not None and node.ns().name is not None: 260 self._message[-1] += ('<%s:%s' % (ustr(node.ns().name, 'utf-8'), ustr(node.name, 'utf-8'))) 261 else: 262 self._message[-1] += ('<%s' % ustr(node.name, 'utf-8')) 263 for prop in xml_attr_iter(node): 264 name = prop.name 265 if prop.ns() is not None: 266 name = prop.ns().name + ':' + name 267 atval = prop.content 268 if not isinstance(atval, ustr_type): 269 atval = ustr(atval, 'utf-8') 270 atval = atval.replace('&', '&').replace('<', '<').replace('>', '>').replace('"', '"') 271 self._message += " %s=\"%s\"" % (name, atval) 272 if node.children is not None: 273 self._message[-1] += '>' 274 else: 275 self._message[-1] += '/>' 276 277 def add_end_tag (self, node): 278 if node.children is not None: 279 if len(self._message) == 0 or not(isinstance(self._message[-1], string_types)): 280 self._message.append('') 281 if node.ns() is not None and node.ns().name is not None: 282 self._message[-1] += ('</%s:%s>' % (ustr(node.ns().name, 'utf-8'), ustr(node.name, 'utf-8'))) 283 else: 284 self._message[-1] += ('</%s>' % ustr(node.name, 'utf-8')) 285 286 def is_empty (self): 287 return self._empty 288 289 def get_context (self): 290 return self._ctxt 291 292 def set_context (self, ctxt): 293 self._ctxt = ctxt 294 295 def add_source (self, source): 296 if not isinstance(source, ustr_type): 297 source = ustr(source, 'utf-8') 298 self._sources.append(source) 299 300 def get_sources (self): 301 return self._sources 302 303 def add_marker (self, marker): 304 if not isinstance(marker, ustr_type): 305 marker = ustr(marker, 'utf-8') 306 self._markers.append(marker) 307 308 def get_markers (self): 309 return self._markers 310 311 def add_id_value(self, id_value): 312 self._id_values.append(id_value) 313 314 def get_id_values(self): 315 return self._id_values 316 317 def add_comment (self, comment): 318 if comment is not None: 319 self._comments.append(comment) 320 321 def get_comments (self): 322 return self._comments 323 324 def get_string (self): 325 message = '' 326 placeholder = 1 327 for msg in self._message: 328 if isinstance(msg, string_types): 329 message += msg 330 elif isinstance(msg, Placeholder): 331 message += '<_:%s-%i/>' % (msg.name, placeholder) 332 placeholder += 1 333 if not self._preserve: 334 message = re.sub('\s+', ' ', message).strip() 335 return message 336 337 def get_preserve_space (self): 338 return self._preserve 339 340 def set_preserve_space (self, preserve=True): 341 self._preserve = preserve 342 343 def get_locale_filter(self): 344 return self._locale_filter 345 346 def set_locale_filter(self, locale): 347 self._locale_filter = locale 348 349 def format (self): 350 ret = '' 351 markers = {} 352 for marker in self._markers: 353 if marker not in markers: 354 ret += '#. (itstool) path: ' + marker + '\n' 355 markers[marker] = marker 356 for idvalue in self._id_values: 357 ret += '#. (itstool) id: ' + idvalue + '\n' 358 if self._locale_filter is not None: 359 ret += '#. (itstool) ' + self._locale_filter[1] + ' locale: ' + self._locale_filter[0] + '\n' 360 comments = [] 361 commentsdict = {} 362 for comment in self._comments: 363 key = comment.get_text() 364 if key in commentsdict: 365 for marker in comment.get_markers(): 366 commentsdict[key].add_marker(marker) 367 else: 368 comments.append(comment) 369 commentsdict[key] = comment 370 for i in range(len(comments)): 371 if i != 0: 372 ret += '#.\n' 373 ret += comments[i].format() 374 for source in self._sources: 375 ret += '#: %s\n' % source 376 if self._preserve: 377 ret += '#, no-wrap\n' 378 if self._ctxt is not None: 379 ret += 'msgctxt "%s"\n' % self._ctxt 380 message = self.get_string() 381 if self._preserve: 382 ret += 'msgid ""\n' 383 lines = message.split('\n') 384 for line, no in zip(lines, list(range(len(lines)))): 385 if no == len(lines) - 1: 386 ret += '"%s"\n' % self.escape(line) 387 else: 388 ret += '"%s\\n"\n' % self.escape(line) 389 else: 390 ret += 'msgid "%s"\n' % self.escape(message) 391 ret += 'msgstr ""\n' 392 return ret 393 394 395def xml_child_iter (node): 396 child = node.children 397 while child is not None: 398 yield child 399 child = child.next 400 401def xml_attr_iter (node): 402 attr = node.get_properties() 403 while attr is not None: 404 yield attr 405 attr = attr.next 406 407def xml_is_ns_name (node, ns, name): 408 if node.type != 'element': 409 return False 410 return node.name == name and node.ns() is not None and node.ns().content == ns 411 412def xml_get_node_path(node): 413 # The built-in nodePath() method only does numeric indexes 414 # when necessary for disambiguation. For various reasons, 415 # we prefer always using indexes. 416 name = node.name 417 if node.ns() is not None and node.ns().name is not None: 418 name = node.ns().name + ':' + name 419 if node.type == 'attribute': 420 name = '@' + name 421 name = '/' + name 422 if node.type == 'element' and node.parent.type == 'element': 423 count = 1 424 prev = node.previousElementSibling() 425 while prev is not None: 426 if prev.name == node.name: 427 if prev.ns() is None: 428 if node.ns() is None: 429 count += 1 430 else: 431 if node.ns() is not None: 432 if prev.ns().name == node.ns().name: 433 count += 1 434 prev = prev.previousElementSibling() 435 name = '%s[%i]' % (name, count) 436 if node.parent.type == 'element': 437 name = xml_get_node_path(node.parent) + name 438 return name 439 440def xml_error_catcher(doc, error): 441 doc._xml_err += " %s" % error 442 443def fix_node_ns (node, nsdefs): 444 childnsdefs = nsdefs.copy() 445 nsdef = node.nsDefs() 446 while nsdef is not None: 447 nextnsdef = nsdef.next 448 if nsdef.name in nsdefs and nsdefs[nsdef.name] == nsdef.content: 449 node.removeNsDef(nsdef.content) 450 else: 451 childnsdefs[nsdef.name] = nsdef.content 452 nsdef = nextnsdef 453 for child in xml_child_iter(node): 454 if child.type == 'element': 455 fix_node_ns(child, childnsdefs) 456 457 458class LocNote (object): 459 def __init__(self, locnote=None, locnoteref=None, locnotetype=None, space=False): 460 self.locnote = locnote 461 self.locnoteref = locnoteref 462 self.locnotetype = locnotetype 463 if self.locnotetype != 'alert': 464 self.locnotetype = 'description' 465 self._preserve_space=space 466 467 def __repr__(self): 468 if self.locnote is not None: 469 if self._preserve_space: 470 return self.locnote 471 else: 472 return re.sub('\s+', ' ', self.locnote).strip() 473 elif self.locnoteref is not None: 474 return '(itstool) link: ' + re.sub('\s+', ' ', self.locnoteref).strip() 475 return '' 476 477 478class Document (object): 479 def __init__ (self, filename, messages, load_dtd=False, keep_entities=False): 480 self._xml_err = '' 481 libxml2.registerErrorHandler(xml_error_catcher, self) 482 try: 483 ctxt = libxml2.createFileParserCtxt(filename) 484 except: 485 sys.stderr.write('Error: cannot open XML file %s\n' % filename) 486 sys.exit(1) 487 ctxt.lineNumbers(1) 488 self._load_dtd = load_dtd 489 self._keep_entities = keep_entities 490 if load_dtd: 491 ctxt.loadSubset(1) 492 if keep_entities: 493 ctxt.ctxtUseOptions(libxml2.XML_PARSE_DTDLOAD) 494 ctxt.replaceEntities(0) 495 else: 496 ctxt.replaceEntities(1) 497 ctxt.parseDocument() 498 self._filename = filename 499 self._doc = ctxt.doc() 500 self._localrules = [] 501 def pre_process (node): 502 for child in xml_child_iter(node): 503 if xml_is_ns_name(child, 'http://www.w3.org/2001/XInclude', 'include'): 504 if child.nsProp('parse', None) == 'text': 505 child.xincludeProcessTree() 506 elif xml_is_ns_name(child, NS_ITS, 'rules'): 507 if child.hasNsProp('href', NS_XLINK): 508 href = child.nsProp('href', NS_XLINK) 509 fileref = os.path.join(os.path.dirname(filename), href) 510 if not os.path.exists(fileref): 511 if opts.itspath is not None: 512 for pathdir in opts.itspath: 513 fileref = os.path.join(pathdir, href) 514 if os.path.exists(fileref): 515 break 516 if not os.path.exists(fileref): 517 sys.stderr.write('Error: Could not locate ITS file %s\n' % href) 518 sys.exit(1) 519 hctxt = libxml2.createFileParserCtxt(fileref) 520 hctxt.replaceEntities(1) 521 hctxt.parseDocument() 522 root = hctxt.doc().getRootElement() 523 version = None 524 if root.hasNsProp('version', None): 525 version = root.nsProp('version', None) 526 else: 527 sys.stderr.write('Warning: ITS file %s missing version attribute\n' % 528 os.path.basename(href)) 529 if version is not None and version not in ('1.0', '2.0'): 530 sys.stderr.write('Warning: Skipping ITS file %s with unknown version %s\n' % 531 (os.path.basename(href), root.nsProp('version', None))) 532 else: 533 self._localrules.append(root) 534 version = None 535 if child.hasNsProp('version', None): 536 version = child.nsProp('version', None) 537 else: 538 root = child.doc.getRootElement() 539 if root.hasNsProp('version', NS_ITS): 540 version = root.nsProp('version', NS_ITS) 541 else: 542 sys.stderr.write('Warning: Local ITS rules missing version attribute\n') 543 if version is not None and version not in ('1.0', '2.0'): 544 sys.stderr.write('Warning: Skipping local ITS rules with unknown version %s\n' % 545 version) 546 else: 547 self._localrules.append(child) 548 pre_process(child) 549 pre_process(self._doc) 550 try: 551 self._check_errors() 552 except libxml2.parserError as e: 553 sys.stderr.write('Error: Could not parse document:\n%s\n' % ustr(e)) 554 sys.exit(1) 555 self._msgs = messages 556 self._its_translate_nodes = {} 557 self._its_within_text_nodes = {} 558 self._its_locale_filters = {} 559 self._its_id_values = {} 560 self._its_loc_notes = {} 561 self._its_preserve_space_nodes = {} 562 self._itst_drop_nodes = {} 563 self._itst_contexts = {} 564 self._its_lang = {} 565 self._itst_lang_attr = {} 566 self._itst_credits = None 567 self._its_externals = {} 568 569 def __del__ (self): 570 self._doc.freeDoc() 571 572 def _check_errors(self): 573 if self._xml_err: 574 raise libxml2.parserError(self._xml_err) 575 576 def register_its_params(self, xpath, rules, params={}): 577 for child in xml_child_iter(rules): 578 if xml_is_ns_name(child, NS_ITS, 'param'): 579 name = child.nsProp('name', None) 580 if name in params: 581 value = params[name] 582 else: 583 value = child.getContent() 584 xpath.xpathRegisterVariable(name, None, value) 585 586 def apply_its_rule(self, rule, xpath): 587 if rule.type != 'element': 588 return 589 if xml_is_ns_name(rule, NS_ITS, 'translateRule'): 590 if rule.nsProp('selector', None) is not None: 591 for node in self._try_xpath_eval(xpath, rule.nsProp('selector', None)): 592 self._its_translate_nodes[node] = rule.nsProp('translate', None) 593 elif xml_is_ns_name(rule, NS_ITS, 'withinTextRule'): 594 if rule.nsProp('selector', None) is not None: 595 for node in self._try_xpath_eval(xpath, rule.nsProp('selector', None)): 596 self._its_within_text_nodes[node] = rule.nsProp('withinText', None) 597 elif xml_is_ns_name(rule, NS_ITST, 'preserveSpaceRule'): 598 if rule.nsProp('selector', None) is not None: 599 for node in self._try_xpath_eval(xpath, rule.nsProp('selector', None)): 600 val = rule.nsProp('preserveSpace', None) 601 if val == 'yes': 602 self._its_preserve_space_nodes[node] = 'preserve' 603 elif xml_is_ns_name(rule, NS_ITS, 'preserveSpaceRule'): 604 if rule.nsProp('selector', None) is not None: 605 for node in self._try_xpath_eval(xpath, rule.nsProp('selector', None)): 606 self._its_preserve_space_nodes[node] = rule.nsProp('space', None) 607 elif xml_is_ns_name(rule, NS_ITS, 'localeFilterRule'): 608 if rule.nsProp('selector', None) is not None: 609 if rule.hasNsProp('localeFilterList', None): 610 lst = rule.nsProp('localeFilterList', None) 611 else: 612 lst = '*' 613 if rule.hasNsProp('localeFilterType', None): 614 typ = rule.nsProp('localeFilterType', None) 615 else: 616 typ = 'include' 617 for node in self._try_xpath_eval(xpath, rule.nsProp('selector', None)): 618 self._its_locale_filters[node] = (lst, typ) 619 elif xml_is_ns_name(rule, NS_ITST, 'dropRule'): 620 if rule.nsProp('selector', None) is not None: 621 for node in self._try_xpath_eval(xpath, rule.nsProp('selector', None)): 622 self._itst_drop_nodes[node] = rule.nsProp('drop', None) 623 elif xml_is_ns_name(rule, NS_ITS, 'idValueRule'): 624 sel = rule.nsProp('selector', None) 625 idv = rule.nsProp('idValue', None) 626 if sel is not None and idv is not None: 627 for node in self._try_xpath_eval(xpath, sel): 628 try: 629 oldnode = xpath.contextNode() 630 except: 631 oldnode = None 632 xpath.setContextNode(node) 633 idvalue = self._try_xpath_eval(xpath, idv) 634 if isinstance(idvalue, string_types): 635 self._its_id_values[node] = idvalue 636 else: 637 for val in idvalue: 638 self._its_id_values[node] = val.content 639 break 640 xpath.setContextNode(oldnode) 641 pass 642 elif xml_is_ns_name(rule, NS_ITST, 'contextRule'): 643 if rule.nsProp('selector', None) is not None: 644 for node in self._try_xpath_eval(xpath, rule.nsProp('selector', None)): 645 if rule.hasNsProp('context', None): 646 self._itst_contexts[node] = rule.nsProp('context', None) 647 elif rule.hasNsProp('contextPointer', None): 648 try: 649 oldnode = xpath.contextNode() 650 except: 651 oldnode = None 652 xpath.setContextNode(node) 653 ctxt = self._try_xpath_eval(xpath, rule.nsProp('contextPointer', None)) 654 if isinstance(ctxt, string_types): 655 self._itst_contexts[node] = ctxt 656 else: 657 for ctxt in ctxt: 658 self._itst_contexts[node] = ctxt.content 659 break 660 xpath.setContextNode(oldnode) 661 elif xml_is_ns_name(rule, NS_ITS, 'locNoteRule'): 662 locnote = None 663 notetype = rule.nsProp('locNoteType', None) 664 for child in xml_child_iter(rule): 665 if xml_is_ns_name(child, NS_ITS, 'locNote'): 666 locnote = LocNote(locnote=child.content, locnotetype=notetype) 667 break 668 if locnote is None: 669 if rule.hasNsProp('locNoteRef', None): 670 locnote = LocNote(locnoteref=rule.nsProp('locNoteRef', None), locnotetype=notetype) 671 if rule.nsProp('selector', None) is not None: 672 for node in self._try_xpath_eval(xpath, rule.nsProp('selector', None)): 673 if locnote is not None: 674 self._its_loc_notes.setdefault(node, []).append(locnote) 675 else: 676 if rule.hasNsProp('locNotePointer', None): 677 sel = rule.nsProp('locNotePointer', None) 678 ref = False 679 elif rule.hasNsProp('locNoteRefPointer', None): 680 sel = rule.nsProp('locNoteRefPointer', None) 681 ref = True 682 else: 683 continue 684 try: 685 oldnode = xpath.contextNode() 686 except: 687 oldnode = None 688 xpath.setContextNode(node) 689 note = self._try_xpath_eval(xpath, sel) 690 if isinstance(note, string_types): 691 if ref: 692 nodenote = LocNote(locnoteref=note, locnotetype=notetype) 693 else: 694 nodenote = LocNote(locnote=note, locnotetype=notetype) 695 self._its_loc_notes.setdefault(node, []).append(nodenote) 696 else: 697 for note in note: 698 if ref: 699 nodenote = LocNote(locnoteref=note.content, locnotetype=notetype) 700 else: 701 nodenote = LocNote(locnote=note.content, locnotetype=notetype, 702 space=self.get_preserve_space(note)) 703 self._its_loc_notes.setdefault(node, []).append(nodenote) 704 break 705 xpath.setContextNode(oldnode) 706 elif xml_is_ns_name(rule, NS_ITS, 'langRule'): 707 if rule.nsProp('selector', None) is not None and rule.nsProp('langPointer', None) is not None: 708 for node in self._try_xpath_eval(xpath, rule.nsProp('selector', None)): 709 try: 710 oldnode = xpath.contextNode() 711 except: 712 oldnode = None 713 xpath.setContextNode(node) 714 res = self._try_xpath_eval(xpath, rule.nsProp('langPointer', None)) 715 if len(res) > 0: 716 self._its_lang[node] = res[0].content 717 # We need to construct language attributes, not just read 718 # language information. Technically, langPointer could be 719 # any XPath expression. But if it looks like an attribute 720 # accessor, just use the attribute name. 721 if rule.nsProp('langPointer', None)[0] == '@': 722 self._itst_lang_attr[node] = rule.nsProp('langPointer', None)[1:] 723 xpath.setContextNode(oldnode) 724 elif xml_is_ns_name(rule, NS_ITST, 'credits'): 725 if rule.nsProp('appendTo', None) is not None: 726 for node in self._try_xpath_eval(xpath, rule.nsProp('appendTo', None)): 727 self._itst_credits = (node, rule) 728 break 729 elif (xml_is_ns_name(rule, NS_ITS, 'externalResourceRefRule') or 730 xml_is_ns_name(rule, NS_ITST, 'externalRefRule')): 731 sel = rule.nsProp('selector', None) 732 if xml_is_ns_name(rule, NS_ITS, 'externalResourceRefRule'): 733 ptr = rule.nsProp('externalResourceRefPointer', None) 734 else: 735 ptr = rule.nsProp('refPointer', None) 736 if sel is not None and ptr is not None: 737 for node in self._try_xpath_eval(xpath, sel): 738 try: 739 oldnode = xpath.contextNode() 740 except: 741 oldnode = None 742 xpath.setContextNode(node) 743 res = self._try_xpath_eval(xpath, ptr) 744 if len(res) > 0: 745 self._its_externals[node] = res[0].content 746 xpath.setContextNode(oldnode) 747 748 def apply_its_rules(self, builtins, params={}): 749 if builtins: 750 dirs = [] 751 ddir = os.getenv('XDG_DATA_HOME', '') 752 if ddir == '': 753 ddir = os.path.join(os.path.expanduser('~'), '.local', 'share') 754 dirs.append(ddir) 755 ddir = os.getenv('XDG_DATA_DIRS', '') 756 if ddir == '': 757 if DATADIR not in ('/usr/local/share', '/usr/share'): 758 ddir += DATADIR + ':' 759 ddir += '/usr/local/share:/usr/share' 760 dirs.extend(ddir.split(':')) 761 ddone = {} 762 for ddir in dirs: 763 itsdir = os.path.join(ddir, 'itstool', 'its') 764 if not os.path.exists(itsdir): 765 continue 766 for dfile in os.listdir(itsdir): 767 if dfile.endswith('.its'): 768 if not ddone.get(dfile, False): 769 self.apply_its_file(os.path.join(itsdir, dfile), params=params) 770 ddone[dfile] = True 771 self.apply_local_its_rules(params=params) 772 773 def apply_its_file(self, filename, params={}): 774 doc = libxml2.parseFile(filename) 775 root = doc.getRootElement() 776 if not xml_is_ns_name(root, NS_ITS, 'rules'): 777 return 778 version = None 779 if root.hasNsProp('version', None): 780 version = root.nsProp('version', None) 781 else: 782 sys.stderr.write('Warning: ITS file %s missing version attribute\n' % 783 os.path.basename(filename)) 784 if version is not None and version not in ('1.0', '2.0'): 785 sys.stderr.write('Warning: Skipping ITS file %s with unknown version %s\n' % 786 (os.path.basename(filename), root.nsProp('version', None))) 787 return 788 matched = True 789 for match in xml_child_iter(root): 790 if xml_is_ns_name(match, NS_ITST, 'match'): 791 matched = False 792 xpath = self._doc.xpathNewContext() 793 par = match 794 nss = {} 795 while par is not None: 796 nsdef = par.nsDefs() 797 while nsdef is not None: 798 if nsdef.name is not None: 799 if nsdef.name not in nss: 800 nss[nsdef.name] = nsdef.content 801 xpath.xpathRegisterNs(nsdef.name, nsdef.content) 802 nsdef = nsdef.next 803 par = par.parent 804 if match.hasNsProp('selector', None): 805 if len(self._try_xpath_eval(xpath, match.nsProp('selector', None))) > 0: 806 matched = True 807 break 808 if matched == False: 809 return 810 for rule in xml_child_iter(root): 811 xpath = self._doc.xpathNewContext() 812 par = match 813 nss = {} 814 while par is not None: 815 nsdef = par.nsDefs() 816 while nsdef is not None: 817 if nsdef.name is not None: 818 if nsdef.name not in nss: 819 nss[nsdef.name] = nsdef.content 820 xpath.xpathRegisterNs(nsdef.name, nsdef.content) 821 nsdef = nsdef.next 822 par = par.parent 823 self.register_its_params(xpath, root, params=params) 824 self.apply_its_rule(rule, xpath) 825 826 def apply_local_its_rules(self, params={}): 827 for rules in self._localrules: 828 def reg_ns(xpath, node): 829 if node.parent is not None: 830 reg_ns(xpath, node.parent) 831 nsdef = node.nsDefs() 832 while nsdef is not None: 833 if nsdef.name is not None: 834 xpath.xpathRegisterNs(nsdef.name, nsdef.content) 835 nsdef = nsdef.next 836 xpath = self._doc.xpathNewContext() 837 reg_ns(xpath, rules) 838 self.register_its_params(xpath, rules, params=params) 839 for rule in xml_child_iter(rules): 840 if rule.type != 'element': 841 continue 842 if rule.nsDefs() is not None: 843 rule_xpath = self._doc.xpathNewContext() 844 reg_ns(rule_xpath, rule) 845 self.register_its_params(rule_xpath, rules, params=params) 846 else: 847 rule_xpath = xpath 848 self.apply_its_rule(rule, rule_xpath) 849 850 def _append_credits(self, parent, node, trdata): 851 if xml_is_ns_name(node, NS_ITST, 'for-each'): 852 select = node.nsProp('select', None) 853 if select == 'years': 854 for year in trdata[2].split(','): 855 for child in xml_child_iter(node): 856 self._append_credits(parent, child, trdata + (year.strip(),)) 857 elif xml_is_ns_name(node, NS_ITST, 'value-of'): 858 select = node.nsProp('select', None) 859 val = None 860 if select == 'name': 861 val = trdata[0] 862 elif select == 'email': 863 val = trdata[1] 864 elif select == 'years': 865 val = trdata[2] 866 elif select == 'year' and len(trdata) == 4: 867 val = trdata[3] 868 if val is not None: 869 if not PY3: 870 val = val.encode('utf-8') 871 parent.addContent(val) 872 else: 873 newnode = node.copyNode(2) 874 parent.addChild(newnode) 875 for child in xml_child_iter(node): 876 self._append_credits(newnode, child, trdata) 877 878 def merge_credits(self, translations, language, node): 879 if self._itst_credits is None: 880 return 881 # Dear Python, please implement pgettext. 882 # http://bugs.python.org/issue2504 883 # Sincerely, Shaun 884 trans = translations.ugettext('_\x04translator-credits') 885 if trans is None or trans == 'translator-credits': 886 return 887 regex = re.compile('(.*) \<(.*)\>, (.*)') 888 for credit in trans.split('\n'): 889 match = regex.match(credit) 890 if not match: 891 continue 892 trdata = match.groups() 893 for node in xml_child_iter(self._itst_credits[1]): 894 self._append_credits(self._itst_credits[0], node, trdata) 895 896 def join_translations(self, translations, node=None, strict=False): 897 is_root = False 898 if node is None: 899 is_root = True 900 self.generate_messages(comments=False) 901 node = self._doc.getRootElement() 902 if node is None or node.type != 'element': 903 return 904 if self.get_itst_drop(node) == 'yes': 905 prev = node.prev 906 node.unlinkNode() 907 node.freeNode() 908 if prev is not None and prev.isBlankNode(): 909 prev.unlinkNode() 910 prev.freeNode() 911 return 912 msg = self._msgs.get_message_by_node(node) 913 if msg is None: 914 self.translate_attrs(node, node) 915 children = [child for child in xml_child_iter(node)] 916 for child in children: 917 self.join_translations(translations, node=child, strict=strict) 918 else: 919 prevnode = None 920 if node.prev is not None and node.prev.type == 'text': 921 prevtext = node.prev.content 922 if re.sub('\s+', '', prevtext) == '': 923 prevnode = node.prev 924 for lang in sorted(list(translations.keys()), reverse=True): 925 locale = self.get_its_locale_filter(node) 926 lmatch = match_locale_list(locale[0], lang) 927 if (locale[1] == 'include' and not lmatch) or (locale[1] == 'exclude' and lmatch): 928 continue 929 newnode = self.get_translated(node, translations[lang], strict=strict, lang=lang) 930 if newnode != node: 931 newnode.setProp('xml:lang', lang) 932 node.addNextSibling(newnode) 933 if prevnode is not None: 934 node.addNextSibling(prevnode.copyNode(0)) 935 if is_root: 936 # Because of the way we create nodes and rewrite the document, 937 # we end up with lots of redundant namespace definitions. We 938 # kill them off in one fell swoop at the end. 939 fix_node_ns(node, {}) 940 self._check_errors() 941 942 def merge_translations(self, translations, language, node=None, strict=False): 943 is_root = False 944 if node is None: 945 is_root = True 946 self.generate_messages(comments=False) 947 node = self._doc.getRootElement() 948 if node is None or node.type != 'element': 949 return 950 drop = False 951 locale = self.get_its_locale_filter(node) 952 if locale[1] == 'include': 953 if locale[0] != '*': 954 if not match_locale_list(locale[0], language): 955 drop = True 956 elif locale[1] == 'exclude': 957 if match_locale_list(locale[0], language): 958 drop = True 959 if self.get_itst_drop(node) == 'yes' or drop: 960 prev = node.prev 961 node.unlinkNode() 962 node.freeNode() 963 if prev is not None and prev.isBlankNode(): 964 prev.unlinkNode() 965 prev.freeNode() 966 return 967 if is_root: 968 self.merge_credits(translations, language, node) 969 msg = self._msgs.get_message_by_node(node) 970 if msg is None: 971 self.translate_attrs(node, node) 972 children = [child for child in xml_child_iter(node)] 973 for child in children: 974 self.merge_translations(translations, language, node=child, strict=strict) 975 else: 976 newnode = self.get_translated(node, translations, strict=strict, lang=language) 977 if newnode != node: 978 self.translate_attrs(node, newnode) 979 node.replaceNode(newnode) 980 if is_root: 981 # Apply language attributes to untranslated nodes. We don't do 982 # this before processing, because then these attributes would 983 # be copied into the new nodes. We apply the attribute without 984 # checking whether it was translated, because any that were will 985 # just be floating around, unattached to a document. 986 for lcnode in self._msgs.get_nodes_with_messages(): 987 attr = self._itst_lang_attr.get(lcnode) 988 if attr is None: 989 continue 990 origlang = None 991 lcpar = lcnode 992 while lcpar is not None: 993 origlang = self._its_lang.get(lcpar) 994 if origlang is not None: 995 break 996 lcpar = lcpar.parent 997 if origlang is not None: 998 lcnode.setProp(attr, origlang) 999 # And then set the language attribute on the root node. 1000 if language is not None: 1001 attr = self._itst_lang_attr.get(node) 1002 if attr is not None: 1003 node.setProp(attr, language) 1004 # Because of the way we create nodes and rewrite the document, 1005 # we end up with lots of redundant namespace definitions. We 1006 # kill them off in one fell swoop at the end. 1007 fix_node_ns(node, {}) 1008 self._check_errors() 1009 1010 def translate_attrs(self, oldnode, newnode): 1011 trans_attrs = [attr for attr in xml_attr_iter(oldnode) if self._its_translate_nodes.get(attr, 'no') == 'yes'] 1012 for attr in trans_attrs: 1013 srccontent = attr.get_content() 1014 if not PY3: 1015 srccontent = srccontent.decode('utf-8') 1016 newcontent = translations.ugettext(srccontent) 1017 if newcontent: 1018 if not PY3: 1019 newcontent = newcontent.encode('utf-8') 1020 newnode.setProp(attr.name, newcontent) 1021 1022 def get_translated (self, node, translations, strict=False, lang=None): 1023 msg = self._msgs.get_message_by_node(node) 1024 if msg is None: 1025 return node 1026 msgstr = msg.get_string() 1027 # Dear Python, please implement pgettext. 1028 # http://bugs.python.org/issue2504 1029 # Sincerely, Shaun 1030 if msg.get_context() is not None: 1031 msgstr = msg.get_context() + '\x04' + msgstr 1032 trans = translations.ugettext(msgstr) 1033 if trans is None: 1034 return node 1035 nss = {} 1036 def reg_ns(node, nss): 1037 if node.parent is not None: 1038 reg_ns(node.parent, nss) 1039 nsdef = node.nsDefs() 1040 while nsdef is not None: 1041 nss[nsdef.name] = nsdef.content 1042 nsdef = nsdef.next 1043 reg_ns(node, nss) 1044 nss['_'] = NS_BLANK 1045 try: 1046 blurb = node.doc.intSubset().serialize('utf-8') 1047 except Exception: 1048 blurb = '' 1049 blurb += '<' + ustr(node.name, 'utf-8') 1050 for nsname in list(nss.keys()): 1051 if nsname is None: 1052 blurb += ' xmlns="%s"' % nss[nsname] 1053 else: 1054 blurb += ' xmlns:%s="%s"' % (nsname, nss[nsname]) 1055 blurb += '>%s</%s>' % (trans, ustr(node.name, 'utf-8')) 1056 if not PY3: 1057 blurb = blurb.encode('utf-8') 1058 ctxt = libxml2.createDocParserCtxt(blurb) 1059 if self._load_dtd: 1060 ctxt.loadSubset(1) 1061 if self._keep_entities: 1062 ctxt.ctxtUseOptions(libxml2.XML_PARSE_DTDLOAD) 1063 ctxt.replaceEntities(0) 1064 else: 1065 ctxt.replaceEntities(1) 1066 ctxt.parseDocument() 1067 trnode = ctxt.doc().getRootElement().copyNode(1) 1068 try: 1069 self._check_errors() 1070 except libxml2.parserError: 1071 if strict: 1072 raise 1073 else: 1074 sys.stderr.write(pr_str('Warning: Could not merge %stranslation for msgid:\n%s\n' % ( 1075 (lang + ' ') if lang is not None else '', 1076 msgstr))) 1077 self._xml_err = '' 1078 return node 1079 def scan_node(node): 1080 children = [child for child in xml_child_iter(node)] 1081 for child in children: 1082 if child.type != 'element': 1083 continue 1084 if child.ns() is not None and child.ns().content == NS_BLANK: 1085 ph_node = msg.get_placeholder(child.name).node 1086 if self.has_child_elements(ph_node): 1087 self.merge_translations(translations, None, ph_node, strict=strict) 1088 child.replaceNode(ph_node) 1089 else: 1090 repl = self.get_translated(ph_node, translations, strict=strict, lang=lang) 1091 child.replaceNode(repl) 1092 scan_node(child) 1093 try: 1094 scan_node(trnode) 1095 except: 1096 if strict: 1097 raise 1098 else: 1099 sys.stderr.write(pr_str('Warning: Could not merge %stranslation for msgid:\n%s\n' % ( 1100 (lang + ' ') if lang is not None else '', 1101 msgstr))) 1102 self._xml_err = '' 1103 return node 1104 retnode = node.copyNode(2) 1105 for child in xml_child_iter(trnode): 1106 retnode.addChild(child.copyNode(1)) 1107 ctxt.doc().freeDoc() 1108 return retnode 1109 1110 def generate_messages(self, comments=True): 1111 if self._itst_credits is not None: 1112 self._msgs.add_credits() 1113 for child in xml_child_iter(self._doc): 1114 if child.type == 'element': 1115 self.generate_message(child, None, comments=comments) 1116 break 1117 1118 def generate_message (self, node, msg, comments=True, path=None): 1119 if node.type in ('text', 'cdata') and msg is not None: 1120 msg.add_text(node.content) 1121 return 1122 if node.type == 'entity_ref': 1123 msg.add_entity_ref(node.name); 1124 if node.type != 'element': 1125 return 1126 if node.hasNsProp('drop', NS_ITST) and node.nsProp('drop', NS_ITST) == 'yes': 1127 return 1128 if self._itst_drop_nodes.get(node, 'no') == 'yes': 1129 return 1130 locfil = self.get_its_locale_filter(node) 1131 if locfil == ('', 'include') or locfil == ('*', 'exclude'): 1132 return 1133 if path is None: 1134 path = '' 1135 translate = self.get_its_translate(node) 1136 withinText = False 1137 if translate == 'no': 1138 if msg is not None: 1139 msg.add_placeholder(node) 1140 is_unit = False 1141 msg = None 1142 else: 1143 is_unit = msg is None or self.is_translation_unit(node) 1144 if is_unit: 1145 if msg is not None: 1146 msg.add_placeholder(node) 1147 msg = Message() 1148 ctxt = None 1149 if node.hasNsProp('context', NS_ITST): 1150 ctxt = node.nsProp('context', NS_ITST) 1151 if ctxt is None: 1152 ctxt = self._itst_contexts.get(node) 1153 if ctxt is not None: 1154 msg.set_context(ctxt) 1155 idvalue = self.get_its_id_value(node) 1156 if idvalue is not None: 1157 basename = os.path.basename(self._filename) 1158 msg.add_id_value(basename + '#' + idvalue) 1159 if self.get_preserve_space(node): 1160 msg.set_preserve_space() 1161 if self.get_its_locale_filter(node) != ('*', 'include'): 1162 msg.set_locale_filter(self.get_its_locale_filter(node)) 1163 msg.add_source('%s:%i' % (self._doc.name, node.lineNo())) 1164 msg.add_marker('%s/%s' % (ustr(node.parent.name, 'utf-8'), ustr(node.name, 'utf-8'))) 1165 else: 1166 withinText = True 1167 msg.add_start_tag(node) 1168 1169 if not withinText: 1170 # Add msg for translatable node attributes 1171 for attr in xml_attr_iter(node): 1172 if self._its_translate_nodes.get(attr, 'no') == 'yes': 1173 attr_msg = Message() 1174 if self.get_preserve_space(attr): 1175 attr_msg.set_preserve_space() 1176 attr_msg.add_source('%s:%i' % (self._doc.name, node.lineNo())) 1177 attr_msg.add_marker('%s/%s@%s' % (node.parent.name, node.name, attr.name)) 1178 attr_msg.add_text(attr.content) 1179 if comments: 1180 for locnote in self.get_its_loc_notes(attr): 1181 comment = Comment(locnote) 1182 comment.add_marker ('%s/%s@%s' % ( 1183 node.parent.name, node.name, attr.name)) 1184 attr_msg.add_comment(comment) 1185 self._msgs.add_message(attr_msg, attr) 1186 1187 if comments and msg is not None: 1188 cnode = node 1189 while cnode is not None: 1190 hasnote = False 1191 for locnote in self.get_its_loc_notes(cnode, inherit=(not withinText)): 1192 comment = Comment(locnote) 1193 if withinText: 1194 comment.add_marker('.%s/%s' % (path, cnode.name)) 1195 msg.add_comment(comment) 1196 hasnote = True 1197 if hasnote or not is_unit: 1198 break 1199 cnode = cnode.parent 1200 1201 self.generate_external_resource_message(node) 1202 for attr in xml_attr_iter(node): 1203 self.generate_external_resource_message(attr) 1204 idvalue = self.get_its_id_value(attr) 1205 if idvalue is not None: 1206 basename = os.path.basename(self._filename) 1207 msg.add_id_value(basename + '#' + idvalue) 1208 1209 if withinText: 1210 path = path + '/' + node.name 1211 for child in xml_child_iter(node): 1212 self.generate_message(child, msg, comments=comments, path=path) 1213 1214 if translate: 1215 if is_unit and not msg.is_empty(): 1216 self._msgs.add_message(msg, node) 1217 elif msg is not None: 1218 msg.add_end_tag(node) 1219 1220 def generate_external_resource_message(self, node): 1221 if node not in self._its_externals: 1222 return 1223 resref = self._its_externals[node] 1224 if node.type == 'element': 1225 translate = self.get_its_translate(node) 1226 marker = '%s/%s' % (node.parent.name, node.name) 1227 else: 1228 translate = self.get_its_translate(node.parent) 1229 marker = '%s/%s/@%s' % (node.parent.parent.name, node.parent.name, node.name) 1230 if translate == 'no': 1231 return 1232 msg = Message() 1233 try: 1234 fullfile = os.path.join(os.path.dirname(self._filename), resref) 1235 filefp = open(fullfile, 'rb') 1236 filemd5 = hashlib.md5(filefp.read()).hexdigest() 1237 filefp.close() 1238 except Exception: 1239 filemd5 = '__failed__' 1240 txt = "external ref='%s' md5='%s'" % (resref, filemd5) 1241 msg.set_context('_') 1242 msg.add_text(txt) 1243 msg.add_source('%s:%i' % (self._doc.name, node.lineNo())) 1244 msg.add_marker(marker) 1245 msg.add_comment(Comment('This is a reference to an external file such as an image or' 1246 ' video. When the file changes, the md5 hash will change to' 1247 ' let you know you need to update your localized copy. The' 1248 ' msgstr is not used at all. Set it to whatever you like' 1249 ' once you have updated your copy of the file.')) 1250 self._msgs.add_message(msg, None) 1251 1252 def is_translation_unit (self, node): 1253 return self.get_its_within_text(node) != 'yes' 1254 1255 def has_child_elements(self, node): 1256 return len([child for child in xml_child_iter(node) if child.type=='element']) 1257 1258 def get_preserve_space (self, node): 1259 while node.type in ('attribute', 'element'): 1260 if node.getSpacePreserve() == 1: 1261 return True 1262 if node in self._its_preserve_space_nodes: 1263 return (self._its_preserve_space_nodes[node] == 'preserve') 1264 node = node.parent 1265 return False 1266 1267 def get_its_translate(self, node): 1268 val = None 1269 if node.hasNsProp('translate', NS_ITS): 1270 val = node.nsProp('translate', NS_ITS) 1271 elif xml_is_ns_name(node, NS_ITS, 'span') and node.hasNsProp('translate', None): 1272 val = node.nsProp('translate', None) 1273 elif node in self._its_translate_nodes: 1274 val = self._its_translate_nodes[node] 1275 if val is not None: 1276 return val 1277 if node.type == 'attribute': 1278 return 'no' 1279 if node.parent.type == 'element': 1280 return self.get_its_translate(node.parent) 1281 return 'yes' 1282 1283 def get_its_within_text(self, node): 1284 if node.hasNsProp('withinText', NS_ITS): 1285 val = node.nsProp('withinText', NS_ITS) 1286 elif xml_is_ns_name(node, NS_ITS, 'span') and node.hasNsProp('withinText', None): 1287 val = node.nsProp('withinText', None) 1288 else: 1289 return self._its_within_text_nodes.get(node, 'no') 1290 if val in ('yes', 'nested'): 1291 return val 1292 return 'no' 1293 1294 def get_its_locale_filter(self, node): 1295 if node.hasNsProp('localeFilterList', NS_ITS) or node.hasNsProp('localeFilterType', NS_ITS): 1296 if node.hasNsProp('localeFilterList', NS_ITS): 1297 lst = node.nsProp('localeFilterList', NS_ITS) 1298 else: 1299 lst = '*' 1300 if node.hasNsProp('localeFilterType', NS_ITS): 1301 typ = node.nsProp('localeFilterType', NS_ITS) 1302 else: 1303 typ = 'include' 1304 return (lst, typ) 1305 if (xml_is_ns_name(node, NS_ITS, 'span') and 1306 (node.hasNsProp('localeFilterList', None) or node.hasNsProp('localeFilterType', None))): 1307 if node.hasNsProp('localeFilterList', None): 1308 lst = node.nsProp('localeFilterList', None) 1309 else: 1310 lst = '*' 1311 if node.hasNsProp('localeFilterType', None): 1312 typ = node.nsProp('localeFilterType', None) 1313 else: 1314 typ = 'include' 1315 return (lst, typ) 1316 if node in self._its_locale_filters: 1317 return self._its_locale_filters[node] 1318 if node.parent.type == 'element': 1319 return self.get_its_locale_filter(node.parent) 1320 return ('*', 'include') 1321 1322 def get_itst_drop(self, node): 1323 if node.hasNsProp('drop', NS_ITST) and node.nsProp('drop', NS_ITST) == 'yes': 1324 return 'yes' 1325 if self._itst_drop_nodes.get(node, 'no') == 'yes': 1326 return 'yes' 1327 return 'no' 1328 1329 def get_its_id_value(self, node): 1330 if node.hasNsProp('id', NS_XML): 1331 return node.nsProp('id', NS_XML) 1332 return self._its_id_values.get(node, None) 1333 1334 def get_its_loc_notes(self, node, inherit=True): 1335 ret = [] 1336 if node.hasNsProp('locNote', NS_ITS) or node.hasNsProp('locNoteRef', NS_ITS) or node.hasNsProp('locNoteType', NS_ITS): 1337 notetype = node.nsProp('locNoteType', NS_ITS) 1338 if node.hasNsProp('locNote', NS_ITS): 1339 ret.append(LocNote(locnote=node.nsProp('locNote', NS_ITS), locnotetype=notetype)) 1340 elif node.hasNsProp('locNoteRef', NS_ITS): 1341 ret.append(LocNote(locnoteref=node.nsProp('locNoteRef', NS_ITS), locnotetype=notetype)) 1342 elif xml_is_ns_name(node, NS_ITS, 'span'): 1343 if node.hasNsProp('locNote', None) or node.hasNsProp('locNoteRef', None) or node.hasNsProp('locNoteType', None): 1344 notetype = node.nsProp('locNoteType', None) 1345 if node.hasNsProp('locNote', None): 1346 ret.append(LocNote(locnote=node.nsProp('locNote', None), locnotetype=notetype)) 1347 elif node.hasNsProp('locNoteRef', None): 1348 ret.append(LocNote(locnoteref=node.nsProp('locNoteRef', None), locnotetype=notetype)) 1349 for locnote in reversed(self._its_loc_notes.get(node, [])): 1350 ret.append(locnote) 1351 if (len(ret) == 0 and inherit and 1352 node.type != 'attribute' and node.parent is not None and node.parent.type == 'element'): 1353 return self.get_its_loc_notes(node.parent) 1354 return ret 1355 1356 def output_test_data(self, category, out, node=None): 1357 if node is None: 1358 node = self._doc.getRootElement() 1359 compval = '' 1360 if category == 'translate': 1361 compval = 'translate="%s"' % self.get_its_translate(node) 1362 elif category == 'withinText': 1363 if node.type != 'attribute': 1364 compval = 'withinText="%s"' % self.get_its_within_text(node) 1365 elif category == 'localeFilter': 1366 compval = 'localeFilterList="%s"\tlocaleFilterType="%s"' % self.get_its_locale_filter(node) 1367 elif category == 'locNote': 1368 val = self.get_its_loc_notes(node) 1369 if len(val) > 0: 1370 if val[0].locnote is not None: 1371 compval = 'locNote="%s"\tlocNoteType="%s"' % (ustr(val[0]), val[0].locnotetype) 1372 elif val[0].locnoteref is not None: 1373 compval = 'locNoteRef="%s"\tlocNoteType="%s"' % (val[0].locnoteref, val[0].locnotetype) 1374 elif category == 'externalResourceRef': 1375 val = self._its_externals.get(node, '') 1376 if val != '': 1377 compval = 'externalResourceRef="%s"' % val 1378 elif category == 'idValue': 1379 val = self.get_its_id_value(node) 1380 if val is not None: 1381 compval = 'idValue="%s"' % val 1382 elif category == 'preserveSpace': 1383 if self.get_preserve_space(node): 1384 compval = 'space="preserve"' 1385 else: 1386 compval = 'space="default"' 1387 else: 1388 sys.stderr.write('Error: Unrecognized category %s\n' % category) 1389 sys.exit(1) 1390 if compval != '': 1391 out.write('%s\t%s\r\n' % (xml_get_node_path(node), compval)) 1392 else: 1393 out.write('%s\r\n' % (xml_get_node_path(node))) 1394 for attr in sorted(xml_attr_iter(node), key=ustr): 1395 self.output_test_data(category, out, attr) 1396 for child in xml_child_iter(node): 1397 if child.type == 'element': 1398 self.output_test_data(category, out, child) 1399 1400 @staticmethod 1401 def _try_xpath_eval (xpath, expr): 1402 try: 1403 return xpath.xpathEval(expr) 1404 except: 1405 sys.stderr.write('Warning: Invalid XPath: %s\n' % expr) 1406 return [] 1407 1408def match_locale_list(extranges, locale): 1409 if extranges.strip() == '': 1410 return False 1411 for extrange in [extrange.strip() for extrange in extranges.split(',')]: 1412 if match_locale(extrange, locale): 1413 return True 1414 return False 1415 1416def match_locale(extrange, locale): 1417 # Extended filtering for extended language ranges as 1418 # defined by RFC4647, part of BCP47. 1419 # http://tools.ietf.org/html/rfc4647#section-3.3.2 1420 rangelist = [x.lower() for x in extrange.split('-')] 1421 localelist = [x.lower() for x in locale.split('-')] 1422 if rangelist[0] not in ('*', localelist[0]): 1423 return False 1424 rangei = localei = 0 1425 while rangei < len(rangelist): 1426 if rangelist[rangei] == '*': 1427 rangei += 1 1428 continue 1429 if localei >= len(localelist): 1430 return False 1431 if rangelist[rangei] in ('*', localelist[localei]): 1432 rangei += 1 1433 localei += 1 1434 continue 1435 if len(localelist[localei]) == 1: 1436 return False 1437 localei += 1 1438 return True 1439 1440_locale_pattern = re.compile('([a-zA-Z0-9-]+)(_[A-Za-z0-9]+)?(@[A-Za-z0-9]+)?(\.[A-Za-z0-9]+)?') 1441def convert_locale (locale): 1442 # Automatically convert POSIX-style locales to BCP47 1443 match = _locale_pattern.match(locale) 1444 if match is None: 1445 return locale 1446 ret = match.group(1).lower() 1447 variant = match.group(3) 1448 if variant == '@cyrillic': 1449 ret += '-Cyrl' 1450 variant = None 1451 if variant == '@devanagari': 1452 ret += '-Deva' 1453 variant = None 1454 elif variant == '@latin': 1455 ret += '-Latn' 1456 variant = None 1457 elif variant == '@shaw': 1458 ret += '-Shaw' 1459 variant = None 1460 if match.group(2) is not None: 1461 ret += '-' + match.group(2)[1:].upper() 1462 if variant is not None and variant != '@euro': 1463 ret += '-' + variant[1:].lower() 1464 return ret 1465 1466 1467if __name__ == '__main__': 1468 options = optparse.OptionParser() 1469 options.set_usage('\n itstool [OPTIONS] [XMLFILES]\n' + 1470 ' itstool -m <MOFILE> [OPTIONS] [XMLFILES]\n' + 1471 ' itstool -j <XMLFILE> [OPTIONS] [MOFILES]') 1472 options.add_option('-i', '--its', 1473 action='append', 1474 dest='itsfile', 1475 metavar='ITS', 1476 help='Load the ITS rules in the file ITS (can specify multiple times)') 1477 options.add_option('-l', '--lang', 1478 dest='lang', 1479 default=None, 1480 metavar='LANGUAGE', 1481 help='Explicitly set the language code for output file') 1482 options.add_option('-j', '--join', 1483 dest='join', 1484 metavar='FILE', 1485 help='Join multiple MO files with the XML file FILE and output XML file') 1486 options.add_option('-m', '--merge', 1487 dest='merge', 1488 metavar='FILE', 1489 help='Merge from a PO or MO file FILE and output XML files') 1490 options.add_option('-n', '--no-builtins', 1491 action='store_true', 1492 dest='nobuiltins', 1493 default=False, 1494 help='Do not apply the built-in ITS rules') 1495 options.add_option('-o', '--output', 1496 dest='output', 1497 default=None, 1498 metavar='OUT', 1499 help='Output PO files to file OUT or XML files in directory OUT') 1500 options.add_option('--path', 1501 action='append', 1502 dest='itspath', 1503 default=None, 1504 metavar='PATHS', 1505 help='Extra path where ITS files may be found (can specify multiple times)') 1506 options.add_option('-s', '--strict', 1507 action='store_true', 1508 dest='strict', 1509 default=False, 1510 help='Exit with error when PO files contain broken XML') 1511 options.add_option('-d', '--load-dtd', 1512 action='store_true', 1513 dest='load_dtd', 1514 default=False, 1515 help='Load external DTDs used by input XML') 1516 options.add_option('-k', '--keep-entities', 1517 action='store_true', 1518 dest='keep_entities', 1519 default=False, 1520 help='Keep entity reference unexpanded') 1521 options.add_option('-p', '--param', 1522 action='append', 1523 dest='params', 1524 default=[], 1525 nargs=2, 1526 metavar='NAME VALUE', 1527 help='Define the ITS parameter NAME to the value VALUE (can specify multiple times)') 1528 options.add_option('-t', '--test', 1529 dest='test', 1530 default=None, 1531 metavar='CATEGORY', 1532 help='Generate conformance test output for CATEGORY') 1533 options.add_option('-v', '--version', 1534 action='store_true', 1535 dest='version', 1536 default=False, 1537 help='Print itstool version and exit') 1538 (opts, args) = options.parse_args(sys.argv) 1539 1540 if opts.version: 1541 print('itstool %s' % VERSION) 1542 sys.exit(0) 1543 1544 params = {} 1545 for name, value in opts.params: 1546 params[name] = value 1547 1548 if opts.merge is None and opts.join is None: 1549 messages = MessageList() 1550 for filename in args[1:]: 1551 doc = Document(filename, messages, load_dtd=opts.load_dtd, keep_entities=opts.keep_entities) 1552 doc.apply_its_rules(not(opts.nobuiltins), params=params) 1553 if opts.itsfile is not None: 1554 for itsfile in opts.itsfile: 1555 doc.apply_its_file(itsfile, params=params) 1556 if opts.test is None: 1557 doc.generate_messages() 1558 if opts.output is None or opts.output == '-': 1559 out = sys.stdout 1560 else: 1561 try: 1562 out = io.open(opts.output, 'wt', encoding='utf-8') 1563 except: 1564 sys.stderr.write('Error: Cannot write to file %s\n' % opts.output) 1565 sys.exit(1) 1566 if opts.test is not None: 1567 doc.output_test_data(opts.test, out) 1568 else: 1569 messages.output(out) 1570 out.flush() 1571 elif opts.merge is not None: 1572 try: 1573 translations = gettext.GNUTranslations(open(opts.merge, 'rb')) 1574 except: 1575 sys.stderr.write('Error: cannot open mo file %s\n' % opts.merge) 1576 sys.exit(1) 1577 if PY3: 1578 translations.ugettext = translations.gettext 1579 translations.add_fallback(NoneTranslations()) 1580 if opts.lang is None: 1581 opts.lang = convert_locale(os.path.splitext(os.path.basename(opts.merge))[0]) 1582 if opts.output is None: 1583 out = './' 1584 elif os.path.isdir(opts.output): 1585 out = opts.output 1586 elif len(args) == 2: 1587 if opts.output == '-': 1588 out = sys.stdout 1589 else: 1590 out = open(opts.output, 'wb') 1591 else: 1592 sys.stderr.write('Error: Non-directory output for multiple files\n') 1593 sys.exit(1) 1594 for filename in args[1:]: 1595 messages = MessageList() 1596 doc = Document(filename, messages, load_dtd=opts.load_dtd, keep_entities=opts.keep_entities) 1597 doc.apply_its_rules(not(opts.nobuiltins), params=params) 1598 if opts.itsfile is not None: 1599 for itsfile in opts.itsfile: 1600 doc.apply_its_file(itsfile, params=params) 1601 try: 1602 doc.merge_translations(translations, opts.lang, strict=opts.strict) 1603 except Exception as e: 1604 raise 1605 sys.stderr.write('Error: Could not merge translations:\n%s\n' % ustr(e)) 1606 sys.exit(1) 1607 serialized = doc._doc.serialize('utf-8') 1608 if PY3: 1609 # For some reason, under py3, our serialized data is returns as a str. 1610 # Let's encode it to bytes 1611 serialized = serialized.encode('utf-8') 1612 fout = out 1613 fout_is_str = isinstance(fout, string_types) 1614 if fout_is_str: 1615 fout = open(os.path.join(fout, os.path.basename(filename)), 'wb') 1616 fout.write(serialized) 1617 fout.flush() 1618 if fout_is_str: 1619 fout.close() 1620 elif opts.join is not None: 1621 translations = {} 1622 for filename in args[1:]: 1623 try: 1624 thistr = gettext.GNUTranslations(open(filename, 'rb')) 1625 except: 1626 sys.stderr.write('Error: cannot open mo file %s\n' % filename) 1627 sys.exit(1) 1628 thistr.add_fallback(NoneTranslations()) 1629 if PY3: 1630 thistr.ugettext = thistr.gettext 1631 lang = convert_locale(os.path.splitext(os.path.basename(filename))[0]) 1632 translations[lang] = thistr 1633 if opts.output is None: 1634 out = sys.stdout 1635 elif os.path.isdir(opts.output): 1636 out = open(os.path.join(opts.output, os.path.basename(filename)), 'wb') 1637 else: 1638 out = open(opts.output, 'wb') 1639 messages = MessageList() 1640 doc = Document(opts.join, messages) 1641 doc.apply_its_rules(not(opts.nobuiltins), params=params) 1642 if opts.itsfile is not None: 1643 for itsfile in opts.itsfile: 1644 doc.apply_its_file(itsfile, params=params) 1645 doc.join_translations(translations, strict=opts.strict) 1646 serialized = doc._doc.serialize('utf-8') 1647 if PY3: 1648 # For some reason, under py3, our serialized data is returns as a str. 1649 # Let's encode it to bytes 1650 serialized = serialized.encode('utf-8') 1651 out.write(serialized) 1652 out.flush() 1653