1# 2# Copyright 2002-2007 Zuza Software Foundation 3# 4# This file is part of translate. 5# 6# translate is free software; you can redistribute it and/or modify 7# it under the terms of the GNU General Public License as published by 8# the Free Software Foundation; either version 2 of the License, or 9# (at your option) any later version. 10# 11# translate is distributed in the hope that it will be useful, 12# but WITHOUT ANY WARRANTY; without even the implied warranty of 13# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14# GNU General Public License for more details. 15# 16# You should have received a copy of the GNU General Public License 17# along with this program; if not, see <http://www.gnu.org/licenses/>. 18 19"""Classes that hold units of .po files (pounit) or entire files (pofile). 20 21Gettext-style .po (or .pot) files are used in translations for KDE, GNOME and 22many other projects. 23 24This uses libgettextpo from the gettext package. Any version before 0.17 will 25at least cause some subtle bugs or may not work at all. Developers might want 26to have a look at gettext-tools/libgettextpo/gettext-po.h from the gettext 27package for the public API of the library. 28""" 29 30import ctypes.util 31import logging 32import os 33import re 34import sys 35import tempfile 36import threading 37from ctypes import ( 38 CFUNCTYPE, 39 POINTER, 40 Structure, 41 c_char_p, 42 c_int, 43 c_long, 44 c_size_t, 45 c_uint, 46 cdll, 47) 48 49from translate.misc.multistring import multistring 50from translate.storage import base, pocommon, pypo 51 52 53logger = logging.getLogger(__name__) 54 55lsep = " " 56"""Separator for #: entries""" 57 58STRING = c_char_p 59 60 61# Structures 62class po_message(Structure): 63 pass 64 65 66class po_file(Structure): 67 pass 68 69 70class po_filepos(Structure): 71 pass 72 73 74class po_iterator(Structure): 75 pass 76 77 78po_message_t = POINTER(po_message) 79"""A po_message_t represents a message in a PO file.""" 80 81po_file_t = POINTER(po_file) 82"""A po_file_t represents a PO file.""" 83 84po_filepos_t = POINTER(po_filepos) 85"""A po_filepos_t represents the position in a PO file.""" 86 87po_iterator_t = POINTER(po_iterator) 88"""A po_iterator_t represents an iterator through a PO file.""" 89 90 91# Function prototypes 92xerror_prototype = CFUNCTYPE( 93 None, c_int, po_message_t, STRING, c_uint, c_uint, c_int, STRING 94) 95xerror2_prototype = CFUNCTYPE( 96 None, 97 c_int, 98 po_message_t, 99 STRING, 100 c_uint, 101 c_uint, 102 c_int, 103 STRING, 104 po_message_t, 105 STRING, 106 c_uint, 107 c_uint, 108 c_int, 109 STRING, 110) 111 112 113# Structures (error handler) 114class po_xerror_handler(Structure): 115 _fields_ = [("xerror", xerror_prototype), ("xerror2", xerror2_prototype)] 116 117 118class po_error_handler(Structure): 119 _fields_ = [ 120 ("error", CFUNCTYPE(None, c_int, c_int, STRING)), 121 ("error_at_line", CFUNCTYPE(None, c_int, c_int, STRING, c_uint, STRING)), 122 ("multiline_warning", CFUNCTYPE(None, STRING, STRING)), 123 ("multiline_error", CFUNCTYPE(None, STRING, STRING)), 124 ] 125 126 127xerror_storage = threading.local() 128 129ignored_erorrs = { 130 # TODO: this is probably bug somewhere in cpo, but 131 # it used to be silently ignored before the exceptions 132 # were raised, so it is left to fixing separately 133 "invalid multibyte sequence", 134 # Duplicate messages are allowed 135 "duplicate message definition", 136} 137 138 139def trigger_exception(severity, filename, lineno, column, message_text): 140 # Severity 0 is warning, severity 1 error, severity 2 critical 141 if severity >= 1 and message_text not in ignored_erorrs: 142 if filename: 143 detail = f"{filename}:{lineno}:{column}: {message_text}" 144 else: 145 detail = message_text 146 xerror_storage.exception = ValueError(detail) 147 148 149# Callback functions for po_xerror_handler 150def xerror_cb(severity, message, filename, lineno, column, multiline_p, message_text): 151 message_text = message_text.decode() 152 if filename: 153 filename = filename.decode() 154 logger.error( 155 "xerror_cb %s %s %s %s %s %s %s", 156 severity, 157 message, 158 filename, 159 lineno, 160 column, 161 multiline_p, 162 message_text, 163 ) 164 trigger_exception(severity, filename, lineno, column, message_text) 165 166 167def xerror2_cb( 168 severity, 169 message1, 170 filename1, 171 lineno1, 172 column1, 173 multiline_p1, 174 message_text1, 175 message2, 176 filename2, 177 lineno2, 178 column2, 179 multiline_p2, 180 message_text2, 181): 182 message_text1 = message_text1.decode() 183 message_text2 = message_text2.decode() 184 if filename1: 185 filename1 = filename1.decode() 186 if filename2: 187 filename2 = filename2.decode() 188 logger.error( 189 "xerror2_cb %s %s %s %s %s %s %s %s %s %s %s %s", 190 severity, 191 message1, 192 filename1, 193 lineno1, 194 column1, 195 multiline_p1, 196 message_text1, 197 filename2, 198 lineno2, 199 column2, 200 multiline_p2, 201 message_text2, 202 ) 203 trigger_exception(severity, filename1, lineno1, column1, message_text1) 204 205 206# Setup return and parameter types 207# See also http://git.savannah.gnu.org/cgit/gettext.git/tree/gettext-tools/libgettextpo/gettext-po.in.h 208def setup_call_types(gpo): 209 # File access 210 gpo.po_file_create.restype = po_file_t 211 gpo.po_file_read_v3.argtypes = [STRING, POINTER(po_xerror_handler)] 212 gpo.po_file_read_v3.restype = po_file_t 213 gpo.po_file_write_v2.argtypes = [po_file_t, STRING, POINTER(po_xerror_handler)] 214 gpo.po_file_write_v2.restype = po_file_t 215 gpo.po_file_free.argtypes = [po_file_t] 216 217 # Header 218 gpo.po_file_domain_header.argtypes = [po_file_t, STRING] 219 gpo.po_file_domain_header.restype = STRING 220 gpo.po_header_field.argtypes = [STRING, STRING] 221 gpo.po_header_field.restype = STRING 222 gpo.po_header_set_field.argtypes = [STRING, STRING, STRING] 223 gpo.po_header_set_field.restype = STRING 224 225 # Locations (filepos) 226 gpo.po_filepos_file.argtypes = [po_filepos_t] 227 gpo.po_filepos_file.restype = STRING 228 gpo.po_filepos_start_line.argtypes = [po_filepos_t] 229 gpo.po_filepos_start_line.restype = c_int # not strictly true casting 230 gpo.po_message_filepos.argtypes = [po_message_t, c_int] 231 gpo.po_message_filepos.restype = po_filepos_t 232 gpo.po_message_add_filepos.argtypes = [po_message_t, STRING, c_size_t] 233 gpo.po_message_remove_filepos.argtypes = [po_message_t, c_size_t] 234 235 # Iterators 236 gpo.po_message_iterator.argtypes = [po_file_t, STRING] 237 gpo.po_message_iterator.restype = po_iterator_t 238 gpo.po_message_iterator_free.argtypes = [po_iterator_t] 239 gpo.po_next_message.argtypes = [po_iterator_t] 240 gpo.po_next_message.restype = po_message_t 241 gpo.po_message_insert.argtypes = [po_iterator_t, po_message_t] 242 243 # Message (get methods) 244 gpo.po_message_create.restype = po_message_t 245 gpo.po_message_msgctxt.argtypes = [po_message_t] 246 gpo.po_message_msgctxt.restype = STRING 247 gpo.po_message_comments.argtypes = [po_message_t] 248 gpo.po_message_comments.restype = STRING 249 gpo.po_message_extracted_comments.argtypes = [po_message_t] 250 gpo.po_message_extracted_comments.restype = STRING 251 gpo.po_message_prev_msgctxt.argtypes = [po_message_t] 252 gpo.po_message_prev_msgctxt.restype = STRING 253 gpo.po_message_prev_msgid.argtypes = [po_message_t] 254 gpo.po_message_prev_msgid.restype = STRING 255 gpo.po_message_prev_msgid_plural.argtypes = [po_message_t] 256 gpo.po_message_prev_msgid_plural.restype = STRING 257 gpo.po_message_is_obsolete.argtypes = [po_message_t] 258 gpo.po_message_is_obsolete.restype = c_int 259 gpo.po_message_is_fuzzy.argtypes = [po_message_t] 260 gpo.po_message_is_fuzzy.restype = c_int 261 gpo.po_message_is_format.argtypes = [po_message_t, STRING] 262 gpo.po_message_is_format.restype = c_int 263 gpo.po_message_msgctxt.restype = STRING 264 gpo.po_message_msgid.argtypes = [po_message_t] 265 gpo.po_message_msgid.restype = STRING 266 gpo.po_message_msgid_plural.argtypes = [po_message_t] 267 gpo.po_message_msgid_plural.restype = STRING 268 gpo.po_message_msgstr.argtypes = [po_message_t] 269 gpo.po_message_msgstr.restype = STRING 270 gpo.po_message_msgstr_plural.argtypes = [po_message_t, c_int] 271 gpo.po_message_msgstr_plural.restype = STRING 272 273 # Message (set methods) 274 gpo.po_message_set_comments.argtypes = [po_message_t, STRING] 275 gpo.po_message_set_extracted_comments.argtypes = [po_message_t, STRING] 276 gpo.po_message_set_prev_msgctxt.argtypes = [po_message_t, STRING] 277 gpo.po_message_set_prev_msgid.argtypes = [po_message_t, STRING] 278 gpo.po_message_set_prev_msgid_plural.argtypes = [po_message_t, STRING] 279 gpo.po_message_set_obsolete.argtypes = [po_message_t, c_int] 280 gpo.po_message_set_fuzzy.argtypes = [po_message_t, c_int] 281 gpo.po_message_set_format.argtypes = [po_message_t, STRING, c_int] 282 gpo.po_message_set_msgctxt.argtypes = [po_message_t, STRING] 283 gpo.po_message_set_msgid.argtypes = [po_message_t, STRING] 284 gpo.po_message_set_msgstr.argtypes = [po_message_t, STRING] 285 gpo.po_message_set_msgstr_plural.argtypes = [po_message_t, c_int, STRING] 286 gpo.po_message_set_range.argtypes = [po_message_t, c_int, c_int] 287 288 289# Load libgettextpo 290gpo = None 291# 'gettextpo' is recognised on Unix, while only 'libgettextpo' is recognised on 292# windows. Therefore we test both. 293names = ["gettextpo", "libgettextpo"] 294for name in names: 295 lib_location = ctypes.util.find_library(name) 296 if lib_location: 297 gpo = cdll.LoadLibrary(lib_location) 298 if gpo: 299 break 300else: 301 # Don't raise exception in Sphinx autodoc [where xml is Mock()ed]. There is 302 # nothing special about use of xml here - any of the Mock classes set up 303 # in docs/conf.py would work as well, but xml is likely always to be there. 304 gpo = None 305 if "xml" not in sys.modules or sys.modules["xml"].__path__ != "/dev/null": 306 307 # Now we are getting desperate, so let's guess a unix type DLL that 308 # might be in LD_LIBRARY_PATH or loaded with LD_PRELOAD 309 try: 310 gpo = cdll.LoadLibrary("libgettextpo.so") 311 except OSError: 312 raise ImportError("gettext PO library not found") 313 314if gpo: 315 setup_call_types(gpo) 316 317# Setup the po_xerror_handler 318xerror_handler = po_xerror_handler() 319xerror_handler.xerror = xerror_prototype(xerror_cb) 320xerror_handler.xerror2 = xerror2_prototype(xerror2_cb) 321 322 323def escapeforpo(text): 324 return pypo.escapeforpo(text) 325 326 327def quoteforpo(text): 328 return pypo.quoteforpo(text) 329 330 331def unquotefrompo(postr): 332 return pypo.unquotefrompo(postr) 333 334 335def get_libgettextpo_version(): 336 """Returns the libgettextpo version 337 338 :rtype: three-value tuple 339 :return: libgettextpo version in the following format:: 340 (major version, minor version, subminor version) 341 """ 342 libversion = c_long.in_dll(gpo, "libgettextpo_version") 343 major = libversion.value >> 16 344 minor = (libversion.value >> 8) & 0xFF 345 subminor = libversion.value - (major << 16) - (minor << 8) 346 return major, minor, subminor 347 348 349def gpo_encode(value): 350 return value.encode("utf-8") if isinstance(value, str) else value 351 352 353def gpo_decode(value): 354 if isinstance(value, str): 355 return value 356 elif isinstance(value, bytes): 357 return value.decode("utf-8") 358 return value 359 360 361class pounit(pocommon.pounit): 362 363 #: fixed encoding that is always used for cPO structure (self._gpo_message) 364 CPO_ENC = "utf-8" 365 366 def __init__(self, source=None, encoding="utf-8", gpo_message=None): 367 self._rich_source = None 368 self._rich_target = None 369 encoding = encoding or "utf-8" 370 if not gpo_message: 371 self._gpo_message = gpo.po_message_create() 372 if source or source == "": 373 self.source = source 374 self.target = "" 375 elif gpo_message: 376 if encoding.lower() != self.CPO_ENC: 377 features = ["msgctxt", "msgid", "msgid_plural"] 378 features += ["prev_" + x for x in features] 379 features += ["comments", "extracted_comments", "msgstr"] 380 for feature in features: 381 text = getattr(gpo, "po_message_" + feature)(gpo_message) 382 if text: 383 getattr(gpo, "po_message_set_" + feature)( 384 gpo_message, text.decode(encoding).encode(self.CPO_ENC) 385 ) 386 # Also iterate through plural forms 387 nplural = 0 388 text = True 389 while text: 390 text = gpo.po_message_msgstr_plural(gpo_message, nplural) 391 if text: 392 gpo.po_message_set_msgstr_plural( 393 gpo_message, 394 text.decode(encoding).encode(self.CPO_ENC), 395 nplural, 396 ) 397 nplural += 1 398 self._gpo_message = gpo_message 399 self.infer_state() 400 401 def infer_state(self): 402 # FIXME: do obsolete 403 if gpo.po_message_is_obsolete(self._gpo_message): 404 if gpo.po_message_is_fuzzy(self._gpo_message): 405 self.set_state_n(self.STATE[self.S_FUZZY_OBSOLETE][0]) 406 else: 407 self.set_state_n(self.STATE[self.S_OBSOLETE][0]) 408 elif gpo.po_message_is_fuzzy(self._gpo_message): 409 self.set_state_n(self.STATE[self.S_FUZZY][0]) 410 elif self.target: 411 self.set_state_n(self.STATE[self.S_TRANSLATED][0]) 412 else: 413 self.set_state_n(self.STATE[self.S_UNTRANSLATED][0]) 414 415 def setmsgid_plural(self, msgid_plural): 416 if isinstance(msgid_plural, list): 417 msgid_plural = "".join(msgid_plural) 418 gpo.po_message_set_msgid_plural(self._gpo_message, gpo_encode(msgid_plural)) 419 420 msgid_plural = property(None, setmsgid_plural) 421 422 @property 423 def source(self): 424 def remove_msgid_comments(text): 425 if not text: 426 return text 427 if text.startswith("_:"): 428 remainder = re.search(r"_: .*\n(.*)", text) 429 if remainder: 430 return remainder.group(1) 431 else: 432 return "" 433 else: 434 return text 435 436 singular = remove_msgid_comments( 437 gpo_decode(gpo.po_message_msgid(self._gpo_message)) or "" 438 ) 439 if singular: 440 if self.hasplural(): 441 multi = multistring(singular) 442 pluralform = ( 443 gpo_decode(gpo.po_message_msgid_plural(self._gpo_message)) or "" 444 ) 445 multi.strings.append(pluralform) 446 return multi 447 else: 448 return singular 449 else: 450 return "" 451 452 @source.setter 453 def source(self, source): 454 if isinstance(source, multistring): 455 source = source.strings 456 if isinstance(source, list): 457 gpo.po_message_set_msgid(self._gpo_message, gpo_encode(source[0])) 458 if len(source) > 1: 459 gpo.po_message_set_msgid_plural( 460 self._gpo_message, gpo_encode(source[1]) 461 ) 462 else: 463 gpo.po_message_set_msgid(self._gpo_message, gpo_encode(source)) 464 gpo.po_message_set_msgid_plural(self._gpo_message, None) 465 466 @property 467 def target(self): 468 if self.hasplural(): 469 plurals = [] 470 nplural = 0 471 plural = gpo.po_message_msgstr_plural(self._gpo_message, nplural) 472 while plural: 473 plurals.append(plural.decode(self.CPO_ENC)) 474 nplural += 1 475 plural = gpo.po_message_msgstr_plural(self._gpo_message, nplural) 476 if plurals: 477 multi = multistring(plurals) 478 else: 479 multi = multistring("") 480 else: 481 multi = gpo_decode(gpo.po_message_msgstr(self._gpo_message)) or "" 482 return multi 483 484 @target.setter 485 def target(self, target): 486 # for plural strings: convert 'target' into a list 487 if self.hasplural(): 488 if isinstance(target, multistring): 489 target = target.strings 490 elif isinstance(target, str): 491 target = [target] 492 # for non-plurals: check number of items in 'target' 493 elif isinstance(target, (dict, list)): 494 if len(target) == 1: 495 target = target[0] 496 else: 497 raise ValueError( 498 "po msgid element has no plural but msgstr has %d elements (%s)" 499 % (len(target), target) 500 ) 501 # empty the previous list of messages 502 # TODO: the "pypo" implementation does not remove the previous items of 503 # the target, if self.target == target (essentially: comparing only 504 # the first item of a plural string with the single new string) 505 # Maybe this behaviour should be unified. 506 if isinstance(target, (dict, list)): 507 i = 0 508 message = gpo.po_message_msgstr_plural(self._gpo_message, i) 509 while message is not None: 510 gpo.po_message_set_msgstr_plural(self._gpo_message, i, None) 511 i += 1 512 message = gpo.po_message_msgstr_plural(self._gpo_message, i) 513 # add the items of a list 514 if isinstance(target, list): 515 for i, targetstring in enumerate(target): 516 gpo.po_message_set_msgstr_plural( 517 self._gpo_message, i, gpo_encode(targetstring) 518 ) 519 # add the values of a dict 520 elif isinstance(target, dict): 521 for i, targetstring in enumerate(target.values()): 522 gpo.po_message_set_msgstr_plural( 523 self._gpo_message, i, gpo_encode(targetstring) 524 ) 525 # add a single string 526 else: 527 if target is None: 528 gpo.po_message_set_msgstr(self._gpo_message, gpo_encode("")) 529 else: 530 gpo.po_message_set_msgstr(self._gpo_message, gpo_encode(target)) 531 532 def getid(self): 533 """The unique identifier for this unit according to the conventions in 534 .mo files. 535 """ 536 id = gpo_decode(gpo.po_message_msgid(self._gpo_message)) or "" 537 # Gettext does not consider the plural to determine duplicates, only 538 # the msgid. For generation of .mo files, we might want to use this 539 # code to generate the entry for the hash table, but for now, it is 540 # commented out for conformance to gettext. 541 # plural = gpo.po_message_msgid_plural(self._gpo_message) 542 # if not plural is None: 543 # id = '%s\0%s' % (id, plural) 544 context = gpo.po_message_msgctxt(self._gpo_message) 545 if context: 546 id = f"{gpo_decode(context)}\04{id}" 547 return id 548 549 def getnotes(self, origin=None): 550 if origin is None: 551 comments = gpo.po_message_comments( 552 self._gpo_message 553 ) + gpo.po_message_extracted_comments(self._gpo_message) 554 elif origin == "translator": 555 comments = gpo.po_message_comments(self._gpo_message) 556 elif origin in ["programmer", "developer", "source code"]: 557 comments = gpo.po_message_extracted_comments(self._gpo_message) 558 else: 559 raise ValueError("Comment type not valid") 560 561 if comments and get_libgettextpo_version() < (0, 17, 0): 562 comments = "\n".join(line for line in comments.split("\n")) 563 # Let's drop the last newline 564 return gpo_decode(comments[:-1]) 565 566 def addnote(self, text, origin=None, position="append"): 567 # ignore empty strings and strings without non-space characters 568 if not (text and text.strip()): 569 return 570 oldnotes = self.getnotes(origin) 571 newnotes = None 572 if oldnotes: 573 if position == "append": 574 newnotes = oldnotes + "\n" + text 575 elif position == "merge": 576 if oldnotes != text: 577 oldnoteslist = oldnotes.split("\n") 578 for newline in text.split("\n"): 579 newline = newline.rstrip("\r") 580 # avoid duplicate comment lines (this might cause some problems) 581 if newline not in oldnotes or len(newline) < 5: 582 oldnoteslist.append(newline) 583 newnotes = "\n".join(oldnoteslist) 584 else: 585 newnotes = text + "\n" + oldnotes 586 else: 587 newnotes = "\n".join(line.rstrip("\r") for line in text.split("\n")) 588 589 if newnotes: 590 newlines = [] 591 needs_space = get_libgettextpo_version() < (0, 17, 0) 592 for line in newnotes.split("\n"): 593 if line and needs_space: 594 newlines.append(" " + line) 595 else: 596 newlines.append(line) 597 newnotes = gpo_encode("\n".join(newlines)) 598 if origin in ["programmer", "developer", "source code"]: 599 gpo.po_message_set_extracted_comments(self._gpo_message, newnotes) 600 else: 601 gpo.po_message_set_comments(self._gpo_message, newnotes) 602 603 def removenotes(self, origin=None): 604 gpo.po_message_set_comments(self._gpo_message, b"") 605 606 def copy(self): 607 newpo = self.__class__() 608 newpo._gpo_message = self._gpo_message 609 return newpo 610 611 def merge(self, otherpo, overwrite=False, comments=True, authoritative=False): 612 """Merges the otherpo (with the same msgid) into this one. 613 614 Overwrite non-blank self.msgstr only if overwrite is True 615 merge comments only if comments is True 616 """ 617 618 if not isinstance(otherpo, pounit): 619 super().merge(otherpo, overwrite, comments) 620 return 621 if comments: 622 self.addnote( 623 otherpo.getnotes("translator"), origin="translator", position="merge" 624 ) 625 # FIXME mergelists(self.typecomments, otherpo.typecomments) 626 if not authoritative: 627 # We don't bring across otherpo.automaticcomments as we consider ourself 628 # to be the the authority. Same applies to otherpo.msgidcomments 629 self.addnote( 630 otherpo.getnotes("developer"), origin="developer", position="merge" 631 ) 632 self.msgidcomment = otherpo._extract_msgidcomments() or None 633 self.addlocations(otherpo.getlocations()) 634 if not self.istranslated() or overwrite: 635 # Remove kde-style comments from the translation (if any). 636 if self._extract_msgidcomments(otherpo.target): 637 otherpo.target = otherpo.target.replace( 638 "_: " + otherpo._extract_msgidcomments() + "\n", "" 639 ) 640 self.target = otherpo.target 641 if ( 642 self.source != otherpo.source 643 or self.getcontext() != otherpo.getcontext() 644 ): 645 self.markfuzzy() 646 else: 647 self.markfuzzy(otherpo.isfuzzy()) 648 elif not otherpo.istranslated(): 649 if self.source != otherpo.source: 650 self.markfuzzy() 651 else: 652 if self.target != otherpo.target: 653 self.markfuzzy() 654 655 def isheader(self): 656 # return self.source == "" and self.target != "" 657 # we really want to make sure that there is no msgidcomment or msgctxt 658 return self.getid() == "" and len(self.target) > 0 659 660 def isblank(self): 661 return len(self.source) == len(self.target) == len(self.getcontext()) == 0 662 663 def hastypecomment(self, typecomment): 664 return gpo.po_message_is_format(self._gpo_message, gpo_encode(typecomment)) 665 666 def settypecomment(self, typecomment, present=True): 667 gpo.po_message_set_format(self._gpo_message, gpo_encode(typecomment), present) 668 669 def hasmarkedcomment(self, commentmarker): 670 commentmarker = "(%s)" % commentmarker 671 for comment in self.getnotes("translator").split("\n"): 672 if comment.startswith(commentmarker): 673 return True 674 return False 675 676 def isfuzzy(self): 677 return gpo.po_message_is_fuzzy(self._gpo_message) 678 679 def _domarkfuzzy(self, present=True): 680 gpo.po_message_set_fuzzy(self._gpo_message, present) 681 682 def makeobsolete(self): 683 # FIXME: libgettexpo currently does not reset other data, we probably want to do that 684 # but a better solution would be for libgettextpo to output correct data on serialisation 685 gpo.po_message_set_obsolete(self._gpo_message, True) 686 self.infer_state() 687 688 def resurrect(self): 689 gpo.po_message_set_obsolete(self._gpo_message, False) 690 self.infer_state() 691 692 def hasplural(self): 693 return gpo.po_message_msgid_plural(self._gpo_message) is not None 694 695 def _extract_msgidcomments(self, text=None): 696 """Extract KDE style msgid comments from the unit. 697 698 :rtype: String 699 :return: Returns the extracted msgidcomments found in this unit's msgid. 700 """ 701 if not text: 702 text = gpo_decode(gpo.po_message_msgid(self._gpo_message)) or "" 703 if text: 704 return pocommon.extract_msgid_comment(text) 705 return "" 706 707 def setmsgidcomment(self, msgidcomment): 708 if msgidcomment: 709 self.source = f"_: {msgidcomment}\n{self.source}" 710 711 msgidcomment = property(_extract_msgidcomments, setmsgidcomment) 712 713 def __str__(self): 714 pf = pofile(noheader=True) 715 pf.addunit(self) 716 return bytes(pf).decode(self.CPO_ENC) 717 718 def getlocations(self): 719 locations = [] 720 i = 0 721 location = gpo.po_message_filepos(self._gpo_message, i) 722 while location: 723 locname = gpo_decode(gpo.po_filepos_file(location)) 724 locline = gpo.po_filepos_start_line(location) 725 if locline == -1: 726 locstring = locname 727 else: 728 locstring = ":".join([locname, str(locline)]) 729 locations.append(pocommon.unquote_plus(locstring)) 730 i += 1 731 location = gpo.po_message_filepos(self._gpo_message, i) 732 return locations 733 734 def addlocation(self, location): 735 if location.find(" ") != -1: 736 location = pocommon.quote_plus(location) 737 parts = location.split(":") 738 if len(parts) == 2 and parts[1].isdigit(): 739 file = parts[0] 740 line = int(parts[1] or "0") 741 else: 742 file = location 743 line = -1 744 gpo.po_message_add_filepos(self._gpo_message, gpo_encode(file), line) 745 746 def getcontext(self): 747 msgctxt = gpo.po_message_msgctxt(self._gpo_message) 748 if msgctxt: 749 return gpo_decode(msgctxt) 750 return self._extract_msgidcomments() 751 752 def setcontext(self, context): 753 gpo.po_message_set_msgctxt(self._gpo_message, gpo_encode(context)) 754 755 @classmethod 756 def buildfromunit(cls, unit, encoding=None): 757 """Build a native unit from a foreign unit, preserving as much 758 information as possible. 759 """ 760 if type(unit) == cls and hasattr(unit, "copy") and callable(unit.copy): 761 return unit.copy() 762 elif isinstance(unit, pocommon.pounit): 763 newunit = cls(unit.source, encoding) 764 newunit.target = unit.target 765 # context 766 newunit.msgidcomment = unit._extract_msgidcomments() 767 context = unit.getcontext() 768 if not newunit.msgidcomment and context: 769 newunit.setcontext(context) 770 771 locations = unit.getlocations() 772 if locations: 773 newunit.addlocations(locations) 774 notes = unit.getnotes("developer") 775 if notes: 776 newunit.addnote(notes, "developer") 777 notes = unit.getnotes("translator") 778 if notes: 779 newunit.addnote(notes, "translator") 780 if unit.isobsolete(): 781 newunit.makeobsolete() 782 newunit.markfuzzy(unit.isfuzzy()) 783 for tc in ["python-format", "c-format", "php-format"]: 784 if unit.hastypecomment(tc): 785 newunit.settypecomment(tc) 786 # We assume/guess/hope that there will only be one 787 break 788 return newunit 789 else: 790 return base.TranslationUnit.buildfromunit(unit) 791 792 793class pofile(pocommon.pofile): 794 UnitClass = pounit 795 796 def __init__(self, inputfile=None, noheader=False, **kwargs): 797 self._gpo_memory_file = None 798 self._gpo_message_iterator = None 799 self.sourcelanguage = None 800 self.targetlanguage = None 801 if inputfile is None: 802 self.units = [] 803 self._encoding = kwargs.get("encoding") 804 self._gpo_memory_file = gpo.po_file_create() 805 self._gpo_message_iterator = gpo.po_message_iterator( 806 self._gpo_memory_file, None 807 ) 808 super().__init__(inputfile=inputfile, noheader=noheader, **kwargs) 809 810 def addunit(self, unit, new=True): 811 if new: 812 gpo.po_message_insert(self._gpo_message_iterator, unit._gpo_message) 813 super().addunit(unit) 814 815 def removeunit(self, unit): 816 # There seems to be no API to remove a message 817 raise ValueError("Unit removal not supported by cpo") 818 819 def _insert_header(self, header): 820 header._store = self 821 self.units.insert(0, header) 822 gpo.po_message_iterator_free(self._gpo_message_iterator) 823 self._gpo_message_iterator = gpo.po_message_iterator( 824 self._gpo_memory_file, None 825 ) 826 gpo.po_message_insert(self._gpo_message_iterator, header._gpo_message) 827 while gpo.po_next_message(self._gpo_message_iterator): 828 pass 829 830 def removeduplicates(self, duplicatestyle="merge"): 831 """make sure each msgid is unique ; merge comments etc from duplicates into original""" 832 # TODO: can we handle consecutive calls to removeduplicates()? What 833 # about files already containing msgctxt? - test 834 id_dict = {} 835 uniqueunits = [] 836 # TODO: this is using a list as the pos aren't hashable, but this is slow. 837 # probably not used frequently enough to worry about it, though. 838 markedpos = [] 839 840 def addcomment(thepo): 841 thepo.msgidcomment = " ".join(thepo.getlocations()) 842 markedpos.append(thepo) 843 844 for thepo in self.units: 845 id = thepo.getid() 846 if thepo.isheader() and not thepo.getlocations(): 847 # header msgids shouldn't be merged... 848 uniqueunits.append(thepo) 849 elif id in id_dict: 850 if duplicatestyle == "merge": 851 if id: 852 id_dict[id].merge(thepo) 853 else: 854 addcomment(thepo) 855 uniqueunits.append(thepo) 856 elif duplicatestyle == "msgctxt": 857 origpo = id_dict[id] 858 if origpo not in markedpos: 859 origpo.setcontext(" ".join(origpo.getlocations())) 860 markedpos.append(thepo) 861 thepo.setcontext(" ".join(thepo.getlocations())) 862 thepo_msgctxt = gpo.po_message_msgctxt(thepo._gpo_message) 863 idpo_msgctxt = gpo.po_message_msgctxt(id_dict[id]._gpo_message) 864 if not thepo_msgctxt == idpo_msgctxt: 865 uniqueunits.append(thepo) 866 else: 867 logger.warning( 868 "Duplicate unit found with msgctx of '%s' and source '%s'", 869 thepo_msgctxt, 870 thepo.source, 871 ) 872 else: 873 if not id: 874 if duplicatestyle == "merge": 875 addcomment(thepo) 876 else: 877 thepo.setcontext(" ".join(thepo.getlocations())) 878 id_dict[id] = thepo 879 uniqueunits.append(thepo) 880 new_gpo_memory_file = gpo.po_file_create() 881 new_gpo_message_iterator = gpo.po_message_iterator(new_gpo_memory_file, None) 882 for unit in uniqueunits: 883 gpo.po_message_insert(new_gpo_message_iterator, unit._gpo_message) 884 gpo.po_message_iterator_free(self._gpo_message_iterator) 885 self._gpo_message_iterator = new_gpo_message_iterator 886 self._gpo_memory_file = new_gpo_memory_file 887 self.units = uniqueunits 888 889 def serialize(self, out): 890 def obsolete_workaround(): 891 # Remove all items that are not output by msgmerge when a unit is obsolete. This is a work 892 # around for bug in libgettextpo 893 # FIXME Do version test in case they fix this bug 894 for unit in self.units: 895 if unit.isobsolete(): 896 gpo.po_message_set_extracted_comments(unit._gpo_message, b"") 897 location = gpo.po_message_filepos(unit._gpo_message, 0) 898 while location: 899 gpo.po_message_remove_filepos(unit._gpo_message, 0) 900 location = gpo.po_message_filepos(unit._gpo_message, 0) 901 902 def writefile(filename): 903 xerror_storage.exception = None 904 self._gpo_memory_file = gpo.po_file_write_v2( 905 self._gpo_memory_file, gpo_encode(filename), xerror_handler 906 ) 907 if xerror_storage.exception is not None: 908 raise xerror_storage.exception 909 with open(filename, "rb") as tfile: 910 return tfile.read() 911 912 outputstring = "" 913 if self._gpo_memory_file: 914 obsolete_workaround() 915 f, fname = tempfile.mkstemp(prefix="translate", suffix=".po") 916 os.close(f) 917 try: 918 outputstring = writefile(fname) 919 if self.encoding != pounit.CPO_ENC: 920 try: 921 outputstring = outputstring.decode(pounit.CPO_ENC).encode( 922 self.encoding 923 ) 924 except UnicodeEncodeError: 925 self.encoding = pounit.CPO_ENC 926 self.updateheader( 927 content_type="text/plain; charset=UTF-8", 928 content_transfer_encoding="8bit", 929 ) 930 outputstring = writefile(fname) 931 finally: 932 os.remove(fname) 933 out.write(outputstring) 934 935 def isempty(self): 936 """Returns True if the object doesn't contain any translation units.""" 937 if len(self.units) == 0: 938 return True 939 # Skip the first unit if it is a header. 940 if self.units[0].isheader(): 941 units = self.units[1:] 942 else: 943 units = self.units 944 945 for unit in units: 946 if not unit.isblank() and not unit.isobsolete(): 947 return False 948 return True 949 950 def parse(self, input): 951 if hasattr(input, "name"): 952 self.filename = input.name 953 elif not getattr(self, "filename", ""): 954 self.filename = "" 955 956 if hasattr(input, "read"): 957 posrc = input.read() 958 input.close() 959 input = posrc 960 961 needtmpfile = not os.path.isfile(input) 962 if needtmpfile: 963 # This is not a file - we write the string to a temporary file 964 fd, fname = tempfile.mkstemp(prefix="translate", suffix=".po") 965 os.write(fd, input) 966 input = fname 967 os.close(fd) 968 969 try: 970 xerror_storage.exception = None 971 self._gpo_memory_file = gpo.po_file_read_v3( 972 gpo_encode(input), xerror_handler 973 ) 974 if xerror_storage.exception is not None: 975 raise xerror_storage.exception 976 if self._gpo_memory_file is None: 977 logger.error("Error:") 978 finally: 979 if needtmpfile: 980 os.remove(input) 981 982 self.units = [] 983 # Handle xerrors here 984 self._header = gpo.po_file_domain_header(self._gpo_memory_file, None) 985 if self._header: 986 charset = gpo_decode( 987 gpo.po_header_field(self._header, gpo_encode("Content-Type")) 988 ) 989 if charset: 990 charset = re.search("charset=([^\\s]+)", charset).group(1) 991 self.encoding = charset 992 self._gpo_message_iterator = gpo.po_message_iterator( 993 self._gpo_memory_file, None 994 ) 995 newmessage = gpo.po_next_message(self._gpo_message_iterator) 996 while newmessage: 997 newunit = pounit(gpo_message=newmessage, encoding=self.encoding) 998 self.addunit(newunit, new=False) 999 newmessage = gpo.po_next_message(self._gpo_message_iterator) 1000 self._free_iterator() 1001 1002 def __del__(self): 1003 # We currently disable this while we still get segmentation faults. 1004 # Note that this is definitely leaking memory because of this. 1005 return 1006 self._free_iterator() 1007 if self._gpo_memory_file is not None: 1008 gpo.po_file_free(self._gpo_memory_file) 1009 self._gpo_memory_file = None 1010 1011 def _free_iterator(self): 1012 # We currently disable this while we still get segmentation faults. 1013 # Note that this is definitely leaking memory because of this. 1014 return 1015 if self._gpo_message_iterator is not None: 1016 gpo.po_message_iterator_free(self._gpo_message_iterator) 1017 self._gpo_message_iterator = None 1018