1#
2# Copyright 2002-2007 Zuza Software Foundation
3#
4# This file is part of translate.
5#
6# translate is free software; you can redistribute it and/or modify
7# it under the terms of the GNU General Public License as published by
8# the Free Software Foundation; either version 2 of the License, or
9# (at your option) any later version.
10#
11# translate is distributed in the hope that it will be useful,
12# but WITHOUT ANY WARRANTY; without even the implied warranty of
13# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14# GNU General Public License for more details.
15#
16# You should have received a copy of the GNU General Public License
17# along with this program; if not, see <http://www.gnu.org/licenses/>.
18
19"""Classes that hold units of .po files (pounit) or entire files (pofile).
20
21Gettext-style .po (or .pot) files are used in translations for KDE, GNOME and
22many other projects.
23
24This uses libgettextpo from the gettext package. Any version before 0.17 will
25at least cause some subtle bugs or may not work at all. Developers might want
26to have a look at gettext-tools/libgettextpo/gettext-po.h from the gettext
27package for the public API of the library.
28"""
29
30import ctypes.util
31import logging
32import os
33import re
34import sys
35import tempfile
36import threading
37from ctypes import (
38    CFUNCTYPE,
39    POINTER,
40    Structure,
41    c_char_p,
42    c_int,
43    c_long,
44    c_size_t,
45    c_uint,
46    cdll,
47)
48
49from translate.misc.multistring import multistring
50from translate.storage import base, pocommon, pypo
51
52
53logger = logging.getLogger(__name__)
54
55lsep = " "
56"""Separator for #: entries"""
57
58STRING = c_char_p
59
60
61# Structures
62class po_message(Structure):
63    pass
64
65
66class po_file(Structure):
67    pass
68
69
70class po_filepos(Structure):
71    pass
72
73
74class po_iterator(Structure):
75    pass
76
77
78po_message_t = POINTER(po_message)
79"""A po_message_t represents a message in a PO file."""
80
81po_file_t = POINTER(po_file)
82"""A po_file_t represents a PO file."""
83
84po_filepos_t = POINTER(po_filepos)
85"""A po_filepos_t represents the position in a PO file."""
86
87po_iterator_t = POINTER(po_iterator)
88"""A po_iterator_t represents an iterator through a PO file."""
89
90
91# Function prototypes
92xerror_prototype = CFUNCTYPE(
93    None, c_int, po_message_t, STRING, c_uint, c_uint, c_int, STRING
94)
95xerror2_prototype = CFUNCTYPE(
96    None,
97    c_int,
98    po_message_t,
99    STRING,
100    c_uint,
101    c_uint,
102    c_int,
103    STRING,
104    po_message_t,
105    STRING,
106    c_uint,
107    c_uint,
108    c_int,
109    STRING,
110)
111
112
113# Structures (error handler)
114class po_xerror_handler(Structure):
115    _fields_ = [("xerror", xerror_prototype), ("xerror2", xerror2_prototype)]
116
117
118class po_error_handler(Structure):
119    _fields_ = [
120        ("error", CFUNCTYPE(None, c_int, c_int, STRING)),
121        ("error_at_line", CFUNCTYPE(None, c_int, c_int, STRING, c_uint, STRING)),
122        ("multiline_warning", CFUNCTYPE(None, STRING, STRING)),
123        ("multiline_error", CFUNCTYPE(None, STRING, STRING)),
124    ]
125
126
127xerror_storage = threading.local()
128
129ignored_erorrs = {
130    # TODO: this is probably bug somewhere in cpo, but
131    # it used to be silently ignored before the exceptions
132    # were raised, so it is left to fixing separately
133    "invalid multibyte sequence",
134    # Duplicate messages are allowed
135    "duplicate message definition",
136}
137
138
139def trigger_exception(severity, filename, lineno, column, message_text):
140    # Severity 0 is warning, severity 1 error, severity 2 critical
141    if severity >= 1 and message_text not in ignored_erorrs:
142        if filename:
143            detail = f"{filename}:{lineno}:{column}: {message_text}"
144        else:
145            detail = message_text
146        xerror_storage.exception = ValueError(detail)
147
148
149# Callback functions for po_xerror_handler
150def xerror_cb(severity, message, filename, lineno, column, multiline_p, message_text):
151    message_text = message_text.decode()
152    if filename:
153        filename = filename.decode()
154    logger.error(
155        "xerror_cb %s %s %s %s %s %s %s",
156        severity,
157        message,
158        filename,
159        lineno,
160        column,
161        multiline_p,
162        message_text,
163    )
164    trigger_exception(severity, filename, lineno, column, message_text)
165
166
167def xerror2_cb(
168    severity,
169    message1,
170    filename1,
171    lineno1,
172    column1,
173    multiline_p1,
174    message_text1,
175    message2,
176    filename2,
177    lineno2,
178    column2,
179    multiline_p2,
180    message_text2,
181):
182    message_text1 = message_text1.decode()
183    message_text2 = message_text2.decode()
184    if filename1:
185        filename1 = filename1.decode()
186    if filename2:
187        filename2 = filename2.decode()
188    logger.error(
189        "xerror2_cb %s %s %s %s %s %s %s %s %s %s %s %s",
190        severity,
191        message1,
192        filename1,
193        lineno1,
194        column1,
195        multiline_p1,
196        message_text1,
197        filename2,
198        lineno2,
199        column2,
200        multiline_p2,
201        message_text2,
202    )
203    trigger_exception(severity, filename1, lineno1, column1, message_text1)
204
205
206# Setup return and parameter types
207# See also http://git.savannah.gnu.org/cgit/gettext.git/tree/gettext-tools/libgettextpo/gettext-po.in.h
208def setup_call_types(gpo):
209    # File access
210    gpo.po_file_create.restype = po_file_t
211    gpo.po_file_read_v3.argtypes = [STRING, POINTER(po_xerror_handler)]
212    gpo.po_file_read_v3.restype = po_file_t
213    gpo.po_file_write_v2.argtypes = [po_file_t, STRING, POINTER(po_xerror_handler)]
214    gpo.po_file_write_v2.restype = po_file_t
215    gpo.po_file_free.argtypes = [po_file_t]
216
217    # Header
218    gpo.po_file_domain_header.argtypes = [po_file_t, STRING]
219    gpo.po_file_domain_header.restype = STRING
220    gpo.po_header_field.argtypes = [STRING, STRING]
221    gpo.po_header_field.restype = STRING
222    gpo.po_header_set_field.argtypes = [STRING, STRING, STRING]
223    gpo.po_header_set_field.restype = STRING
224
225    # Locations (filepos)
226    gpo.po_filepos_file.argtypes = [po_filepos_t]
227    gpo.po_filepos_file.restype = STRING
228    gpo.po_filepos_start_line.argtypes = [po_filepos_t]
229    gpo.po_filepos_start_line.restype = c_int  # not strictly true casting
230    gpo.po_message_filepos.argtypes = [po_message_t, c_int]
231    gpo.po_message_filepos.restype = po_filepos_t
232    gpo.po_message_add_filepos.argtypes = [po_message_t, STRING, c_size_t]
233    gpo.po_message_remove_filepos.argtypes = [po_message_t, c_size_t]
234
235    # Iterators
236    gpo.po_message_iterator.argtypes = [po_file_t, STRING]
237    gpo.po_message_iterator.restype = po_iterator_t
238    gpo.po_message_iterator_free.argtypes = [po_iterator_t]
239    gpo.po_next_message.argtypes = [po_iterator_t]
240    gpo.po_next_message.restype = po_message_t
241    gpo.po_message_insert.argtypes = [po_iterator_t, po_message_t]
242
243    # Message (get methods)
244    gpo.po_message_create.restype = po_message_t
245    gpo.po_message_msgctxt.argtypes = [po_message_t]
246    gpo.po_message_msgctxt.restype = STRING
247    gpo.po_message_comments.argtypes = [po_message_t]
248    gpo.po_message_comments.restype = STRING
249    gpo.po_message_extracted_comments.argtypes = [po_message_t]
250    gpo.po_message_extracted_comments.restype = STRING
251    gpo.po_message_prev_msgctxt.argtypes = [po_message_t]
252    gpo.po_message_prev_msgctxt.restype = STRING
253    gpo.po_message_prev_msgid.argtypes = [po_message_t]
254    gpo.po_message_prev_msgid.restype = STRING
255    gpo.po_message_prev_msgid_plural.argtypes = [po_message_t]
256    gpo.po_message_prev_msgid_plural.restype = STRING
257    gpo.po_message_is_obsolete.argtypes = [po_message_t]
258    gpo.po_message_is_obsolete.restype = c_int
259    gpo.po_message_is_fuzzy.argtypes = [po_message_t]
260    gpo.po_message_is_fuzzy.restype = c_int
261    gpo.po_message_is_format.argtypes = [po_message_t, STRING]
262    gpo.po_message_is_format.restype = c_int
263    gpo.po_message_msgctxt.restype = STRING
264    gpo.po_message_msgid.argtypes = [po_message_t]
265    gpo.po_message_msgid.restype = STRING
266    gpo.po_message_msgid_plural.argtypes = [po_message_t]
267    gpo.po_message_msgid_plural.restype = STRING
268    gpo.po_message_msgstr.argtypes = [po_message_t]
269    gpo.po_message_msgstr.restype = STRING
270    gpo.po_message_msgstr_plural.argtypes = [po_message_t, c_int]
271    gpo.po_message_msgstr_plural.restype = STRING
272
273    # Message (set methods)
274    gpo.po_message_set_comments.argtypes = [po_message_t, STRING]
275    gpo.po_message_set_extracted_comments.argtypes = [po_message_t, STRING]
276    gpo.po_message_set_prev_msgctxt.argtypes = [po_message_t, STRING]
277    gpo.po_message_set_prev_msgid.argtypes = [po_message_t, STRING]
278    gpo.po_message_set_prev_msgid_plural.argtypes = [po_message_t, STRING]
279    gpo.po_message_set_obsolete.argtypes = [po_message_t, c_int]
280    gpo.po_message_set_fuzzy.argtypes = [po_message_t, c_int]
281    gpo.po_message_set_format.argtypes = [po_message_t, STRING, c_int]
282    gpo.po_message_set_msgctxt.argtypes = [po_message_t, STRING]
283    gpo.po_message_set_msgid.argtypes = [po_message_t, STRING]
284    gpo.po_message_set_msgstr.argtypes = [po_message_t, STRING]
285    gpo.po_message_set_msgstr_plural.argtypes = [po_message_t, c_int, STRING]
286    gpo.po_message_set_range.argtypes = [po_message_t, c_int, c_int]
287
288
289# Load libgettextpo
290gpo = None
291# 'gettextpo' is recognised on Unix, while only 'libgettextpo' is recognised on
292# windows. Therefore we test both.
293names = ["gettextpo", "libgettextpo"]
294for name in names:
295    lib_location = ctypes.util.find_library(name)
296    if lib_location:
297        gpo = cdll.LoadLibrary(lib_location)
298        if gpo:
299            break
300else:
301    # Don't raise exception in Sphinx autodoc [where xml is Mock()ed]. There is
302    # nothing special about use of xml here - any of the Mock classes set up
303    # in docs/conf.py would work as well, but xml is likely always to be there.
304    gpo = None
305    if "xml" not in sys.modules or sys.modules["xml"].__path__ != "/dev/null":
306
307        # Now we are getting desperate, so let's guess a unix type DLL that
308        # might be in LD_LIBRARY_PATH or loaded with LD_PRELOAD
309        try:
310            gpo = cdll.LoadLibrary("libgettextpo.so")
311        except OSError:
312            raise ImportError("gettext PO library not found")
313
314if gpo:
315    setup_call_types(gpo)
316
317# Setup the po_xerror_handler
318xerror_handler = po_xerror_handler()
319xerror_handler.xerror = xerror_prototype(xerror_cb)
320xerror_handler.xerror2 = xerror2_prototype(xerror2_cb)
321
322
323def escapeforpo(text):
324    return pypo.escapeforpo(text)
325
326
327def quoteforpo(text):
328    return pypo.quoteforpo(text)
329
330
331def unquotefrompo(postr):
332    return pypo.unquotefrompo(postr)
333
334
335def get_libgettextpo_version():
336    """Returns the libgettextpo version
337
338    :rtype: three-value tuple
339    :return: libgettextpo version in the following format::
340        (major version, minor version, subminor version)
341    """
342    libversion = c_long.in_dll(gpo, "libgettextpo_version")
343    major = libversion.value >> 16
344    minor = (libversion.value >> 8) & 0xFF
345    subminor = libversion.value - (major << 16) - (minor << 8)
346    return major, minor, subminor
347
348
349def gpo_encode(value):
350    return value.encode("utf-8") if isinstance(value, str) else value
351
352
353def gpo_decode(value):
354    if isinstance(value, str):
355        return value
356    elif isinstance(value, bytes):
357        return value.decode("utf-8")
358    return value
359
360
361class pounit(pocommon.pounit):
362
363    #: fixed encoding that is always used for cPO structure (self._gpo_message)
364    CPO_ENC = "utf-8"
365
366    def __init__(self, source=None, encoding="utf-8", gpo_message=None):
367        self._rich_source = None
368        self._rich_target = None
369        encoding = encoding or "utf-8"
370        if not gpo_message:
371            self._gpo_message = gpo.po_message_create()
372        if source or source == "":
373            self.source = source
374            self.target = ""
375        elif gpo_message:
376            if encoding.lower() != self.CPO_ENC:
377                features = ["msgctxt", "msgid", "msgid_plural"]
378                features += ["prev_" + x for x in features]
379                features += ["comments", "extracted_comments", "msgstr"]
380                for feature in features:
381                    text = getattr(gpo, "po_message_" + feature)(gpo_message)
382                    if text:
383                        getattr(gpo, "po_message_set_" + feature)(
384                            gpo_message, text.decode(encoding).encode(self.CPO_ENC)
385                        )
386                # Also iterate through plural forms
387                nplural = 0
388                text = True
389                while text:
390                    text = gpo.po_message_msgstr_plural(gpo_message, nplural)
391                    if text:
392                        gpo.po_message_set_msgstr_plural(
393                            gpo_message,
394                            text.decode(encoding).encode(self.CPO_ENC),
395                            nplural,
396                        )
397                    nplural += 1
398            self._gpo_message = gpo_message
399        self.infer_state()
400
401    def infer_state(self):
402        # FIXME: do obsolete
403        if gpo.po_message_is_obsolete(self._gpo_message):
404            if gpo.po_message_is_fuzzy(self._gpo_message):
405                self.set_state_n(self.STATE[self.S_FUZZY_OBSOLETE][0])
406            else:
407                self.set_state_n(self.STATE[self.S_OBSOLETE][0])
408        elif gpo.po_message_is_fuzzy(self._gpo_message):
409            self.set_state_n(self.STATE[self.S_FUZZY][0])
410        elif self.target:
411            self.set_state_n(self.STATE[self.S_TRANSLATED][0])
412        else:
413            self.set_state_n(self.STATE[self.S_UNTRANSLATED][0])
414
415    def setmsgid_plural(self, msgid_plural):
416        if isinstance(msgid_plural, list):
417            msgid_plural = "".join(msgid_plural)
418        gpo.po_message_set_msgid_plural(self._gpo_message, gpo_encode(msgid_plural))
419
420    msgid_plural = property(None, setmsgid_plural)
421
422    @property
423    def source(self):
424        def remove_msgid_comments(text):
425            if not text:
426                return text
427            if text.startswith("_:"):
428                remainder = re.search(r"_: .*\n(.*)", text)
429                if remainder:
430                    return remainder.group(1)
431                else:
432                    return ""
433            else:
434                return text
435
436        singular = remove_msgid_comments(
437            gpo_decode(gpo.po_message_msgid(self._gpo_message)) or ""
438        )
439        if singular:
440            if self.hasplural():
441                multi = multistring(singular)
442                pluralform = (
443                    gpo_decode(gpo.po_message_msgid_plural(self._gpo_message)) or ""
444                )
445                multi.strings.append(pluralform)
446                return multi
447            else:
448                return singular
449        else:
450            return ""
451
452    @source.setter
453    def source(self, source):
454        if isinstance(source, multistring):
455            source = source.strings
456        if isinstance(source, list):
457            gpo.po_message_set_msgid(self._gpo_message, gpo_encode(source[0]))
458            if len(source) > 1:
459                gpo.po_message_set_msgid_plural(
460                    self._gpo_message, gpo_encode(source[1])
461                )
462        else:
463            gpo.po_message_set_msgid(self._gpo_message, gpo_encode(source))
464            gpo.po_message_set_msgid_plural(self._gpo_message, None)
465
466    @property
467    def target(self):
468        if self.hasplural():
469            plurals = []
470            nplural = 0
471            plural = gpo.po_message_msgstr_plural(self._gpo_message, nplural)
472            while plural:
473                plurals.append(plural.decode(self.CPO_ENC))
474                nplural += 1
475                plural = gpo.po_message_msgstr_plural(self._gpo_message, nplural)
476            if plurals:
477                multi = multistring(plurals)
478            else:
479                multi = multistring("")
480        else:
481            multi = gpo_decode(gpo.po_message_msgstr(self._gpo_message)) or ""
482        return multi
483
484    @target.setter
485    def target(self, target):
486        # for plural strings: convert 'target' into a list
487        if self.hasplural():
488            if isinstance(target, multistring):
489                target = target.strings
490            elif isinstance(target, str):
491                target = [target]
492        # for non-plurals: check number of items in 'target'
493        elif isinstance(target, (dict, list)):
494            if len(target) == 1:
495                target = target[0]
496            else:
497                raise ValueError(
498                    "po msgid element has no plural but msgstr has %d elements (%s)"
499                    % (len(target), target)
500                )
501        # empty the previous list of messages
502        # TODO: the "pypo" implementation does not remove the previous items of
503        #   the target, if self.target == target (essentially: comparing only
504        #   the first item of a plural string with the single new string)
505        #   Maybe this behaviour should be unified.
506        if isinstance(target, (dict, list)):
507            i = 0
508            message = gpo.po_message_msgstr_plural(self._gpo_message, i)
509            while message is not None:
510                gpo.po_message_set_msgstr_plural(self._gpo_message, i, None)
511                i += 1
512                message = gpo.po_message_msgstr_plural(self._gpo_message, i)
513        # add the items of a list
514        if isinstance(target, list):
515            for i, targetstring in enumerate(target):
516                gpo.po_message_set_msgstr_plural(
517                    self._gpo_message, i, gpo_encode(targetstring)
518                )
519        # add the values of a dict
520        elif isinstance(target, dict):
521            for i, targetstring in enumerate(target.values()):
522                gpo.po_message_set_msgstr_plural(
523                    self._gpo_message, i, gpo_encode(targetstring)
524                )
525        # add a single string
526        else:
527            if target is None:
528                gpo.po_message_set_msgstr(self._gpo_message, gpo_encode(""))
529            else:
530                gpo.po_message_set_msgstr(self._gpo_message, gpo_encode(target))
531
532    def getid(self):
533        """The unique identifier for this unit according to the conventions in
534        .mo files.
535        """
536        id = gpo_decode(gpo.po_message_msgid(self._gpo_message)) or ""
537        # Gettext does not consider the plural to determine duplicates, only
538        # the msgid. For generation of .mo files, we might want to use this
539        # code to generate the entry for the hash table, but for now, it is
540        # commented out for conformance to gettext.
541        #        plural = gpo.po_message_msgid_plural(self._gpo_message)
542        #        if not plural is None:
543        #            id = '%s\0%s' % (id, plural)
544        context = gpo.po_message_msgctxt(self._gpo_message)
545        if context:
546            id = f"{gpo_decode(context)}\04{id}"
547        return id
548
549    def getnotes(self, origin=None):
550        if origin is None:
551            comments = gpo.po_message_comments(
552                self._gpo_message
553            ) + gpo.po_message_extracted_comments(self._gpo_message)
554        elif origin == "translator":
555            comments = gpo.po_message_comments(self._gpo_message)
556        elif origin in ["programmer", "developer", "source code"]:
557            comments = gpo.po_message_extracted_comments(self._gpo_message)
558        else:
559            raise ValueError("Comment type not valid")
560
561        if comments and get_libgettextpo_version() < (0, 17, 0):
562            comments = "\n".join(line for line in comments.split("\n"))
563        # Let's drop the last newline
564        return gpo_decode(comments[:-1])
565
566    def addnote(self, text, origin=None, position="append"):
567        # ignore empty strings and strings without non-space characters
568        if not (text and text.strip()):
569            return
570        oldnotes = self.getnotes(origin)
571        newnotes = None
572        if oldnotes:
573            if position == "append":
574                newnotes = oldnotes + "\n" + text
575            elif position == "merge":
576                if oldnotes != text:
577                    oldnoteslist = oldnotes.split("\n")
578                    for newline in text.split("\n"):
579                        newline = newline.rstrip("\r")
580                        # avoid duplicate comment lines (this might cause some problems)
581                        if newline not in oldnotes or len(newline) < 5:
582                            oldnoteslist.append(newline)
583                    newnotes = "\n".join(oldnoteslist)
584            else:
585                newnotes = text + "\n" + oldnotes
586        else:
587            newnotes = "\n".join(line.rstrip("\r") for line in text.split("\n"))
588
589        if newnotes:
590            newlines = []
591            needs_space = get_libgettextpo_version() < (0, 17, 0)
592            for line in newnotes.split("\n"):
593                if line and needs_space:
594                    newlines.append(" " + line)
595                else:
596                    newlines.append(line)
597            newnotes = gpo_encode("\n".join(newlines))
598            if origin in ["programmer", "developer", "source code"]:
599                gpo.po_message_set_extracted_comments(self._gpo_message, newnotes)
600            else:
601                gpo.po_message_set_comments(self._gpo_message, newnotes)
602
603    def removenotes(self, origin=None):
604        gpo.po_message_set_comments(self._gpo_message, b"")
605
606    def copy(self):
607        newpo = self.__class__()
608        newpo._gpo_message = self._gpo_message
609        return newpo
610
611    def merge(self, otherpo, overwrite=False, comments=True, authoritative=False):
612        """Merges the otherpo (with the same msgid) into this one.
613
614        Overwrite non-blank self.msgstr only if overwrite is True
615        merge comments only if comments is True
616        """
617
618        if not isinstance(otherpo, pounit):
619            super().merge(otherpo, overwrite, comments)
620            return
621        if comments:
622            self.addnote(
623                otherpo.getnotes("translator"), origin="translator", position="merge"
624            )
625            # FIXME mergelists(self.typecomments, otherpo.typecomments)
626            if not authoritative:
627                # We don't bring across otherpo.automaticcomments as we consider ourself
628                # to be the the authority.  Same applies to otherpo.msgidcomments
629                self.addnote(
630                    otherpo.getnotes("developer"), origin="developer", position="merge"
631                )
632                self.msgidcomment = otherpo._extract_msgidcomments() or None
633                self.addlocations(otherpo.getlocations())
634        if not self.istranslated() or overwrite:
635            # Remove kde-style comments from the translation (if any).
636            if self._extract_msgidcomments(otherpo.target):
637                otherpo.target = otherpo.target.replace(
638                    "_: " + otherpo._extract_msgidcomments() + "\n", ""
639                )
640            self.target = otherpo.target
641            if (
642                self.source != otherpo.source
643                or self.getcontext() != otherpo.getcontext()
644            ):
645                self.markfuzzy()
646            else:
647                self.markfuzzy(otherpo.isfuzzy())
648        elif not otherpo.istranslated():
649            if self.source != otherpo.source:
650                self.markfuzzy()
651        else:
652            if self.target != otherpo.target:
653                self.markfuzzy()
654
655    def isheader(self):
656        # return self.source == "" and self.target != ""
657        # we really want to make sure that there is no msgidcomment or msgctxt
658        return self.getid() == "" and len(self.target) > 0
659
660    def isblank(self):
661        return len(self.source) == len(self.target) == len(self.getcontext()) == 0
662
663    def hastypecomment(self, typecomment):
664        return gpo.po_message_is_format(self._gpo_message, gpo_encode(typecomment))
665
666    def settypecomment(self, typecomment, present=True):
667        gpo.po_message_set_format(self._gpo_message, gpo_encode(typecomment), present)
668
669    def hasmarkedcomment(self, commentmarker):
670        commentmarker = "(%s)" % commentmarker
671        for comment in self.getnotes("translator").split("\n"):
672            if comment.startswith(commentmarker):
673                return True
674        return False
675
676    def isfuzzy(self):
677        return gpo.po_message_is_fuzzy(self._gpo_message)
678
679    def _domarkfuzzy(self, present=True):
680        gpo.po_message_set_fuzzy(self._gpo_message, present)
681
682    def makeobsolete(self):
683        # FIXME: libgettexpo currently does not reset other data, we probably want to do that
684        # but a better solution would be for libgettextpo to output correct data on serialisation
685        gpo.po_message_set_obsolete(self._gpo_message, True)
686        self.infer_state()
687
688    def resurrect(self):
689        gpo.po_message_set_obsolete(self._gpo_message, False)
690        self.infer_state()
691
692    def hasplural(self):
693        return gpo.po_message_msgid_plural(self._gpo_message) is not None
694
695    def _extract_msgidcomments(self, text=None):
696        """Extract KDE style msgid comments from the unit.
697
698        :rtype: String
699        :return: Returns the extracted msgidcomments found in this unit's msgid.
700        """
701        if not text:
702            text = gpo_decode(gpo.po_message_msgid(self._gpo_message)) or ""
703        if text:
704            return pocommon.extract_msgid_comment(text)
705        return ""
706
707    def setmsgidcomment(self, msgidcomment):
708        if msgidcomment:
709            self.source = f"_: {msgidcomment}\n{self.source}"
710
711    msgidcomment = property(_extract_msgidcomments, setmsgidcomment)
712
713    def __str__(self):
714        pf = pofile(noheader=True)
715        pf.addunit(self)
716        return bytes(pf).decode(self.CPO_ENC)
717
718    def getlocations(self):
719        locations = []
720        i = 0
721        location = gpo.po_message_filepos(self._gpo_message, i)
722        while location:
723            locname = gpo_decode(gpo.po_filepos_file(location))
724            locline = gpo.po_filepos_start_line(location)
725            if locline == -1:
726                locstring = locname
727            else:
728                locstring = ":".join([locname, str(locline)])
729            locations.append(pocommon.unquote_plus(locstring))
730            i += 1
731            location = gpo.po_message_filepos(self._gpo_message, i)
732        return locations
733
734    def addlocation(self, location):
735        if location.find(" ") != -1:
736            location = pocommon.quote_plus(location)
737        parts = location.split(":")
738        if len(parts) == 2 and parts[1].isdigit():
739            file = parts[0]
740            line = int(parts[1] or "0")
741        else:
742            file = location
743            line = -1
744        gpo.po_message_add_filepos(self._gpo_message, gpo_encode(file), line)
745
746    def getcontext(self):
747        msgctxt = gpo.po_message_msgctxt(self._gpo_message)
748        if msgctxt:
749            return gpo_decode(msgctxt)
750        return self._extract_msgidcomments()
751
752    def setcontext(self, context):
753        gpo.po_message_set_msgctxt(self._gpo_message, gpo_encode(context))
754
755    @classmethod
756    def buildfromunit(cls, unit, encoding=None):
757        """Build a native unit from a foreign unit, preserving as much
758        information as possible.
759        """
760        if type(unit) == cls and hasattr(unit, "copy") and callable(unit.copy):
761            return unit.copy()
762        elif isinstance(unit, pocommon.pounit):
763            newunit = cls(unit.source, encoding)
764            newunit.target = unit.target
765            # context
766            newunit.msgidcomment = unit._extract_msgidcomments()
767            context = unit.getcontext()
768            if not newunit.msgidcomment and context:
769                newunit.setcontext(context)
770
771            locations = unit.getlocations()
772            if locations:
773                newunit.addlocations(locations)
774            notes = unit.getnotes("developer")
775            if notes:
776                newunit.addnote(notes, "developer")
777            notes = unit.getnotes("translator")
778            if notes:
779                newunit.addnote(notes, "translator")
780            if unit.isobsolete():
781                newunit.makeobsolete()
782            newunit.markfuzzy(unit.isfuzzy())
783            for tc in ["python-format", "c-format", "php-format"]:
784                if unit.hastypecomment(tc):
785                    newunit.settypecomment(tc)
786                    # We assume/guess/hope that there will only be one
787                    break
788            return newunit
789        else:
790            return base.TranslationUnit.buildfromunit(unit)
791
792
793class pofile(pocommon.pofile):
794    UnitClass = pounit
795
796    def __init__(self, inputfile=None, noheader=False, **kwargs):
797        self._gpo_memory_file = None
798        self._gpo_message_iterator = None
799        self.sourcelanguage = None
800        self.targetlanguage = None
801        if inputfile is None:
802            self.units = []
803            self._encoding = kwargs.get("encoding")
804            self._gpo_memory_file = gpo.po_file_create()
805            self._gpo_message_iterator = gpo.po_message_iterator(
806                self._gpo_memory_file, None
807            )
808        super().__init__(inputfile=inputfile, noheader=noheader, **kwargs)
809
810    def addunit(self, unit, new=True):
811        if new:
812            gpo.po_message_insert(self._gpo_message_iterator, unit._gpo_message)
813        super().addunit(unit)
814
815    def removeunit(self, unit):
816        # There seems to be no API to remove a message
817        raise ValueError("Unit removal not supported by cpo")
818
819    def _insert_header(self, header):
820        header._store = self
821        self.units.insert(0, header)
822        gpo.po_message_iterator_free(self._gpo_message_iterator)
823        self._gpo_message_iterator = gpo.po_message_iterator(
824            self._gpo_memory_file, None
825        )
826        gpo.po_message_insert(self._gpo_message_iterator, header._gpo_message)
827        while gpo.po_next_message(self._gpo_message_iterator):
828            pass
829
830    def removeduplicates(self, duplicatestyle="merge"):
831        """make sure each msgid is unique ; merge comments etc from duplicates into original"""
832        # TODO: can we handle consecutive calls to removeduplicates()? What
833        # about files already containing msgctxt? - test
834        id_dict = {}
835        uniqueunits = []
836        # TODO: this is using a list as the pos aren't hashable, but this is slow.
837        # probably not used frequently enough to worry about it, though.
838        markedpos = []
839
840        def addcomment(thepo):
841            thepo.msgidcomment = " ".join(thepo.getlocations())
842            markedpos.append(thepo)
843
844        for thepo in self.units:
845            id = thepo.getid()
846            if thepo.isheader() and not thepo.getlocations():
847                # header msgids shouldn't be merged...
848                uniqueunits.append(thepo)
849            elif id in id_dict:
850                if duplicatestyle == "merge":
851                    if id:
852                        id_dict[id].merge(thepo)
853                    else:
854                        addcomment(thepo)
855                        uniqueunits.append(thepo)
856                elif duplicatestyle == "msgctxt":
857                    origpo = id_dict[id]
858                    if origpo not in markedpos:
859                        origpo.setcontext(" ".join(origpo.getlocations()))
860                        markedpos.append(thepo)
861                    thepo.setcontext(" ".join(thepo.getlocations()))
862                    thepo_msgctxt = gpo.po_message_msgctxt(thepo._gpo_message)
863                    idpo_msgctxt = gpo.po_message_msgctxt(id_dict[id]._gpo_message)
864                    if not thepo_msgctxt == idpo_msgctxt:
865                        uniqueunits.append(thepo)
866                    else:
867                        logger.warning(
868                            "Duplicate unit found with msgctx of '%s' and source '%s'",
869                            thepo_msgctxt,
870                            thepo.source,
871                        )
872            else:
873                if not id:
874                    if duplicatestyle == "merge":
875                        addcomment(thepo)
876                    else:
877                        thepo.setcontext(" ".join(thepo.getlocations()))
878                id_dict[id] = thepo
879                uniqueunits.append(thepo)
880        new_gpo_memory_file = gpo.po_file_create()
881        new_gpo_message_iterator = gpo.po_message_iterator(new_gpo_memory_file, None)
882        for unit in uniqueunits:
883            gpo.po_message_insert(new_gpo_message_iterator, unit._gpo_message)
884        gpo.po_message_iterator_free(self._gpo_message_iterator)
885        self._gpo_message_iterator = new_gpo_message_iterator
886        self._gpo_memory_file = new_gpo_memory_file
887        self.units = uniqueunits
888
889    def serialize(self, out):
890        def obsolete_workaround():
891            # Remove all items that are not output by msgmerge when a unit is obsolete.  This is a work
892            # around for bug in libgettextpo
893            # FIXME Do version test in case they fix this bug
894            for unit in self.units:
895                if unit.isobsolete():
896                    gpo.po_message_set_extracted_comments(unit._gpo_message, b"")
897                    location = gpo.po_message_filepos(unit._gpo_message, 0)
898                    while location:
899                        gpo.po_message_remove_filepos(unit._gpo_message, 0)
900                        location = gpo.po_message_filepos(unit._gpo_message, 0)
901
902        def writefile(filename):
903            xerror_storage.exception = None
904            self._gpo_memory_file = gpo.po_file_write_v2(
905                self._gpo_memory_file, gpo_encode(filename), xerror_handler
906            )
907            if xerror_storage.exception is not None:
908                raise xerror_storage.exception
909            with open(filename, "rb") as tfile:
910                return tfile.read()
911
912        outputstring = ""
913        if self._gpo_memory_file:
914            obsolete_workaround()
915            f, fname = tempfile.mkstemp(prefix="translate", suffix=".po")
916            os.close(f)
917            try:
918                outputstring = writefile(fname)
919                if self.encoding != pounit.CPO_ENC:
920                    try:
921                        outputstring = outputstring.decode(pounit.CPO_ENC).encode(
922                            self.encoding
923                        )
924                    except UnicodeEncodeError:
925                        self.encoding = pounit.CPO_ENC
926                        self.updateheader(
927                            content_type="text/plain; charset=UTF-8",
928                            content_transfer_encoding="8bit",
929                        )
930                        outputstring = writefile(fname)
931            finally:
932                os.remove(fname)
933        out.write(outputstring)
934
935    def isempty(self):
936        """Returns True if the object doesn't contain any translation units."""
937        if len(self.units) == 0:
938            return True
939        # Skip the first unit if it is a header.
940        if self.units[0].isheader():
941            units = self.units[1:]
942        else:
943            units = self.units
944
945        for unit in units:
946            if not unit.isblank() and not unit.isobsolete():
947                return False
948        return True
949
950    def parse(self, input):
951        if hasattr(input, "name"):
952            self.filename = input.name
953        elif not getattr(self, "filename", ""):
954            self.filename = ""
955
956        if hasattr(input, "read"):
957            posrc = input.read()
958            input.close()
959            input = posrc
960
961        needtmpfile = not os.path.isfile(input)
962        if needtmpfile:
963            # This is not a file - we write the string to a temporary file
964            fd, fname = tempfile.mkstemp(prefix="translate", suffix=".po")
965            os.write(fd, input)
966            input = fname
967            os.close(fd)
968
969        try:
970            xerror_storage.exception = None
971            self._gpo_memory_file = gpo.po_file_read_v3(
972                gpo_encode(input), xerror_handler
973            )
974            if xerror_storage.exception is not None:
975                raise xerror_storage.exception
976            if self._gpo_memory_file is None:
977                logger.error("Error:")
978        finally:
979            if needtmpfile:
980                os.remove(input)
981
982        self.units = []
983        # Handle xerrors here
984        self._header = gpo.po_file_domain_header(self._gpo_memory_file, None)
985        if self._header:
986            charset = gpo_decode(
987                gpo.po_header_field(self._header, gpo_encode("Content-Type"))
988            )
989            if charset:
990                charset = re.search("charset=([^\\s]+)", charset).group(1)
991            self.encoding = charset
992        self._gpo_message_iterator = gpo.po_message_iterator(
993            self._gpo_memory_file, None
994        )
995        newmessage = gpo.po_next_message(self._gpo_message_iterator)
996        while newmessage:
997            newunit = pounit(gpo_message=newmessage, encoding=self.encoding)
998            self.addunit(newunit, new=False)
999            newmessage = gpo.po_next_message(self._gpo_message_iterator)
1000        self._free_iterator()
1001
1002    def __del__(self):
1003        # We currently disable this while we still get segmentation faults.
1004        # Note that this is definitely leaking memory because of this.
1005        return
1006        self._free_iterator()
1007        if self._gpo_memory_file is not None:
1008            gpo.po_file_free(self._gpo_memory_file)
1009            self._gpo_memory_file = None
1010
1011    def _free_iterator(self):
1012        # We currently disable this while we still get segmentation faults.
1013        # Note that this is definitely leaking memory because of this.
1014        return
1015        if self._gpo_message_iterator is not None:
1016            gpo.po_message_iterator_free(self._gpo_message_iterator)
1017            self._gpo_message_iterator = None
1018