1#
2# Copyright 2018 BhaaL
3#
4# This file is part of translate.
5#
6# translate is free software; you can redistribute it and/or modify
7# it under the terms of the GNU General Public License as published by
8# the Free Software Foundation; either version 2 of the License, or
9# (at your option) any later version.
10#
11# translate is distributed in the hope that it will be useful,
12# but WITHOUT ANY WARRANTY; without even the implied warranty of
13# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14# GNU General Public License for more details.
15#
16# You should have received a copy of the GNU General Public License
17# along with this program; if not, see <http://www.gnu.org/licenses/>.
18
19"""Module for handling flat XML files."""
20
21from lxml import etree
22
23from translate.misc.xml_helpers import getText, namespaced, reindent
24from translate.storage import base
25
26
27class FlatXMLUnit(base.TranslationUnit):
28    """A single term in the XML file."""
29
30    def __init__(
31        self,
32        source=None,
33        namespace=None,
34        element_name="str",
35        attribute_name="key",
36        **kwargs
37    ):
38        self.namespace = namespace
39        self.element_name = element_name
40        self.attribute_name = attribute_name
41        self.xmlelement = etree.Element(self.namespaced(self.element_name))
42        super().__init__(source, **kwargs)
43
44    def __str__(self):
45        # "unicode" encoding keeps the unicode status of the output
46        return etree.tostring(self.xmlelement, encoding="unicode")
47
48    @property
49    def source(self):
50        """Returns the unique identifier of this unit."""
51        return self.xmlelement.get(self.attribute_name)
52
53    @source.setter
54    def source(self, source):
55        """Updates the unique identifier of this unit."""
56        self.xmlelement.set(self.attribute_name, source)
57
58    @property
59    def target(self):
60        """Returns the translated string of this unit."""
61        return self.node_text
62
63    @target.setter
64    def target(self, target):
65        """Updates the translated string of this unit."""
66        if self.target == target:
67            return
68        self.xmlelement.text = target
69
70    def namespaced(self, name):
71        """Returns name in Clark notation."""
72        return namespaced(self.namespace, name)
73
74    @property
75    def node_text(self):
76        """Returns the text content of the XML element."""
77        if self.xmlelement is None:
78            return None
79
80        return getText(self.xmlelement)
81
82    @classmethod
83    def createfromxmlElement(
84        cls, element, namespace=None, element_name="str", attribute_name="key"
85    ):
86        """Attempts to create a unit from the passed element.
87
88        element must not be None and must match the given element name
89        (including namespace); otherwise None will be returned.
90        """
91        if element is None:
92            return None
93        if element.tag != namespaced(namespace, element_name):
94            return None
95        unit = cls(
96            source=None,
97            namespace=namespace,
98            element_name=element_name,
99            attribute_name=attribute_name,
100        )
101        unit.xmlelement = element
102        return unit
103
104
105class FlatXMLFile(base.TranslationStore):
106    """Class representing a flat XML file store"""
107
108    UnitClass = FlatXMLUnit
109    _name = "Flat XML File"
110    Mimetypes = ["text/xml"]
111    Extensions = ["xml"]
112
113    def __init__(
114        self,
115        inputfile=None,
116        sourcelanguage="en",
117        targetlanguage=None,
118        root_name="root",
119        value_name="str",
120        key_name="key",
121        namespace=None,
122        indent_chars="  ",
123        trailing_eol=True,
124        **kwargs
125    ):
126        self.root_name = root_name
127        self.value_name = value_name
128        self.key_name = key_name
129        self.namespace = namespace
130        self.indent_chars = indent_chars
131        self.trailing_eol = trailing_eol
132
133        super().__init__(**kwargs)
134        if inputfile is not None:
135            self.parse(inputfile)
136        else:
137            self.make_empty_file()
138            self.setsourcelanguage(sourcelanguage)
139            self.settargetlanguage(targetlanguage)
140
141    def addunit(self, unit, new=True):
142        unit.namespace = self.namespace
143        super().addunit(unit)
144        if new:
145            self.root.append(unit.xmlelement)
146
147    def removeunit(self, unit):
148        super().removeunit(unit)
149        self.root.remove(unit.xmlelement)
150
151    def reindent(self):
152        """Reindents the backing document to be consistent."""
153        # no elements? nothing to do.
154        if not (len(self.root)):
155            pass
156
157        if self.indent_chars is None:
158            # indent None means: linearize
159            self.root.text = None
160            for child in self.root:
161                child.tail = None
162        else:
163            reindent(self.root, indent=self.indent_chars)
164
165        if self.trailing_eol:
166            # ensure trailing EOL for VCS
167            self.root.tail = "\n"
168
169    def serialize(self, out=None):
170        self.reindent()
171        self.document.write(out, xml_declaration=True, encoding=self.encoding)
172
173    def make_empty_file(self):
174        """Initializes the backing document to be an empty root element."""
175        self.root = etree.Element(self.namespaced(self.root_name))
176        self.document = self.root.getroottree()
177
178    def parse(self, xml):
179        """Parses the passed xml file into the backing document."""
180        if not hasattr(self, "filename"):
181            self.filename = getattr(xml, "name", "")
182        if hasattr(xml, "read"):
183            xml.seek(0)
184            posrc = xml.read()
185            xml = posrc
186
187        parser = etree.XMLParser(strip_cdata=False, resolve_entities=False)
188        self.root = etree.fromstring(xml, parser)
189        self.document = self.root.getroottree()
190        self.encoding = self.document.docinfo.encoding
191
192        root_name = self.namespaced(self.root_name)
193        assert (
194            self.root.tag == root_name
195        ), "expected root name to be {} but got {}".format(
196            root_name,
197            self.root.tag,
198        )
199        if len(self.root):
200            # we'd expect at least one child element to have the correct
201            # name and attributes; otherwise the name parameters might've
202            # been wrong/typo'd and need to be addressed in order to avoid
203            # coming up empty when the file actually contains entries.
204            value_name = self.namespaced(self.value_name)
205            matching_nodes = list(self.root.iterchildren(value_name))
206            assert len(
207                matching_nodes
208            ), "expected value name to be {} but first node is {}".format(
209                value_name,
210                self.root[0].tag,
211            )
212
213            assert matching_nodes[0].get(
214                self.key_name
215            ), "expected key attribute to be {}, found attribute(s): {}".format(
216                self.key_name,
217                ",".join(matching_nodes[0].attrib),
218            )
219
220        for entry in self.root:
221            unit = self.UnitClass.createfromxmlElement(
222                entry,
223                namespace=self.namespace,
224                element_name=self.value_name,
225                attribute_name=self.key_name,
226            )
227            if unit is not None:
228                self.addunit(unit, new=False)
229
230    def namespaced(self, name):
231        """Returns name in Clark notation."""
232        return namespaced(self.namespace, name)
233