1# 2# Copyright 2018 BhaaL 3# 4# This file is part of translate. 5# 6# translate is free software; you can redistribute it and/or modify 7# it under the terms of the GNU General Public License as published by 8# the Free Software Foundation; either version 2 of the License, or 9# (at your option) any later version. 10# 11# translate is distributed in the hope that it will be useful, 12# but WITHOUT ANY WARRANTY; without even the implied warranty of 13# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14# GNU General Public License for more details. 15# 16# You should have received a copy of the GNU General Public License 17# along with this program; if not, see <http://www.gnu.org/licenses/>. 18 19"""Module for handling flat XML files.""" 20 21from lxml import etree 22 23from translate.misc.xml_helpers import getText, namespaced, reindent 24from translate.storage import base 25 26 27class FlatXMLUnit(base.TranslationUnit): 28 """A single term in the XML file.""" 29 30 def __init__( 31 self, 32 source=None, 33 namespace=None, 34 element_name="str", 35 attribute_name="key", 36 **kwargs 37 ): 38 self.namespace = namespace 39 self.element_name = element_name 40 self.attribute_name = attribute_name 41 self.xmlelement = etree.Element(self.namespaced(self.element_name)) 42 super().__init__(source, **kwargs) 43 44 def __str__(self): 45 # "unicode" encoding keeps the unicode status of the output 46 return etree.tostring(self.xmlelement, encoding="unicode") 47 48 @property 49 def source(self): 50 """Returns the unique identifier of this unit.""" 51 return self.xmlelement.get(self.attribute_name) 52 53 @source.setter 54 def source(self, source): 55 """Updates the unique identifier of this unit.""" 56 self.xmlelement.set(self.attribute_name, source) 57 58 @property 59 def target(self): 60 """Returns the translated string of this unit.""" 61 return self.node_text 62 63 @target.setter 64 def target(self, target): 65 """Updates the translated string of this unit.""" 66 if self.target == target: 67 return 68 self.xmlelement.text = target 69 70 def namespaced(self, name): 71 """Returns name in Clark notation.""" 72 return namespaced(self.namespace, name) 73 74 @property 75 def node_text(self): 76 """Returns the text content of the XML element.""" 77 if self.xmlelement is None: 78 return None 79 80 return getText(self.xmlelement) 81 82 @classmethod 83 def createfromxmlElement( 84 cls, element, namespace=None, element_name="str", attribute_name="key" 85 ): 86 """Attempts to create a unit from the passed element. 87 88 element must not be None and must match the given element name 89 (including namespace); otherwise None will be returned. 90 """ 91 if element is None: 92 return None 93 if element.tag != namespaced(namespace, element_name): 94 return None 95 unit = cls( 96 source=None, 97 namespace=namespace, 98 element_name=element_name, 99 attribute_name=attribute_name, 100 ) 101 unit.xmlelement = element 102 return unit 103 104 105class FlatXMLFile(base.TranslationStore): 106 """Class representing a flat XML file store""" 107 108 UnitClass = FlatXMLUnit 109 _name = "Flat XML File" 110 Mimetypes = ["text/xml"] 111 Extensions = ["xml"] 112 113 def __init__( 114 self, 115 inputfile=None, 116 sourcelanguage="en", 117 targetlanguage=None, 118 root_name="root", 119 value_name="str", 120 key_name="key", 121 namespace=None, 122 indent_chars=" ", 123 trailing_eol=True, 124 **kwargs 125 ): 126 self.root_name = root_name 127 self.value_name = value_name 128 self.key_name = key_name 129 self.namespace = namespace 130 self.indent_chars = indent_chars 131 self.trailing_eol = trailing_eol 132 133 super().__init__(**kwargs) 134 if inputfile is not None: 135 self.parse(inputfile) 136 else: 137 self.make_empty_file() 138 self.setsourcelanguage(sourcelanguage) 139 self.settargetlanguage(targetlanguage) 140 141 def addunit(self, unit, new=True): 142 unit.namespace = self.namespace 143 super().addunit(unit) 144 if new: 145 self.root.append(unit.xmlelement) 146 147 def removeunit(self, unit): 148 super().removeunit(unit) 149 self.root.remove(unit.xmlelement) 150 151 def reindent(self): 152 """Reindents the backing document to be consistent.""" 153 # no elements? nothing to do. 154 if not (len(self.root)): 155 pass 156 157 if self.indent_chars is None: 158 # indent None means: linearize 159 self.root.text = None 160 for child in self.root: 161 child.tail = None 162 else: 163 reindent(self.root, indent=self.indent_chars) 164 165 if self.trailing_eol: 166 # ensure trailing EOL for VCS 167 self.root.tail = "\n" 168 169 def serialize(self, out=None): 170 self.reindent() 171 self.document.write(out, xml_declaration=True, encoding=self.encoding) 172 173 def make_empty_file(self): 174 """Initializes the backing document to be an empty root element.""" 175 self.root = etree.Element(self.namespaced(self.root_name)) 176 self.document = self.root.getroottree() 177 178 def parse(self, xml): 179 """Parses the passed xml file into the backing document.""" 180 if not hasattr(self, "filename"): 181 self.filename = getattr(xml, "name", "") 182 if hasattr(xml, "read"): 183 xml.seek(0) 184 posrc = xml.read() 185 xml = posrc 186 187 parser = etree.XMLParser(strip_cdata=False, resolve_entities=False) 188 self.root = etree.fromstring(xml, parser) 189 self.document = self.root.getroottree() 190 self.encoding = self.document.docinfo.encoding 191 192 root_name = self.namespaced(self.root_name) 193 assert ( 194 self.root.tag == root_name 195 ), "expected root name to be {} but got {}".format( 196 root_name, 197 self.root.tag, 198 ) 199 if len(self.root): 200 # we'd expect at least one child element to have the correct 201 # name and attributes; otherwise the name parameters might've 202 # been wrong/typo'd and need to be addressed in order to avoid 203 # coming up empty when the file actually contains entries. 204 value_name = self.namespaced(self.value_name) 205 matching_nodes = list(self.root.iterchildren(value_name)) 206 assert len( 207 matching_nodes 208 ), "expected value name to be {} but first node is {}".format( 209 value_name, 210 self.root[0].tag, 211 ) 212 213 assert matching_nodes[0].get( 214 self.key_name 215 ), "expected key attribute to be {}, found attribute(s): {}".format( 216 self.key_name, 217 ",".join(matching_nodes[0].attrib), 218 ) 219 220 for entry in self.root: 221 unit = self.UnitClass.createfromxmlElement( 222 entry, 223 namespace=self.namespace, 224 element_name=self.value_name, 225 attribute_name=self.key_name, 226 ) 227 if unit is not None: 228 self.addunit(unit, new=False) 229 230 def namespaced(self, name): 231 """Returns name in Clark notation.""" 232 return namespaced(self.namespace, name) 233