1# 2# Copyright 2004-2010 Zuza Software Foundation 3# 4# This file is part of translate. 5# 6# This program is free software; you can redistribute it and/or modify 7# it under the terms of the GNU General Public License as published by 8# the Free Software Foundation; either version 2 of the License, or 9# (at your option) any later version. 10# 11# This program is distributed in the hope that it will be useful, 12# but WITHOUT ANY WARRANTY; without even the implied warranty of 13# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14# GNU General Public License for more details. 15# 16# You should have received a copy of the GNU General Public License 17# along with this program; if not, see <http://www.gnu.org/licenses/>. 18 19"""Convert template files (like .pot or template .xlf files) to translation 20files, preserving existing translations. 21 22See: http://docs.translatehouse.org/projects/translate-toolkit/en/latest/commands/pot2po.html 23for examples and usage instructions. 24""" 25 26 27from translate.misc.multistring import multistring 28from translate.search import match 29from translate.storage import catkeys, factory, poheader 30from translate.tools import pretranslate 31 32 33def convertpot( 34 input_file, 35 output_file, 36 template_file, 37 tm=None, 38 min_similarity=75, 39 fuzzymatching=True, 40 classes=None, 41 classes_str=None, 42 **kwargs 43): 44 """Main conversion function.""" 45 input_store = factory.getobject( 46 input_file, classes=classes, classes_str=classes_str 47 ) 48 try: 49 temp_store = factory.getobject(input_file, classes_str=classes_str) 50 except Exception: 51 # StringIO and other file like objects will be closed after parsing 52 temp_store = None 53 54 template_store = None 55 if template_file is not None: 56 template_store = factory.getobject(template_file, classes_str=classes_str) 57 58 output_store = convert_stores( 59 input_store, 60 template_store, 61 temp_store, 62 tm, 63 min_similarity, 64 fuzzymatching, 65 **kwargs 66 ) 67 output_store.serialize(output_file) 68 69 return 1 70 71 72def convert_stores( 73 input_store, 74 template_store, 75 temp_store=None, 76 tm=None, 77 min_similarity=75, 78 fuzzymatching=True, 79 **kwargs 80): 81 """Actual conversion function, works on stores not files, returns 82 a properly initialized pretranslated output store, with structure 83 based on input_store, metadata based on template_store, migrates 84 old translations from template_store and pretranslating from TM. 85 """ 86 if temp_store is None: 87 temp_store = input_store 88 89 # Create fuzzy matchers to be used by pretranslate.pretranslate_unit 90 matchers = [] 91 92 _prepare_merge(input_store, temp_store, template_store) 93 if fuzzymatching: 94 if template_store: 95 matcher = match.matcher( 96 template_store, 97 max_candidates=1, 98 min_similarity=min_similarity, 99 max_length=3000, 100 usefuzzy=True, 101 ) 102 matcher.addpercentage = False 103 matchers.append(matcher) 104 if tm: 105 matcher = pretranslate.memory( 106 tm, max_candidates=1, min_similarity=min_similarity, max_length=1000 107 ) 108 matcher.addpercentage = False 109 matchers.append(matcher) 110 111 # initialize store 112 _store_pre_merge(input_store, temp_store, template_store) 113 114 # Do matching 115 for input_unit in temp_store.units: 116 if input_unit.istranslatable(): 117 input_unit = pretranslate.pretranslate_unit( 118 input_unit, 119 template_store, 120 matchers, 121 mark_reused=True, 122 merge_on=input_store.merge_on, 123 ) 124 _unit_post_merge(input_unit, input_store, temp_store, template_store) 125 126 # finalize store 127 _store_post_merge(input_store, temp_store, template_store) 128 129 return temp_store 130 131 132##dispatchers 133def _prepare_merge(input_store, output_store, template_store, **kwargs): 134 """Prepare stores & TM matchers before merging.""" 135 # Dispatch to format specific functions 136 prepare_merge_hook = "_prepare_merge_%s" % input_store.__class__.__name__ 137 if prepare_merge_hook in globals(): 138 globals()[prepare_merge_hook]( 139 input_store, output_store, template_store, **kwargs 140 ) 141 142 # Generate an index so we can search by source string and location later on 143 input_store.makeindex() 144 if template_store: 145 template_store.makeindex() 146 147 148def _store_pre_merge(input_store, output_store, template_store, **kwargs): 149 """Initialize the new file with things like headers and metadata.""" 150 # formats that implement poheader interface are a special case 151 if isinstance(input_store, poheader.poheader): 152 _do_poheaders(input_store, output_store, template_store) 153 elif isinstance(input_store, catkeys.CatkeysFile): 154 # FIXME: also this should be a format specific hook 155 if template_store is not None: 156 output_store.header = template_store.header 157 else: 158 output_store.header = input_store.header 159 160 # Dispatch to format specific functions 161 store_pre_merge_hook = "_store_pre_merge_%s" % input_store.__class__.__name__ 162 if store_pre_merge_hook in globals(): 163 globals()[store_pre_merge_hook]( 164 input_store, output_store, template_store, **kwargs 165 ) 166 167 168def _store_post_merge(input_store, output_store, template_store, **kwargs): 169 """Close file after merging all translations, used for adding statistics, 170 obsolete messages and similar wrapup tasks. 171 """ 172 # Dispatch to format specific functions 173 store_post_merge_hook = "_store_post_merge_%s" % input_store.__class__.__name__ 174 if store_post_merge_hook in globals(): 175 globals()[store_post_merge_hook]( 176 input_store, output_store, template_store, **kwargs 177 ) 178 179 180def _unit_post_merge(input_unit, input_store, output_store, template_store, **kwargs): 181 """Handle any unit level cleanup and situations not handled by the merge() 182 function. 183 """ 184 # dispatch to format specific functions 185 unit_post_merge_hook = "_unit_post_merge_%s" % input_unit.__class__.__name__ 186 if unit_post_merge_hook in globals(): 187 globals()[unit_post_merge_hook]( 188 input_unit, input_store, output_store, template_store, **kwargs 189 ) 190 191 192## Format specific functions 193def _unit_post_merge_pounit(input_unit, input_store, output_store, template_store): 194 """PO format specific plural string initializtion logic.""" 195 # FIXME: do we want to do that for poxliff also? 196 if input_unit.hasplural() and len(input_unit.target) == 0: 197 # untranslated plural unit; Let's ensure that we have the correct 198 # number of plural forms: 199 nplurals, plural = output_store.getheaderplural() 200 if nplurals and nplurals.isdigit() and nplurals != "2": 201 input_unit.target = multistring([""] * int(nplurals)) 202 203 204def _store_post_merge_pofile(input_store, output_store, template_store): 205 """PO format specific: adds newly obsoleted messages to end of store.""" 206 # Let's take care of obsoleted messages 207 if template_store: 208 newlyobsoleted = [] 209 for unit in template_store.units: 210 if unit.isheader() or unit.isblank(): 211 continue 212 if unit.target and not ( 213 input_store.findid(unit.getid()) or hasattr(unit, "reused") 214 ): 215 # Not in .pot, make it obsolete 216 unit.makeobsolete() 217 newlyobsoleted.append(unit) 218 for unit in newlyobsoleted: 219 output_store.addunit(unit) 220 221 222def _do_poheaders(input_store, output_store, template_store): 223 """Adds initialized PO headers to output store.""" 224 # header values 225 charset = "UTF-8" 226 encoding = "8bit" 227 project_id_version = None 228 pot_creation_date = None 229 po_revision_date = None 230 last_translator = None 231 language_team = None 232 mime_version = None 233 plural_forms = None 234 kwargs = {} 235 236 if template_store is not None and isinstance(template_store, poheader.poheader): 237 templateheadervalues = template_store.parseheader() 238 for key, value in templateheadervalues.items(): 239 if key == "Project-Id-Version": 240 project_id_version = value 241 elif key == "Last-Translator": 242 last_translator = value 243 elif key == "Language-Team": 244 language_team = value 245 elif key == "PO-Revision-Date": 246 po_revision_date = value 247 elif key in ("POT-Creation-Date", "MIME-Version"): 248 # don't know how to handle these keys, or ignoring them 249 pass 250 elif key == "Content-Type": 251 kwargs[key] = value 252 elif key == "Content-Transfer-Encoding": 253 encoding = value 254 elif key == "Plural-Forms": 255 plural_forms = value 256 else: 257 kwargs[key] = value 258 259 inputheadervalues = input_store.parseheader() 260 for key, value in inputheadervalues.items(): 261 if key in ( 262 "Project-Id-Version", 263 "Last-Translator", 264 "Language-Team", 265 "PO-Revision-Date", 266 "Content-Type", 267 "Content-Transfer-Encoding", 268 "Plural-Forms", 269 ): 270 # want to carry these from the template so we ignore them 271 pass 272 elif key == "POT-Creation-Date": 273 pot_creation_date = value 274 elif key == "MIME-Version": 275 mime_version = value 276 else: 277 kwargs[key] = value 278 279 output_header = output_store.init_headers( 280 charset=charset, 281 encoding=encoding, 282 project_id_version=project_id_version, 283 pot_creation_date=pot_creation_date, 284 po_revision_date=po_revision_date, 285 last_translator=last_translator, 286 language_team=language_team, 287 mime_version=mime_version, 288 plural_forms=plural_forms, 289 **kwargs 290 ) 291 292 # Get the header comments and fuzziness state 293 # override some values from input file 294 if template_store is not None: 295 template_header = template_store.header() 296 if template_header is not None: 297 if template_header.getnotes("translator"): 298 output_header.addnote( 299 template_header.getnotes("translator"), 300 "translator", 301 position="replace", 302 ) 303 output_header.markfuzzy(template_header.isfuzzy()) 304 305 306def main(argv=None): 307 from translate.convert import convert 308 309 formats = { 310 "pot": ("po", convertpot), 311 ("pot", "po"): ("po", convertpot), 312 "xlf": ("xlf", convertpot), 313 ("xlf", "xlf"): ("xlf", convertpot), 314 "xliff": ("xliff", convertpot), 315 ("xliff", "xliff"): ("xliff", convertpot), 316 "ts": ("ts", convertpot), 317 "lang": ("lang", convertpot), 318 ("lang", "lang"): ("lang", convertpot), 319 ("ts", "ts"): ("ts", convertpot), 320 "catkeys": ("catkeys", convertpot), 321 ("catkeys", "catkeys"): ("catkeys", convertpot), 322 } 323 parser = convert.ConvertOptionParser( 324 formats, 325 usepots=True, 326 usetemplates=True, 327 allowmissingtemplate=True, 328 description=__doc__, 329 ) 330 331 parser.add_option( 332 "", 333 "--tm", 334 dest="tm", 335 default=None, 336 help="The file to use as translation memory when fuzzy matching", 337 ) 338 parser.passthrough.append("tm") 339 340 defaultsimilarity = 75 341 parser.add_option( 342 "-s", 343 "--similarity", 344 dest="min_similarity", 345 default=defaultsimilarity, 346 type="float", 347 help="The minimum similarity for inclusion (default: %d%%)" % defaultsimilarity, 348 ) 349 parser.passthrough.append("min_similarity") 350 351 parser.add_option( 352 "--nofuzzymatching", 353 dest="fuzzymatching", 354 action="store_false", 355 default=True, 356 help="Disable fuzzy matching", 357 ) 358 parser.passthrough.append("fuzzymatching") 359 360 parser.run(argv) 361 362 363if __name__ == "__main__": 364 main() 365