1#
2# Copyright 2004-2010 Zuza Software Foundation
3#
4# This file is part of translate.
5#
6# This program is free software; you can redistribute it and/or modify
7# it under the terms of the GNU General Public License as published by
8# the Free Software Foundation; either version 2 of the License, or
9# (at your option) any later version.
10#
11# This program is distributed in the hope that it will be useful,
12# but WITHOUT ANY WARRANTY; without even the implied warranty of
13# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14# GNU General Public License for more details.
15#
16# You should have received a copy of the GNU General Public License
17# along with this program; if not, see <http://www.gnu.org/licenses/>.
18
19"""Convert template files (like .pot or template .xlf files) to translation
20files, preserving existing translations.
21
22See: http://docs.translatehouse.org/projects/translate-toolkit/en/latest/commands/pot2po.html
23for examples and usage instructions.
24"""
25
26
27from translate.misc.multistring import multistring
28from translate.search import match
29from translate.storage import catkeys, factory, poheader
30from translate.tools import pretranslate
31
32
33def convertpot(
34    input_file,
35    output_file,
36    template_file,
37    tm=None,
38    min_similarity=75,
39    fuzzymatching=True,
40    classes=None,
41    classes_str=None,
42    **kwargs
43):
44    """Main conversion function."""
45    input_store = factory.getobject(
46        input_file, classes=classes, classes_str=classes_str
47    )
48    try:
49        temp_store = factory.getobject(input_file, classes_str=classes_str)
50    except Exception:
51        # StringIO and other file like objects will be closed after parsing
52        temp_store = None
53
54    template_store = None
55    if template_file is not None:
56        template_store = factory.getobject(template_file, classes_str=classes_str)
57
58    output_store = convert_stores(
59        input_store,
60        template_store,
61        temp_store,
62        tm,
63        min_similarity,
64        fuzzymatching,
65        **kwargs
66    )
67    output_store.serialize(output_file)
68
69    return 1
70
71
72def convert_stores(
73    input_store,
74    template_store,
75    temp_store=None,
76    tm=None,
77    min_similarity=75,
78    fuzzymatching=True,
79    **kwargs
80):
81    """Actual conversion function, works on stores not files, returns
82    a properly initialized pretranslated output store, with structure
83    based on input_store, metadata based on template_store, migrates
84    old translations from template_store and pretranslating from TM.
85    """
86    if temp_store is None:
87        temp_store = input_store
88
89    # Create fuzzy matchers to be used by pretranslate.pretranslate_unit
90    matchers = []
91
92    _prepare_merge(input_store, temp_store, template_store)
93    if fuzzymatching:
94        if template_store:
95            matcher = match.matcher(
96                template_store,
97                max_candidates=1,
98                min_similarity=min_similarity,
99                max_length=3000,
100                usefuzzy=True,
101            )
102            matcher.addpercentage = False
103            matchers.append(matcher)
104        if tm:
105            matcher = pretranslate.memory(
106                tm, max_candidates=1, min_similarity=min_similarity, max_length=1000
107            )
108            matcher.addpercentage = False
109            matchers.append(matcher)
110
111    # initialize store
112    _store_pre_merge(input_store, temp_store, template_store)
113
114    # Do matching
115    for input_unit in temp_store.units:
116        if input_unit.istranslatable():
117            input_unit = pretranslate.pretranslate_unit(
118                input_unit,
119                template_store,
120                matchers,
121                mark_reused=True,
122                merge_on=input_store.merge_on,
123            )
124            _unit_post_merge(input_unit, input_store, temp_store, template_store)
125
126    # finalize store
127    _store_post_merge(input_store, temp_store, template_store)
128
129    return temp_store
130
131
132##dispatchers
133def _prepare_merge(input_store, output_store, template_store, **kwargs):
134    """Prepare stores & TM matchers before merging."""
135    # Dispatch to format specific functions
136    prepare_merge_hook = "_prepare_merge_%s" % input_store.__class__.__name__
137    if prepare_merge_hook in globals():
138        globals()[prepare_merge_hook](
139            input_store, output_store, template_store, **kwargs
140        )
141
142    # Generate an index so we can search by source string and location later on
143    input_store.makeindex()
144    if template_store:
145        template_store.makeindex()
146
147
148def _store_pre_merge(input_store, output_store, template_store, **kwargs):
149    """Initialize the new file with things like headers and metadata."""
150    # formats that implement poheader interface are a special case
151    if isinstance(input_store, poheader.poheader):
152        _do_poheaders(input_store, output_store, template_store)
153    elif isinstance(input_store, catkeys.CatkeysFile):
154        # FIXME: also this should be a format specific hook
155        if template_store is not None:
156            output_store.header = template_store.header
157        else:
158            output_store.header = input_store.header
159
160    # Dispatch to format specific functions
161    store_pre_merge_hook = "_store_pre_merge_%s" % input_store.__class__.__name__
162    if store_pre_merge_hook in globals():
163        globals()[store_pre_merge_hook](
164            input_store, output_store, template_store, **kwargs
165        )
166
167
168def _store_post_merge(input_store, output_store, template_store, **kwargs):
169    """Close file after merging all translations, used for adding statistics,
170    obsolete messages and similar wrapup tasks.
171    """
172    # Dispatch to format specific functions
173    store_post_merge_hook = "_store_post_merge_%s" % input_store.__class__.__name__
174    if store_post_merge_hook in globals():
175        globals()[store_post_merge_hook](
176            input_store, output_store, template_store, **kwargs
177        )
178
179
180def _unit_post_merge(input_unit, input_store, output_store, template_store, **kwargs):
181    """Handle any unit level cleanup and situations not handled by the merge()
182    function.
183    """
184    # dispatch to format specific functions
185    unit_post_merge_hook = "_unit_post_merge_%s" % input_unit.__class__.__name__
186    if unit_post_merge_hook in globals():
187        globals()[unit_post_merge_hook](
188            input_unit, input_store, output_store, template_store, **kwargs
189        )
190
191
192## Format specific functions
193def _unit_post_merge_pounit(input_unit, input_store, output_store, template_store):
194    """PO format specific plural string initializtion logic."""
195    # FIXME: do we want to do that for poxliff also?
196    if input_unit.hasplural() and len(input_unit.target) == 0:
197        # untranslated plural unit; Let's ensure that we have the correct
198        # number of plural forms:
199        nplurals, plural = output_store.getheaderplural()
200        if nplurals and nplurals.isdigit() and nplurals != "2":
201            input_unit.target = multistring([""] * int(nplurals))
202
203
204def _store_post_merge_pofile(input_store, output_store, template_store):
205    """PO format specific: adds newly obsoleted messages to end of store."""
206    # Let's take care of obsoleted messages
207    if template_store:
208        newlyobsoleted = []
209        for unit in template_store.units:
210            if unit.isheader() or unit.isblank():
211                continue
212            if unit.target and not (
213                input_store.findid(unit.getid()) or hasattr(unit, "reused")
214            ):
215                # Not in .pot, make it obsolete
216                unit.makeobsolete()
217                newlyobsoleted.append(unit)
218        for unit in newlyobsoleted:
219            output_store.addunit(unit)
220
221
222def _do_poheaders(input_store, output_store, template_store):
223    """Adds initialized PO headers to output store."""
224    # header values
225    charset = "UTF-8"
226    encoding = "8bit"
227    project_id_version = None
228    pot_creation_date = None
229    po_revision_date = None
230    last_translator = None
231    language_team = None
232    mime_version = None
233    plural_forms = None
234    kwargs = {}
235
236    if template_store is not None and isinstance(template_store, poheader.poheader):
237        templateheadervalues = template_store.parseheader()
238        for key, value in templateheadervalues.items():
239            if key == "Project-Id-Version":
240                project_id_version = value
241            elif key == "Last-Translator":
242                last_translator = value
243            elif key == "Language-Team":
244                language_team = value
245            elif key == "PO-Revision-Date":
246                po_revision_date = value
247            elif key in ("POT-Creation-Date", "MIME-Version"):
248                # don't know how to handle these keys, or ignoring them
249                pass
250            elif key == "Content-Type":
251                kwargs[key] = value
252            elif key == "Content-Transfer-Encoding":
253                encoding = value
254            elif key == "Plural-Forms":
255                plural_forms = value
256            else:
257                kwargs[key] = value
258
259    inputheadervalues = input_store.parseheader()
260    for key, value in inputheadervalues.items():
261        if key in (
262            "Project-Id-Version",
263            "Last-Translator",
264            "Language-Team",
265            "PO-Revision-Date",
266            "Content-Type",
267            "Content-Transfer-Encoding",
268            "Plural-Forms",
269        ):
270            # want to carry these from the template so we ignore them
271            pass
272        elif key == "POT-Creation-Date":
273            pot_creation_date = value
274        elif key == "MIME-Version":
275            mime_version = value
276        else:
277            kwargs[key] = value
278
279    output_header = output_store.init_headers(
280        charset=charset,
281        encoding=encoding,
282        project_id_version=project_id_version,
283        pot_creation_date=pot_creation_date,
284        po_revision_date=po_revision_date,
285        last_translator=last_translator,
286        language_team=language_team,
287        mime_version=mime_version,
288        plural_forms=plural_forms,
289        **kwargs
290    )
291
292    # Get the header comments and fuzziness state
293    # override some values from input file
294    if template_store is not None:
295        template_header = template_store.header()
296        if template_header is not None:
297            if template_header.getnotes("translator"):
298                output_header.addnote(
299                    template_header.getnotes("translator"),
300                    "translator",
301                    position="replace",
302                )
303            output_header.markfuzzy(template_header.isfuzzy())
304
305
306def main(argv=None):
307    from translate.convert import convert
308
309    formats = {
310        "pot": ("po", convertpot),
311        ("pot", "po"): ("po", convertpot),
312        "xlf": ("xlf", convertpot),
313        ("xlf", "xlf"): ("xlf", convertpot),
314        "xliff": ("xliff", convertpot),
315        ("xliff", "xliff"): ("xliff", convertpot),
316        "ts": ("ts", convertpot),
317        "lang": ("lang", convertpot),
318        ("lang", "lang"): ("lang", convertpot),
319        ("ts", "ts"): ("ts", convertpot),
320        "catkeys": ("catkeys", convertpot),
321        ("catkeys", "catkeys"): ("catkeys", convertpot),
322    }
323    parser = convert.ConvertOptionParser(
324        formats,
325        usepots=True,
326        usetemplates=True,
327        allowmissingtemplate=True,
328        description=__doc__,
329    )
330
331    parser.add_option(
332        "",
333        "--tm",
334        dest="tm",
335        default=None,
336        help="The file to use as translation memory when fuzzy matching",
337    )
338    parser.passthrough.append("tm")
339
340    defaultsimilarity = 75
341    parser.add_option(
342        "-s",
343        "--similarity",
344        dest="min_similarity",
345        default=defaultsimilarity,
346        type="float",
347        help="The minimum similarity for inclusion (default: %d%%)" % defaultsimilarity,
348    )
349    parser.passthrough.append("min_similarity")
350
351    parser.add_option(
352        "--nofuzzymatching",
353        dest="fuzzymatching",
354        action="store_false",
355        default=True,
356        help="Disable fuzzy matching",
357    )
358    parser.passthrough.append("fuzzymatching")
359
360    parser.run(argv)
361
362
363if __name__ == "__main__":
364    main()
365