1#
2# Copyright 2005-2008,2010 Zuza Software Foundation
3#
4# This file is part of translate.
5#
6# translate is free software; you can redistribute it and/or modify
7# it under the terms of the GNU General Public License as published by
8# the Free Software Foundation; either version 2 of the License, or
9# (at your option) any later version.
10#
11# translate is distributed in the hope that it will be useful,
12# but WITHOUT ANY WARRANTY; without even the implied warranty of
13# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14# GNU General Public License for more details.
15#
16# You should have received a copy of the GNU General Public License
17# along with this program; if not, see <http://www.gnu.org/licenses/>.
18
19"""Conflict finder for Gettext PO localization files.
20
21See: http://docs.translatehouse.org/projects/translate-toolkit/en/latest/commands/poconflicts.html
22for examples and usage instructions.
23"""
24
25import os
26import sys
27
28from translate.misc import optrecurse
29from translate.storage import factory, po
30
31
32class ConflictOptionParser(optrecurse.RecursiveOptionParser):
33    """a specialized Option Parser for the conflict tool..."""
34
35    def parse_args(self, args=None, values=None):
36        """parses the command line options, handling implicit input/output args"""
37        (options, args) = optrecurse.optparse.OptionParser.parse_args(
38            self, args, values
39        )
40        # some intelligence as to what reasonable people might give on the command line
41        if args and not options.input:
42            if not options.output:
43                options.input = args[:-1]
44                args = args[-1:]
45            else:
46                options.input = args
47                args = []
48        if args and not options.output:
49            options.output = args[-1]
50            args = args[:-1]
51        if not options.output:
52            self.error("output file is required")
53        if args:
54            self.error(
55                "You have used an invalid combination of --input, --output and freestanding args"
56            )
57        if isinstance(options.input, list) and len(options.input) == 1:
58            options.input = options.input[0]
59        return (options, args)
60
61    def set_usage(self, usage=None):
62        """sets the usage string - if usage not given, uses getusagestring for each option"""
63        if usage is None:
64            self.usage = (
65                "%prog "
66                + " ".join(self.getusagestring(option) for option in self.option_list)
67                + "\n  input directory is searched for PO files, PO files with name of conflicting string are output in output directory"
68            )
69        else:
70            super().set_usage(usage)
71
72    def recursiveprocess(self, options):
73        """recurse through directories and process files"""
74        if self.isrecursive(options.input, "input") and getattr(
75            options, "allowrecursiveinput", True
76        ):
77            if not self.isrecursive(options.output, "output"):
78                self.warning("Output directory does not exist. Attempting to create")
79                try:
80                    os.mkdir(options.output)
81                except Exception:
82                    self.error(
83                        optrecurse.optparse.OptionValueError(
84                            "Output directory does not exist, attempt to create failed"
85                        )
86                    )
87            if isinstance(options.input, list):
88                inputfiles = self.recurseinputfilelist(options)
89            else:
90                inputfiles = self.recurseinputfiles(options)
91        else:
92            if options.input:
93                inputfiles = [os.path.basename(options.input)]
94                options.input = os.path.dirname(options.input)
95            else:
96                inputfiles = [options.input]
97        self.textmap = {}
98        progress_bar = optrecurse.ProgressBar(options.progress, inputfiles)
99        for inputpath in inputfiles:
100            fullinputpath = self.getfullinputpath(options, inputpath)
101            try:
102                success = self.processfile(None, options, fullinputpath)
103            except Exception:
104                self.warning(
105                    "Error processing: input %s" % (fullinputpath),
106                    options,
107                    sys.exc_info(),
108                )
109                success = False
110            progress_bar.report_progress(inputpath, success)
111        del progress_bar
112        self.buildconflictmap()
113        self.outputconflicts(options)
114
115    def clean(self, string, options):
116        """returns the cleaned string that contains the text to be matched"""
117        if options.ignorecase:
118            string = string.lower()
119        for accelerator in options.accelchars:
120            string = string.replace(accelerator, "")
121        string = string.strip()
122        return string
123
124    def processfile(self, fileprocessor, options, fullinputpath):
125        """process an individual file"""
126        inputfile = self.openinputfile(options, fullinputpath)
127        inputfile = factory.getobject(inputfile)
128        for unit in inputfile.units:
129            if unit.isheader() or not unit.istranslated():
130                continue
131            if unit.hasplural():
132                continue
133            if not options.invert:
134                source = self.clean(unit.source, options)
135                target = self.clean(unit.target, options)
136            else:
137                target = self.clean(unit.source, options)
138                source = self.clean(unit.target, options)
139            self.textmap.setdefault(source, []).append((target, unit, fullinputpath))
140
141    def flatten(self, text, joinchar):
142        """flattens text to just be words"""
143        flattext = ""
144        for c in text:
145            if c.isalnum():
146                flattext += c
147            elif flattext[-1:].isalnum():
148                flattext += joinchar
149        return flattext.rstrip(joinchar)
150
151    def buildconflictmap(self):
152        """work out which strings are conflicting"""
153        self.conflictmap = {}
154        for source, translations in self.textmap.items():
155            source = self.flatten(source, " ")
156            if len(source) <= 1:
157                continue
158            if len(translations) > 1:
159                uniquetranslations = dict.fromkeys(
160                    [target for target, unit, filename in translations]
161                )
162                if len(uniquetranslations) > 1:
163                    self.conflictmap[source] = translations
164
165    def outputconflicts(self, options):
166        """saves the result of the conflict match"""
167        print(
168            "%d/%d different strings have conflicts"
169            % (len(self.conflictmap), len(self.textmap))
170        )
171        reducedmap = {}
172
173        def str_len(x):
174            return len(x)
175
176        for source, translations in self.conflictmap.items():
177            words = source.split()
178            words.sort(key=str_len)
179            source = words[-1]
180            reducedmap.setdefault(source, []).extend(translations)
181        # reduce plurals
182        plurals = {}
183        for word in reducedmap:
184            if word + "s" in reducedmap:
185                plurals[word] = word + "s"
186        for word, pluralword in plurals.items():
187            reducedmap[word].extend(reducedmap.pop(pluralword))
188        for source, translations in reducedmap.items():
189            flatsource = self.flatten(source, "-")
190            fulloutputpath = os.path.join(options.output, flatsource + os.extsep + "po")
191            conflictfile = po.pofile()
192            for target, unit, filename in translations:
193                unit.othercomments.append("# (poconflicts) %s\n" % filename)
194                conflictfile.units.append(unit)
195            with open(fulloutputpath, "wb") as fh:
196                conflictfile.serialize(fh)
197
198
199def main():
200    formats = {"po": ("po", None), None: ("po", None)}
201    parser = ConflictOptionParser(formats)
202    parser.add_option(
203        "-I",
204        "--ignore-case",
205        dest="ignorecase",
206        action="store_true",
207        default=False,
208        help="ignore case distinctions",
209    )
210    parser.add_option(
211        "-v",
212        "--invert",
213        dest="invert",
214        action="store_true",
215        default=False,
216        help="invert the conflicts thus extracting conflicting destination words",
217    )
218    parser.add_option(
219        "",
220        "--accelerator",
221        dest="accelchars",
222        default="",
223        metavar="ACCELERATORS",
224        help="ignores the given accelerator characters when matching",
225    )
226    parser.set_usage()
227    parser.description = __doc__
228    parser.run()
229
230
231if __name__ == "__main__":
232    main()
233