1# 2# Copyright 2005-2008,2010 Zuza Software Foundation 3# 4# This file is part of translate. 5# 6# translate is free software; you can redistribute it and/or modify 7# it under the terms of the GNU General Public License as published by 8# the Free Software Foundation; either version 2 of the License, or 9# (at your option) any later version. 10# 11# translate is distributed in the hope that it will be useful, 12# but WITHOUT ANY WARRANTY; without even the implied warranty of 13# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14# GNU General Public License for more details. 15# 16# You should have received a copy of the GNU General Public License 17# along with this program; if not, see <http://www.gnu.org/licenses/>. 18 19"""Conflict finder for Gettext PO localization files. 20 21See: http://docs.translatehouse.org/projects/translate-toolkit/en/latest/commands/poconflicts.html 22for examples and usage instructions. 23""" 24 25import os 26import sys 27 28from translate.misc import optrecurse 29from translate.storage import factory, po 30 31 32class ConflictOptionParser(optrecurse.RecursiveOptionParser): 33 """a specialized Option Parser for the conflict tool...""" 34 35 def parse_args(self, args=None, values=None): 36 """parses the command line options, handling implicit input/output args""" 37 (options, args) = optrecurse.optparse.OptionParser.parse_args( 38 self, args, values 39 ) 40 # some intelligence as to what reasonable people might give on the command line 41 if args and not options.input: 42 if not options.output: 43 options.input = args[:-1] 44 args = args[-1:] 45 else: 46 options.input = args 47 args = [] 48 if args and not options.output: 49 options.output = args[-1] 50 args = args[:-1] 51 if not options.output: 52 self.error("output file is required") 53 if args: 54 self.error( 55 "You have used an invalid combination of --input, --output and freestanding args" 56 ) 57 if isinstance(options.input, list) and len(options.input) == 1: 58 options.input = options.input[0] 59 return (options, args) 60 61 def set_usage(self, usage=None): 62 """sets the usage string - if usage not given, uses getusagestring for each option""" 63 if usage is None: 64 self.usage = ( 65 "%prog " 66 + " ".join(self.getusagestring(option) for option in self.option_list) 67 + "\n input directory is searched for PO files, PO files with name of conflicting string are output in output directory" 68 ) 69 else: 70 super().set_usage(usage) 71 72 def recursiveprocess(self, options): 73 """recurse through directories and process files""" 74 if self.isrecursive(options.input, "input") and getattr( 75 options, "allowrecursiveinput", True 76 ): 77 if not self.isrecursive(options.output, "output"): 78 self.warning("Output directory does not exist. Attempting to create") 79 try: 80 os.mkdir(options.output) 81 except Exception: 82 self.error( 83 optrecurse.optparse.OptionValueError( 84 "Output directory does not exist, attempt to create failed" 85 ) 86 ) 87 if isinstance(options.input, list): 88 inputfiles = self.recurseinputfilelist(options) 89 else: 90 inputfiles = self.recurseinputfiles(options) 91 else: 92 if options.input: 93 inputfiles = [os.path.basename(options.input)] 94 options.input = os.path.dirname(options.input) 95 else: 96 inputfiles = [options.input] 97 self.textmap = {} 98 progress_bar = optrecurse.ProgressBar(options.progress, inputfiles) 99 for inputpath in inputfiles: 100 fullinputpath = self.getfullinputpath(options, inputpath) 101 try: 102 success = self.processfile(None, options, fullinputpath) 103 except Exception: 104 self.warning( 105 "Error processing: input %s" % (fullinputpath), 106 options, 107 sys.exc_info(), 108 ) 109 success = False 110 progress_bar.report_progress(inputpath, success) 111 del progress_bar 112 self.buildconflictmap() 113 self.outputconflicts(options) 114 115 def clean(self, string, options): 116 """returns the cleaned string that contains the text to be matched""" 117 if options.ignorecase: 118 string = string.lower() 119 for accelerator in options.accelchars: 120 string = string.replace(accelerator, "") 121 string = string.strip() 122 return string 123 124 def processfile(self, fileprocessor, options, fullinputpath): 125 """process an individual file""" 126 inputfile = self.openinputfile(options, fullinputpath) 127 inputfile = factory.getobject(inputfile) 128 for unit in inputfile.units: 129 if unit.isheader() or not unit.istranslated(): 130 continue 131 if unit.hasplural(): 132 continue 133 if not options.invert: 134 source = self.clean(unit.source, options) 135 target = self.clean(unit.target, options) 136 else: 137 target = self.clean(unit.source, options) 138 source = self.clean(unit.target, options) 139 self.textmap.setdefault(source, []).append((target, unit, fullinputpath)) 140 141 def flatten(self, text, joinchar): 142 """flattens text to just be words""" 143 flattext = "" 144 for c in text: 145 if c.isalnum(): 146 flattext += c 147 elif flattext[-1:].isalnum(): 148 flattext += joinchar 149 return flattext.rstrip(joinchar) 150 151 def buildconflictmap(self): 152 """work out which strings are conflicting""" 153 self.conflictmap = {} 154 for source, translations in self.textmap.items(): 155 source = self.flatten(source, " ") 156 if len(source) <= 1: 157 continue 158 if len(translations) > 1: 159 uniquetranslations = dict.fromkeys( 160 [target for target, unit, filename in translations] 161 ) 162 if len(uniquetranslations) > 1: 163 self.conflictmap[source] = translations 164 165 def outputconflicts(self, options): 166 """saves the result of the conflict match""" 167 print( 168 "%d/%d different strings have conflicts" 169 % (len(self.conflictmap), len(self.textmap)) 170 ) 171 reducedmap = {} 172 173 def str_len(x): 174 return len(x) 175 176 for source, translations in self.conflictmap.items(): 177 words = source.split() 178 words.sort(key=str_len) 179 source = words[-1] 180 reducedmap.setdefault(source, []).extend(translations) 181 # reduce plurals 182 plurals = {} 183 for word in reducedmap: 184 if word + "s" in reducedmap: 185 plurals[word] = word + "s" 186 for word, pluralword in plurals.items(): 187 reducedmap[word].extend(reducedmap.pop(pluralword)) 188 for source, translations in reducedmap.items(): 189 flatsource = self.flatten(source, "-") 190 fulloutputpath = os.path.join(options.output, flatsource + os.extsep + "po") 191 conflictfile = po.pofile() 192 for target, unit, filename in translations: 193 unit.othercomments.append("# (poconflicts) %s\n" % filename) 194 conflictfile.units.append(unit) 195 with open(fulloutputpath, "wb") as fh: 196 conflictfile.serialize(fh) 197 198 199def main(): 200 formats = {"po": ("po", None), None: ("po", None)} 201 parser = ConflictOptionParser(formats) 202 parser.add_option( 203 "-I", 204 "--ignore-case", 205 dest="ignorecase", 206 action="store_true", 207 default=False, 208 help="ignore case distinctions", 209 ) 210 parser.add_option( 211 "-v", 212 "--invert", 213 dest="invert", 214 action="store_true", 215 default=False, 216 help="invert the conflicts thus extracting conflicting destination words", 217 ) 218 parser.add_option( 219 "", 220 "--accelerator", 221 dest="accelchars", 222 default="", 223 metavar="ACCELERATORS", 224 help="ignores the given accelerator characters when matching", 225 ) 226 parser.set_usage() 227 parser.description = __doc__ 228 parser.run() 229 230 231if __name__ == "__main__": 232 main() 233