1#!/usr/bin/env python3 2# 3# Check gcc.pot file for stylistic issues as described in 4# https://gcc.gnu.org/onlinedocs/gccint/Guidelines-for-Diagnostics.html, 5# especially in gcc-internal-format messages. 6# 7# This file is part of GCC. 8# 9# GCC is free software; you can redistribute it and/or modify it under 10# the terms of the GNU General Public License as published by the Free 11# Software Foundation; either version 3, or (at your option) any later 12# version. 13# 14# GCC is distributed in the hope that it will be useful, but WITHOUT ANY 15# WARRANTY; without even the implied warranty of MERCHANTABILITY or 16# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 17# for more details. 18# 19# You should have received a copy of the GNU General Public License 20# along with GCC; see the file COPYING3. If not see 21# <http://www.gnu.org/licenses/>. 22 23import argparse 24import re 25from collections import Counter 26from typing import Dict, Match 27 28import polib 29 30seen_warnings = Counter() 31 32 33def location(msg: polib.POEntry): 34 if msg.occurrences: 35 occ = msg.occurrences[0] 36 return f'{occ[0]}:{occ[1]}' 37 return '<unknown location>' 38 39 40def warn(msg: polib.POEntry, 41 diagnostic_id: str, diagnostic: str, include_msgid=True): 42 """ 43 To suppress a warning for a particular message, 44 add a line "#, gcclint:ignore:{diagnostic_id}" to the message. 45 """ 46 47 if f'gcclint:ignore:{diagnostic_id}' in msg.flags: 48 return 49 50 seen_warnings[diagnostic] += 1 51 52 if include_msgid: 53 print(f'{location(msg)}: {diagnostic} in {repr(msg.msgid)}') 54 else: 55 print(f'{location(msg)}: {diagnostic}') 56 57 58def lint_gcc_internal_format(msg: polib.POEntry): 59 """ 60 Checks a single message that has the gcc-internal-format. These 61 messages use a variety of placeholders like %qs, %<quotes%> and 62 %q#E. 63 """ 64 65 msgid: str = msg.msgid 66 67 def outside_quotes(m: Match[str]): 68 before = msgid[:m.start(0)] 69 return before.count("%<") == before.count("%>") 70 71 def lint_matching_placeholders(): 72 """ 73 Warns when literal values in placeholders are not exactly equal 74 in the translation. This can happen when doing copy-and-paste 75 translations of similar messages. 76 77 To avoid these mismatches in the first place, 78 structurally equal messages are found by 79 lint_diagnostics_differing_only_in_placeholders. 80 81 This check only applies when checking a finished translation 82 such as de.po, not gcc.pot. 83 """ 84 85 if not msg.translated(): 86 return 87 88 in_msgid = re.findall('%<[^%]+%>', msgid) 89 in_msgstr = re.findall('%<[^%]+%>', msg.msgstr) 90 91 if set(in_msgid) != set(in_msgstr): 92 warn(msg, 93 'placeholder-mismatch', 94 f'placeholder mismatch: msgid has {in_msgid}, ' 95 f'msgstr has {in_msgstr}', 96 include_msgid=False) 97 98 def lint_option_outside_quotes(): 99 for match in re.finditer(r'\S+', msgid): 100 part = match.group() 101 if not outside_quotes(match): 102 continue 103 104 if part.startswith('-'): 105 if len(part) >= 2 and part[1].isalpha(): 106 if part == '-INF': 107 continue 108 109 warn(msg, 110 'option-outside-quotes', 111 'command line option outside %<quotes%>') 112 113 if part.startswith('__builtin_'): 114 warn(msg, 115 'builtin-outside-quotes', 116 'builtin function outside %<quotes%>') 117 118 def lint_plain_apostrophe(): 119 for match in re.finditer("[^%]'", msgid): 120 if outside_quotes(match): 121 warn(msg, 'apostrophe', 'apostrophe without leading %') 122 123 def lint_space_before_quote(): 124 """ 125 A space before %< is often the result of string literals that 126 are joined by the C compiler and neither literal has a space 127 to separate the words. 128 """ 129 130 for match in re.finditer("(.?[a-zA-Z0-9])%<", msgid): 131 if match.group(1) != '%s': 132 warn(msg, 133 'no-space-before-quote', 134 '%< directly following a letter or digit') 135 136 def lint_underscore_outside_quotes(): 137 """ 138 An underscore outside of quotes is used in several contexts, 139 and many of them violate the GCC Guidelines for Diagnostics: 140 141 * names of GCC-internal compiler functions 142 * names of GCC-internal data structures 143 * static_cast and the like (which are legitimate) 144 """ 145 146 for match in re.finditer("_", msgid): 147 if outside_quotes(match): 148 warn(msg, 149 'underscore-outside-quotes', 150 'underscore outside of %<quotes%>') 151 return 152 153 def lint_may_not(): 154 """ 155 The term "may not" may either mean "it could be the case" 156 or "should not". These two different meanings are sometimes 157 hard to tell apart. 158 """ 159 160 if re.search(r'\bmay not\b', msgid): 161 warn(msg, 162 'ambiguous-may-not', 163 'the term "may not" is ambiguous') 164 165 def lint_unbalanced_quotes(): 166 if msgid.count("%<") != msgid.count("%>"): 167 warn(msg, 168 'unbalanced-quotes', 169 'unbalanced %< and %> quotes') 170 171 if msg.translated(): 172 if msg.msgstr.count("%<") != msg.msgstr.count("%>"): 173 warn(msg, 174 'unbalanced-quotes', 175 'unbalanced %< and %> quotes') 176 177 def lint_single_space_after_sentence(): 178 """ 179 After a sentence there should be two spaces. 180 """ 181 182 if re.search(r'[.] [A-Z]', msgid): 183 warn(msg, 184 'single-space-after-sentence', 185 'single space after sentence') 186 187 def lint_non_canonical_quotes(): 188 """ 189 Catches %<%s%>, which can be written in the shorter form %qs. 190 """ 191 match = re.search("%<%s%>|'%s'|\"%s\"|`%s'", msgid) 192 if match: 193 warn(msg, 194 'non-canonical-quotes', 195 f'placeholder {match.group()} should be written as %qs') 196 197 lint_option_outside_quotes() 198 lint_plain_apostrophe() 199 lint_space_before_quote() 200 lint_underscore_outside_quotes() 201 lint_may_not() 202 lint_unbalanced_quotes() 203 lint_matching_placeholders() 204 lint_single_space_after_sentence() 205 lint_non_canonical_quotes() 206 207 208def lint_diagnostics_differing_only_in_placeholders(po: polib.POFile): 209 """ 210 Detects messages that are structurally the same, except that they 211 use different plain strings inside %<quotes%>. These messages can 212 be merged in order to prevent copy-and-paste mistakes by the 213 translators. 214 215 See bug 90119. 216 """ 217 218 seen: Dict[str, polib.POEntry] = {} 219 220 for msg in po: 221 msg: polib.POEntry 222 msgid = msg.msgid 223 224 normalized = re.sub('%<[^%]+%>', '%qs', msgid) 225 if normalized not in seen: 226 seen[normalized] = msg 227 seen[msgid] = msg 228 continue 229 230 prev = seen[normalized] 231 warn(msg, 232 'same-pattern', 233 f'same pattern for {repr(msgid)} and ' 234 f'{repr(prev.msgid)} in {location(prev)}', 235 include_msgid=False) 236 237 238def lint_file(po: polib.POFile): 239 for msg in po: 240 msg: polib.POEntry 241 242 if not msg.obsolete and not msg.fuzzy: 243 if 'gcc-internal-format' in msg.flags: 244 lint_gcc_internal_format(msg) 245 246 lint_diagnostics_differing_only_in_placeholders(po) 247 248 249def main(): 250 parser = argparse.ArgumentParser(description='') 251 parser.add_argument('file', help='pot file') 252 253 args = parser.parse_args() 254 255 po = polib.pofile(args.file) 256 lint_file(po) 257 258 print() 259 print('summary:') 260 for entry in seen_warnings.most_common(): 261 if entry[1] > 1: 262 print(f'{entry[1]}\t{entry[0]}') 263 264 265if __name__ == '__main__': 266 main() 267