1#!/usr/bin/env python3 2# 3# check-code - a style and portability checker for Mercurial 4# 5# Copyright 2010 Olivia Mackall <olivia@selenic.com> 6# 7# This software may be used and distributed according to the terms of the 8# GNU General Public License version 2 or any later version. 9 10"""style and portability checker for Mercurial 11 12when a rule triggers wrong, do one of the following (prefer one from top): 13 * do the work-around the rule suggests 14 * doublecheck that it is a false match 15 * improve the rule pattern 16 * add an ignore pattern to the rule (3rd arg) which matches your good line 17 (you can append a short comment and match this, like: #re-raises) 18 * change the pattern to a warning and list the exception in test-check-code-hg 19 * ONLY use no--check-code for skipping entire files from external sources 20""" 21 22from __future__ import absolute_import, print_function 23import glob 24import keyword 25import optparse 26import os 27import re 28import sys 29 30if sys.version_info[0] < 3: 31 opentext = open 32else: 33 34 def opentext(f): 35 return open(f, encoding='latin1') 36 37 38try: 39 xrange 40except NameError: 41 xrange = range 42try: 43 import re2 44except ImportError: 45 re2 = None 46 47import testparseutil 48 49 50def compilere(pat, multiline=False): 51 if multiline: 52 pat = '(?m)' + pat 53 if re2: 54 try: 55 return re2.compile(pat) 56 except re2.error: 57 pass 58 return re.compile(pat) 59 60 61# check "rules depending on implementation of repquote()" in each 62# patterns (especially pypats), before changing around repquote() 63_repquotefixedmap = { 64 ' ': ' ', 65 '\n': '\n', 66 '.': 'p', 67 ':': 'q', 68 '%': '%', 69 '\\': 'b', 70 '*': 'A', 71 '+': 'P', 72 '-': 'M', 73} 74 75 76def _repquoteencodechr(i): 77 if i > 255: 78 return 'u' 79 c = chr(i) 80 if c in _repquotefixedmap: 81 return _repquotefixedmap[c] 82 if c.isalpha(): 83 return 'x' 84 if c.isdigit(): 85 return 'n' 86 return 'o' 87 88 89_repquotett = ''.join(_repquoteencodechr(i) for i in xrange(256)) 90 91 92def repquote(m): 93 t = m.group('text') 94 t = t.translate(_repquotett) 95 return m.group('quote') + t + m.group('quote') 96 97 98def reppython(m): 99 comment = m.group('comment') 100 if comment: 101 l = len(comment.rstrip()) 102 return "#" * l + comment[l:] 103 return repquote(m) 104 105 106def repcomment(m): 107 return m.group(1) + "#" * len(m.group(2)) 108 109 110def repccomment(m): 111 t = re.sub(r"((?<=\n) )|\S", "x", m.group(2)) 112 return m.group(1) + t + "*/" 113 114 115def repcallspaces(m): 116 t = re.sub(r"\n\s+", "\n", m.group(2)) 117 return m.group(1) + t 118 119 120def repinclude(m): 121 return m.group(1) + "<foo>" 122 123 124def rephere(m): 125 t = re.sub(r"\S", "x", m.group(2)) 126 return m.group(1) + t 127 128 129testpats = [ 130 [ 131 (r'\b(push|pop)d\b', "don't use 'pushd' or 'popd', use 'cd'"), 132 (r'\W\$?\(\([^\)\n]*\)\)', "don't use (()) or $(()), use 'expr'"), 133 (r'grep.*-q', "don't use 'grep -q', redirect to /dev/null"), 134 (r'(?<!hg )grep.* -a', "don't use 'grep -a', use in-line python"), 135 (r'sed.*-i', "don't use 'sed -i', use a temporary file"), 136 (r'\becho\b.*\\n', "don't use 'echo \\n', use printf"), 137 (r'echo -n', "don't use 'echo -n', use printf"), 138 (r'(^|\|\s*)\bwc\b[^|]*$\n(?!.*\(re\))', "filter wc output"), 139 (r'head -c', "don't use 'head -c', use 'dd'"), 140 (r'tail -n', "don't use the '-n' option to tail, just use '-<num>'"), 141 (r'sha1sum', "don't use sha1sum, use $TESTDIR/md5sum.py"), 142 (r'\bls\b.*-\w*R', "don't use 'ls -R', use 'find'"), 143 (r'printf.*[^\\]\\([1-9]|0\d)', r"don't use 'printf \NNN', use Python"), 144 (r'printf.*[^\\]\\x', "don't use printf \\x, use Python"), 145 (r'rm -rf \*', "don't use naked rm -rf, target a directory"), 146 ( 147 r'\[[^\]]+==', 148 '[ foo == bar ] is a bashism, use [ foo = bar ] instead', 149 ), 150 ( 151 r'(^|\|\s*)grep (-\w\s+)*[^|]*[(|]\w', 152 "use egrep for extended grep syntax", 153 ), 154 (r'(^|\|\s*)e?grep .*\\S', "don't use \\S in regular expression"), 155 (r'(?<!!)/bin/', "don't use explicit paths for tools"), 156 (r'#!.*/bash', "don't use bash in shebang, use sh"), 157 (r'[^\n]\Z', "no trailing newline"), 158 (r'export .*=', "don't export and assign at once"), 159 (r'^source\b', "don't use 'source', use '.'"), 160 (r'touch -d', "don't use 'touch -d', use 'touch -t' instead"), 161 (r'\bls +[^|\n-]+ +-', "options to 'ls' must come before filenames"), 162 (r'[^>\n]>\s*\$HGRCPATH', "don't overwrite $HGRCPATH, append to it"), 163 (r'^stop\(\)', "don't use 'stop' as a shell function name"), 164 (r'(\[|\btest\b).*-e ', "don't use 'test -e', use 'test -f'"), 165 (r'\[\[\s+[^\]]*\]\]', "don't use '[[ ]]', use '[ ]'"), 166 (r'^alias\b.*=', "don't use alias, use a function"), 167 (r'if\s*!', "don't use '!' to negate exit status"), 168 (r'/dev/u?random', "don't use entropy, use /dev/zero"), 169 (r'do\s*true;\s*done', "don't use true as loop body, use sleep 0"), 170 ( 171 r'sed (-e )?\'(\d+|/[^/]*/)i(?!\\\n)', 172 "put a backslash-escaped newline after sed 'i' command", 173 ), 174 (r'^diff *-\w*[uU].*$\n(^ \$ |^$)', "prefix diff -u/-U with cmp"), 175 (r'^\s+(if)? diff *-\w*[uU]', "prefix diff -u/-U with cmp"), 176 (r'[\s="`\']python\s(?!bindings)', "don't use 'python', use '$PYTHON'"), 177 (r'seq ', "don't use 'seq', use $TESTDIR/seq.py"), 178 (r'\butil\.Abort\b', "directly use error.Abort"), 179 (r'\|&', "don't use |&, use 2>&1"), 180 (r'\w = +\w', "only one space after = allowed"), 181 ( 182 r'\bsed\b.*[^\\]\\n', 183 "don't use 'sed ... \\n', use a \\ and a newline", 184 ), 185 (r'env.*-u', "don't use 'env -u VAR', use 'unset VAR'"), 186 (r'cp.* -r ', "don't use 'cp -r', use 'cp -R'"), 187 (r'grep.* -[ABC]', "don't use grep's context flags"), 188 ( 189 r'find.*-printf', 190 "don't use 'find -printf', it doesn't exist on BSD find(1)", 191 ), 192 (r'\$RANDOM ', "don't use bash-only $RANDOM to generate random values"), 193 ], 194 # warnings 195 [ 196 (r'^function', "don't use 'function', use old style"), 197 (r'^diff.*-\w*N', "don't use 'diff -N'"), 198 (r'\$PWD|\${PWD}', "don't use $PWD, use `pwd`", "no-pwd-check"), 199 (r'^([^"\'\n]|("[^"\n]*")|(\'[^\'\n]*\'))*\^', "^ must be quoted"), 200 (r'kill (`|\$\()', "don't use kill, use killdaemons.py"), 201 ], 202] 203 204testfilters = [ 205 (r"( *)(#([^!][^\n]*\S)?)", repcomment), 206 (r"<<(\S+)((.|\n)*?\n\1)", rephere), 207] 208 209uprefix = r"^ \$ " 210utestpats = [ 211 [ 212 (r'^(\S.*|| [$>] \S.*)[ \t]\n', "trailing whitespace on non-output"), 213 ( 214 uprefix + r'.*\|\s*sed[^|>\n]*\n', 215 "use regex test output patterns instead of sed", 216 ), 217 (uprefix + r'(true|exit 0)', "explicit zero exit unnecessary"), 218 ( 219 uprefix + r'.*\|\| echo.*(fail|error)', 220 "explicit exit code checks unnecessary", 221 ), 222 (uprefix + r'set -e', "don't use set -e"), 223 (uprefix + r'(\s|fi\b|done\b)', "use > for continued lines"), 224 ( 225 uprefix + r'.*:\.\S*/', 226 "x:.y in a path does not work on msys, rewrite " 227 "as x://.y, or see `hg log -k msys` for alternatives", 228 r'-\S+:\.|' '# no-msys', # -Rxxx 229 ), # in test-pull.t which is skipped on windows 230 ( 231 r'^ [^$>].*27\.0\.0\.1', 232 'use $LOCALIP not an explicit loopback address', 233 ), 234 ( 235 r'^ (?![>$] ).*\$LOCALIP.*[^)]$', 236 'mark $LOCALIP output lines with (glob) to help tests in BSD jails', 237 ), 238 ( 239 r'^ (cat|find): .*: \$ENOENT\$', 240 'use test -f to test for file existence', 241 ), 242 ( 243 r'^ diff -[^ -]*p', 244 "don't use (external) diff with -p for portability", 245 ), 246 (r' readlink ', 'use readlink.py instead of readlink'), 247 ( 248 r'^ [-+][-+][-+] .* [-+]0000 \(glob\)', 249 "glob timezone field in diff output for portability", 250 ), 251 ( 252 r'^ @@ -[0-9]+ [+][0-9]+,[0-9]+ @@', 253 "use '@@ -N* +N,n @@ (glob)' style chunk header for portability", 254 ), 255 ( 256 r'^ @@ -[0-9]+,[0-9]+ [+][0-9]+ @@', 257 "use '@@ -N,n +N* @@ (glob)' style chunk header for portability", 258 ), 259 ( 260 r'^ @@ -[0-9]+ [+][0-9]+ @@', 261 "use '@@ -N* +N* @@ (glob)' style chunk header for portability", 262 ), 263 ( 264 uprefix + r'hg( +-[^ ]+( +[^ ]+)?)* +extdiff' 265 r'( +(-[^ po-]+|--(?!program|option)[^ ]+|[^-][^ ]*))*$', 266 "use $RUNTESTDIR/pdiff via extdiff (or -o/-p for false-positives)", 267 ), 268 ], 269 # warnings 270 [ 271 ( 272 r'^ (?!.*\$LOCALIP)[^*?/\n]* \(glob\)$', 273 "glob match with no glob string (?, *, /, and $LOCALIP)", 274 ), 275 ], 276] 277 278# transform plain test rules to unified test's 279for i in [0, 1]: 280 for tp in testpats[i]: 281 p = tp[0] 282 m = tp[1] 283 if p.startswith('^'): 284 p = "^ [$>] (%s)" % p[1:] 285 else: 286 p = "^ [$>] .*(%s)" % p 287 utestpats[i].append((p, m) + tp[2:]) 288 289# don't transform the following rules: 290# " > \t" and " \t" should be allowed in unified tests 291testpats[0].append((r'^( *)\t', "don't use tabs to indent")) 292utestpats[0].append((r'^( ?)\t', "don't use tabs to indent")) 293 294utestfilters = [ 295 (r"<<(\S+)((.|\n)*?\n > \1)", rephere), 296 (r"( +)(#([^!][^\n]*\S)?)", repcomment), 297] 298 299# common patterns to check *.py 300commonpypats = [ 301 [ 302 (r'\\$', 'Use () to wrap long lines in Python, not \\'), 303 ( 304 r'^\s*def\s*\w+\s*\(.*,\s*\(', 305 "tuple parameter unpacking not available in Python 3+", 306 ), 307 ( 308 r'lambda\s*\(.*,.*\)', 309 "tuple parameter unpacking not available in Python 3+", 310 ), 311 (r'(?<!def)\s+(cmp)\(', "cmp is not available in Python 3+"), 312 (r'(?<!\.)\breduce\s*\(.*', "reduce is not available in Python 3+"), 313 ( 314 r'\bdict\(.*=', 315 'dict() is different in Py2 and 3 and is slower than {}', 316 'dict-from-generator', 317 ), 318 (r'\.has_key\b', "dict.has_key is not available in Python 3+"), 319 (r'\s<>\s', '<> operator is not available in Python 3+, use !='), 320 (r'^\s*\t', "don't use tabs"), 321 (r'\S;\s*\n', "semicolon"), 322 (r'[^_]_\([ \t\n]*(?:"[^"]+"[ \t\n+]*)+%', "don't use % inside _()"), 323 (r"[^_]_\([ \t\n]*(?:'[^']+'[ \t\n+]*)+%", "don't use % inside _()"), 324 (r'(\w|\)),\w', "missing whitespace after ,"), 325 (r'(\w|\))[+/*\-<>]\w', "missing whitespace in expression"), 326 (r'\w\s=\s\s+\w', "gratuitous whitespace after ="), 327 ( 328 ( 329 # a line ending with a colon, potentially with trailing comments 330 r':([ \t]*#[^\n]*)?\n' 331 # one that is not a pass and not only a comment 332 r'(?P<indent>[ \t]+)[^#][^\n]+\n' 333 # more lines at the same indent level 334 r'((?P=indent)[^\n]+\n)*' 335 # a pass at the same indent level, which is bogus 336 r'(?P=indent)pass[ \t\n#]' 337 ), 338 'omit superfluous pass', 339 ), 340 (r'[^\n]\Z', "no trailing newline"), 341 (r'(\S[ \t]+|^[ \t]+)\n', "trailing whitespace"), 342 ( 343 r'^\s*(if|while|def|class|except|try)\s[^[\n]*:\s*[^\\n]#\s]+', 344 "linebreak after :", 345 ), 346 ( 347 r'class\s[^( \n]+:', 348 "old-style class, use class foo(object)", 349 r'#.*old-style', 350 ), 351 ( 352 r'class\s[^( \n]+\(\):', 353 "class foo() creates old style object, use class foo(object)", 354 r'#.*old-style', 355 ), 356 ( 357 r'\b(%s)\(' 358 % '|'.join(k for k in keyword.kwlist if k not in ('print', 'exec')), 359 "Python keyword is not a function", 360 ), 361 # (r'class\s[A-Z][^\(]*\((?!Exception)', 362 # "don't capitalize non-exception classes"), 363 # (r'in range\(', "use xrange"), 364 # (r'^\s*print\s+', "avoid using print in core and extensions"), 365 (r'[\x80-\xff]', "non-ASCII character literal"), 366 (r'("\')\.format\(', "str.format() has no bytes counterpart, use %"), 367 ( 368 r'([\(\[][ \t]\S)|(\S[ \t][\)\]])', 369 "gratuitous whitespace in () or []", 370 ), 371 # (r'\s\s=', "gratuitous whitespace before ="), 372 ( 373 r'[^>< ](\+=|-=|!=|<>|<=|>=|<<=|>>=|%=)\S', 374 "missing whitespace around operator", 375 ), 376 ( 377 r'[^>< ](\+=|-=|!=|<>|<=|>=|<<=|>>=|%=)\s', 378 "missing whitespace around operator", 379 ), 380 ( 381 r'\s(\+=|-=|!=|<>|<=|>=|<<=|>>=|%=)\S', 382 "missing whitespace around operator", 383 ), 384 (r'[^^+=*/!<>&| %-](\s=|=\s)[^= ]', "wrong whitespace around ="), 385 ( 386 r'\([^()]*( =[^=]|[^<>!=]= )', 387 "no whitespace around = for named parameters", 388 ), 389 ( 390 r'raise [^,(]+, (\([^\)]+\)|[^,\(\)]+)$', 391 "don't use old-style two-argument raise, use Exception(message)", 392 ), 393 (r' is\s+(not\s+)?["\'0-9-]', "object comparison with literal"), 394 ( 395 r' [=!]=\s+(True|False|None)', 396 "comparison with singleton, use 'is' or 'is not' instead", 397 ), 398 ( 399 r'^\s*(while|if) [01]:', 400 "use True/False for constant Boolean expression", 401 ), 402 (r'^\s*if False(:| +and)', 'Remove code instead of using `if False`'), 403 ( 404 r'(?:(?<!def)\s+|\()hasattr\(', 405 'hasattr(foo, bar) is broken on py2, use util.safehasattr(foo, bar) ' 406 'instead', 407 r'#.*hasattr-py3-only', 408 ), 409 (r'opener\([^)]*\).read\(', "use opener.read() instead"), 410 (r'opener\([^)]*\).write\(', "use opener.write() instead"), 411 (r'(?i)descend[e]nt', "the proper spelling is descendAnt"), 412 (r'\.debug\(\_', "don't mark debug messages for translation"), 413 (r'\.strip\(\)\.split\(\)', "no need to strip before splitting"), 414 (r'^\s*except\s*:', "naked except clause", r'#.*re-raises'), 415 ( 416 r'^\s*except\s([^\(,]+|\([^\)]+\))\s*,', 417 'legacy exception syntax; use "as" instead of ","', 418 ), 419 (r'release\(.*wlock, .*lock\)', "wrong lock release order"), 420 (r'\bdef\s+__bool__\b', "__bool__ should be __nonzero__ in Python 2"), 421 ( 422 r'os\.path\.join\(.*, *(""|\'\')\)', 423 "use pathutil.normasprefix(path) instead of os.path.join(path, '')", 424 ), 425 (r'\s0[0-7]+\b', 'legacy octal syntax; use "0o" prefix instead of "0"'), 426 # XXX only catch mutable arguments on the first line of the definition 427 (r'def.*[( ]\w+=\{\}', "don't use mutable default arguments"), 428 (r'\butil\.Abort\b', "directly use error.Abort"), 429 ( 430 r'^@(\w*\.)?cachefunc', 431 "module-level @cachefunc is risky, please avoid", 432 ), 433 ( 434 r'^import Queue', 435 "don't use Queue, use pycompat.queue.Queue + " 436 "pycompat.queue.Empty", 437 ), 438 ( 439 r'^import cStringIO', 440 "don't use cStringIO.StringIO, use util.stringio", 441 ), 442 (r'^import urllib', "don't use urllib, use util.urlreq/util.urlerr"), 443 ( 444 r'^import SocketServer', 445 "don't use SockerServer, use util.socketserver", 446 ), 447 (r'^import urlparse', "don't use urlparse, use util.urlreq"), 448 (r'^import xmlrpclib', "don't use xmlrpclib, use util.xmlrpclib"), 449 (r'^import cPickle', "don't use cPickle, use util.pickle"), 450 (r'^import pickle', "don't use pickle, use util.pickle"), 451 (r'^import httplib', "don't use httplib, use util.httplib"), 452 (r'^import BaseHTTPServer', "use util.httpserver instead"), 453 ( 454 r'^(from|import) mercurial\.(cext|pure|cffi)', 455 "use mercurial.policy.importmod instead", 456 ), 457 (r'\.next\(\)', "don't use .next(), use next(...)"), 458 ( 459 r'([a-z]*).revision\(\1\.node\(', 460 "don't convert rev to node before passing to revision(nodeorrev)", 461 ), 462 (r'platform\.system\(\)', "don't use platform.system(), use pycompat"), 463 ], 464 # warnings 465 [], 466] 467 468# patterns to check normal *.py files 469pypats = [ 470 [ 471 # Ideally, these should be placed in "commonpypats" for 472 # consistency of coding rules in Mercurial source tree. 473 # But on the other hand, these are not so seriously required for 474 # python code fragments embedded in test scripts. Fixing test 475 # scripts for these patterns requires many changes, and has less 476 # profit than effort. 477 (r'raise Exception', "don't raise generic exceptions"), 478 (r'[\s\(](open|file)\([^)]*\)\.read\(', "use util.readfile() instead"), 479 ( 480 r'[\s\(](open|file)\([^)]*\)\.write\(', 481 "use util.writefile() instead", 482 ), 483 ( 484 r'^[\s\(]*(open(er)?|file)\([^)]*\)(?!\.close\(\))', 485 "always assign an opened file to a variable, and close it afterwards", 486 ), 487 ( 488 r'[\s\(](open|file)\([^)]*\)\.(?!close\(\))', 489 "always assign an opened file to a variable, and close it afterwards", 490 ), 491 (r':\n( )*( ){1,3}[^ ]', "must indent 4 spaces"), 492 (r'^import atexit', "don't use atexit, use ui.atexit"), 493 # rules depending on implementation of repquote() 494 ( 495 r' x+[xpqo%APM][\'"]\n\s+[\'"]x', 496 'string join across lines with no space', 497 ), 498 ( 499 r'''(?x)ui\.(status|progress|write|note|warn)\( 500 [ \t\n#]* 501 (?# any strings/comments might precede a string, which 502 # contains translatable message) 503 b?((['"]|\'\'\'|""")[ \npq%bAPMxno]*(['"]|\'\'\'|""")[ \t\n#]+)* 504 (?# sequence consisting of below might precede translatable message 505 # - formatting string: "% 10s", "%05d", "% -3.2f", "%*s", "%%" ... 506 # - escaped character: "\\", "\n", "\0" ... 507 # - character other than '%', 'b' as '\', and 'x' as alphabet) 508 (['"]|\'\'\'|""") 509 ((%([ n]?[PM]?([np]+|A))?x)|%%|b[bnx]|[ \nnpqAPMo])*x 510 (?# this regexp can't use [^...] style, 511 # because _preparepats forcibly adds "\n" into [^...], 512 # even though this regexp wants match it against "\n")''', 513 "missing _() in ui message (use () to hide false-positives)", 514 ), 515 ] 516 + commonpypats[0], 517 # warnings 518 [ 519 # rules depending on implementation of repquote() 520 (r'(^| )pp +xxxxqq[ \n][^\n]', "add two newlines after '.. note::'"), 521 ] 522 + commonpypats[1], 523] 524 525# patterns to check *.py for embedded ones in test script 526embeddedpypats = [ 527 [] + commonpypats[0], 528 # warnings 529 [] + commonpypats[1], 530] 531 532# common filters to convert *.py 533commonpyfilters = [ 534 ( 535 r"""(?msx)(?P<comment>\#.*?$)| 536 ((?P<quote>('''|\"\"\"|(?<!')'(?!')|(?<!")"(?!"))) 537 (?P<text>(([^\\]|\\.)*?)) 538 (?P=quote))""", 539 reppython, 540 ), 541] 542 543# pattern only for mercurial and extensions 544core_py_pats = [ 545 [ 546 # Windows tend to get confused about capitalization of the drive letter 547 # 548 # see mercurial.windows.abspath for details 549 ( 550 r'os\.path\.abspath', 551 "use util.abspath instead (windows)", 552 r'#.*re-exports', 553 ), 554 ], 555 # warnings 556 [], 557] 558 559# filters to convert normal *.py files 560pyfilters = [] + commonpyfilters 561 562# non-filter patterns 563pynfpats = [ 564 [ 565 (r'pycompat\.osname\s*[=!]=\s*[\'"]nt[\'"]', "use pycompat.iswindows"), 566 (r'pycompat\.osname\s*[=!]=\s*[\'"]posix[\'"]', "use pycompat.isposix"), 567 ( 568 r'pycompat\.sysplatform\s*[!=]=\s*[\'"]darwin[\'"]', 569 "use pycompat.isdarwin", 570 ), 571 ], 572 # warnings 573 [], 574] 575 576# filters to convert *.py for embedded ones in test script 577embeddedpyfilters = [] + commonpyfilters 578 579# extension non-filter patterns 580pyextnfpats = [ 581 [(r'^"""\n?[A-Z]', "don't capitalize docstring title")], 582 # warnings 583 [], 584] 585 586txtfilters = [] 587 588txtpats = [ 589 [ 590 (r'\s$', 'trailing whitespace'), 591 ('.. note::[ \n][^\n]', 'add two newlines after note::'), 592 ], 593 [], 594] 595 596cpats = [ 597 [ 598 (r'//', "don't use //-style comments"), 599 (r'\S\t', "don't use tabs except for indent"), 600 (r'(\S[ \t]+|^[ \t]+)\n', "trailing whitespace"), 601 (r'(while|if|do|for)\(', "use space after while/if/do/for"), 602 (r'return\(', "return is not a function"), 603 (r' ;', "no space before ;"), 604 (r'[^;] \)', "no space before )"), 605 (r'[)][{]', "space between ) and {"), 606 (r'\w+\* \w+', "use int *foo, not int* foo"), 607 (r'\W\([^\)]+\) \w+', "use (int)foo, not (int) foo"), 608 (r'\w+ (\+\+|--)', "use foo++, not foo ++"), 609 (r'\w,\w', "missing whitespace after ,"), 610 (r'^[^#]\w[+/*]\w', "missing whitespace in expression"), 611 (r'\w\s=\s\s+\w', "gratuitous whitespace after ="), 612 (r'^#\s+\w', "use #foo, not # foo"), 613 (r'[^\n]\Z', "no trailing newline"), 614 (r'^\s*#import\b', "use only #include in standard C code"), 615 (r'strcpy\(', "don't use strcpy, use strlcpy or memcpy"), 616 (r'strcat\(', "don't use strcat"), 617 # rules depending on implementation of repquote() 618 ], 619 # warnings 620 [ 621 # rules depending on implementation of repquote() 622 ], 623] 624 625cfilters = [ 626 (r'(/\*)(((\*(?!/))|[^*])*)\*/', repccomment), 627 (r'''(?P<quote>(?<!")")(?P<text>([^"]|\\")+)"(?!")''', repquote), 628 (r'''(#\s*include\s+<)([^>]+)>''', repinclude), 629 (r'(\()([^)]+\))', repcallspaces), 630] 631 632inutilpats = [ 633 [ 634 (r'\bui\.', "don't use ui in util"), 635 ], 636 # warnings 637 [], 638] 639 640inrevlogpats = [ 641 [ 642 (r'\brepo\.', "don't use repo in revlog"), 643 ], 644 # warnings 645 [], 646] 647 648webtemplatefilters = [] 649 650webtemplatepats = [ 651 [], 652 [ 653 ( 654 r'{desc(\|(?!websub|firstline)[^\|]*)+}', 655 'follow desc keyword with either firstline or websub', 656 ), 657 ], 658] 659 660allfilesfilters = [] 661 662allfilespats = [ 663 [ 664 ( 665 r'(http|https)://[a-zA-Z0-9./]*selenic.com/', 666 'use mercurial-scm.org domain URL', 667 ), 668 ( 669 r'mercurial@selenic\.com', 670 'use mercurial-scm.org domain for mercurial ML address', 671 ), 672 ( 673 r'mercurial-devel@selenic\.com', 674 'use mercurial-scm.org domain for mercurial-devel ML address', 675 ), 676 ], 677 # warnings 678 [], 679] 680 681py3pats = [ 682 [ 683 ( 684 r'os\.environ', 685 "use encoding.environ instead (py3)", 686 r'#.*re-exports', 687 ), 688 (r'os\.name', "use pycompat.osname instead (py3)"), 689 (r'os\.getcwd', "use encoding.getcwd instead (py3)", r'#.*re-exports'), 690 (r'os\.sep', "use pycompat.ossep instead (py3)"), 691 (r'os\.pathsep', "use pycompat.ospathsep instead (py3)"), 692 (r'os\.altsep', "use pycompat.osaltsep instead (py3)"), 693 (r'sys\.platform', "use pycompat.sysplatform instead (py3)"), 694 (r'getopt\.getopt', "use pycompat.getoptb instead (py3)"), 695 (r'os\.getenv', "use encoding.environ.get instead"), 696 (r'os\.setenv', "modifying the environ dict is not preferred"), 697 (r'(?<!pycompat\.)xrange', "use pycompat.xrange instead (py3)"), 698 ], 699 # warnings 700 [], 701] 702 703checks = [ 704 ('python', r'.*\.(py|cgi)$', r'^#!.*python', pyfilters, pypats), 705 ('python', r'.*\.(py|cgi)$', r'^#!.*python', [], pynfpats), 706 ('python', r'.*hgext.*\.py$', '', [], pyextnfpats), 707 ( 708 'python 3', 709 r'.*(hgext|mercurial)/(?!demandimport|policy|pycompat).*\.py', 710 '', 711 pyfilters, 712 py3pats, 713 ), 714 ( 715 'core files', 716 r'.*(hgext|mercurial)/(?!demandimport|policy|pycompat).*\.py', 717 '', 718 pyfilters, 719 core_py_pats, 720 ), 721 ('test script', r'(.*/)?test-[^.~]*$', '', testfilters, testpats), 722 ('c', r'.*\.[ch]$', '', cfilters, cpats), 723 ('unified test', r'.*\.t$', '', utestfilters, utestpats), 724 ( 725 'layering violation repo in revlog', 726 r'mercurial/revlog\.py', 727 '', 728 pyfilters, 729 inrevlogpats, 730 ), 731 ( 732 'layering violation ui in util', 733 r'mercurial/util\.py', 734 '', 735 pyfilters, 736 inutilpats, 737 ), 738 ('txt', r'.*\.txt$', '', txtfilters, txtpats), 739 ( 740 'web template', 741 r'mercurial/templates/.*\.tmpl', 742 '', 743 webtemplatefilters, 744 webtemplatepats, 745 ), 746 ('all except for .po', r'.*(?<!\.po)$', '', allfilesfilters, allfilespats), 747] 748 749# (desc, 750# func to pick up embedded code fragments, 751# list of patterns to convert target files 752# list of patterns to detect errors/warnings) 753embeddedchecks = [ 754 ( 755 'embedded python', 756 testparseutil.pyembedded, 757 embeddedpyfilters, 758 embeddedpypats, 759 ) 760] 761 762 763def _preparepats(): 764 def preparefailandwarn(failandwarn): 765 for pats in failandwarn: 766 for i, pseq in enumerate(pats): 767 # fix-up regexes for multi-line searches 768 p = pseq[0] 769 # \s doesn't match \n (done in two steps) 770 # first, we replace \s that appears in a set already 771 p = re.sub(r'\[\\s', r'[ \\t', p) 772 # now we replace other \s instances. 773 p = re.sub(r'(?<!(\\|\[))\\s', r'[ \\t]', p) 774 # [^...] doesn't match newline 775 p = re.sub(r'(?<!\\)\[\^', r'[^\\n', p) 776 777 pats[i] = (re.compile(p, re.MULTILINE),) + pseq[1:] 778 779 def preparefilters(filters): 780 for i, flt in enumerate(filters): 781 filters[i] = re.compile(flt[0]), flt[1] 782 783 for cs in (checks, embeddedchecks): 784 for c in cs: 785 failandwarn = c[-1] 786 preparefailandwarn(failandwarn) 787 788 filters = c[-2] 789 preparefilters(filters) 790 791 792class norepeatlogger(object): 793 def __init__(self): 794 self._lastseen = None 795 796 def log(self, fname, lineno, line, msg, blame): 797 """print error related a to given line of a given file. 798 799 The faulty line will also be printed but only once in the case 800 of multiple errors. 801 802 :fname: filename 803 :lineno: line number 804 :line: actual content of the line 805 :msg: error message 806 """ 807 msgid = fname, lineno, line 808 if msgid != self._lastseen: 809 if blame: 810 print("%s:%d (%s):" % (fname, lineno, blame)) 811 else: 812 print("%s:%d:" % (fname, lineno)) 813 print(" > %s" % line) 814 self._lastseen = msgid 815 print(" " + msg) 816 817 818_defaultlogger = norepeatlogger() 819 820 821def getblame(f): 822 lines = [] 823 for l in os.popen('hg annotate -un %s' % f): 824 start, line = l.split(':', 1) 825 user, rev = start.split() 826 lines.append((line[1:-1], user, rev)) 827 return lines 828 829 830def checkfile( 831 f, 832 logfunc=_defaultlogger.log, 833 maxerr=None, 834 warnings=False, 835 blame=False, 836 debug=False, 837 lineno=True, 838): 839 """checks style and portability of a given file 840 841 :f: filepath 842 :logfunc: function used to report error 843 logfunc(filename, linenumber, linecontent, errormessage) 844 :maxerr: number of error to display before aborting. 845 Set to false (default) to report all errors 846 847 return True if no error is found, False otherwise. 848 """ 849 result = True 850 851 try: 852 with opentext(f) as fp: 853 try: 854 pre = fp.read() 855 except UnicodeDecodeError as e: 856 print("%s while reading %s" % (e, f)) 857 return result 858 except IOError as e: 859 print("Skipping %s, %s" % (f, str(e).split(':', 1)[0])) 860 return result 861 862 # context information shared while single checkfile() invocation 863 context = {'blamecache': None} 864 865 for name, match, magic, filters, pats in checks: 866 if debug: 867 print(name, f) 868 if not (re.match(match, f) or (magic and re.search(magic, pre))): 869 if debug: 870 print( 871 "Skipping %s for %s it doesn't match %s" % (name, match, f) 872 ) 873 continue 874 if "no-" "check-code" in pre: 875 # If you're looking at this line, it's because a file has: 876 # no- check- code 877 # but the reason to output skipping is to make life for 878 # tests easier. So, instead of writing it with a normal 879 # spelling, we write it with the expected spelling from 880 # tests/test-check-code.t 881 print("Skipping %s it has no-che?k-code (glob)" % f) 882 return "Skip" # skip checking this file 883 884 fc = _checkfiledata( 885 name, 886 f, 887 pre, 888 filters, 889 pats, 890 context, 891 logfunc, 892 maxerr, 893 warnings, 894 blame, 895 debug, 896 lineno, 897 ) 898 if fc: 899 result = False 900 901 if f.endswith('.t') and "no-" "check-code" not in pre: 902 if debug: 903 print("Checking embedded code in %s" % f) 904 905 prelines = pre.splitlines() 906 embeddederros = [] 907 for name, embedded, filters, pats in embeddedchecks: 908 # "reset curmax at each repetition" treats maxerr as "max 909 # nubmer of errors in an actual file per entry of 910 # (embedded)checks" 911 curmaxerr = maxerr 912 913 for found in embedded(f, prelines, embeddederros): 914 filename, starts, ends, code = found 915 fc = _checkfiledata( 916 name, 917 f, 918 code, 919 filters, 920 pats, 921 context, 922 logfunc, 923 curmaxerr, 924 warnings, 925 blame, 926 debug, 927 lineno, 928 offset=starts - 1, 929 ) 930 if fc: 931 result = False 932 if curmaxerr: 933 if fc >= curmaxerr: 934 break 935 curmaxerr -= fc 936 937 return result 938 939 940def _checkfiledata( 941 name, 942 f, 943 filedata, 944 filters, 945 pats, 946 context, 947 logfunc, 948 maxerr, 949 warnings, 950 blame, 951 debug, 952 lineno, 953 offset=None, 954): 955 """Execute actual error check for file data 956 957 :name: of the checking category 958 :f: filepath 959 :filedata: content of a file 960 :filters: to be applied before checking 961 :pats: to detect errors 962 :context: a dict of information shared while single checkfile() invocation 963 Valid keys: 'blamecache'. 964 :logfunc: function used to report error 965 logfunc(filename, linenumber, linecontent, errormessage) 966 :maxerr: number of error to display before aborting, or False to 967 report all errors 968 :warnings: whether warning level checks should be applied 969 :blame: whether blame information should be displayed at error reporting 970 :debug: whether debug information should be displayed 971 :lineno: whether lineno should be displayed at error reporting 972 :offset: line number offset of 'filedata' in 'f' for checking 973 an embedded code fragment, or None (offset=0 is different 974 from offset=None) 975 976 returns number of detected errors. 977 """ 978 blamecache = context['blamecache'] 979 if offset is None: 980 lineoffset = 0 981 else: 982 lineoffset = offset 983 984 fc = 0 985 pre = post = filedata 986 987 if True: # TODO: get rid of this redundant 'if' block 988 for p, r in filters: 989 post = re.sub(p, r, post) 990 nerrs = len(pats[0]) # nerr elements are errors 991 if warnings: 992 pats = pats[0] + pats[1] 993 else: 994 pats = pats[0] 995 # print post # uncomment to show filtered version 996 997 if debug: 998 print("Checking %s for %s" % (name, f)) 999 1000 prelines = None 1001 errors = [] 1002 for i, pat in enumerate(pats): 1003 if len(pat) == 3: 1004 p, msg, ignore = pat 1005 else: 1006 p, msg = pat 1007 ignore = None 1008 if i >= nerrs: 1009 msg = "warning: " + msg 1010 1011 pos = 0 1012 n = 0 1013 for m in p.finditer(post): 1014 if prelines is None: 1015 prelines = pre.splitlines() 1016 postlines = post.splitlines(True) 1017 1018 start = m.start() 1019 while n < len(postlines): 1020 step = len(postlines[n]) 1021 if pos + step > start: 1022 break 1023 pos += step 1024 n += 1 1025 l = prelines[n] 1026 1027 if ignore and re.search(ignore, l, re.MULTILINE): 1028 if debug: 1029 print( 1030 "Skipping %s for %s:%s (ignore pattern)" 1031 % (name, f, (n + lineoffset)) 1032 ) 1033 continue 1034 bd = "" 1035 if blame: 1036 bd = 'working directory' 1037 if blamecache is None: 1038 blamecache = getblame(f) 1039 context['blamecache'] = blamecache 1040 if (n + lineoffset) < len(blamecache): 1041 bl, bu, br = blamecache[(n + lineoffset)] 1042 if offset is None and bl == l: 1043 bd = '%s@%s' % (bu, br) 1044 elif offset is not None and bl.endswith(l): 1045 # "offset is not None" means "checking 1046 # embedded code fragment". In this case, 1047 # "l" does not have information about the 1048 # beginning of an *original* line in the 1049 # file (e.g. ' > '). 1050 # Therefore, use "str.endswith()", and 1051 # show "maybe" for a little loose 1052 # examination. 1053 bd = '%s@%s, maybe' % (bu, br) 1054 1055 errors.append((f, lineno and (n + lineoffset + 1), l, msg, bd)) 1056 1057 errors.sort() 1058 for e in errors: 1059 logfunc(*e) 1060 fc += 1 1061 if maxerr and fc >= maxerr: 1062 print(" (too many errors, giving up)") 1063 break 1064 1065 return fc 1066 1067 1068def main(): 1069 parser = optparse.OptionParser("%prog [options] [files | -]") 1070 parser.add_option( 1071 "-w", 1072 "--warnings", 1073 action="store_true", 1074 help="include warning-level checks", 1075 ) 1076 parser.add_option( 1077 "-p", "--per-file", type="int", help="max warnings per file" 1078 ) 1079 parser.add_option( 1080 "-b", 1081 "--blame", 1082 action="store_true", 1083 help="use annotate to generate blame info", 1084 ) 1085 parser.add_option( 1086 "", "--debug", action="store_true", help="show debug information" 1087 ) 1088 parser.add_option( 1089 "", 1090 "--nolineno", 1091 action="store_false", 1092 dest='lineno', 1093 help="don't show line numbers", 1094 ) 1095 1096 parser.set_defaults( 1097 per_file=15, warnings=False, blame=False, debug=False, lineno=True 1098 ) 1099 (options, args) = parser.parse_args() 1100 1101 if len(args) == 0: 1102 check = glob.glob("*") 1103 elif args == ['-']: 1104 # read file list from stdin 1105 check = sys.stdin.read().splitlines() 1106 else: 1107 check = args 1108 1109 _preparepats() 1110 1111 ret = 0 1112 for f in check: 1113 if not checkfile( 1114 f, 1115 maxerr=options.per_file, 1116 warnings=options.warnings, 1117 blame=options.blame, 1118 debug=options.debug, 1119 lineno=options.lineno, 1120 ): 1121 ret = 1 1122 return ret 1123 1124 1125if __name__ == "__main__": 1126 sys.exit(main()) 1127