1#!/usr/bin/env python3
2#
3# check-code - a style and portability checker for Mercurial
4#
5# Copyright 2010 Olivia Mackall <olivia@selenic.com>
6#
7# This software may be used and distributed according to the terms of the
8# GNU General Public License version 2 or any later version.
9
10"""style and portability checker for Mercurial
11
12when a rule triggers wrong, do one of the following (prefer one from top):
13 * do the work-around the rule suggests
14 * doublecheck that it is a false match
15 * improve the rule pattern
16 * add an ignore pattern to the rule (3rd arg) which matches your good line
17   (you can append a short comment and match this, like: #re-raises)
18 * change the pattern to a warning and list the exception in test-check-code-hg
19 * ONLY use no--check-code for skipping entire files from external sources
20"""
21
22from __future__ import absolute_import, print_function
23import glob
24import keyword
25import optparse
26import os
27import re
28import sys
29
30if sys.version_info[0] < 3:
31    opentext = open
32else:
33
34    def opentext(f):
35        return open(f, encoding='latin1')
36
37
38try:
39    xrange
40except NameError:
41    xrange = range
42try:
43    import re2
44except ImportError:
45    re2 = None
46
47import testparseutil
48
49
50def compilere(pat, multiline=False):
51    if multiline:
52        pat = '(?m)' + pat
53    if re2:
54        try:
55            return re2.compile(pat)
56        except re2.error:
57            pass
58    return re.compile(pat)
59
60
61# check "rules depending on implementation of repquote()" in each
62# patterns (especially pypats), before changing around repquote()
63_repquotefixedmap = {
64    ' ': ' ',
65    '\n': '\n',
66    '.': 'p',
67    ':': 'q',
68    '%': '%',
69    '\\': 'b',
70    '*': 'A',
71    '+': 'P',
72    '-': 'M',
73}
74
75
76def _repquoteencodechr(i):
77    if i > 255:
78        return 'u'
79    c = chr(i)
80    if c in _repquotefixedmap:
81        return _repquotefixedmap[c]
82    if c.isalpha():
83        return 'x'
84    if c.isdigit():
85        return 'n'
86    return 'o'
87
88
89_repquotett = ''.join(_repquoteencodechr(i) for i in xrange(256))
90
91
92def repquote(m):
93    t = m.group('text')
94    t = t.translate(_repquotett)
95    return m.group('quote') + t + m.group('quote')
96
97
98def reppython(m):
99    comment = m.group('comment')
100    if comment:
101        l = len(comment.rstrip())
102        return "#" * l + comment[l:]
103    return repquote(m)
104
105
106def repcomment(m):
107    return m.group(1) + "#" * len(m.group(2))
108
109
110def repccomment(m):
111    t = re.sub(r"((?<=\n) )|\S", "x", m.group(2))
112    return m.group(1) + t + "*/"
113
114
115def repcallspaces(m):
116    t = re.sub(r"\n\s+", "\n", m.group(2))
117    return m.group(1) + t
118
119
120def repinclude(m):
121    return m.group(1) + "<foo>"
122
123
124def rephere(m):
125    t = re.sub(r"\S", "x", m.group(2))
126    return m.group(1) + t
127
128
129testpats = [
130    [
131        (r'\b(push|pop)d\b', "don't use 'pushd' or 'popd', use 'cd'"),
132        (r'\W\$?\(\([^\)\n]*\)\)', "don't use (()) or $(()), use 'expr'"),
133        (r'grep.*-q', "don't use 'grep -q', redirect to /dev/null"),
134        (r'(?<!hg )grep.* -a', "don't use 'grep -a', use in-line python"),
135        (r'sed.*-i', "don't use 'sed -i', use a temporary file"),
136        (r'\becho\b.*\\n', "don't use 'echo \\n', use printf"),
137        (r'echo -n', "don't use 'echo -n', use printf"),
138        (r'(^|\|\s*)\bwc\b[^|]*$\n(?!.*\(re\))', "filter wc output"),
139        (r'head -c', "don't use 'head -c', use 'dd'"),
140        (r'tail -n', "don't use the '-n' option to tail, just use '-<num>'"),
141        (r'sha1sum', "don't use sha1sum, use $TESTDIR/md5sum.py"),
142        (r'\bls\b.*-\w*R', "don't use 'ls -R', use 'find'"),
143        (r'printf.*[^\\]\\([1-9]|0\d)', r"don't use 'printf \NNN', use Python"),
144        (r'printf.*[^\\]\\x', "don't use printf \\x, use Python"),
145        (r'rm -rf \*', "don't use naked rm -rf, target a directory"),
146        (
147            r'\[[^\]]+==',
148            '[ foo == bar ] is a bashism, use [ foo = bar ] instead',
149        ),
150        (
151            r'(^|\|\s*)grep (-\w\s+)*[^|]*[(|]\w',
152            "use egrep for extended grep syntax",
153        ),
154        (r'(^|\|\s*)e?grep .*\\S', "don't use \\S in regular expression"),
155        (r'(?<!!)/bin/', "don't use explicit paths for tools"),
156        (r'#!.*/bash', "don't use bash in shebang, use sh"),
157        (r'[^\n]\Z', "no trailing newline"),
158        (r'export .*=', "don't export and assign at once"),
159        (r'^source\b', "don't use 'source', use '.'"),
160        (r'touch -d', "don't use 'touch -d', use 'touch -t' instead"),
161        (r'\bls +[^|\n-]+ +-', "options to 'ls' must come before filenames"),
162        (r'[^>\n]>\s*\$HGRCPATH', "don't overwrite $HGRCPATH, append to it"),
163        (r'^stop\(\)', "don't use 'stop' as a shell function name"),
164        (r'(\[|\btest\b).*-e ', "don't use 'test -e', use 'test -f'"),
165        (r'\[\[\s+[^\]]*\]\]', "don't use '[[ ]]', use '[ ]'"),
166        (r'^alias\b.*=', "don't use alias, use a function"),
167        (r'if\s*!', "don't use '!' to negate exit status"),
168        (r'/dev/u?random', "don't use entropy, use /dev/zero"),
169        (r'do\s*true;\s*done', "don't use true as loop body, use sleep 0"),
170        (
171            r'sed (-e )?\'(\d+|/[^/]*/)i(?!\\\n)',
172            "put a backslash-escaped newline after sed 'i' command",
173        ),
174        (r'^diff *-\w*[uU].*$\n(^  \$ |^$)', "prefix diff -u/-U with cmp"),
175        (r'^\s+(if)? diff *-\w*[uU]', "prefix diff -u/-U with cmp"),
176        (r'[\s="`\']python\s(?!bindings)', "don't use 'python', use '$PYTHON'"),
177        (r'seq ', "don't use 'seq', use $TESTDIR/seq.py"),
178        (r'\butil\.Abort\b', "directly use error.Abort"),
179        (r'\|&', "don't use |&, use 2>&1"),
180        (r'\w =  +\w', "only one space after = allowed"),
181        (
182            r'\bsed\b.*[^\\]\\n',
183            "don't use 'sed ... \\n', use a \\ and a newline",
184        ),
185        (r'env.*-u', "don't use 'env -u VAR', use 'unset VAR'"),
186        (r'cp.* -r ', "don't use 'cp -r', use 'cp -R'"),
187        (r'grep.* -[ABC]', "don't use grep's context flags"),
188        (
189            r'find.*-printf',
190            "don't use 'find -printf', it doesn't exist on BSD find(1)",
191        ),
192        (r'\$RANDOM ', "don't use bash-only $RANDOM to generate random values"),
193    ],
194    # warnings
195    [
196        (r'^function', "don't use 'function', use old style"),
197        (r'^diff.*-\w*N', "don't use 'diff -N'"),
198        (r'\$PWD|\${PWD}', "don't use $PWD, use `pwd`", "no-pwd-check"),
199        (r'^([^"\'\n]|("[^"\n]*")|(\'[^\'\n]*\'))*\^', "^ must be quoted"),
200        (r'kill (`|\$\()', "don't use kill, use killdaemons.py"),
201    ],
202]
203
204testfilters = [
205    (r"( *)(#([^!][^\n]*\S)?)", repcomment),
206    (r"<<(\S+)((.|\n)*?\n\1)", rephere),
207]
208
209uprefix = r"^  \$ "
210utestpats = [
211    [
212        (r'^(\S.*||  [$>] \S.*)[ \t]\n', "trailing whitespace on non-output"),
213        (
214            uprefix + r'.*\|\s*sed[^|>\n]*\n',
215            "use regex test output patterns instead of sed",
216        ),
217        (uprefix + r'(true|exit 0)', "explicit zero exit unnecessary"),
218        (
219            uprefix + r'.*\|\| echo.*(fail|error)',
220            "explicit exit code checks unnecessary",
221        ),
222        (uprefix + r'set -e', "don't use set -e"),
223        (uprefix + r'(\s|fi\b|done\b)', "use > for continued lines"),
224        (
225            uprefix + r'.*:\.\S*/',
226            "x:.y in a path does not work on msys, rewrite "
227            "as x://.y, or see `hg log -k msys` for alternatives",
228            r'-\S+:\.|' '# no-msys',  # -Rxxx
229        ),  # in test-pull.t which is skipped on windows
230        (
231            r'^  [^$>].*27\.0\.0\.1',
232            'use $LOCALIP not an explicit loopback address',
233        ),
234        (
235            r'^  (?![>$] ).*\$LOCALIP.*[^)]$',
236            'mark $LOCALIP output lines with (glob) to help tests in BSD jails',
237        ),
238        (
239            r'^  (cat|find): .*: \$ENOENT\$',
240            'use test -f to test for file existence',
241        ),
242        (
243            r'^  diff -[^ -]*p',
244            "don't use (external) diff with -p for portability",
245        ),
246        (r' readlink ', 'use readlink.py instead of readlink'),
247        (
248            r'^  [-+][-+][-+] .* [-+]0000 \(glob\)',
249            "glob timezone field in diff output for portability",
250        ),
251        (
252            r'^  @@ -[0-9]+ [+][0-9]+,[0-9]+ @@',
253            "use '@@ -N* +N,n @@ (glob)' style chunk header for portability",
254        ),
255        (
256            r'^  @@ -[0-9]+,[0-9]+ [+][0-9]+ @@',
257            "use '@@ -N,n +N* @@ (glob)' style chunk header for portability",
258        ),
259        (
260            r'^  @@ -[0-9]+ [+][0-9]+ @@',
261            "use '@@ -N* +N* @@ (glob)' style chunk header for portability",
262        ),
263        (
264            uprefix + r'hg( +-[^ ]+( +[^ ]+)?)* +extdiff'
265            r'( +(-[^ po-]+|--(?!program|option)[^ ]+|[^-][^ ]*))*$',
266            "use $RUNTESTDIR/pdiff via extdiff (or -o/-p for false-positives)",
267        ),
268    ],
269    # warnings
270    [
271        (
272            r'^  (?!.*\$LOCALIP)[^*?/\n]* \(glob\)$',
273            "glob match with no glob string (?, *, /, and $LOCALIP)",
274        ),
275    ],
276]
277
278# transform plain test rules to unified test's
279for i in [0, 1]:
280    for tp in testpats[i]:
281        p = tp[0]
282        m = tp[1]
283        if p.startswith('^'):
284            p = "^  [$>] (%s)" % p[1:]
285        else:
286            p = "^  [$>] .*(%s)" % p
287        utestpats[i].append((p, m) + tp[2:])
288
289# don't transform the following rules:
290# "  > \t" and "  \t" should be allowed in unified tests
291testpats[0].append((r'^( *)\t', "don't use tabs to indent"))
292utestpats[0].append((r'^( ?)\t', "don't use tabs to indent"))
293
294utestfilters = [
295    (r"<<(\S+)((.|\n)*?\n  > \1)", rephere),
296    (r"( +)(#([^!][^\n]*\S)?)", repcomment),
297]
298
299# common patterns to check *.py
300commonpypats = [
301    [
302        (r'\\$', 'Use () to wrap long lines in Python, not \\'),
303        (
304            r'^\s*def\s*\w+\s*\(.*,\s*\(',
305            "tuple parameter unpacking not available in Python 3+",
306        ),
307        (
308            r'lambda\s*\(.*,.*\)',
309            "tuple parameter unpacking not available in Python 3+",
310        ),
311        (r'(?<!def)\s+(cmp)\(', "cmp is not available in Python 3+"),
312        (r'(?<!\.)\breduce\s*\(.*', "reduce is not available in Python 3+"),
313        (
314            r'\bdict\(.*=',
315            'dict() is different in Py2 and 3 and is slower than {}',
316            'dict-from-generator',
317        ),
318        (r'\.has_key\b', "dict.has_key is not available in Python 3+"),
319        (r'\s<>\s', '<> operator is not available in Python 3+, use !='),
320        (r'^\s*\t', "don't use tabs"),
321        (r'\S;\s*\n', "semicolon"),
322        (r'[^_]_\([ \t\n]*(?:"[^"]+"[ \t\n+]*)+%', "don't use % inside _()"),
323        (r"[^_]_\([ \t\n]*(?:'[^']+'[ \t\n+]*)+%", "don't use % inside _()"),
324        (r'(\w|\)),\w', "missing whitespace after ,"),
325        (r'(\w|\))[+/*\-<>]\w', "missing whitespace in expression"),
326        (r'\w\s=\s\s+\w', "gratuitous whitespace after ="),
327        (
328            (
329                # a line ending with a colon, potentially with trailing comments
330                r':([ \t]*#[^\n]*)?\n'
331                # one that is not a pass and not only a comment
332                r'(?P<indent>[ \t]+)[^#][^\n]+\n'
333                # more lines at the same indent level
334                r'((?P=indent)[^\n]+\n)*'
335                # a pass at the same indent level, which is bogus
336                r'(?P=indent)pass[ \t\n#]'
337            ),
338            'omit superfluous pass',
339        ),
340        (r'[^\n]\Z', "no trailing newline"),
341        (r'(\S[ \t]+|^[ \t]+)\n', "trailing whitespace"),
342        (
343            r'^\s*(if|while|def|class|except|try)\s[^[\n]*:\s*[^\\n]#\s]+',
344            "linebreak after :",
345        ),
346        (
347            r'class\s[^( \n]+:',
348            "old-style class, use class foo(object)",
349            r'#.*old-style',
350        ),
351        (
352            r'class\s[^( \n]+\(\):',
353            "class foo() creates old style object, use class foo(object)",
354            r'#.*old-style',
355        ),
356        (
357            r'\b(%s)\('
358            % '|'.join(k for k in keyword.kwlist if k not in ('print', 'exec')),
359            "Python keyword is not a function",
360        ),
361        #    (r'class\s[A-Z][^\(]*\((?!Exception)',
362        #     "don't capitalize non-exception classes"),
363        #    (r'in range\(', "use xrange"),
364        #    (r'^\s*print\s+', "avoid using print in core and extensions"),
365        (r'[\x80-\xff]', "non-ASCII character literal"),
366        (r'("\')\.format\(', "str.format() has no bytes counterpart, use %"),
367        (
368            r'([\(\[][ \t]\S)|(\S[ \t][\)\]])',
369            "gratuitous whitespace in () or []",
370        ),
371        #    (r'\s\s=', "gratuitous whitespace before ="),
372        (
373            r'[^>< ](\+=|-=|!=|<>|<=|>=|<<=|>>=|%=)\S',
374            "missing whitespace around operator",
375        ),
376        (
377            r'[^>< ](\+=|-=|!=|<>|<=|>=|<<=|>>=|%=)\s',
378            "missing whitespace around operator",
379        ),
380        (
381            r'\s(\+=|-=|!=|<>|<=|>=|<<=|>>=|%=)\S',
382            "missing whitespace around operator",
383        ),
384        (r'[^^+=*/!<>&| %-](\s=|=\s)[^= ]', "wrong whitespace around ="),
385        (
386            r'\([^()]*( =[^=]|[^<>!=]= )',
387            "no whitespace around = for named parameters",
388        ),
389        (
390            r'raise [^,(]+, (\([^\)]+\)|[^,\(\)]+)$',
391            "don't use old-style two-argument raise, use Exception(message)",
392        ),
393        (r' is\s+(not\s+)?["\'0-9-]', "object comparison with literal"),
394        (
395            r' [=!]=\s+(True|False|None)',
396            "comparison with singleton, use 'is' or 'is not' instead",
397        ),
398        (
399            r'^\s*(while|if) [01]:',
400            "use True/False for constant Boolean expression",
401        ),
402        (r'^\s*if False(:| +and)', 'Remove code instead of using `if False`'),
403        (
404            r'(?:(?<!def)\s+|\()hasattr\(',
405            'hasattr(foo, bar) is broken on py2, use util.safehasattr(foo, bar) '
406            'instead',
407            r'#.*hasattr-py3-only',
408        ),
409        (r'opener\([^)]*\).read\(', "use opener.read() instead"),
410        (r'opener\([^)]*\).write\(', "use opener.write() instead"),
411        (r'(?i)descend[e]nt', "the proper spelling is descendAnt"),
412        (r'\.debug\(\_', "don't mark debug messages for translation"),
413        (r'\.strip\(\)\.split\(\)', "no need to strip before splitting"),
414        (r'^\s*except\s*:', "naked except clause", r'#.*re-raises'),
415        (
416            r'^\s*except\s([^\(,]+|\([^\)]+\))\s*,',
417            'legacy exception syntax; use "as" instead of ","',
418        ),
419        (r'release\(.*wlock, .*lock\)', "wrong lock release order"),
420        (r'\bdef\s+__bool__\b', "__bool__ should be __nonzero__ in Python 2"),
421        (
422            r'os\.path\.join\(.*, *(""|\'\')\)',
423            "use pathutil.normasprefix(path) instead of os.path.join(path, '')",
424        ),
425        (r'\s0[0-7]+\b', 'legacy octal syntax; use "0o" prefix instead of "0"'),
426        # XXX only catch mutable arguments on the first line of the definition
427        (r'def.*[( ]\w+=\{\}', "don't use mutable default arguments"),
428        (r'\butil\.Abort\b', "directly use error.Abort"),
429        (
430            r'^@(\w*\.)?cachefunc',
431            "module-level @cachefunc is risky, please avoid",
432        ),
433        (
434            r'^import Queue',
435            "don't use Queue, use pycompat.queue.Queue + "
436            "pycompat.queue.Empty",
437        ),
438        (
439            r'^import cStringIO',
440            "don't use cStringIO.StringIO, use util.stringio",
441        ),
442        (r'^import urllib', "don't use urllib, use util.urlreq/util.urlerr"),
443        (
444            r'^import SocketServer',
445            "don't use SockerServer, use util.socketserver",
446        ),
447        (r'^import urlparse', "don't use urlparse, use util.urlreq"),
448        (r'^import xmlrpclib', "don't use xmlrpclib, use util.xmlrpclib"),
449        (r'^import cPickle', "don't use cPickle, use util.pickle"),
450        (r'^import pickle', "don't use pickle, use util.pickle"),
451        (r'^import httplib', "don't use httplib, use util.httplib"),
452        (r'^import BaseHTTPServer', "use util.httpserver instead"),
453        (
454            r'^(from|import) mercurial\.(cext|pure|cffi)',
455            "use mercurial.policy.importmod instead",
456        ),
457        (r'\.next\(\)', "don't use .next(), use next(...)"),
458        (
459            r'([a-z]*).revision\(\1\.node\(',
460            "don't convert rev to node before passing to revision(nodeorrev)",
461        ),
462        (r'platform\.system\(\)', "don't use platform.system(), use pycompat"),
463    ],
464    # warnings
465    [],
466]
467
468# patterns to check normal *.py files
469pypats = [
470    [
471        # Ideally, these should be placed in "commonpypats" for
472        # consistency of coding rules in Mercurial source tree.
473        # But on the other hand, these are not so seriously required for
474        # python code fragments embedded in test scripts. Fixing test
475        # scripts for these patterns requires many changes, and has less
476        # profit than effort.
477        (r'raise Exception', "don't raise generic exceptions"),
478        (r'[\s\(](open|file)\([^)]*\)\.read\(', "use util.readfile() instead"),
479        (
480            r'[\s\(](open|file)\([^)]*\)\.write\(',
481            "use util.writefile() instead",
482        ),
483        (
484            r'^[\s\(]*(open(er)?|file)\([^)]*\)(?!\.close\(\))',
485            "always assign an opened file to a variable, and close it afterwards",
486        ),
487        (
488            r'[\s\(](open|file)\([^)]*\)\.(?!close\(\))',
489            "always assign an opened file to a variable, and close it afterwards",
490        ),
491        (r':\n(    )*( ){1,3}[^ ]', "must indent 4 spaces"),
492        (r'^import atexit', "don't use atexit, use ui.atexit"),
493        # rules depending on implementation of repquote()
494        (
495            r' x+[xpqo%APM][\'"]\n\s+[\'"]x',
496            'string join across lines with no space',
497        ),
498        (
499            r'''(?x)ui\.(status|progress|write|note|warn)\(
500         [ \t\n#]*
501         (?# any strings/comments might precede a string, which
502           # contains translatable message)
503         b?((['"]|\'\'\'|""")[ \npq%bAPMxno]*(['"]|\'\'\'|""")[ \t\n#]+)*
504         (?# sequence consisting of below might precede translatable message
505           # - formatting string: "% 10s", "%05d", "% -3.2f", "%*s", "%%" ...
506           # - escaped character: "\\", "\n", "\0" ...
507           # - character other than '%', 'b' as '\', and 'x' as alphabet)
508         (['"]|\'\'\'|""")
509         ((%([ n]?[PM]?([np]+|A))?x)|%%|b[bnx]|[ \nnpqAPMo])*x
510         (?# this regexp can't use [^...] style,
511           # because _preparepats forcibly adds "\n" into [^...],
512           # even though this regexp wants match it against "\n")''',
513            "missing _() in ui message (use () to hide false-positives)",
514        ),
515    ]
516    + commonpypats[0],
517    # warnings
518    [
519        # rules depending on implementation of repquote()
520        (r'(^| )pp +xxxxqq[ \n][^\n]', "add two newlines after '.. note::'"),
521    ]
522    + commonpypats[1],
523]
524
525# patterns to check *.py for embedded ones in test script
526embeddedpypats = [
527    [] + commonpypats[0],
528    # warnings
529    [] + commonpypats[1],
530]
531
532# common filters to convert *.py
533commonpyfilters = [
534    (
535        r"""(?msx)(?P<comment>\#.*?$)|
536         ((?P<quote>('''|\"\"\"|(?<!')'(?!')|(?<!")"(?!")))
537          (?P<text>(([^\\]|\\.)*?))
538          (?P=quote))""",
539        reppython,
540    ),
541]
542
543# pattern only for mercurial and extensions
544core_py_pats = [
545    [
546        # Windows tend to get confused about capitalization of the drive letter
547        #
548        # see mercurial.windows.abspath for details
549        (
550            r'os\.path\.abspath',
551            "use util.abspath instead (windows)",
552            r'#.*re-exports',
553        ),
554    ],
555    # warnings
556    [],
557]
558
559# filters to convert normal *.py files
560pyfilters = [] + commonpyfilters
561
562# non-filter patterns
563pynfpats = [
564    [
565        (r'pycompat\.osname\s*[=!]=\s*[\'"]nt[\'"]', "use pycompat.iswindows"),
566        (r'pycompat\.osname\s*[=!]=\s*[\'"]posix[\'"]', "use pycompat.isposix"),
567        (
568            r'pycompat\.sysplatform\s*[!=]=\s*[\'"]darwin[\'"]',
569            "use pycompat.isdarwin",
570        ),
571    ],
572    # warnings
573    [],
574]
575
576# filters to convert *.py for embedded ones in test script
577embeddedpyfilters = [] + commonpyfilters
578
579# extension non-filter patterns
580pyextnfpats = [
581    [(r'^"""\n?[A-Z]', "don't capitalize docstring title")],
582    # warnings
583    [],
584]
585
586txtfilters = []
587
588txtpats = [
589    [
590        (r'\s$', 'trailing whitespace'),
591        ('.. note::[ \n][^\n]', 'add two newlines after note::'),
592    ],
593    [],
594]
595
596cpats = [
597    [
598        (r'//', "don't use //-style comments"),
599        (r'\S\t', "don't use tabs except for indent"),
600        (r'(\S[ \t]+|^[ \t]+)\n', "trailing whitespace"),
601        (r'(while|if|do|for)\(', "use space after while/if/do/for"),
602        (r'return\(', "return is not a function"),
603        (r' ;', "no space before ;"),
604        (r'[^;] \)', "no space before )"),
605        (r'[)][{]', "space between ) and {"),
606        (r'\w+\* \w+', "use int *foo, not int* foo"),
607        (r'\W\([^\)]+\) \w+', "use (int)foo, not (int) foo"),
608        (r'\w+ (\+\+|--)', "use foo++, not foo ++"),
609        (r'\w,\w', "missing whitespace after ,"),
610        (r'^[^#]\w[+/*]\w', "missing whitespace in expression"),
611        (r'\w\s=\s\s+\w', "gratuitous whitespace after ="),
612        (r'^#\s+\w', "use #foo, not # foo"),
613        (r'[^\n]\Z', "no trailing newline"),
614        (r'^\s*#import\b', "use only #include in standard C code"),
615        (r'strcpy\(', "don't use strcpy, use strlcpy or memcpy"),
616        (r'strcat\(', "don't use strcat"),
617        # rules depending on implementation of repquote()
618    ],
619    # warnings
620    [
621        # rules depending on implementation of repquote()
622    ],
623]
624
625cfilters = [
626    (r'(/\*)(((\*(?!/))|[^*])*)\*/', repccomment),
627    (r'''(?P<quote>(?<!")")(?P<text>([^"]|\\")+)"(?!")''', repquote),
628    (r'''(#\s*include\s+<)([^>]+)>''', repinclude),
629    (r'(\()([^)]+\))', repcallspaces),
630]
631
632inutilpats = [
633    [
634        (r'\bui\.', "don't use ui in util"),
635    ],
636    # warnings
637    [],
638]
639
640inrevlogpats = [
641    [
642        (r'\brepo\.', "don't use repo in revlog"),
643    ],
644    # warnings
645    [],
646]
647
648webtemplatefilters = []
649
650webtemplatepats = [
651    [],
652    [
653        (
654            r'{desc(\|(?!websub|firstline)[^\|]*)+}',
655            'follow desc keyword with either firstline or websub',
656        ),
657    ],
658]
659
660allfilesfilters = []
661
662allfilespats = [
663    [
664        (
665            r'(http|https)://[a-zA-Z0-9./]*selenic.com/',
666            'use mercurial-scm.org domain URL',
667        ),
668        (
669            r'mercurial@selenic\.com',
670            'use mercurial-scm.org domain for mercurial ML address',
671        ),
672        (
673            r'mercurial-devel@selenic\.com',
674            'use mercurial-scm.org domain for mercurial-devel ML address',
675        ),
676    ],
677    # warnings
678    [],
679]
680
681py3pats = [
682    [
683        (
684            r'os\.environ',
685            "use encoding.environ instead (py3)",
686            r'#.*re-exports',
687        ),
688        (r'os\.name', "use pycompat.osname instead (py3)"),
689        (r'os\.getcwd', "use encoding.getcwd instead (py3)", r'#.*re-exports'),
690        (r'os\.sep', "use pycompat.ossep instead (py3)"),
691        (r'os\.pathsep', "use pycompat.ospathsep instead (py3)"),
692        (r'os\.altsep', "use pycompat.osaltsep instead (py3)"),
693        (r'sys\.platform', "use pycompat.sysplatform instead (py3)"),
694        (r'getopt\.getopt', "use pycompat.getoptb instead (py3)"),
695        (r'os\.getenv', "use encoding.environ.get instead"),
696        (r'os\.setenv', "modifying the environ dict is not preferred"),
697        (r'(?<!pycompat\.)xrange', "use pycompat.xrange instead (py3)"),
698    ],
699    # warnings
700    [],
701]
702
703checks = [
704    ('python', r'.*\.(py|cgi)$', r'^#!.*python', pyfilters, pypats),
705    ('python', r'.*\.(py|cgi)$', r'^#!.*python', [], pynfpats),
706    ('python', r'.*hgext.*\.py$', '', [], pyextnfpats),
707    (
708        'python 3',
709        r'.*(hgext|mercurial)/(?!demandimport|policy|pycompat).*\.py',
710        '',
711        pyfilters,
712        py3pats,
713    ),
714    (
715        'core files',
716        r'.*(hgext|mercurial)/(?!demandimport|policy|pycompat).*\.py',
717        '',
718        pyfilters,
719        core_py_pats,
720    ),
721    ('test script', r'(.*/)?test-[^.~]*$', '', testfilters, testpats),
722    ('c', r'.*\.[ch]$', '', cfilters, cpats),
723    ('unified test', r'.*\.t$', '', utestfilters, utestpats),
724    (
725        'layering violation repo in revlog',
726        r'mercurial/revlog\.py',
727        '',
728        pyfilters,
729        inrevlogpats,
730    ),
731    (
732        'layering violation ui in util',
733        r'mercurial/util\.py',
734        '',
735        pyfilters,
736        inutilpats,
737    ),
738    ('txt', r'.*\.txt$', '', txtfilters, txtpats),
739    (
740        'web template',
741        r'mercurial/templates/.*\.tmpl',
742        '',
743        webtemplatefilters,
744        webtemplatepats,
745    ),
746    ('all except for .po', r'.*(?<!\.po)$', '', allfilesfilters, allfilespats),
747]
748
749# (desc,
750#  func to pick up embedded code fragments,
751#  list of patterns to convert target files
752#  list of patterns to detect errors/warnings)
753embeddedchecks = [
754    (
755        'embedded python',
756        testparseutil.pyembedded,
757        embeddedpyfilters,
758        embeddedpypats,
759    )
760]
761
762
763def _preparepats():
764    def preparefailandwarn(failandwarn):
765        for pats in failandwarn:
766            for i, pseq in enumerate(pats):
767                # fix-up regexes for multi-line searches
768                p = pseq[0]
769                # \s doesn't match \n (done in two steps)
770                # first, we replace \s that appears in a set already
771                p = re.sub(r'\[\\s', r'[ \\t', p)
772                # now we replace other \s instances.
773                p = re.sub(r'(?<!(\\|\[))\\s', r'[ \\t]', p)
774                # [^...] doesn't match newline
775                p = re.sub(r'(?<!\\)\[\^', r'[^\\n', p)
776
777                pats[i] = (re.compile(p, re.MULTILINE),) + pseq[1:]
778
779    def preparefilters(filters):
780        for i, flt in enumerate(filters):
781            filters[i] = re.compile(flt[0]), flt[1]
782
783    for cs in (checks, embeddedchecks):
784        for c in cs:
785            failandwarn = c[-1]
786            preparefailandwarn(failandwarn)
787
788            filters = c[-2]
789            preparefilters(filters)
790
791
792class norepeatlogger(object):
793    def __init__(self):
794        self._lastseen = None
795
796    def log(self, fname, lineno, line, msg, blame):
797        """print error related a to given line of a given file.
798
799        The faulty line will also be printed but only once in the case
800        of multiple errors.
801
802        :fname: filename
803        :lineno: line number
804        :line: actual content of the line
805        :msg: error message
806        """
807        msgid = fname, lineno, line
808        if msgid != self._lastseen:
809            if blame:
810                print("%s:%d (%s):" % (fname, lineno, blame))
811            else:
812                print("%s:%d:" % (fname, lineno))
813            print(" > %s" % line)
814            self._lastseen = msgid
815        print(" " + msg)
816
817
818_defaultlogger = norepeatlogger()
819
820
821def getblame(f):
822    lines = []
823    for l in os.popen('hg annotate -un %s' % f):
824        start, line = l.split(':', 1)
825        user, rev = start.split()
826        lines.append((line[1:-1], user, rev))
827    return lines
828
829
830def checkfile(
831    f,
832    logfunc=_defaultlogger.log,
833    maxerr=None,
834    warnings=False,
835    blame=False,
836    debug=False,
837    lineno=True,
838):
839    """checks style and portability of a given file
840
841    :f: filepath
842    :logfunc: function used to report error
843              logfunc(filename, linenumber, linecontent, errormessage)
844    :maxerr: number of error to display before aborting.
845             Set to false (default) to report all errors
846
847    return True if no error is found, False otherwise.
848    """
849    result = True
850
851    try:
852        with opentext(f) as fp:
853            try:
854                pre = fp.read()
855            except UnicodeDecodeError as e:
856                print("%s while reading %s" % (e, f))
857                return result
858    except IOError as e:
859        print("Skipping %s, %s" % (f, str(e).split(':', 1)[0]))
860        return result
861
862    # context information shared while single checkfile() invocation
863    context = {'blamecache': None}
864
865    for name, match, magic, filters, pats in checks:
866        if debug:
867            print(name, f)
868        if not (re.match(match, f) or (magic and re.search(magic, pre))):
869            if debug:
870                print(
871                    "Skipping %s for %s it doesn't match %s" % (name, match, f)
872                )
873            continue
874        if "no-" "check-code" in pre:
875            # If you're looking at this line, it's because a file has:
876            # no- check- code
877            # but the reason to output skipping is to make life for
878            # tests easier. So, instead of writing it with a normal
879            # spelling, we write it with the expected spelling from
880            # tests/test-check-code.t
881            print("Skipping %s it has no-che?k-code (glob)" % f)
882            return "Skip"  # skip checking this file
883
884        fc = _checkfiledata(
885            name,
886            f,
887            pre,
888            filters,
889            pats,
890            context,
891            logfunc,
892            maxerr,
893            warnings,
894            blame,
895            debug,
896            lineno,
897        )
898        if fc:
899            result = False
900
901    if f.endswith('.t') and "no-" "check-code" not in pre:
902        if debug:
903            print("Checking embedded code in %s" % f)
904
905        prelines = pre.splitlines()
906        embeddederros = []
907        for name, embedded, filters, pats in embeddedchecks:
908            # "reset curmax at each repetition" treats maxerr as "max
909            # nubmer of errors in an actual file per entry of
910            # (embedded)checks"
911            curmaxerr = maxerr
912
913            for found in embedded(f, prelines, embeddederros):
914                filename, starts, ends, code = found
915                fc = _checkfiledata(
916                    name,
917                    f,
918                    code,
919                    filters,
920                    pats,
921                    context,
922                    logfunc,
923                    curmaxerr,
924                    warnings,
925                    blame,
926                    debug,
927                    lineno,
928                    offset=starts - 1,
929                )
930                if fc:
931                    result = False
932                    if curmaxerr:
933                        if fc >= curmaxerr:
934                            break
935                        curmaxerr -= fc
936
937    return result
938
939
940def _checkfiledata(
941    name,
942    f,
943    filedata,
944    filters,
945    pats,
946    context,
947    logfunc,
948    maxerr,
949    warnings,
950    blame,
951    debug,
952    lineno,
953    offset=None,
954):
955    """Execute actual error check for file data
956
957    :name: of the checking category
958    :f: filepath
959    :filedata: content of a file
960    :filters: to be applied before checking
961    :pats: to detect errors
962    :context: a dict of information shared while single checkfile() invocation
963              Valid keys: 'blamecache'.
964    :logfunc: function used to report error
965              logfunc(filename, linenumber, linecontent, errormessage)
966    :maxerr: number of error to display before aborting, or False to
967             report all errors
968    :warnings: whether warning level checks should be applied
969    :blame: whether blame information should be displayed at error reporting
970    :debug: whether debug information should be displayed
971    :lineno: whether lineno should be displayed at error reporting
972    :offset: line number offset of 'filedata' in 'f' for checking
973             an embedded code fragment, or None (offset=0 is different
974             from offset=None)
975
976    returns number of detected errors.
977    """
978    blamecache = context['blamecache']
979    if offset is None:
980        lineoffset = 0
981    else:
982        lineoffset = offset
983
984    fc = 0
985    pre = post = filedata
986
987    if True:  # TODO: get rid of this redundant 'if' block
988        for p, r in filters:
989            post = re.sub(p, r, post)
990        nerrs = len(pats[0])  # nerr elements are errors
991        if warnings:
992            pats = pats[0] + pats[1]
993        else:
994            pats = pats[0]
995        # print post # uncomment to show filtered version
996
997        if debug:
998            print("Checking %s for %s" % (name, f))
999
1000        prelines = None
1001        errors = []
1002        for i, pat in enumerate(pats):
1003            if len(pat) == 3:
1004                p, msg, ignore = pat
1005            else:
1006                p, msg = pat
1007                ignore = None
1008            if i >= nerrs:
1009                msg = "warning: " + msg
1010
1011            pos = 0
1012            n = 0
1013            for m in p.finditer(post):
1014                if prelines is None:
1015                    prelines = pre.splitlines()
1016                    postlines = post.splitlines(True)
1017
1018                start = m.start()
1019                while n < len(postlines):
1020                    step = len(postlines[n])
1021                    if pos + step > start:
1022                        break
1023                    pos += step
1024                    n += 1
1025                l = prelines[n]
1026
1027                if ignore and re.search(ignore, l, re.MULTILINE):
1028                    if debug:
1029                        print(
1030                            "Skipping %s for %s:%s (ignore pattern)"
1031                            % (name, f, (n + lineoffset))
1032                        )
1033                    continue
1034                bd = ""
1035                if blame:
1036                    bd = 'working directory'
1037                    if blamecache is None:
1038                        blamecache = getblame(f)
1039                        context['blamecache'] = blamecache
1040                    if (n + lineoffset) < len(blamecache):
1041                        bl, bu, br = blamecache[(n + lineoffset)]
1042                        if offset is None and bl == l:
1043                            bd = '%s@%s' % (bu, br)
1044                        elif offset is not None and bl.endswith(l):
1045                            # "offset is not None" means "checking
1046                            # embedded code fragment". In this case,
1047                            # "l" does not have information about the
1048                            # beginning of an *original* line in the
1049                            # file (e.g. '  > ').
1050                            # Therefore, use "str.endswith()", and
1051                            # show "maybe" for a little loose
1052                            # examination.
1053                            bd = '%s@%s, maybe' % (bu, br)
1054
1055                errors.append((f, lineno and (n + lineoffset + 1), l, msg, bd))
1056
1057        errors.sort()
1058        for e in errors:
1059            logfunc(*e)
1060            fc += 1
1061            if maxerr and fc >= maxerr:
1062                print(" (too many errors, giving up)")
1063                break
1064
1065    return fc
1066
1067
1068def main():
1069    parser = optparse.OptionParser("%prog [options] [files | -]")
1070    parser.add_option(
1071        "-w",
1072        "--warnings",
1073        action="store_true",
1074        help="include warning-level checks",
1075    )
1076    parser.add_option(
1077        "-p", "--per-file", type="int", help="max warnings per file"
1078    )
1079    parser.add_option(
1080        "-b",
1081        "--blame",
1082        action="store_true",
1083        help="use annotate to generate blame info",
1084    )
1085    parser.add_option(
1086        "", "--debug", action="store_true", help="show debug information"
1087    )
1088    parser.add_option(
1089        "",
1090        "--nolineno",
1091        action="store_false",
1092        dest='lineno',
1093        help="don't show line numbers",
1094    )
1095
1096    parser.set_defaults(
1097        per_file=15, warnings=False, blame=False, debug=False, lineno=True
1098    )
1099    (options, args) = parser.parse_args()
1100
1101    if len(args) == 0:
1102        check = glob.glob("*")
1103    elif args == ['-']:
1104        # read file list from stdin
1105        check = sys.stdin.read().splitlines()
1106    else:
1107        check = args
1108
1109    _preparepats()
1110
1111    ret = 0
1112    for f in check:
1113        if not checkfile(
1114            f,
1115            maxerr=options.per_file,
1116            warnings=options.warnings,
1117            blame=options.blame,
1118            debug=options.debug,
1119            lineno=options.lineno,
1120        ):
1121            ret = 1
1122    return ret
1123
1124
1125if __name__ == "__main__":
1126    sys.exit(main())
1127