1#!/usr/bin/env python
2# pycodestyle.py - Check Python source code formatting, according to
3# PEP 8
4#
5# Copyright (C) 2006-2009 Johann C. Rocholl <johann@rocholl.net>
6# Copyright (C) 2009-2014 Florent Xicluna <florent.xicluna@gmail.com>
7# Copyright (C) 2014-2016 Ian Lee <ianlee1521@gmail.com>
8#
9# Permission is hereby granted, free of charge, to any person
10# obtaining a copy of this software and associated documentation files
11# (the "Software"), to deal in the Software without restriction,
12# including without limitation the rights to use, copy, modify, merge,
13# publish, distribute, sublicense, and/or sell copies of the Software,
14# and to permit persons to whom the Software is furnished to do so,
15# subject to the following conditions:
16#
17# The above copyright notice and this permission notice shall be
18# included in all copies or substantial portions of the Software.
19#
20# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
24# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
25# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
26# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
27# SOFTWARE.
28r"""
29Check Python source code formatting, according to PEP 8.
30
31For usage and a list of options, try this:
32$ python pycodestyle.py -h
33
34This program and its regression test suite live here:
35https://github.com/pycqa/pycodestyle
36
37Groups of errors and warnings:
38E errors
39W warnings
40100 indentation
41200 whitespace
42300 blank lines
43400 imports
44500 line length
45600 deprecation
46700 statements
47900 syntax error
48"""
49from __future__ import with_statement
50
51import bisect
52import inspect
53import keyword
54import os
55import re
56import sys
57import time
58import tokenize
59import warnings
60
61try:
62    from functools import lru_cache
63except ImportError:
64    def lru_cache(maxsize=128):  # noqa as it's a fake implementation.
65        """Does not really need a real a lru_cache, it's just
66        optimization, so let's just do nothing here. Python 3.2+ will
67        just get better performances, time to upgrade?
68        """
69        return lambda function: function
70
71from fnmatch import fnmatch
72from optparse import OptionParser
73
74try:
75    from configparser import RawConfigParser
76    from io import TextIOWrapper
77except ImportError:
78    from ConfigParser import RawConfigParser
79
80# this is a performance hack.  see https://bugs.python.org/issue43014
81if (
82        sys.version_info < (3, 10) and
83        callable(getattr(tokenize, '_compile', None))
84):  # pragma: no cover (<py310)
85    tokenize._compile = lru_cache()(tokenize._compile)  # type: ignore
86
87__version__ = '2.8.0'
88
89DEFAULT_EXCLUDE = '.svn,CVS,.bzr,.hg,.git,__pycache__,.tox'
90DEFAULT_IGNORE = 'E121,E123,E126,E226,E24,E704,W503,W504'
91try:
92    if sys.platform == 'win32':
93        USER_CONFIG = os.path.expanduser(r'~\.pycodestyle')
94    else:
95        USER_CONFIG = os.path.join(
96            os.getenv('XDG_CONFIG_HOME') or os.path.expanduser('~/.config'),
97            'pycodestyle'
98        )
99except ImportError:
100    USER_CONFIG = None
101
102PROJECT_CONFIG = ('setup.cfg', 'tox.ini')
103TESTSUITE_PATH = os.path.join(os.path.dirname(__file__), 'testsuite')
104MAX_LINE_LENGTH = 79
105# Number of blank lines between various code parts.
106BLANK_LINES_CONFIG = {
107    # Top level class and function.
108    'top_level': 2,
109    # Methods and nested class and function.
110    'method': 1,
111}
112MAX_DOC_LENGTH = 72
113INDENT_SIZE = 4
114REPORT_FORMAT = {
115    'default': '%(path)s:%(row)d:%(col)d: %(code)s %(text)s',
116    'pylint': '%(path)s:%(row)d: [%(code)s] %(text)s',
117}
118
119PyCF_ONLY_AST = 1024
120SINGLETONS = frozenset(['False', 'None', 'True'])
121KEYWORDS = frozenset(keyword.kwlist + ['print', 'async']) - SINGLETONS
122UNARY_OPERATORS = frozenset(['>>', '**', '*', '+', '-'])
123ARITHMETIC_OP = frozenset(['**', '*', '/', '//', '+', '-', '@'])
124WS_OPTIONAL_OPERATORS = ARITHMETIC_OP.union(['^', '&', '|', '<<', '>>', '%'])
125# Warn for -> function annotation operator in py3.5+ (issue 803)
126FUNCTION_RETURN_ANNOTATION_OP = ['->'] if sys.version_info >= (3, 5) else []
127ASSIGNMENT_EXPRESSION_OP = [':='] if sys.version_info >= (3, 8) else []
128WS_NEEDED_OPERATORS = frozenset([
129    '**=', '*=', '/=', '//=', '+=', '-=', '!=', '<>', '<', '>',
130    '%=', '^=', '&=', '|=', '==', '<=', '>=', '<<=', '>>=', '=',
131    'and', 'in', 'is', 'or'] +
132    FUNCTION_RETURN_ANNOTATION_OP +
133    ASSIGNMENT_EXPRESSION_OP)
134WHITESPACE = frozenset(' \t')
135NEWLINE = frozenset([tokenize.NL, tokenize.NEWLINE])
136SKIP_TOKENS = NEWLINE.union([tokenize.INDENT, tokenize.DEDENT])
137# ERRORTOKEN is triggered by backticks in Python 3
138SKIP_COMMENTS = SKIP_TOKENS.union([tokenize.COMMENT, tokenize.ERRORTOKEN])
139BENCHMARK_KEYS = ['directories', 'files', 'logical lines', 'physical lines']
140
141INDENT_REGEX = re.compile(r'([ \t]*)')
142RAISE_COMMA_REGEX = re.compile(r'raise\s+\w+\s*,')
143RERAISE_COMMA_REGEX = re.compile(r'raise\s+\w+\s*,.*,\s*\w+\s*$')
144ERRORCODE_REGEX = re.compile(r'\b[A-Z]\d{3}\b')
145DOCSTRING_REGEX = re.compile(r'u?r?["\']')
146EXTRANEOUS_WHITESPACE_REGEX = re.compile(r'[\[({][ \t]|[ \t][\]}),;:](?!=)')
147WHITESPACE_AFTER_COMMA_REGEX = re.compile(r'[,;:]\s*(?:  |\t)')
148COMPARE_SINGLETON_REGEX = re.compile(r'(\bNone|\bFalse|\bTrue)?\s*([=!]=)'
149                                     r'\s*(?(1)|(None|False|True))\b')
150COMPARE_NEGATIVE_REGEX = re.compile(r'\b(?<!is\s)(not)\s+[^][)(}{ ]+\s+'
151                                    r'(in|is)\s')
152COMPARE_TYPE_REGEX = re.compile(r'(?:[=!]=|is(?:\s+not)?)\s+type(?:s.\w+Type'
153                                r'|\s*\(\s*([^)]*[^ )])\s*\))')
154KEYWORD_REGEX = re.compile(r'(\s*)\b(?:%s)\b(\s*)' % r'|'.join(KEYWORDS))
155OPERATOR_REGEX = re.compile(r'(?:[^,\s])(\s*)(?:[-+*/|!<=>%&^]+)(\s*)')
156LAMBDA_REGEX = re.compile(r'\blambda\b')
157HUNK_REGEX = re.compile(r'^@@ -\d+(?:,\d+)? \+(\d+)(?:,(\d+))? @@.*$')
158STARTSWITH_DEF_REGEX = re.compile(r'^(async\s+def|def)\b')
159STARTSWITH_TOP_LEVEL_REGEX = re.compile(r'^(async\s+def\s+|def\s+|class\s+|@)')
160STARTSWITH_INDENT_STATEMENT_REGEX = re.compile(
161    r'^\s*({0})\b'.format('|'.join(s.replace(' ', r'\s+') for s in (
162        'def', 'async def',
163        'for', 'async for',
164        'if', 'elif', 'else',
165        'try', 'except', 'finally',
166        'with', 'async with',
167        'class',
168        'while',
169    )))
170)
171DUNDER_REGEX = re.compile(r"^__([^\s]+)__(?::\s*[a-zA-Z.0-9_\[\]\"]+)? = ")
172BLANK_EXCEPT_REGEX = re.compile(r"except\s*:")
173
174_checks = {'physical_line': {}, 'logical_line': {}, 'tree': {}}
175
176
177def _get_parameters(function):
178    if sys.version_info >= (3, 3):
179        return [parameter.name
180                for parameter
181                in inspect.signature(function).parameters.values()
182                if parameter.kind == parameter.POSITIONAL_OR_KEYWORD]
183    else:
184        return inspect.getargspec(function)[0]
185
186
187def register_check(check, codes=None):
188    """Register a new check object."""
189    def _add_check(check, kind, codes, args):
190        if check in _checks[kind]:
191            _checks[kind][check][0].extend(codes or [])
192        else:
193            _checks[kind][check] = (codes or [''], args)
194    if inspect.isfunction(check):
195        args = _get_parameters(check)
196        if args and args[0] in ('physical_line', 'logical_line'):
197            if codes is None:
198                codes = ERRORCODE_REGEX.findall(check.__doc__ or '')
199            _add_check(check, args[0], codes, args)
200    elif inspect.isclass(check):
201        if _get_parameters(check.__init__)[:2] == ['self', 'tree']:
202            _add_check(check, 'tree', codes, None)
203    return check
204
205
206########################################################################
207# Plugins (check functions) for physical lines
208########################################################################
209
210@register_check
211def tabs_or_spaces(physical_line, indent_char):
212    r"""Never mix tabs and spaces.
213
214    The most popular way of indenting Python is with spaces only.  The
215    second-most popular way is with tabs only.  Code indented with a
216    mixture of tabs and spaces should be converted to using spaces
217    exclusively.  When invoking the Python command line interpreter with
218    the -t option, it issues warnings about code that illegally mixes
219    tabs and spaces.  When using -tt these warnings become errors.
220    These options are highly recommended!
221
222    Okay: if a == 0:\n    a = 1\n    b = 1
223    E101: if a == 0:\n        a = 1\n\tb = 1
224    """
225    indent = INDENT_REGEX.match(physical_line).group(1)
226    for offset, char in enumerate(indent):
227        if char != indent_char:
228            return offset, "E101 indentation contains mixed spaces and tabs"
229
230
231@register_check
232def tabs_obsolete(physical_line):
233    r"""On new projects, spaces-only are strongly recommended over tabs.
234
235    Okay: if True:\n    return
236    W191: if True:\n\treturn
237    """
238    indent = INDENT_REGEX.match(physical_line).group(1)
239    if '\t' in indent:
240        return indent.index('\t'), "W191 indentation contains tabs"
241
242
243@register_check
244def trailing_whitespace(physical_line):
245    r"""Trailing whitespace is superfluous.
246
247    The warning returned varies on whether the line itself is blank,
248    for easier filtering for those who want to indent their blank lines.
249
250    Okay: spam(1)\n#
251    W291: spam(1) \n#
252    W293: class Foo(object):\n    \n    bang = 12
253    """
254    physical_line = physical_line.rstrip('\n')    # chr(10), newline
255    physical_line = physical_line.rstrip('\r')    # chr(13), carriage return
256    physical_line = physical_line.rstrip('\x0c')  # chr(12), form feed, ^L
257    stripped = physical_line.rstrip(' \t\v')
258    if physical_line != stripped:
259        if stripped:
260            return len(stripped), "W291 trailing whitespace"
261        else:
262            return 0, "W293 blank line contains whitespace"
263
264
265@register_check
266def trailing_blank_lines(physical_line, lines, line_number, total_lines):
267    r"""Trailing blank lines are superfluous.
268
269    Okay: spam(1)
270    W391: spam(1)\n
271
272    However the last line should end with a new line (warning W292).
273    """
274    if line_number == total_lines:
275        stripped_last_line = physical_line.rstrip('\r\n')
276        if physical_line and not stripped_last_line:
277            return 0, "W391 blank line at end of file"
278        if stripped_last_line == physical_line:
279            return len(lines[-1]), "W292 no newline at end of file"
280
281
282@register_check
283def maximum_line_length(physical_line, max_line_length, multiline,
284                        line_number, noqa):
285    r"""Limit all lines to a maximum of 79 characters.
286
287    There are still many devices around that are limited to 80 character
288    lines; plus, limiting windows to 80 characters makes it possible to
289    have several windows side-by-side.  The default wrapping on such
290    devices looks ugly.  Therefore, please limit all lines to a maximum
291    of 79 characters. For flowing long blocks of text (docstrings or
292    comments), limiting the length to 72 characters is recommended.
293
294    Reports error E501.
295    """
296    line = physical_line.rstrip()
297    length = len(line)
298    if length > max_line_length and not noqa:
299        # Special case: ignore long shebang lines.
300        if line_number == 1 and line.startswith('#!'):
301            return
302        # Special case for long URLs in multi-line docstrings or
303        # comments, but still report the error when the 72 first chars
304        # are whitespaces.
305        chunks = line.split()
306        if ((len(chunks) == 1 and multiline) or
307            (len(chunks) == 2 and chunks[0] == '#')) and \
308                len(line) - len(chunks[-1]) < max_line_length - 7:
309            return
310        if hasattr(line, 'decode'):   # Python 2
311            # The line could contain multi-byte characters
312            try:
313                length = len(line.decode('utf-8'))
314            except UnicodeError:
315                pass
316        if length > max_line_length:
317            return (max_line_length, "E501 line too long "
318                    "(%d > %d characters)" % (length, max_line_length))
319
320
321########################################################################
322# Plugins (check functions) for logical lines
323########################################################################
324
325
326def _is_one_liner(logical_line, indent_level, lines, line_number):
327    if not STARTSWITH_TOP_LEVEL_REGEX.match(logical_line):
328        return False
329
330    line_idx = line_number - 1
331
332    if line_idx < 1:
333        prev_indent = 0
334    else:
335        prev_indent = expand_indent(lines[line_idx - 1])
336
337    if prev_indent > indent_level:
338        return False
339
340    while line_idx < len(lines):
341        line = lines[line_idx].strip()
342        if not line.startswith('@') and STARTSWITH_TOP_LEVEL_REGEX.match(line):
343            break
344        else:
345            line_idx += 1
346    else:
347        return False  # invalid syntax: EOF while searching for def/class
348
349    next_idx = line_idx + 1
350    while next_idx < len(lines):
351        if lines[next_idx].strip():
352            break
353        else:
354            next_idx += 1
355    else:
356        return True  # line is last in the file
357
358    return expand_indent(lines[next_idx]) <= indent_level
359
360
361@register_check
362def blank_lines(logical_line, blank_lines, indent_level, line_number,
363                blank_before, previous_logical,
364                previous_unindented_logical_line, previous_indent_level,
365                lines):
366    r"""Separate top-level function and class definitions with two blank
367    lines.
368
369    Method definitions inside a class are separated by a single blank
370    line.
371
372    Extra blank lines may be used (sparingly) to separate groups of
373    related functions.  Blank lines may be omitted between a bunch of
374    related one-liners (e.g. a set of dummy implementations).
375
376    Use blank lines in functions, sparingly, to indicate logical
377    sections.
378
379    Okay: def a():\n    pass\n\n\ndef b():\n    pass
380    Okay: def a():\n    pass\n\n\nasync def b():\n    pass
381    Okay: def a():\n    pass\n\n\n# Foo\n# Bar\n\ndef b():\n    pass
382    Okay: default = 1\nfoo = 1
383    Okay: classify = 1\nfoo = 1
384
385    E301: class Foo:\n    b = 0\n    def bar():\n        pass
386    E302: def a():\n    pass\n\ndef b(n):\n    pass
387    E302: def a():\n    pass\n\nasync def b(n):\n    pass
388    E303: def a():\n    pass\n\n\n\ndef b(n):\n    pass
389    E303: def a():\n\n\n\n    pass
390    E304: @decorator\n\ndef a():\n    pass
391    E305: def a():\n    pass\na()
392    E306: def a():\n    def b():\n        pass\n    def c():\n        pass
393    """  # noqa
394    top_level_lines = BLANK_LINES_CONFIG['top_level']
395    method_lines = BLANK_LINES_CONFIG['method']
396
397    if not previous_logical and blank_before < top_level_lines:
398        return  # Don't expect blank lines before the first line
399    if previous_logical.startswith('@'):
400        if blank_lines:
401            yield 0, "E304 blank lines found after function decorator"
402    elif (blank_lines > top_level_lines or
403            (indent_level and blank_lines == method_lines + 1)
404          ):
405        yield 0, "E303 too many blank lines (%d)" % blank_lines
406    elif STARTSWITH_TOP_LEVEL_REGEX.match(logical_line):
407        # allow a group of one-liners
408        if (
409            _is_one_liner(logical_line, indent_level, lines, line_number) and
410            blank_before == 0
411        ):
412            return
413        if indent_level:
414            if not (blank_before == method_lines or
415                    previous_indent_level < indent_level or
416                    DOCSTRING_REGEX.match(previous_logical)
417                    ):
418                ancestor_level = indent_level
419                nested = False
420                # Search backwards for a def ancestor or tree root
421                # (top level).
422                for line in lines[line_number - top_level_lines::-1]:
423                    if line.strip() and expand_indent(line) < ancestor_level:
424                        ancestor_level = expand_indent(line)
425                        nested = STARTSWITH_DEF_REGEX.match(line.lstrip())
426                        if nested or ancestor_level == 0:
427                            break
428                if nested:
429                    yield 0, "E306 expected %s blank line before a " \
430                        "nested definition, found 0" % (method_lines,)
431                else:
432                    yield 0, "E301 expected %s blank line, found 0" % (
433                        method_lines,)
434        elif blank_before != top_level_lines:
435            yield 0, "E302 expected %s blank lines, found %d" % (
436                top_level_lines, blank_before)
437    elif (logical_line and
438            not indent_level and
439            blank_before != top_level_lines and
440            previous_unindented_logical_line.startswith(('def ', 'class '))
441          ):
442        yield 0, "E305 expected %s blank lines after " \
443            "class or function definition, found %d" % (
444                top_level_lines, blank_before)
445
446
447@register_check
448def extraneous_whitespace(logical_line):
449    r"""Avoid extraneous whitespace.
450
451    Avoid extraneous whitespace in these situations:
452    - Immediately inside parentheses, brackets or braces.
453    - Immediately before a comma, semicolon, or colon.
454
455    Okay: spam(ham[1], {eggs: 2})
456    E201: spam( ham[1], {eggs: 2})
457    E201: spam(ham[ 1], {eggs: 2})
458    E201: spam(ham[1], { eggs: 2})
459    E202: spam(ham[1], {eggs: 2} )
460    E202: spam(ham[1 ], {eggs: 2})
461    E202: spam(ham[1], {eggs: 2 })
462
463    E203: if x == 4: print x, y; x, y = y , x
464    E203: if x == 4: print x, y ; x, y = y, x
465    E203: if x == 4 : print x, y; x, y = y, x
466    """
467    line = logical_line
468    for match in EXTRANEOUS_WHITESPACE_REGEX.finditer(line):
469        text = match.group()
470        char = text.strip()
471        found = match.start()
472        if text[-1].isspace():
473            # assert char in '([{'
474            yield found + 1, "E201 whitespace after '%s'" % char
475        elif line[found - 1] != ',':
476            code = ('E202' if char in '}])' else 'E203')  # if char in ',;:'
477            yield found, "%s whitespace before '%s'" % (code, char)
478
479
480@register_check
481def whitespace_around_keywords(logical_line):
482    r"""Avoid extraneous whitespace around keywords.
483
484    Okay: True and False
485    E271: True and  False
486    E272: True  and False
487    E273: True and\tFalse
488    E274: True\tand False
489    """
490    for match in KEYWORD_REGEX.finditer(logical_line):
491        before, after = match.groups()
492
493        if '\t' in before:
494            yield match.start(1), "E274 tab before keyword"
495        elif len(before) > 1:
496            yield match.start(1), "E272 multiple spaces before keyword"
497
498        if '\t' in after:
499            yield match.start(2), "E273 tab after keyword"
500        elif len(after) > 1:
501            yield match.start(2), "E271 multiple spaces after keyword"
502
503
504@register_check
505def missing_whitespace_after_import_keyword(logical_line):
506    r"""Multiple imports in form from x import (a, b, c) should have
507    space between import statement and parenthesised name list.
508
509    Okay: from foo import (bar, baz)
510    E275: from foo import(bar, baz)
511    E275: from importable.module import(bar, baz)
512    """
513    line = logical_line
514    indicator = ' import('
515    if line.startswith('from '):
516        found = line.find(indicator)
517        if -1 < found:
518            pos = found + len(indicator) - 1
519            yield pos, "E275 missing whitespace after keyword"
520
521
522@register_check
523def missing_whitespace(logical_line):
524    r"""Each comma, semicolon or colon should be followed by whitespace.
525
526    Okay: [a, b]
527    Okay: (3,)
528    Okay: a[1:4]
529    Okay: a[:4]
530    Okay: a[1:]
531    Okay: a[1:4:2]
532    E231: ['a','b']
533    E231: foo(bar,baz)
534    E231: [{'a':'b'}]
535    """
536    line = logical_line
537    for index in range(len(line) - 1):
538        char = line[index]
539        next_char = line[index + 1]
540        if char in ',;:' and next_char not in WHITESPACE:
541            before = line[:index]
542            if char == ':' and before.count('[') > before.count(']') and \
543                    before.rfind('{') < before.rfind('['):
544                continue  # Slice syntax, no space required
545            if char == ',' and next_char == ')':
546                continue  # Allow tuple with only one element: (3,)
547            if char == ':' and next_char == '=' and sys.version_info >= (3, 8):
548                continue  # Allow assignment expression
549            yield index, "E231 missing whitespace after '%s'" % char
550
551
552@register_check
553def indentation(logical_line, previous_logical, indent_char,
554                indent_level, previous_indent_level,
555                indent_size):
556    r"""Use indent_size (PEP8 says 4) spaces per indentation level.
557
558    For really old code that you don't want to mess up, you can continue
559    to use 8-space tabs.
560
561    Okay: a = 1
562    Okay: if a == 0:\n    a = 1
563    E111:   a = 1
564    E114:   # a = 1
565
566    Okay: for item in items:\n    pass
567    E112: for item in items:\npass
568    E115: for item in items:\n# Hi\n    pass
569
570    Okay: a = 1\nb = 2
571    E113: a = 1\n    b = 2
572    E116: a = 1\n    # b = 2
573    """
574    c = 0 if logical_line else 3
575    tmpl = "E11%d %s" if logical_line else "E11%d %s (comment)"
576    if indent_level % indent_size:
577        yield 0, tmpl % (
578            1 + c,
579            "indentation is not a multiple of " + str(indent_size),
580        )
581    indent_expect = previous_logical.endswith(':')
582    if indent_expect and indent_level <= previous_indent_level:
583        yield 0, tmpl % (2 + c, "expected an indented block")
584    elif not indent_expect and indent_level > previous_indent_level:
585        yield 0, tmpl % (3 + c, "unexpected indentation")
586
587    if indent_expect:
588        expected_indent_amount = 8 if indent_char == '\t' else 4
589        expected_indent_level = previous_indent_level + expected_indent_amount
590        if indent_level > expected_indent_level:
591            yield 0, tmpl % (7, 'over-indented')
592
593
594@register_check
595def continued_indentation(logical_line, tokens, indent_level, hang_closing,
596                          indent_char, indent_size, noqa, verbose):
597    r"""Continuation lines indentation.
598
599    Continuation lines should align wrapped elements either vertically
600    using Python's implicit line joining inside parentheses, brackets
601    and braces, or using a hanging indent.
602
603    When using a hanging indent these considerations should be applied:
604    - there should be no arguments on the first line, and
605    - further indentation should be used to clearly distinguish itself
606      as a continuation line.
607
608    Okay: a = (\n)
609    E123: a = (\n    )
610
611    Okay: a = (\n    42)
612    E121: a = (\n   42)
613    E122: a = (\n42)
614    E123: a = (\n    42\n    )
615    E124: a = (24,\n     42\n)
616    E125: if (\n    b):\n    pass
617    E126: a = (\n        42)
618    E127: a = (24,\n      42)
619    E128: a = (24,\n    42)
620    E129: if (a or\n    b):\n    pass
621    E131: a = (\n    42\n 24)
622    """
623    first_row = tokens[0][2][0]
624    nrows = 1 + tokens[-1][2][0] - first_row
625    if noqa or nrows == 1:
626        return
627
628    # indent_next tells us whether the next block is indented; assuming
629    # that it is indented by 4 spaces, then we should not allow 4-space
630    # indents on the final continuation line; in turn, some other
631    # indents are allowed to have an extra 4 spaces.
632    indent_next = logical_line.endswith(':')
633
634    row = depth = 0
635    valid_hangs = (indent_size,) if indent_char != '\t' \
636        else (indent_size, indent_size * 2)
637    # remember how many brackets were opened on each line
638    parens = [0] * nrows
639    # relative indents of physical lines
640    rel_indent = [0] * nrows
641    # for each depth, collect a list of opening rows
642    open_rows = [[0]]
643    # for each depth, memorize the hanging indentation
644    hangs = [None]
645    # visual indents
646    indent_chances = {}
647    last_indent = tokens[0][2]
648    visual_indent = None
649    last_token_multiline = False
650    # for each depth, memorize the visual indent column
651    indent = [last_indent[1]]
652    if verbose >= 3:
653        print(">>> " + tokens[0][4].rstrip())
654
655    for token_type, text, start, end, line in tokens:
656
657        newline = row < start[0] - first_row
658        if newline:
659            row = start[0] - first_row
660            newline = not last_token_multiline and token_type not in NEWLINE
661
662        if newline:
663            # this is the beginning of a continuation line.
664            last_indent = start
665            if verbose >= 3:
666                print("... " + line.rstrip())
667
668            # record the initial indent.
669            rel_indent[row] = expand_indent(line) - indent_level
670
671            # identify closing bracket
672            close_bracket = (token_type == tokenize.OP and text in ']})')
673
674            # is the indent relative to an opening bracket line?
675            for open_row in reversed(open_rows[depth]):
676                hang = rel_indent[row] - rel_indent[open_row]
677                hanging_indent = hang in valid_hangs
678                if hanging_indent:
679                    break
680            if hangs[depth]:
681                hanging_indent = (hang == hangs[depth])
682            # is there any chance of visual indent?
683            visual_indent = (not close_bracket and hang > 0 and
684                             indent_chances.get(start[1]))
685
686            if close_bracket and indent[depth]:
687                # closing bracket for visual indent
688                if start[1] != indent[depth]:
689                    yield (start, "E124 closing bracket does not match "
690                           "visual indentation")
691            elif close_bracket and not hang:
692                # closing bracket matches indentation of opening
693                # bracket's line
694                if hang_closing:
695                    yield start, "E133 closing bracket is missing indentation"
696            elif indent[depth] and start[1] < indent[depth]:
697                if visual_indent is not True:
698                    # visual indent is broken
699                    yield (start, "E128 continuation line "
700                           "under-indented for visual indent")
701            elif hanging_indent or (indent_next and
702                                    rel_indent[row] == 2 * indent_size):
703                # hanging indent is verified
704                if close_bracket and not hang_closing:
705                    yield (start, "E123 closing bracket does not match "
706                           "indentation of opening bracket's line")
707                hangs[depth] = hang
708            elif visual_indent is True:
709                # visual indent is verified
710                indent[depth] = start[1]
711            elif visual_indent in (text, str):
712                # ignore token lined up with matching one from a
713                # previous line
714                pass
715            else:
716                # indent is broken
717                if hang <= 0:
718                    error = "E122", "missing indentation or outdented"
719                elif indent[depth]:
720                    error = "E127", "over-indented for visual indent"
721                elif not close_bracket and hangs[depth]:
722                    error = "E131", "unaligned for hanging indent"
723                else:
724                    hangs[depth] = hang
725                    if hang > indent_size:
726                        error = "E126", "over-indented for hanging indent"
727                    else:
728                        error = "E121", "under-indented for hanging indent"
729                yield start, "%s continuation line %s" % error
730
731        # look for visual indenting
732        if (parens[row] and
733                token_type not in (tokenize.NL, tokenize.COMMENT) and
734                not indent[depth]):
735            indent[depth] = start[1]
736            indent_chances[start[1]] = True
737            if verbose >= 4:
738                print("bracket depth %s indent to %s" % (depth, start[1]))
739        # deal with implicit string concatenation
740        elif (token_type in (tokenize.STRING, tokenize.COMMENT) or
741              text in ('u', 'ur', 'b', 'br')):
742            indent_chances[start[1]] = str
743        # visual indent after assert/raise/with
744        elif not row and not depth and text in ["assert", "raise", "with"]:
745            indent_chances[end[1] + 1] = True
746        # special case for the "if" statement because len("if (") == 4
747        elif not indent_chances and not row and not depth and text == 'if':
748            indent_chances[end[1] + 1] = True
749        elif text == ':' and line[end[1]:].isspace():
750            open_rows[depth].append(row)
751
752        # keep track of bracket depth
753        if token_type == tokenize.OP:
754            if text in '([{':
755                depth += 1
756                indent.append(0)
757                hangs.append(None)
758                if len(open_rows) == depth:
759                    open_rows.append([])
760                open_rows[depth].append(row)
761                parens[row] += 1
762                if verbose >= 4:
763                    print("bracket depth %s seen, col %s, visual min = %s" %
764                          (depth, start[1], indent[depth]))
765            elif text in ')]}' and depth > 0:
766                # parent indents should not be more than this one
767                prev_indent = indent.pop() or last_indent[1]
768                hangs.pop()
769                for d in range(depth):
770                    if indent[d] > prev_indent:
771                        indent[d] = 0
772                for ind in list(indent_chances):
773                    if ind >= prev_indent:
774                        del indent_chances[ind]
775                del open_rows[depth + 1:]
776                depth -= 1
777                if depth:
778                    indent_chances[indent[depth]] = True
779                for idx in range(row, -1, -1):
780                    if parens[idx]:
781                        parens[idx] -= 1
782                        break
783            assert len(indent) == depth + 1
784            if start[1] not in indent_chances:
785                # allow lining up tokens
786                indent_chances[start[1]] = text
787
788        last_token_multiline = (start[0] != end[0])
789        if last_token_multiline:
790            rel_indent[end[0] - first_row] = rel_indent[row]
791
792    if indent_next and expand_indent(line) == indent_level + indent_size:
793        pos = (start[0], indent[0] + indent_size)
794        if visual_indent:
795            code = "E129 visually indented line"
796        else:
797            code = "E125 continuation line"
798        yield pos, "%s with same indent as next logical line" % code
799
800
801@register_check
802def whitespace_before_parameters(logical_line, tokens):
803    r"""Avoid extraneous whitespace.
804
805    Avoid extraneous whitespace in the following situations:
806    - before the open parenthesis that starts the argument list of a
807      function call.
808    - before the open parenthesis that starts an indexing or slicing.
809
810    Okay: spam(1)
811    E211: spam (1)
812
813    Okay: dict['key'] = list[index]
814    E211: dict ['key'] = list[index]
815    E211: dict['key'] = list [index]
816    """
817    prev_type, prev_text, __, prev_end, __ = tokens[0]
818    for index in range(1, len(tokens)):
819        token_type, text, start, end, __ = tokens[index]
820        if (
821            token_type == tokenize.OP and
822            text in '([' and
823            start != prev_end and
824            (prev_type == tokenize.NAME or prev_text in '}])') and
825            # Syntax "class A (B):" is allowed, but avoid it
826            (index < 2 or tokens[index - 2][1] != 'class') and
827            # Allow "return (a.foo for a in range(5))"
828            not keyword.iskeyword(prev_text) and
829            # 'match' and 'case' are only soft keywords
830            (
831                sys.version_info < (3, 9) or
832                not keyword.issoftkeyword(prev_text)
833            )
834        ):
835            yield prev_end, "E211 whitespace before '%s'" % text
836        prev_type = token_type
837        prev_text = text
838        prev_end = end
839
840
841@register_check
842def whitespace_around_operator(logical_line):
843    r"""Avoid extraneous whitespace around an operator.
844
845    Okay: a = 12 + 3
846    E221: a = 4  + 5
847    E222: a = 4 +  5
848    E223: a = 4\t+ 5
849    E224: a = 4 +\t5
850    """
851    for match in OPERATOR_REGEX.finditer(logical_line):
852        before, after = match.groups()
853
854        if '\t' in before:
855            yield match.start(1), "E223 tab before operator"
856        elif len(before) > 1:
857            yield match.start(1), "E221 multiple spaces before operator"
858
859        if '\t' in after:
860            yield match.start(2), "E224 tab after operator"
861        elif len(after) > 1:
862            yield match.start(2), "E222 multiple spaces after operator"
863
864
865@register_check
866def missing_whitespace_around_operator(logical_line, tokens):
867    r"""Surround operators with a single space on either side.
868
869    - Always surround these binary operators with a single space on
870      either side: assignment (=), augmented assignment (+=, -= etc.),
871      comparisons (==, <, >, !=, <=, >=, in, not in, is, is not),
872      Booleans (and, or, not).
873
874    - If operators with different priorities are used, consider adding
875      whitespace around the operators with the lowest priorities.
876
877    Okay: i = i + 1
878    Okay: submitted += 1
879    Okay: x = x * 2 - 1
880    Okay: hypot2 = x * x + y * y
881    Okay: c = (a + b) * (a - b)
882    Okay: foo(bar, key='word', *args, **kwargs)
883    Okay: alpha[:-i]
884
885    E225: i=i+1
886    E225: submitted +=1
887    E225: x = x /2 - 1
888    E225: z = x **y
889    E225: z = 1and 1
890    E226: c = (a+b) * (a-b)
891    E226: hypot2 = x*x + y*y
892    E227: c = a|b
893    E228: msg = fmt%(errno, errmsg)
894    """
895    parens = 0
896    need_space = False
897    prev_type = tokenize.OP
898    prev_text = prev_end = None
899    operator_types = (tokenize.OP, tokenize.NAME)
900    for token_type, text, start, end, line in tokens:
901        if token_type in SKIP_COMMENTS:
902            continue
903        if text in ('(', 'lambda'):
904            parens += 1
905        elif text == ')':
906            parens -= 1
907        if need_space:
908            if start != prev_end:
909                # Found a (probably) needed space
910                if need_space is not True and not need_space[1]:
911                    yield (need_space[0],
912                           "E225 missing whitespace around operator")
913                need_space = False
914            elif text == '>' and prev_text in ('<', '-'):
915                # Tolerate the "<>" operator, even if running Python 3
916                # Deal with Python 3's annotated return value "->"
917                pass
918            elif (
919                    # def f(a, /, b):
920                    #           ^
921                    # def f(a, b, /):
922                    #              ^
923                    # f = lambda a, /:
924                    #                ^
925                    prev_text == '/' and text in {',', ')', ':'} or
926                    # def f(a, b, /):
927                    #               ^
928                    prev_text == ')' and text == ':'
929            ):
930                # Tolerate the "/" operator in function definition
931                # For more info see PEP570
932                pass
933            else:
934                if need_space is True or need_space[1]:
935                    # A needed trailing space was not found
936                    yield prev_end, "E225 missing whitespace around operator"
937                elif prev_text != '**':
938                    code, optype = 'E226', 'arithmetic'
939                    if prev_text == '%':
940                        code, optype = 'E228', 'modulo'
941                    elif prev_text not in ARITHMETIC_OP:
942                        code, optype = 'E227', 'bitwise or shift'
943                    yield (need_space[0], "%s missing whitespace "
944                           "around %s operator" % (code, optype))
945                need_space = False
946        elif token_type in operator_types and prev_end is not None:
947            if text == '=' and parens:
948                # Allow keyword args or defaults: foo(bar=None).
949                pass
950            elif text in WS_NEEDED_OPERATORS:
951                need_space = True
952            elif text in UNARY_OPERATORS:
953                # Check if the operator is used as a binary operator
954                # Allow unary operators: -123, -x, +1.
955                # Allow argument unpacking: foo(*args, **kwargs).
956                if prev_type == tokenize.OP and prev_text in '}])' or (
957                    prev_type != tokenize.OP and
958                    prev_text not in KEYWORDS and (
959                        sys.version_info < (3, 9) or
960                        not keyword.issoftkeyword(prev_text)
961                    )
962                ):
963                    need_space = None
964            elif text in WS_OPTIONAL_OPERATORS:
965                need_space = None
966
967            if need_space is None:
968                # Surrounding space is optional, but ensure that
969                # trailing space matches opening space
970                need_space = (prev_end, start != prev_end)
971            elif need_space and start == prev_end:
972                # A needed opening space was not found
973                yield prev_end, "E225 missing whitespace around operator"
974                need_space = False
975        prev_type = token_type
976        prev_text = text
977        prev_end = end
978
979
980@register_check
981def whitespace_around_comma(logical_line):
982    r"""Avoid extraneous whitespace after a comma or a colon.
983
984    Note: these checks are disabled by default
985
986    Okay: a = (1, 2)
987    E241: a = (1,  2)
988    E242: a = (1,\t2)
989    """
990    line = logical_line
991    for m in WHITESPACE_AFTER_COMMA_REGEX.finditer(line):
992        found = m.start() + 1
993        if '\t' in m.group():
994            yield found, "E242 tab after '%s'" % m.group()[0]
995        else:
996            yield found, "E241 multiple spaces after '%s'" % m.group()[0]
997
998
999@register_check
1000def whitespace_around_named_parameter_equals(logical_line, tokens):
1001    r"""Don't use spaces around the '=' sign in function arguments.
1002
1003    Don't use spaces around the '=' sign when used to indicate a
1004    keyword argument or a default parameter value, except when
1005    using a type annotation.
1006
1007    Okay: def complex(real, imag=0.0):
1008    Okay: return magic(r=real, i=imag)
1009    Okay: boolean(a == b)
1010    Okay: boolean(a != b)
1011    Okay: boolean(a <= b)
1012    Okay: boolean(a >= b)
1013    Okay: def foo(arg: int = 42):
1014    Okay: async def foo(arg: int = 42):
1015
1016    E251: def complex(real, imag = 0.0):
1017    E251: return magic(r = real, i = imag)
1018    E252: def complex(real, image: float=0.0):
1019    """
1020    parens = 0
1021    no_space = False
1022    require_space = False
1023    prev_end = None
1024    annotated_func_arg = False
1025    in_def = bool(STARTSWITH_DEF_REGEX.match(logical_line))
1026
1027    message = "E251 unexpected spaces around keyword / parameter equals"
1028    missing_message = "E252 missing whitespace around parameter equals"
1029
1030    for token_type, text, start, end, line in tokens:
1031        if token_type == tokenize.NL:
1032            continue
1033        if no_space:
1034            no_space = False
1035            if start != prev_end:
1036                yield (prev_end, message)
1037        if require_space:
1038            require_space = False
1039            if start == prev_end:
1040                yield (prev_end, missing_message)
1041        if token_type == tokenize.OP:
1042            if text in '([':
1043                parens += 1
1044            elif text in ')]':
1045                parens -= 1
1046            elif in_def and text == ':' and parens == 1:
1047                annotated_func_arg = True
1048            elif parens == 1 and text == ',':
1049                annotated_func_arg = False
1050            elif parens and text == '=':
1051                if annotated_func_arg and parens == 1:
1052                    require_space = True
1053                    if start == prev_end:
1054                        yield (prev_end, missing_message)
1055                else:
1056                    no_space = True
1057                    if start != prev_end:
1058                        yield (prev_end, message)
1059            if not parens:
1060                annotated_func_arg = False
1061
1062        prev_end = end
1063
1064
1065@register_check
1066def whitespace_before_comment(logical_line, tokens):
1067    r"""Separate inline comments by at least two spaces.
1068
1069    An inline comment is a comment on the same line as a statement.
1070    Inline comments should be separated by at least two spaces from the
1071    statement. They should start with a # and a single space.
1072
1073    Each line of a block comment starts with a # and a single space
1074    (unless it is indented text inside the comment).
1075
1076    Okay: x = x + 1  # Increment x
1077    Okay: x = x + 1    # Increment x
1078    Okay: # Block comment
1079    E261: x = x + 1 # Increment x
1080    E262: x = x + 1  #Increment x
1081    E262: x = x + 1  #  Increment x
1082    E265: #Block comment
1083    E266: ### Block comment
1084    """
1085    prev_end = (0, 0)
1086    for token_type, text, start, end, line in tokens:
1087        if token_type == tokenize.COMMENT:
1088            inline_comment = line[:start[1]].strip()
1089            if inline_comment:
1090                if prev_end[0] == start[0] and start[1] < prev_end[1] + 2:
1091                    yield (prev_end,
1092                           "E261 at least two spaces before inline comment")
1093            symbol, sp, comment = text.partition(' ')
1094            bad_prefix = symbol not in '#:' and (symbol.lstrip('#')[:1] or '#')
1095            if inline_comment:
1096                if bad_prefix or comment[:1] in WHITESPACE:
1097                    yield start, "E262 inline comment should start with '# '"
1098            elif bad_prefix and (bad_prefix != '!' or start[0] > 1):
1099                if bad_prefix != '#':
1100                    yield start, "E265 block comment should start with '# '"
1101                elif comment:
1102                    yield start, "E266 too many leading '#' for block comment"
1103        elif token_type != tokenize.NL:
1104            prev_end = end
1105
1106
1107@register_check
1108def imports_on_separate_lines(logical_line):
1109    r"""Place imports on separate lines.
1110
1111    Okay: import os\nimport sys
1112    E401: import sys, os
1113
1114    Okay: from subprocess import Popen, PIPE
1115    Okay: from myclas import MyClass
1116    Okay: from foo.bar.yourclass import YourClass
1117    Okay: import myclass
1118    Okay: import foo.bar.yourclass
1119    """
1120    line = logical_line
1121    if line.startswith('import '):
1122        found = line.find(',')
1123        if -1 < found and ';' not in line[:found]:
1124            yield found, "E401 multiple imports on one line"
1125
1126
1127@register_check
1128def module_imports_on_top_of_file(
1129        logical_line, indent_level, checker_state, noqa):
1130    r"""Place imports at the top of the file.
1131
1132    Always put imports at the top of the file, just after any module
1133    comments and docstrings, and before module globals and constants.
1134
1135    Okay: import os
1136    Okay: # this is a comment\nimport os
1137    Okay: '''this is a module docstring'''\nimport os
1138    Okay: r'''this is a module docstring'''\nimport os
1139    Okay:
1140    try:\n\timport x\nexcept ImportError:\n\tpass\nelse:\n\tpass\nimport y
1141    Okay:
1142    try:\n\timport x\nexcept ImportError:\n\tpass\nfinally:\n\tpass\nimport y
1143    E402: a=1\nimport os
1144    E402: 'One string'\n"Two string"\nimport os
1145    E402: a=1\nfrom sys import x
1146
1147    Okay: if x:\n    import os
1148    """  # noqa
1149    def is_string_literal(line):
1150        if line[0] in 'uUbB':
1151            line = line[1:]
1152        if line and line[0] in 'rR':
1153            line = line[1:]
1154        return line and (line[0] == '"' or line[0] == "'")
1155
1156    allowed_keywords = (
1157        'try', 'except', 'else', 'finally', 'with', 'if', 'elif')
1158
1159    if indent_level:  # Allow imports in conditional statement/function
1160        return
1161    if not logical_line:  # Allow empty lines or comments
1162        return
1163    if noqa:
1164        return
1165    line = logical_line
1166    if line.startswith('import ') or line.startswith('from '):
1167        if checker_state.get('seen_non_imports', False):
1168            yield 0, "E402 module level import not at top of file"
1169    elif re.match(DUNDER_REGEX, line):
1170        return
1171    elif any(line.startswith(kw) for kw in allowed_keywords):
1172        # Allow certain keywords intermixed with imports in order to
1173        # support conditional or filtered importing
1174        return
1175    elif is_string_literal(line):
1176        # The first literal is a docstring, allow it. Otherwise, report
1177        # error.
1178        if checker_state.get('seen_docstring', False):
1179            checker_state['seen_non_imports'] = True
1180        else:
1181            checker_state['seen_docstring'] = True
1182    else:
1183        checker_state['seen_non_imports'] = True
1184
1185
1186@register_check
1187def compound_statements(logical_line):
1188    r"""Compound statements (on the same line) are generally
1189    discouraged.
1190
1191    While sometimes it's okay to put an if/for/while with a small body
1192    on the same line, never do this for multi-clause statements.
1193    Also avoid folding such long lines!
1194
1195    Always use a def statement instead of an assignment statement that
1196    binds a lambda expression directly to a name.
1197
1198    Okay: if foo == 'blah':\n    do_blah_thing()
1199    Okay: do_one()
1200    Okay: do_two()
1201    Okay: do_three()
1202
1203    E701: if foo == 'blah': do_blah_thing()
1204    E701: for x in lst: total += x
1205    E701: while t < 10: t = delay()
1206    E701: if foo == 'blah': do_blah_thing()
1207    E701: else: do_non_blah_thing()
1208    E701: try: something()
1209    E701: finally: cleanup()
1210    E701: if foo == 'blah': one(); two(); three()
1211    E702: do_one(); do_two(); do_three()
1212    E703: do_four();  # useless semicolon
1213    E704: def f(x): return 2*x
1214    E731: f = lambda x: 2*x
1215    """
1216    line = logical_line
1217    last_char = len(line) - 1
1218    found = line.find(':')
1219    prev_found = 0
1220    counts = {char: 0 for char in '{}[]()'}
1221    while -1 < found < last_char:
1222        update_counts(line[prev_found:found], counts)
1223        if ((counts['{'] <= counts['}'] and   # {'a': 1} (dict)
1224             counts['['] <= counts[']'] and   # [1:2] (slice)
1225             counts['('] <= counts[')']) and  # (annotation)
1226            not (sys.version_info >= (3, 8) and
1227                 line[found + 1] == '=')):  # assignment expression
1228            lambda_kw = LAMBDA_REGEX.search(line, 0, found)
1229            if lambda_kw:
1230                before = line[:lambda_kw.start()].rstrip()
1231                if before[-1:] == '=' and isidentifier(before[:-1].strip()):
1232                    yield 0, ("E731 do not assign a lambda expression, use a "
1233                              "def")
1234                break
1235            if STARTSWITH_DEF_REGEX.match(line):
1236                yield 0, "E704 multiple statements on one line (def)"
1237            elif STARTSWITH_INDENT_STATEMENT_REGEX.match(line):
1238                yield found, "E701 multiple statements on one line (colon)"
1239        prev_found = found
1240        found = line.find(':', found + 1)
1241    found = line.find(';')
1242    while -1 < found:
1243        if found < last_char:
1244            yield found, "E702 multiple statements on one line (semicolon)"
1245        else:
1246            yield found, "E703 statement ends with a semicolon"
1247        found = line.find(';', found + 1)
1248
1249
1250@register_check
1251def explicit_line_join(logical_line, tokens):
1252    r"""Avoid explicit line join between brackets.
1253
1254    The preferred way of wrapping long lines is by using Python's
1255    implied line continuation inside parentheses, brackets and braces.
1256    Long lines can be broken over multiple lines by wrapping expressions
1257    in parentheses.  These should be used in preference to using a
1258    backslash for line continuation.
1259
1260    E502: aaa = [123, \\n       123]
1261    E502: aaa = ("bbb " \\n       "ccc")
1262
1263    Okay: aaa = [123,\n       123]
1264    Okay: aaa = ("bbb "\n       "ccc")
1265    Okay: aaa = "bbb " \\n    "ccc"
1266    Okay: aaa = 123  # \\
1267    """
1268    prev_start = prev_end = parens = 0
1269    comment = False
1270    backslash = None
1271    for token_type, text, start, end, line in tokens:
1272        if token_type == tokenize.COMMENT:
1273            comment = True
1274        if start[0] != prev_start and parens and backslash and not comment:
1275            yield backslash, "E502 the backslash is redundant between brackets"
1276        if end[0] != prev_end:
1277            if line.rstrip('\r\n').endswith('\\'):
1278                backslash = (end[0], len(line.splitlines()[-1]) - 1)
1279            else:
1280                backslash = None
1281            prev_start = prev_end = end[0]
1282        else:
1283            prev_start = start[0]
1284        if token_type == tokenize.OP:
1285            if text in '([{':
1286                parens += 1
1287            elif text in ')]}':
1288                parens -= 1
1289
1290
1291_SYMBOLIC_OPS = frozenset("()[]{},:.;@=%~") | frozenset(("...",))
1292
1293
1294def _is_binary_operator(token_type, text):
1295    is_op_token = token_type == tokenize.OP
1296    is_conjunction = text in ['and', 'or']
1297    # NOTE(sigmavirus24): Previously the not_a_symbol check was executed
1298    # conditionally. Since it is now *always* executed, text may be
1299    # None. In that case we get a TypeError for `text not in str`.
1300    not_a_symbol = text and text not in _SYMBOLIC_OPS
1301    # The % character is strictly speaking a binary operator, but the
1302    # common usage seems to be to put it next to the format parameters,
1303    # after a line break.
1304    return ((is_op_token or is_conjunction) and not_a_symbol)
1305
1306
1307def _break_around_binary_operators(tokens):
1308    """Private function to reduce duplication.
1309
1310    This factors out the shared details between
1311    :func:`break_before_binary_operator` and
1312    :func:`break_after_binary_operator`.
1313    """
1314    line_break = False
1315    unary_context = True
1316    # Previous non-newline token types and text
1317    previous_token_type = None
1318    previous_text = None
1319    for token_type, text, start, end, line in tokens:
1320        if token_type == tokenize.COMMENT:
1321            continue
1322        if ('\n' in text or '\r' in text) and token_type != tokenize.STRING:
1323            line_break = True
1324        else:
1325            yield (token_type, text, previous_token_type, previous_text,
1326                   line_break, unary_context, start)
1327            unary_context = text in '([{,;'
1328            line_break = False
1329            previous_token_type = token_type
1330            previous_text = text
1331
1332
1333@register_check
1334def break_before_binary_operator(logical_line, tokens):
1335    r"""
1336    Avoid breaks before binary operators.
1337
1338    The preferred place to break around a binary operator is after the
1339    operator, not before it.
1340
1341    W503: (width == 0\n + height == 0)
1342    W503: (width == 0\n and height == 0)
1343    W503: var = (1\n       & ~2)
1344    W503: var = (1\n       / -2)
1345    W503: var = (1\n       + -1\n       + -2)
1346
1347    Okay: foo(\n    -x)
1348    Okay: foo(x\n    [])
1349    Okay: x = '''\n''' + ''
1350    Okay: foo(x,\n    -y)
1351    Okay: foo(x,  # comment\n    -y)
1352    """
1353    for context in _break_around_binary_operators(tokens):
1354        (token_type, text, previous_token_type, previous_text,
1355         line_break, unary_context, start) = context
1356        if (_is_binary_operator(token_type, text) and line_break and
1357                not unary_context and
1358                not _is_binary_operator(previous_token_type,
1359                                        previous_text)):
1360            yield start, "W503 line break before binary operator"
1361
1362
1363@register_check
1364def break_after_binary_operator(logical_line, tokens):
1365    r"""
1366    Avoid breaks after binary operators.
1367
1368    The preferred place to break around a binary operator is before the
1369    operator, not after it.
1370
1371    W504: (width == 0 +\n height == 0)
1372    W504: (width == 0 and\n height == 0)
1373    W504: var = (1 &\n       ~2)
1374
1375    Okay: foo(\n    -x)
1376    Okay: foo(x\n    [])
1377    Okay: x = '''\n''' + ''
1378    Okay: x = '' + '''\n'''
1379    Okay: foo(x,\n    -y)
1380    Okay: foo(x,  # comment\n    -y)
1381
1382    The following should be W504 but unary_context is tricky with these
1383    Okay: var = (1 /\n       -2)
1384    Okay: var = (1 +\n       -1 +\n       -2)
1385    """
1386    prev_start = None
1387    for context in _break_around_binary_operators(tokens):
1388        (token_type, text, previous_token_type, previous_text,
1389         line_break, unary_context, start) = context
1390        if (_is_binary_operator(previous_token_type, previous_text) and
1391                line_break and
1392                not unary_context and
1393                not _is_binary_operator(token_type, text)):
1394            yield prev_start, "W504 line break after binary operator"
1395        prev_start = start
1396
1397
1398@register_check
1399def comparison_to_singleton(logical_line, noqa):
1400    r"""Comparison to singletons should use "is" or "is not".
1401
1402    Comparisons to singletons like None should always be done
1403    with "is" or "is not", never the equality operators.
1404
1405    Okay: if arg is not None:
1406    E711: if arg != None:
1407    E711: if None == arg:
1408    E712: if arg == True:
1409    E712: if False == arg:
1410
1411    Also, beware of writing if x when you really mean if x is not None
1412    -- e.g. when testing whether a variable or argument that defaults to
1413    None was set to some other value.  The other value might have a type
1414    (such as a container) that could be false in a boolean context!
1415    """
1416    if noqa:
1417        return
1418
1419    for match in COMPARE_SINGLETON_REGEX.finditer(logical_line):
1420        singleton = match.group(1) or match.group(3)
1421        same = (match.group(2) == '==')
1422
1423        msg = "'if cond is %s:'" % (('' if same else 'not ') + singleton)
1424        if singleton in ('None',):
1425            code = 'E711'
1426        else:
1427            code = 'E712'
1428            nonzero = ((singleton == 'True' and same) or
1429                       (singleton == 'False' and not same))
1430            msg += " or 'if %scond:'" % ('' if nonzero else 'not ')
1431        yield match.start(2), ("%s comparison to %s should be %s" %
1432                               (code, singleton, msg))
1433
1434
1435@register_check
1436def comparison_negative(logical_line):
1437    r"""Negative comparison should be done using "not in" and "is not".
1438
1439    Okay: if x not in y:\n    pass
1440    Okay: assert (X in Y or X is Z)
1441    Okay: if not (X in Y):\n    pass
1442    Okay: zz = x is not y
1443    E713: Z = not X in Y
1444    E713: if not X.B in Y:\n    pass
1445    E714: if not X is Y:\n    pass
1446    E714: Z = not X.B is Y
1447    """
1448    match = COMPARE_NEGATIVE_REGEX.search(logical_line)
1449    if match:
1450        pos = match.start(1)
1451        if match.group(2) == 'in':
1452            yield pos, "E713 test for membership should be 'not in'"
1453        else:
1454            yield pos, "E714 test for object identity should be 'is not'"
1455
1456
1457@register_check
1458def comparison_type(logical_line, noqa):
1459    r"""Object type comparisons should always use isinstance().
1460
1461    Do not compare types directly.
1462
1463    Okay: if isinstance(obj, int):
1464    E721: if type(obj) is type(1):
1465
1466    When checking if an object is a string, keep in mind that it might
1467    be a unicode string too! In Python 2.3, str and unicode have a
1468    common base class, basestring, so you can do:
1469
1470    Okay: if isinstance(obj, basestring):
1471    Okay: if type(a1) is type(b1):
1472    """
1473    match = COMPARE_TYPE_REGEX.search(logical_line)
1474    if match and not noqa:
1475        inst = match.group(1)
1476        if inst and isidentifier(inst) and inst not in SINGLETONS:
1477            return  # Allow comparison for types which are not obvious
1478        yield match.start(), "E721 do not compare types, use 'isinstance()'"
1479
1480
1481@register_check
1482def bare_except(logical_line, noqa):
1483    r"""When catching exceptions, mention specific exceptions when
1484    possible.
1485
1486    Okay: except Exception:
1487    Okay: except BaseException:
1488    E722: except:
1489    """
1490    if noqa:
1491        return
1492
1493    match = BLANK_EXCEPT_REGEX.match(logical_line)
1494    if match:
1495        yield match.start(), "E722 do not use bare 'except'"
1496
1497
1498@register_check
1499def ambiguous_identifier(logical_line, tokens):
1500    r"""Never use the characters 'l', 'O', or 'I' as variable names.
1501
1502    In some fonts, these characters are indistinguishable from the
1503    numerals one and zero. When tempted to use 'l', use 'L' instead.
1504
1505    Okay: L = 0
1506    Okay: o = 123
1507    Okay: i = 42
1508    E741: l = 0
1509    E741: O = 123
1510    E741: I = 42
1511
1512    Variables can be bound in several other contexts, including class
1513    and function definitions, 'global' and 'nonlocal' statements,
1514    exception handlers, and 'with' and 'for' statements.
1515    In addition, we have a special handling for function parameters.
1516
1517    Okay: except AttributeError as o:
1518    Okay: with lock as L:
1519    Okay: foo(l=12)
1520    Okay: for a in foo(l=12):
1521    E741: except AttributeError as O:
1522    E741: with lock as l:
1523    E741: global I
1524    E741: nonlocal l
1525    E741: def foo(l):
1526    E741: def foo(l=12):
1527    E741: l = foo(l=12)
1528    E741: for l in range(10):
1529    E742: class I(object):
1530    E743: def l(x):
1531    """
1532    is_func_def = False  # Set to true if 'def' is found
1533    parameter_parentheses_level = 0
1534    idents_to_avoid = ('l', 'O', 'I')
1535    prev_type, prev_text, prev_start, prev_end, __ = tokens[0]
1536    for token_type, text, start, end, line in tokens[1:]:
1537        ident = pos = None
1538        # find function definitions
1539        if prev_text == 'def':
1540            is_func_def = True
1541        # update parameter parentheses level
1542        if parameter_parentheses_level == 0 and \
1543                prev_type == tokenize.NAME and \
1544                token_type == tokenize.OP and text == '(':
1545            parameter_parentheses_level = 1
1546        elif parameter_parentheses_level > 0 and \
1547                token_type == tokenize.OP:
1548            if text == '(':
1549                parameter_parentheses_level += 1
1550            elif text == ')':
1551                parameter_parentheses_level -= 1
1552        # identifiers on the lhs of an assignment operator
1553        if token_type == tokenize.OP and '=' in text and \
1554                parameter_parentheses_level == 0:
1555            if prev_text in idents_to_avoid:
1556                ident = prev_text
1557                pos = prev_start
1558        # identifiers bound to values with 'as', 'for',
1559        # 'global', or 'nonlocal'
1560        if prev_text in ('as', 'for', 'global', 'nonlocal'):
1561            if text in idents_to_avoid:
1562                ident = text
1563                pos = start
1564        # function parameter definitions
1565        if is_func_def:
1566            if text in idents_to_avoid:
1567                ident = text
1568                pos = start
1569        if prev_text == 'class':
1570            if text in idents_to_avoid:
1571                yield start, "E742 ambiguous class definition '%s'" % text
1572        if prev_text == 'def':
1573            if text in idents_to_avoid:
1574                yield start, "E743 ambiguous function definition '%s'" % text
1575        if ident:
1576            yield pos, "E741 ambiguous variable name '%s'" % ident
1577        prev_type = token_type
1578        prev_text = text
1579        prev_start = start
1580
1581
1582@register_check
1583def python_3000_has_key(logical_line, noqa):
1584    r"""The {}.has_key() method is removed in Python 3: use the 'in'
1585    operator.
1586
1587    Okay: if "alph" in d:\n    print d["alph"]
1588    W601: assert d.has_key('alph')
1589    """
1590    pos = logical_line.find('.has_key(')
1591    if pos > -1 and not noqa:
1592        yield pos, "W601 .has_key() is deprecated, use 'in'"
1593
1594
1595@register_check
1596def python_3000_raise_comma(logical_line):
1597    r"""When raising an exception, use "raise ValueError('message')".
1598
1599    The older form is removed in Python 3.
1600
1601    Okay: raise DummyError("Message")
1602    W602: raise DummyError, "Message"
1603    """
1604    match = RAISE_COMMA_REGEX.match(logical_line)
1605    if match and not RERAISE_COMMA_REGEX.match(logical_line):
1606        yield match.end() - 1, "W602 deprecated form of raising exception"
1607
1608
1609@register_check
1610def python_3000_not_equal(logical_line):
1611    r"""New code should always use != instead of <>.
1612
1613    The older syntax is removed in Python 3.
1614
1615    Okay: if a != 'no':
1616    W603: if a <> 'no':
1617    """
1618    pos = logical_line.find('<>')
1619    if pos > -1:
1620        yield pos, "W603 '<>' is deprecated, use '!='"
1621
1622
1623@register_check
1624def python_3000_backticks(logical_line):
1625    r"""Use repr() instead of backticks in Python 3.
1626
1627    Okay: val = repr(1 + 2)
1628    W604: val = `1 + 2`
1629    """
1630    pos = logical_line.find('`')
1631    if pos > -1:
1632        yield pos, "W604 backticks are deprecated, use 'repr()'"
1633
1634
1635@register_check
1636def python_3000_invalid_escape_sequence(logical_line, tokens, noqa):
1637    r"""Invalid escape sequences are deprecated in Python 3.6.
1638
1639    Okay: regex = r'\.png$'
1640    W605: regex = '\.png$'
1641    """
1642    if noqa:
1643        return
1644
1645    # https://docs.python.org/3/reference/lexical_analysis.html#string-and-bytes-literals
1646    valid = [
1647        '\n',
1648        '\\',
1649        '\'',
1650        '"',
1651        'a',
1652        'b',
1653        'f',
1654        'n',
1655        'r',
1656        't',
1657        'v',
1658        '0', '1', '2', '3', '4', '5', '6', '7',
1659        'x',
1660
1661        # Escape sequences only recognized in string literals
1662        'N',
1663        'u',
1664        'U',
1665    ]
1666
1667    for token_type, text, start, end, line in tokens:
1668        if token_type == tokenize.STRING:
1669            start_line, start_col = start
1670            quote = text[-3:] if text[-3:] in ('"""', "'''") else text[-1]
1671            # Extract string modifiers (e.g. u or r)
1672            quote_pos = text.index(quote)
1673            prefix = text[:quote_pos].lower()
1674            start = quote_pos + len(quote)
1675            string = text[start:-len(quote)]
1676
1677            if 'r' not in prefix:
1678                pos = string.find('\\')
1679                while pos >= 0:
1680                    pos += 1
1681                    if string[pos] not in valid:
1682                        line = start_line + string.count('\n', 0, pos)
1683                        if line == start_line:
1684                            col = start_col + len(prefix) + len(quote) + pos
1685                        else:
1686                            col = pos - string.rfind('\n', 0, pos) - 1
1687                        yield (
1688                            (line, col - 1),
1689                            "W605 invalid escape sequence '\\%s'" %
1690                            string[pos],
1691                        )
1692                    pos = string.find('\\', pos + 1)
1693
1694
1695@register_check
1696def python_3000_async_await_keywords(logical_line, tokens):
1697    """'async' and 'await' are reserved keywords starting at Python 3.7.
1698
1699    W606: async = 42
1700    W606: await = 42
1701    Okay: async def read(db):\n    data = await db.fetch('SELECT ...')
1702    """
1703    # The Python tokenize library before Python 3.5 recognizes
1704    # async/await as a NAME token. Therefore, use a state machine to
1705    # look for the possible async/await constructs as defined by the
1706    # Python grammar:
1707    # https://docs.python.org/3/reference/grammar.html
1708
1709    state = None
1710    for token_type, text, start, end, line in tokens:
1711        error = False
1712
1713        if token_type == tokenize.NL:
1714            continue
1715
1716        if state is None:
1717            if token_type == tokenize.NAME:
1718                if text == 'async':
1719                    state = ('async_stmt', start)
1720                elif text == 'await':
1721                    state = ('await', start)
1722                elif (token_type == tokenize.NAME and
1723                      text in ('def', 'for')):
1724                    state = ('define', start)
1725
1726        elif state[0] == 'async_stmt':
1727            if token_type == tokenize.NAME and text in ('def', 'with', 'for'):
1728                # One of funcdef, with_stmt, or for_stmt. Return to
1729                # looking for async/await names.
1730                state = None
1731            else:
1732                error = True
1733        elif state[0] == 'await':
1734            if token_type == tokenize.NAME:
1735                # An await expression. Return to looking for async/await
1736                # names.
1737                state = None
1738            elif token_type == tokenize.OP and text == '(':
1739                state = None
1740            else:
1741                error = True
1742        elif state[0] == 'define':
1743            if token_type == tokenize.NAME and text in ('async', 'await'):
1744                error = True
1745            else:
1746                state = None
1747
1748        if error:
1749            yield (
1750                state[1],
1751                "W606 'async' and 'await' are reserved keywords starting with "
1752                "Python 3.7",
1753            )
1754            state = None
1755
1756    # Last token
1757    if state is not None:
1758        yield (
1759            state[1],
1760            "W606 'async' and 'await' are reserved keywords starting with "
1761            "Python 3.7",
1762        )
1763
1764
1765########################################################################
1766@register_check
1767def maximum_doc_length(logical_line, max_doc_length, noqa, tokens):
1768    r"""Limit all doc lines to a maximum of 72 characters.
1769
1770    For flowing long blocks of text (docstrings or comments), limiting
1771    the length to 72 characters is recommended.
1772
1773    Reports warning W505
1774    """
1775    if max_doc_length is None or noqa:
1776        return
1777
1778    prev_token = None
1779    skip_lines = set()
1780    # Skip lines that
1781    for token_type, text, start, end, line in tokens:
1782        if token_type not in SKIP_COMMENTS.union([tokenize.STRING]):
1783            skip_lines.add(line)
1784
1785    for token_type, text, start, end, line in tokens:
1786        # Skip lines that aren't pure strings
1787        if token_type == tokenize.STRING and skip_lines:
1788            continue
1789        if token_type in (tokenize.STRING, tokenize.COMMENT):
1790            # Only check comment-only lines
1791            if prev_token is None or prev_token in SKIP_TOKENS:
1792                lines = line.splitlines()
1793                for line_num, physical_line in enumerate(lines):
1794                    if hasattr(physical_line, 'decode'):  # Python 2
1795                        # The line could contain multi-byte characters
1796                        try:
1797                            physical_line = physical_line.decode('utf-8')
1798                        except UnicodeError:
1799                            pass
1800                    if start[0] + line_num == 1 and line.startswith('#!'):
1801                        return
1802                    length = len(physical_line)
1803                    chunks = physical_line.split()
1804                    if token_type == tokenize.COMMENT:
1805                        if (len(chunks) == 2 and
1806                                length - len(chunks[-1]) < MAX_DOC_LENGTH):
1807                            continue
1808                    if len(chunks) == 1 and line_num + 1 < len(lines):
1809                        if (len(chunks) == 1 and
1810                                length - len(chunks[-1]) < MAX_DOC_LENGTH):
1811                            continue
1812                    if length > max_doc_length:
1813                        doc_error = (start[0] + line_num, max_doc_length)
1814                        yield (doc_error, "W505 doc line too long "
1815                                          "(%d > %d characters)"
1816                               % (length, max_doc_length))
1817        prev_token = token_type
1818
1819
1820########################################################################
1821# Helper functions
1822########################################################################
1823
1824
1825if sys.version_info < (3,):
1826    # Python 2: implicit encoding.
1827    def readlines(filename):
1828        """Read the source code."""
1829        with open(filename, 'rU') as f:
1830            return f.readlines()
1831    isidentifier = re.compile(r'[a-zA-Z_]\w*$').match
1832    stdin_get_value = sys.stdin.read
1833else:
1834    # Python 3
1835    def readlines(filename):
1836        """Read the source code."""
1837        try:
1838            with tokenize.open(filename) as f:
1839                return f.readlines()
1840        except (LookupError, SyntaxError, UnicodeError):
1841            # Fall back if file encoding is improperly declared
1842            with open(filename, encoding='latin-1') as f:
1843                return f.readlines()
1844    isidentifier = str.isidentifier
1845
1846    def stdin_get_value():
1847        """Read the value from stdin."""
1848        return TextIOWrapper(sys.stdin.buffer, errors='ignore').read()
1849
1850noqa = lru_cache(512)(re.compile(r'# no(?:qa|pep8)\b', re.I).search)
1851
1852
1853def expand_indent(line):
1854    r"""Return the amount of indentation.
1855
1856    Tabs are expanded to the next multiple of 8.
1857
1858    >>> expand_indent('    ')
1859    4
1860    >>> expand_indent('\t')
1861    8
1862    >>> expand_indent('       \t')
1863    8
1864    >>> expand_indent('        \t')
1865    16
1866    """
1867    line = line.rstrip('\n\r')
1868    if '\t' not in line:
1869        return len(line) - len(line.lstrip())
1870    result = 0
1871    for char in line:
1872        if char == '\t':
1873            result = result // 8 * 8 + 8
1874        elif char == ' ':
1875            result += 1
1876        else:
1877            break
1878    return result
1879
1880
1881def mute_string(text):
1882    """Replace contents with 'xxx' to prevent syntax matching.
1883
1884    >>> mute_string('"abc"')
1885    '"xxx"'
1886    >>> mute_string("'''abc'''")
1887    "'''xxx'''"
1888    >>> mute_string("r'abc'")
1889    "r'xxx'"
1890    """
1891    # String modifiers (e.g. u or r)
1892    start = text.index(text[-1]) + 1
1893    end = len(text) - 1
1894    # Triple quotes
1895    if text[-3:] in ('"""', "'''"):
1896        start += 2
1897        end -= 2
1898    return text[:start] + 'x' * (end - start) + text[end:]
1899
1900
1901def parse_udiff(diff, patterns=None, parent='.'):
1902    """Return a dictionary of matching lines."""
1903    # For each file of the diff, the entry key is the filename,
1904    # and the value is a set of row numbers to consider.
1905    rv = {}
1906    path = nrows = None
1907    for line in diff.splitlines():
1908        if nrows:
1909            if line[:1] != '-':
1910                nrows -= 1
1911            continue
1912        if line[:3] == '@@ ':
1913            hunk_match = HUNK_REGEX.match(line)
1914            (row, nrows) = [int(g or '1') for g in hunk_match.groups()]
1915            rv[path].update(range(row, row + nrows))
1916        elif line[:3] == '+++':
1917            path = line[4:].split('\t', 1)[0]
1918            # Git diff will use (i)ndex, (w)ork tree, (c)ommit and
1919            # (o)bject instead of a/b/c/d as prefixes for patches
1920            if path[:2] in ('b/', 'w/', 'i/'):
1921                path = path[2:]
1922            rv[path] = set()
1923    return {
1924        os.path.join(parent, filepath): rows
1925        for (filepath, rows) in rv.items()
1926        if rows and filename_match(filepath, patterns)
1927    }
1928
1929
1930def normalize_paths(value, parent=os.curdir):
1931    """Parse a comma-separated list of paths.
1932
1933    Return a list of absolute paths.
1934    """
1935    if not value:
1936        return []
1937    if isinstance(value, list):
1938        return value
1939    paths = []
1940    for path in value.split(','):
1941        path = path.strip()
1942        if '/' in path:
1943            path = os.path.abspath(os.path.join(parent, path))
1944        paths.append(path.rstrip('/'))
1945    return paths
1946
1947
1948def filename_match(filename, patterns, default=True):
1949    """Check if patterns contains a pattern that matches filename.
1950
1951    If patterns is unspecified, this always returns True.
1952    """
1953    if not patterns:
1954        return default
1955    return any(fnmatch(filename, pattern) for pattern in patterns)
1956
1957
1958def update_counts(s, counts):
1959    r"""Adds one to the counts of each appearance of characters in s,
1960        for characters in counts"""
1961    for char in s:
1962        if char in counts:
1963            counts[char] += 1
1964
1965
1966def _is_eol_token(token):
1967    return token[0] in NEWLINE or token[4][token[3][1]:].lstrip() == '\\\n'
1968
1969
1970########################################################################
1971# Framework to run all checks
1972########################################################################
1973
1974
1975class Checker(object):
1976    """Load a Python source file, tokenize it, check coding style."""
1977
1978    def __init__(self, filename=None, lines=None,
1979                 options=None, report=None, **kwargs):
1980        if options is None:
1981            options = StyleGuide(kwargs).options
1982        else:
1983            assert not kwargs
1984        self._io_error = None
1985        self._physical_checks = options.physical_checks
1986        self._logical_checks = options.logical_checks
1987        self._ast_checks = options.ast_checks
1988        self.max_line_length = options.max_line_length
1989        self.max_doc_length = options.max_doc_length
1990        self.indent_size = options.indent_size
1991        self.multiline = False  # in a multiline string?
1992        self.hang_closing = options.hang_closing
1993        self.indent_size = options.indent_size
1994        self.verbose = options.verbose
1995        self.filename = filename
1996        # Dictionary where a checker can store its custom state.
1997        self._checker_states = {}
1998        if filename is None:
1999            self.filename = 'stdin'
2000            self.lines = lines or []
2001        elif filename == '-':
2002            self.filename = 'stdin'
2003            self.lines = stdin_get_value().splitlines(True)
2004        elif lines is None:
2005            try:
2006                self.lines = readlines(filename)
2007            except IOError:
2008                (exc_type, exc) = sys.exc_info()[:2]
2009                self._io_error = '%s: %s' % (exc_type.__name__, exc)
2010                self.lines = []
2011        else:
2012            self.lines = lines
2013        if self.lines:
2014            ord0 = ord(self.lines[0][0])
2015            if ord0 in (0xef, 0xfeff):  # Strip the UTF-8 BOM
2016                if ord0 == 0xfeff:
2017                    self.lines[0] = self.lines[0][1:]
2018                elif self.lines[0][:3] == '\xef\xbb\xbf':
2019                    self.lines[0] = self.lines[0][3:]
2020        self.report = report or options.report
2021        self.report_error = self.report.error
2022        self.noqa = False
2023
2024    def report_invalid_syntax(self):
2025        """Check if the syntax is valid."""
2026        (exc_type, exc) = sys.exc_info()[:2]
2027        if len(exc.args) > 1:
2028            offset = exc.args[1]
2029            if len(offset) > 2:
2030                offset = offset[1:3]
2031        else:
2032            offset = (1, 0)
2033        self.report_error(offset[0], offset[1] or 0,
2034                          'E901 %s: %s' % (exc_type.__name__, exc.args[0]),
2035                          self.report_invalid_syntax)
2036
2037    def readline(self):
2038        """Get the next line from the input buffer."""
2039        if self.line_number >= self.total_lines:
2040            return ''
2041        line = self.lines[self.line_number]
2042        self.line_number += 1
2043        if self.indent_char is None and line[:1] in WHITESPACE:
2044            self.indent_char = line[0]
2045        return line
2046
2047    def run_check(self, check, argument_names):
2048        """Run a check plugin."""
2049        arguments = []
2050        for name in argument_names:
2051            arguments.append(getattr(self, name))
2052        return check(*arguments)
2053
2054    def init_checker_state(self, name, argument_names):
2055        """Prepare custom state for the specific checker plugin."""
2056        if 'checker_state' in argument_names:
2057            self.checker_state = self._checker_states.setdefault(name, {})
2058
2059    def check_physical(self, line):
2060        """Run all physical checks on a raw input line."""
2061        self.physical_line = line
2062        for name, check, argument_names in self._physical_checks:
2063            self.init_checker_state(name, argument_names)
2064            result = self.run_check(check, argument_names)
2065            if result is not None:
2066                (offset, text) = result
2067                self.report_error(self.line_number, offset, text, check)
2068                if text[:4] == 'E101':
2069                    self.indent_char = line[0]
2070
2071    def build_tokens_line(self):
2072        """Build a logical line from tokens."""
2073        logical = []
2074        comments = []
2075        length = 0
2076        prev_row = prev_col = mapping = None
2077        for token_type, text, start, end, line in self.tokens:
2078            if token_type in SKIP_TOKENS:
2079                continue
2080            if not mapping:
2081                mapping = [(0, start)]
2082            if token_type == tokenize.COMMENT:
2083                comments.append(text)
2084                continue
2085            if token_type == tokenize.STRING:
2086                text = mute_string(text)
2087            if prev_row:
2088                (start_row, start_col) = start
2089                if prev_row != start_row:    # different row
2090                    prev_text = self.lines[prev_row - 1][prev_col - 1]
2091                    if prev_text == ',' or (prev_text not in '{[(' and
2092                                            text not in '}])'):
2093                        text = ' ' + text
2094                elif prev_col != start_col:  # different column
2095                    text = line[prev_col:start_col] + text
2096            logical.append(text)
2097            length += len(text)
2098            mapping.append((length, end))
2099            (prev_row, prev_col) = end
2100        self.logical_line = ''.join(logical)
2101        self.noqa = comments and noqa(''.join(comments))
2102        return mapping
2103
2104    def check_logical(self):
2105        """Build a line from tokens and run all logical checks on it."""
2106        self.report.increment_logical_line()
2107        mapping = self.build_tokens_line()
2108        if not mapping:
2109            return
2110
2111        mapping_offsets = [offset for offset, _ in mapping]
2112        (start_row, start_col) = mapping[0][1]
2113        start_line = self.lines[start_row - 1]
2114        self.indent_level = expand_indent(start_line[:start_col])
2115        if self.blank_before < self.blank_lines:
2116            self.blank_before = self.blank_lines
2117        if self.verbose >= 2:
2118            print(self.logical_line[:80].rstrip())
2119        for name, check, argument_names in self._logical_checks:
2120            if self.verbose >= 4:
2121                print('   ' + name)
2122            self.init_checker_state(name, argument_names)
2123            for offset, text in self.run_check(check, argument_names) or ():
2124                if not isinstance(offset, tuple):
2125                    # As mappings are ordered, bisecting is a fast way
2126                    # to find a given offset in them.
2127                    token_offset, pos = mapping[bisect.bisect_left(
2128                        mapping_offsets, offset)]
2129                    offset = (pos[0], pos[1] + offset - token_offset)
2130                self.report_error(offset[0], offset[1], text, check)
2131        if self.logical_line:
2132            self.previous_indent_level = self.indent_level
2133            self.previous_logical = self.logical_line
2134            if not self.indent_level:
2135                self.previous_unindented_logical_line = self.logical_line
2136        self.blank_lines = 0
2137        self.tokens = []
2138
2139    def check_ast(self):
2140        """Build the file's AST and run all AST checks."""
2141        try:
2142            tree = compile(''.join(self.lines), '', 'exec', PyCF_ONLY_AST)
2143        except (ValueError, SyntaxError, TypeError):
2144            return self.report_invalid_syntax()
2145        for name, cls, __ in self._ast_checks:
2146            checker = cls(tree, self.filename)
2147            for lineno, offset, text, check in checker.run():
2148                if not self.lines or not noqa(self.lines[lineno - 1]):
2149                    self.report_error(lineno, offset, text, check)
2150
2151    def generate_tokens(self):
2152        """Tokenize file, run physical line checks and yield tokens."""
2153        if self._io_error:
2154            self.report_error(1, 0, 'E902 %s' % self._io_error, readlines)
2155        tokengen = tokenize.generate_tokens(self.readline)
2156        try:
2157            prev_physical = ''
2158            for token in tokengen:
2159                if token[2][0] > self.total_lines:
2160                    return
2161                self.noqa = token[4] and noqa(token[4])
2162                self.maybe_check_physical(token, prev_physical)
2163                yield token
2164                prev_physical = token[4]
2165        except (SyntaxError, tokenize.TokenError):
2166            self.report_invalid_syntax()
2167
2168    def maybe_check_physical(self, token, prev_physical):
2169        """If appropriate for token, check current physical line(s)."""
2170        # Called after every token, but act only on end of line.
2171
2172        # a newline token ends a single physical line.
2173        if _is_eol_token(token):
2174            # if the file does not end with a newline, the NEWLINE
2175            # token is inserted by the parser, but it does not contain
2176            # the previous physical line in `token[4]`
2177            if token[4] == '':
2178                self.check_physical(prev_physical)
2179            else:
2180                self.check_physical(token[4])
2181        elif token[0] == tokenize.STRING and '\n' in token[1]:
2182            # Less obviously, a string that contains newlines is a
2183            # multiline string, either triple-quoted or with internal
2184            # newlines backslash-escaped. Check every physical line in
2185            # the string *except* for the last one: its newline is
2186            # outside of the multiline string, so we consider it a
2187            # regular physical line, and will check it like any other
2188            # physical line.
2189            #
2190            # Subtleties:
2191            # - we don't *completely* ignore the last line; if it
2192            #   contains the magical "# noqa" comment, we disable all
2193            #   physical checks for the entire multiline string
2194            # - have to wind self.line_number back because initially it
2195            #   points to the last line of the string, and we want
2196            #   check_physical() to give accurate feedback
2197            if noqa(token[4]):
2198                return
2199            self.multiline = True
2200            self.line_number = token[2][0]
2201            _, src, (_, offset), _, _ = token
2202            src = self.lines[self.line_number - 1][:offset] + src
2203            for line in src.split('\n')[:-1]:
2204                self.check_physical(line + '\n')
2205                self.line_number += 1
2206            self.multiline = False
2207
2208    def check_all(self, expected=None, line_offset=0):
2209        """Run all checks on the input file."""
2210        self.report.init_file(self.filename, self.lines, expected, line_offset)
2211        self.total_lines = len(self.lines)
2212        if self._ast_checks:
2213            self.check_ast()
2214        self.line_number = 0
2215        self.indent_char = None
2216        self.indent_level = self.previous_indent_level = 0
2217        self.previous_logical = ''
2218        self.previous_unindented_logical_line = ''
2219        self.tokens = []
2220        self.blank_lines = self.blank_before = 0
2221        parens = 0
2222        for token in self.generate_tokens():
2223            self.tokens.append(token)
2224            token_type, text = token[0:2]
2225            if self.verbose >= 3:
2226                if token[2][0] == token[3][0]:
2227                    pos = '[%s:%s]' % (token[2][1] or '', token[3][1])
2228                else:
2229                    pos = 'l.%s' % token[3][0]
2230                print('l.%s\t%s\t%s\t%r' %
2231                      (token[2][0], pos, tokenize.tok_name[token[0]], text))
2232            if token_type == tokenize.OP:
2233                if text in '([{':
2234                    parens += 1
2235                elif text in '}])':
2236                    parens -= 1
2237            elif not parens:
2238                if token_type in NEWLINE:
2239                    if token_type == tokenize.NEWLINE:
2240                        self.check_logical()
2241                        self.blank_before = 0
2242                    elif len(self.tokens) == 1:
2243                        # The physical line contains only this token.
2244                        self.blank_lines += 1
2245                        del self.tokens[0]
2246                    else:
2247                        self.check_logical()
2248        if self.tokens:
2249            self.check_physical(self.lines[-1])
2250            self.check_logical()
2251        return self.report.get_file_results()
2252
2253
2254class BaseReport(object):
2255    """Collect the results of the checks."""
2256
2257    print_filename = False
2258
2259    def __init__(self, options):
2260        self._benchmark_keys = options.benchmark_keys
2261        self._ignore_code = options.ignore_code
2262        # Results
2263        self.elapsed = 0
2264        self.total_errors = 0
2265        self.counters = dict.fromkeys(self._benchmark_keys, 0)
2266        self.messages = {}
2267
2268    def start(self):
2269        """Start the timer."""
2270        self._start_time = time.time()
2271
2272    def stop(self):
2273        """Stop the timer."""
2274        self.elapsed = time.time() - self._start_time
2275
2276    def init_file(self, filename, lines, expected, line_offset):
2277        """Signal a new file."""
2278        self.filename = filename
2279        self.lines = lines
2280        self.expected = expected or ()
2281        self.line_offset = line_offset
2282        self.file_errors = 0
2283        self.counters['files'] += 1
2284        self.counters['physical lines'] += len(lines)
2285
2286    def increment_logical_line(self):
2287        """Signal a new logical line."""
2288        self.counters['logical lines'] += 1
2289
2290    def error(self, line_number, offset, text, check):
2291        """Report an error, according to options."""
2292        code = text[:4]
2293        if self._ignore_code(code):
2294            return
2295        if code in self.counters:
2296            self.counters[code] += 1
2297        else:
2298            self.counters[code] = 1
2299            self.messages[code] = text[5:]
2300        # Don't care about expected errors or warnings
2301        if code in self.expected:
2302            return
2303        if self.print_filename and not self.file_errors:
2304            print(self.filename)
2305        self.file_errors += 1
2306        self.total_errors += 1
2307        return code
2308
2309    def get_file_results(self):
2310        """Return the count of errors and warnings for this file."""
2311        return self.file_errors
2312
2313    def get_count(self, prefix=''):
2314        """Return the total count of errors and warnings."""
2315        return sum(self.counters[key]
2316                   for key in self.messages if key.startswith(prefix))
2317
2318    def get_statistics(self, prefix=''):
2319        """Get statistics for message codes that start with the prefix.
2320
2321        prefix='' matches all errors and warnings
2322        prefix='E' matches all errors
2323        prefix='W' matches all warnings
2324        prefix='E4' matches all errors that have to do with imports
2325        """
2326        return ['%-7s %s %s' % (self.counters[key], key, self.messages[key])
2327                for key in sorted(self.messages) if key.startswith(prefix)]
2328
2329    def print_statistics(self, prefix=''):
2330        """Print overall statistics (number of errors and warnings)."""
2331        for line in self.get_statistics(prefix):
2332            print(line)
2333
2334    def print_benchmark(self):
2335        """Print benchmark numbers."""
2336        print('%-7.2f %s' % (self.elapsed, 'seconds elapsed'))
2337        if self.elapsed:
2338            for key in self._benchmark_keys:
2339                print('%-7d %s per second (%d total)' %
2340                      (self.counters[key] / self.elapsed, key,
2341                       self.counters[key]))
2342
2343
2344class FileReport(BaseReport):
2345    """Collect the results of the checks and print the filenames."""
2346
2347    print_filename = True
2348
2349
2350class StandardReport(BaseReport):
2351    """Collect and print the results of the checks."""
2352
2353    def __init__(self, options):
2354        super(StandardReport, self).__init__(options)
2355        self._fmt = REPORT_FORMAT.get(options.format.lower(),
2356                                      options.format)
2357        self._repeat = options.repeat
2358        self._show_source = options.show_source
2359        self._show_pep8 = options.show_pep8
2360
2361    def init_file(self, filename, lines, expected, line_offset):
2362        """Signal a new file."""
2363        self._deferred_print = []
2364        return super(StandardReport, self).init_file(
2365            filename, lines, expected, line_offset)
2366
2367    def error(self, line_number, offset, text, check):
2368        """Report an error, according to options."""
2369        code = super(StandardReport, self).error(line_number, offset,
2370                                                 text, check)
2371        if code and (self.counters[code] == 1 or self._repeat):
2372            self._deferred_print.append(
2373                (line_number, offset, code, text[5:], check.__doc__))
2374        return code
2375
2376    def get_file_results(self):
2377        """Print results and return the overall count for this file."""
2378        self._deferred_print.sort()
2379        for line_number, offset, code, text, doc in self._deferred_print:
2380            print(self._fmt % {
2381                'path': self.filename,
2382                'row': self.line_offset + line_number, 'col': offset + 1,
2383                'code': code, 'text': text,
2384            })
2385            if self._show_source:
2386                if line_number > len(self.lines):
2387                    line = ''
2388                else:
2389                    line = self.lines[line_number - 1]
2390                print(line.rstrip())
2391                print(re.sub(r'\S', ' ', line[:offset]) + '^')
2392            if self._show_pep8 and doc:
2393                print('    ' + doc.strip())
2394
2395            # stdout is block buffered when not stdout.isatty().
2396            # line can be broken where buffer boundary since other
2397            # processes write to same file.
2398            # flush() after print() to avoid buffer boundary.
2399            # Typical buffer size is 8192. line written safely when
2400            # len(line) < 8192.
2401            sys.stdout.flush()
2402        return self.file_errors
2403
2404
2405class DiffReport(StandardReport):
2406    """Collect and print the results for the changed lines only."""
2407
2408    def __init__(self, options):
2409        super(DiffReport, self).__init__(options)
2410        self._selected = options.selected_lines
2411
2412    def error(self, line_number, offset, text, check):
2413        if line_number not in self._selected[self.filename]:
2414            return
2415        return super(DiffReport, self).error(line_number, offset, text, check)
2416
2417
2418class StyleGuide(object):
2419    """Initialize a PEP-8 instance with few options."""
2420
2421    def __init__(self, *args, **kwargs):
2422        # build options from the command line
2423        self.checker_class = kwargs.pop('checker_class', Checker)
2424        parse_argv = kwargs.pop('parse_argv', False)
2425        config_file = kwargs.pop('config_file', False)
2426        parser = kwargs.pop('parser', None)
2427        # build options from dict
2428        options_dict = dict(*args, **kwargs)
2429        arglist = None if parse_argv else options_dict.get('paths', None)
2430        verbose = options_dict.get('verbose', None)
2431        options, self.paths = process_options(
2432            arglist, parse_argv, config_file, parser, verbose)
2433        if options_dict:
2434            options.__dict__.update(options_dict)
2435            if 'paths' in options_dict:
2436                self.paths = options_dict['paths']
2437
2438        self.runner = self.input_file
2439        self.options = options
2440
2441        if not options.reporter:
2442            options.reporter = BaseReport if options.quiet else StandardReport
2443
2444        options.select = tuple(options.select or ())
2445        if not (options.select or options.ignore or
2446                options.testsuite or options.doctest) and DEFAULT_IGNORE:
2447            # The default choice: ignore controversial checks
2448            options.ignore = tuple(DEFAULT_IGNORE.split(','))
2449        else:
2450            # Ignore all checks which are not explicitly selected
2451            options.ignore = ('',) if options.select else tuple(options.ignore)
2452        options.benchmark_keys = BENCHMARK_KEYS[:]
2453        options.ignore_code = self.ignore_code
2454        options.physical_checks = self.get_checks('physical_line')
2455        options.logical_checks = self.get_checks('logical_line')
2456        options.ast_checks = self.get_checks('tree')
2457        self.init_report()
2458
2459    def init_report(self, reporter=None):
2460        """Initialize the report instance."""
2461        self.options.report = (reporter or self.options.reporter)(self.options)
2462        return self.options.report
2463
2464    def check_files(self, paths=None):
2465        """Run all checks on the paths."""
2466        if paths is None:
2467            paths = self.paths
2468        report = self.options.report
2469        runner = self.runner
2470        report.start()
2471        try:
2472            for path in paths:
2473                if os.path.isdir(path):
2474                    self.input_dir(path)
2475                elif not self.excluded(path):
2476                    runner(path)
2477        except KeyboardInterrupt:
2478            print('... stopped')
2479        report.stop()
2480        return report
2481
2482    def input_file(self, filename, lines=None, expected=None, line_offset=0):
2483        """Run all checks on a Python source file."""
2484        if self.options.verbose:
2485            print('checking %s' % filename)
2486        fchecker = self.checker_class(
2487            filename, lines=lines, options=self.options)
2488        return fchecker.check_all(expected=expected, line_offset=line_offset)
2489
2490    def input_dir(self, dirname):
2491        """Check all files in this directory and all subdirectories."""
2492        dirname = dirname.rstrip('/')
2493        if self.excluded(dirname):
2494            return 0
2495        counters = self.options.report.counters
2496        verbose = self.options.verbose
2497        filepatterns = self.options.filename
2498        runner = self.runner
2499        for root, dirs, files in os.walk(dirname):
2500            if verbose:
2501                print('directory ' + root)
2502            counters['directories'] += 1
2503            for subdir in sorted(dirs):
2504                if self.excluded(subdir, root):
2505                    dirs.remove(subdir)
2506            for filename in sorted(files):
2507                # contain a pattern that matches?
2508                if ((filename_match(filename, filepatterns) and
2509                     not self.excluded(filename, root))):
2510                    runner(os.path.join(root, filename))
2511
2512    def excluded(self, filename, parent=None):
2513        """Check if the file should be excluded.
2514
2515        Check if 'options.exclude' contains a pattern matching filename.
2516        """
2517        if not self.options.exclude:
2518            return False
2519        basename = os.path.basename(filename)
2520        if filename_match(basename, self.options.exclude):
2521            return True
2522        if parent:
2523            filename = os.path.join(parent, filename)
2524        filename = os.path.abspath(filename)
2525        return filename_match(filename, self.options.exclude)
2526
2527    def ignore_code(self, code):
2528        """Check if the error code should be ignored.
2529
2530        If 'options.select' contains a prefix of the error code,
2531        return False.  Else, if 'options.ignore' contains a prefix of
2532        the error code, return True.
2533        """
2534        if len(code) < 4 and any(s.startswith(code)
2535                                 for s in self.options.select):
2536            return False
2537        return (code.startswith(self.options.ignore) and
2538                not code.startswith(self.options.select))
2539
2540    def get_checks(self, argument_name):
2541        """Get all the checks for this category.
2542
2543        Find all globally visible functions where the first argument
2544        name starts with argument_name and which contain selected tests.
2545        """
2546        checks = []
2547        for check, attrs in _checks[argument_name].items():
2548            (codes, args) = attrs
2549            if any(not (code and self.ignore_code(code)) for code in codes):
2550                checks.append((check.__name__, check, args))
2551        return sorted(checks)
2552
2553
2554def get_parser(prog='pycodestyle', version=__version__):
2555    """Create the parser for the program."""
2556    parser = OptionParser(prog=prog, version=version,
2557                          usage="%prog [options] input ...")
2558    parser.config_options = [
2559        'exclude', 'filename', 'select', 'ignore', 'max-line-length',
2560        'max-doc-length', 'indent-size', 'hang-closing', 'count', 'format',
2561        'quiet', 'show-pep8', 'show-source', 'statistics', 'verbose']
2562    parser.add_option('-v', '--verbose', default=0, action='count',
2563                      help="print status messages, or debug with -vv")
2564    parser.add_option('-q', '--quiet', default=0, action='count',
2565                      help="report only file names, or nothing with -qq")
2566    parser.add_option('-r', '--repeat', default=True, action='store_true',
2567                      help="(obsolete) show all occurrences of the same error")
2568    parser.add_option('--first', action='store_false', dest='repeat',
2569                      help="show first occurrence of each error")
2570    parser.add_option('--exclude', metavar='patterns', default=DEFAULT_EXCLUDE,
2571                      help="exclude files or directories which match these "
2572                           "comma separated patterns (default: %default)")
2573    parser.add_option('--filename', metavar='patterns', default='*.py',
2574                      help="when parsing directories, only check filenames "
2575                           "matching these comma separated patterns "
2576                           "(default: %default)")
2577    parser.add_option('--select', metavar='errors', default='',
2578                      help="select errors and warnings (e.g. E,W6)")
2579    parser.add_option('--ignore', metavar='errors', default='',
2580                      help="skip errors and warnings (e.g. E4,W) "
2581                           "(default: %s)" % DEFAULT_IGNORE)
2582    parser.add_option('--show-source', action='store_true',
2583                      help="show source code for each error")
2584    parser.add_option('--show-pep8', action='store_true',
2585                      help="show text of PEP 8 for each error "
2586                           "(implies --first)")
2587    parser.add_option('--statistics', action='store_true',
2588                      help="count errors and warnings")
2589    parser.add_option('--count', action='store_true',
2590                      help="print total number of errors and warnings "
2591                           "to standard error and set exit code to 1 if "
2592                           "total is not null")
2593    parser.add_option('--max-line-length', type='int', metavar='n',
2594                      default=MAX_LINE_LENGTH,
2595                      help="set maximum allowed line length "
2596                           "(default: %default)")
2597    parser.add_option('--max-doc-length', type='int', metavar='n',
2598                      default=None,
2599                      help="set maximum allowed doc line length and perform "
2600                           "these checks (unchecked if not set)")
2601    parser.add_option('--indent-size', type='int', metavar='n',
2602                      default=INDENT_SIZE,
2603                      help="set how many spaces make up an indent "
2604                           "(default: %default)")
2605    parser.add_option('--hang-closing', action='store_true',
2606                      help="hang closing bracket instead of matching "
2607                           "indentation of opening bracket's line")
2608    parser.add_option('--format', metavar='format', default='default',
2609                      help="set the error format [default|pylint|<custom>]")
2610    parser.add_option('--diff', action='store_true',
2611                      help="report changes only within line number ranges in "
2612                           "the unified diff received on STDIN")
2613    group = parser.add_option_group("Testing Options")
2614    if os.path.exists(TESTSUITE_PATH):
2615        group.add_option('--testsuite', metavar='dir',
2616                         help="run regression tests from dir")
2617        group.add_option('--doctest', action='store_true',
2618                         help="run doctest on myself")
2619    group.add_option('--benchmark', action='store_true',
2620                     help="measure processing speed")
2621    return parser
2622
2623
2624def read_config(options, args, arglist, parser):
2625    """Read and parse configurations.
2626
2627    If a config file is specified on the command line with the
2628    "--config" option, then only it is used for configuration.
2629
2630    Otherwise, the user configuration (~/.config/pycodestyle) and any
2631    local configurations in the current directory or above will be
2632    merged together (in that order) using the read method of
2633    ConfigParser.
2634    """
2635    config = RawConfigParser()
2636
2637    cli_conf = options.config
2638
2639    local_dir = os.curdir
2640
2641    if USER_CONFIG and os.path.isfile(USER_CONFIG):
2642        if options.verbose:
2643            print('user configuration: %s' % USER_CONFIG)
2644        config.read(USER_CONFIG)
2645
2646    parent = tail = args and os.path.abspath(os.path.commonprefix(args))
2647    while tail:
2648        if config.read(os.path.join(parent, fn) for fn in PROJECT_CONFIG):
2649            local_dir = parent
2650            if options.verbose:
2651                print('local configuration: in %s' % parent)
2652            break
2653        (parent, tail) = os.path.split(parent)
2654
2655    if cli_conf and os.path.isfile(cli_conf):
2656        if options.verbose:
2657            print('cli configuration: %s' % cli_conf)
2658        config.read(cli_conf)
2659
2660    pycodestyle_section = None
2661    if config.has_section(parser.prog):
2662        pycodestyle_section = parser.prog
2663    elif config.has_section('pep8'):
2664        pycodestyle_section = 'pep8'  # Deprecated
2665        warnings.warn('[pep8] section is deprecated. Use [pycodestyle].')
2666
2667    if pycodestyle_section:
2668        option_list = {o.dest: o.type or o.action for o in parser.option_list}
2669
2670        # First, read the default values
2671        (new_options, __) = parser.parse_args([])
2672
2673        # Second, parse the configuration
2674        for opt in config.options(pycodestyle_section):
2675            if opt.replace('_', '-') not in parser.config_options:
2676                print("  unknown option '%s' ignored" % opt)
2677                continue
2678            if options.verbose > 1:
2679                print("  %s = %s" % (opt,
2680                                     config.get(pycodestyle_section, opt)))
2681            normalized_opt = opt.replace('-', '_')
2682            opt_type = option_list[normalized_opt]
2683            if opt_type in ('int', 'count'):
2684                value = config.getint(pycodestyle_section, opt)
2685            elif opt_type in ('store_true', 'store_false'):
2686                value = config.getboolean(pycodestyle_section, opt)
2687            else:
2688                value = config.get(pycodestyle_section, opt)
2689                if normalized_opt == 'exclude':
2690                    value = normalize_paths(value, local_dir)
2691            setattr(new_options, normalized_opt, value)
2692
2693        # Third, overwrite with the command-line options
2694        (options, __) = parser.parse_args(arglist, values=new_options)
2695    options.doctest = options.testsuite = False
2696    return options
2697
2698
2699def process_options(arglist=None, parse_argv=False, config_file=None,
2700                    parser=None, verbose=None):
2701    """Process options passed either via arglist or command line args.
2702
2703    Passing in the ``config_file`` parameter allows other tools, such as
2704    flake8 to specify their own options to be processed in pycodestyle.
2705    """
2706    if not parser:
2707        parser = get_parser()
2708    if not parser.has_option('--config'):
2709        group = parser.add_option_group("Configuration", description=(
2710            "The project options are read from the [%s] section of the "
2711            "tox.ini file or the setup.cfg file located in any parent folder "
2712            "of the path(s) being processed.  Allowed options are: %s." %
2713            (parser.prog, ', '.join(parser.config_options))))
2714        group.add_option('--config', metavar='path', default=config_file,
2715                         help="user config file location")
2716    # Don't read the command line if the module is used as a library.
2717    if not arglist and not parse_argv:
2718        arglist = []
2719    # If parse_argv is True and arglist is None, arguments are
2720    # parsed from the command line (sys.argv)
2721    (options, args) = parser.parse_args(arglist)
2722    options.reporter = None
2723
2724    # If explicitly specified verbosity, override any `-v` CLI flag
2725    if verbose is not None:
2726        options.verbose = verbose
2727
2728    if options.ensure_value('testsuite', False):
2729        args.append(options.testsuite)
2730    elif not options.ensure_value('doctest', False):
2731        if parse_argv and not args:
2732            if options.diff or any(os.path.exists(name)
2733                                   for name in PROJECT_CONFIG):
2734                args = ['.']
2735            else:
2736                parser.error('input not specified')
2737        options = read_config(options, args, arglist, parser)
2738        options.reporter = parse_argv and options.quiet == 1 and FileReport
2739
2740    options.filename = _parse_multi_options(options.filename)
2741    options.exclude = normalize_paths(options.exclude)
2742    options.select = _parse_multi_options(options.select)
2743    options.ignore = _parse_multi_options(options.ignore)
2744
2745    if options.diff:
2746        options.reporter = DiffReport
2747        stdin = stdin_get_value()
2748        options.selected_lines = parse_udiff(stdin, options.filename, args[0])
2749        args = sorted(options.selected_lines)
2750
2751    return options, args
2752
2753
2754def _parse_multi_options(options, split_token=','):
2755    r"""Split and strip and discard empties.
2756
2757    Turns the following:
2758
2759    A,
2760    B,
2761
2762    into ["A", "B"]
2763    """
2764    if options:
2765        return [o.strip() for o in options.split(split_token) if o.strip()]
2766    else:
2767        return options
2768
2769
2770def _main():
2771    """Parse options and run checks on Python source."""
2772    import signal
2773
2774    # Handle "Broken pipe" gracefully
2775    try:
2776        signal.signal(signal.SIGPIPE, lambda signum, frame: sys.exit(1))
2777    except AttributeError:
2778        pass    # not supported on Windows
2779
2780    style_guide = StyleGuide(parse_argv=True)
2781    options = style_guide.options
2782
2783    if options.doctest or options.testsuite:
2784        from testsuite.support import run_tests
2785        report = run_tests(style_guide)
2786    else:
2787        report = style_guide.check_files()
2788
2789    if options.statistics:
2790        report.print_statistics()
2791
2792    if options.benchmark:
2793        report.print_benchmark()
2794
2795    if options.testsuite and not options.quiet:
2796        report.print_results()
2797
2798    if report.total_errors:
2799        if options.count:
2800            sys.stderr.write(str(report.total_errors) + '\n')
2801        sys.exit(1)
2802
2803
2804if __name__ == '__main__':
2805    _main()
2806