1#!/usr/bin/env python
2# pycodestyle.py - Check Python source code formatting, according to
3# PEP 8
4#
5# Copyright (C) 2006-2009 Johann C. Rocholl <johann@rocholl.net>
6# Copyright (C) 2009-2014 Florent Xicluna <florent.xicluna@gmail.com>
7# Copyright (C) 2014-2016 Ian Lee <ianlee1521@gmail.com>
8#
9# Permission is hereby granted, free of charge, to any person
10# obtaining a copy of this software and associated documentation files
11# (the "Software"), to deal in the Software without restriction,
12# including without limitation the rights to use, copy, modify, merge,
13# publish, distribute, sublicense, and/or sell copies of the Software,
14# and to permit persons to whom the Software is furnished to do so,
15# subject to the following conditions:
16#
17# The above copyright notice and this permission notice shall be
18# included in all copies or substantial portions of the Software.
19#
20# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
24# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
25# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
26# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
27# SOFTWARE.
28
29r"""
30Check Python source code formatting, according to PEP 8.
31
32For usage and a list of options, try this:
33$ python pycodestyle.py -h
34
35This program and its regression test suite live here:
36https://github.com/pycqa/pycodestyle
37
38Groups of errors and warnings:
39E errors
40W warnings
41100 indentation
42200 whitespace
43300 blank lines
44400 imports
45500 line length
46600 deprecation
47700 statements
48900 syntax error
49"""
50from __future__ import with_statement
51
52import inspect
53import keyword
54import os
55import re
56import sys
57import time
58import tokenize
59import warnings
60import bisect
61
62try:
63    from functools import lru_cache
64except ImportError:
65    def lru_cache(maxsize=128):  # noqa as it's a fake implementation.
66        """Does not really need a real a lru_cache, it's just
67        optimization, so let's just do nothing here. Python 3.2+ will
68        just get better performances, time to upgrade?
69        """
70        return lambda function: function
71
72from fnmatch import fnmatch
73from optparse import OptionParser
74
75try:
76    from configparser import RawConfigParser
77    from io import TextIOWrapper
78except ImportError:
79    from ConfigParser import RawConfigParser
80
81__version__ = '2.6.0'  # patched PY-37054
82
83DEFAULT_EXCLUDE = '.svn,CVS,.bzr,.hg,.git,__pycache__,.tox'
84DEFAULT_IGNORE = 'E121,E123,E126,E226,E24,E704,W503,W504'
85try:
86    if sys.platform == 'win32':
87        USER_CONFIG = os.path.expanduser(r'~\.pycodestyle')
88    else:
89        USER_CONFIG = os.path.join(
90            os.getenv('XDG_CONFIG_HOME') or os.path.expanduser('~/.config'),
91            'pycodestyle'
92        )
93except ImportError:
94    USER_CONFIG = None
95
96PROJECT_CONFIG = ('setup.cfg', 'tox.ini')
97TESTSUITE_PATH = os.path.join(os.path.dirname(__file__), 'testsuite')
98MAX_LINE_LENGTH = 79
99# Number of blank lines between various code parts.
100BLANK_LINES_CONFIG = {
101    # Top level class and function.
102    'top_level': 2,
103    # Methods and nested class and function.
104    'method': 1,
105}
106MAX_DOC_LENGTH = 72
107REPORT_FORMAT = {
108    'default': '%(path)s:%(row)d:%(col)d: %(code)s %(text)s',
109    'pylint': '%(path)s:%(row)d: [%(code)s] %(text)s',
110}
111
112PyCF_ONLY_AST = 1024
113SINGLETONS = frozenset(['False', 'None', 'True'])
114KEYWORDS = frozenset(keyword.kwlist + ['print', 'async']) - SINGLETONS
115UNARY_OPERATORS = frozenset(['>>', '**', '*', '+', '-'])
116ARITHMETIC_OP = frozenset(['**', '*', '/', '//', '+', '-', '@'])
117WS_OPTIONAL_OPERATORS = ARITHMETIC_OP.union(['^', '&', '|', '<<', '>>', '%'])
118# Warn for -> function annotation operator in py3.5+ (issue 803)
119FUNCTION_RETURN_ANNOTATION_OP = ['->'] if sys.version_info >= (3, 5) else []
120ASSIGNMENT_EXPRESSION_OP = [':='] if sys.version_info >= (3, 8) else []
121WS_NEEDED_OPERATORS = frozenset([
122    '**=', '*=', '/=', '//=', '+=', '-=', '!=', '<>', '<', '>',
123    '%=', '^=', '&=', '|=', '==', '<=', '>=', '<<=', '>>=', '=',
124    'and', 'in', 'is', 'or'] +
125    FUNCTION_RETURN_ANNOTATION_OP +
126    ASSIGNMENT_EXPRESSION_OP)
127WHITESPACE = frozenset(' \t')
128NEWLINE = frozenset([tokenize.NL, tokenize.NEWLINE])
129SKIP_TOKENS = NEWLINE.union([tokenize.INDENT, tokenize.DEDENT])
130# ERRORTOKEN is triggered by backticks in Python 3
131SKIP_COMMENTS = SKIP_TOKENS.union([tokenize.COMMENT, tokenize.ERRORTOKEN])
132BENCHMARK_KEYS = ['directories', 'files', 'logical lines', 'physical lines']
133
134INDENT_REGEX = re.compile(r'([ \t]*)')
135RAISE_COMMA_REGEX = re.compile(r'raise\s+\w+\s*,')
136RERAISE_COMMA_REGEX = re.compile(r'raise\s+\w+\s*,.*,\s*\w+\s*$')
137ERRORCODE_REGEX = re.compile(r'\b[A-Z]\d{3}\b')
138DOCSTRING_REGEX = re.compile(r'u?r?["\']')
139EXTRANEOUS_WHITESPACE_REGEX = re.compile(r'[\[({] | [\]}),;]| :(?!=)')
140WHITESPACE_AFTER_COMMA_REGEX = re.compile(r'[,;:]\s*(?:  |\t)')
141COMPARE_SINGLETON_REGEX = re.compile(r'(\bNone|\bFalse|\bTrue)?\s*([=!]=)'
142                                     r'\s*(?(1)|(None|False|True))\b')
143COMPARE_NEGATIVE_REGEX = re.compile(r'\b(?<!is\s)(not)\s+[^][)(}{ ]+\s+'
144                                    r'(in|is)\s')
145COMPARE_TYPE_REGEX = re.compile(r'(?:[=!]=|is(?:\s+not)?)\s+type(?:s.\w+Type'
146                                r'|\s*\(\s*([^)]*[^ )])\s*\))')
147KEYWORD_REGEX = re.compile(r'(\s*)\b(?:%s)\b(\s*)' % r'|'.join(KEYWORDS))
148OPERATOR_REGEX = re.compile(r'(?:[^,\s])(\s*)(?:[-+*/|!<=>%&^]+)(\s*)')
149LAMBDA_REGEX = re.compile(r'\blambda\b')
150HUNK_REGEX = re.compile(r'^@@ -\d+(?:,\d+)? \+(\d+)(?:,(\d+))? @@.*$')
151STARTSWITH_DEF_REGEX = re.compile(r'^(async\s+def|def)\b')
152STARTSWITH_TOP_LEVEL_REGEX = re.compile(r'^(async\s+def\s+|def\s+|class\s+|@)')
153STARTSWITH_INDENT_STATEMENT_REGEX = re.compile(
154    r'^\s*({0})\b'.format('|'.join(s.replace(' ', r'\s+') for s in (
155        'def', 'async def',
156        'for', 'async for',
157        'if', 'elif', 'else',
158        'try', 'except', 'finally',
159        'with', 'async with',
160        'class',
161        'while',
162    )))
163)
164DUNDER_REGEX = re.compile(r'^__([^\s]+)__ = ')
165
166_checks = {'physical_line': {}, 'logical_line': {}, 'tree': {}}
167
168
169def _get_parameters(function):
170    if sys.version_info >= (3, 3):
171        return [parameter.name
172                for parameter
173                in inspect.signature(function).parameters.values()
174                if parameter.kind == parameter.POSITIONAL_OR_KEYWORD]
175    else:
176        return inspect.getargspec(function)[0]
177
178
179def register_check(check, codes=None):
180    """Register a new check object."""
181    def _add_check(check, kind, codes, args):
182        if check in _checks[kind]:
183            _checks[kind][check][0].extend(codes or [])
184        else:
185            _checks[kind][check] = (codes or [''], args)
186    if inspect.isfunction(check):
187        args = _get_parameters(check)
188        if args and args[0] in ('physical_line', 'logical_line'):
189            if codes is None:
190                codes = ERRORCODE_REGEX.findall(check.__doc__ or '')
191            _add_check(check, args[0], codes, args)
192    elif inspect.isclass(check):
193        if _get_parameters(check.__init__)[:2] == ['self', 'tree']:
194            _add_check(check, 'tree', codes, None)
195    return check
196
197
198########################################################################
199# Plugins (check functions) for physical lines
200########################################################################
201
202@register_check
203def tabs_or_spaces(physical_line, indent_char):
204    r"""Never mix tabs and spaces.
205
206    The most popular way of indenting Python is with spaces only.  The
207    second-most popular way is with tabs only.  Code indented with a
208    mixture of tabs and spaces should be converted to using spaces
209    exclusively.  When invoking the Python command line interpreter with
210    the -t option, it issues warnings about code that illegally mixes
211    tabs and spaces.  When using -tt these warnings become errors.
212    These options are highly recommended!
213
214    Okay: if a == 0:\n    a = 1\n    b = 1
215    E101: if a == 0:\n        a = 1\n\tb = 1
216    """
217    indent = INDENT_REGEX.match(physical_line).group(1)
218    for offset, char in enumerate(indent):
219        if char != indent_char:
220            return offset, "E101 indentation contains mixed spaces and tabs"
221
222
223@register_check
224def tabs_obsolete(physical_line):
225    r"""On new projects, spaces-only are strongly recommended over tabs.
226
227    Okay: if True:\n    return
228    W191: if True:\n\treturn
229    """
230    indent = INDENT_REGEX.match(physical_line).group(1)
231    if '\t' in indent:
232        return indent.index('\t'), "W191 indentation contains tabs"
233
234
235@register_check
236def trailing_whitespace(physical_line):
237    r"""Trailing whitespace is superfluous.
238
239    The warning returned varies on whether the line itself is blank,
240    for easier filtering for those who want to indent their blank lines.
241
242    Okay: spam(1)\n#
243    W291: spam(1) \n#
244    W293: class Foo(object):\n    \n    bang = 12
245    """
246    physical_line = physical_line.rstrip('\n')    # chr(10), newline
247    physical_line = physical_line.rstrip('\r')    # chr(13), carriage return
248    physical_line = physical_line.rstrip('\x0c')  # chr(12), form feed, ^L
249    stripped = physical_line.rstrip(' \t\v')
250    if physical_line != stripped:
251        if stripped:
252            return len(stripped), "W291 trailing whitespace"
253        else:
254            return 0, "W293 blank line contains whitespace"
255
256
257@register_check
258def trailing_blank_lines(physical_line, lines, line_number, total_lines):
259    r"""Trailing blank lines are superfluous.
260
261    Okay: spam(1)
262    W391: spam(1)\n
263
264    However the last line should end with a new line (warning W292).
265    """
266    if line_number == total_lines:
267        stripped_last_line = physical_line.rstrip()
268        if physical_line and not stripped_last_line:
269            return 0, "W391 blank line at end of file"
270        if stripped_last_line == physical_line:
271            return len(lines[-1]), "W292 no newline at end of file"
272
273
274@register_check
275def maximum_line_length(physical_line, max_line_length, multiline,
276                        line_number, noqa):
277    r"""Limit all lines to a maximum of 79 characters.
278
279    There are still many devices around that are limited to 80 character
280    lines; plus, limiting windows to 80 characters makes it possible to
281    have several windows side-by-side.  The default wrapping on such
282    devices looks ugly.  Therefore, please limit all lines to a maximum
283    of 79 characters. For flowing long blocks of text (docstrings or
284    comments), limiting the length to 72 characters is recommended.
285
286    Reports error E501.
287    """
288    line = physical_line.rstrip()
289    length = len(line)
290    if length > max_line_length and not noqa:
291        # Special case: ignore long shebang lines.
292        if line_number == 1 and line.startswith('#!'):
293            return
294        # Special case for long URLs in multi-line docstrings or
295        # comments, but still report the error when the 72 first chars
296        # are whitespaces.
297        chunks = line.split()
298        if ((len(chunks) == 1 and multiline) or
299            (len(chunks) == 2 and chunks[0] == '#')) and \
300                len(line) - len(chunks[-1]) < max_line_length - 7:
301            return
302        if hasattr(line, 'decode'):   # Python 2
303            # The line could contain multi-byte characters
304            try:
305                length = len(line.decode('utf-8'))
306            except UnicodeError:
307                pass
308        if length > max_line_length:
309            return (max_line_length, "E501 line too long "
310                    "(%d > %d characters)" % (length, max_line_length))
311
312
313########################################################################
314# Plugins (check functions) for logical lines
315########################################################################
316
317
318def _is_one_liner(logical_line, indent_level, lines, line_number):
319    if not STARTSWITH_TOP_LEVEL_REGEX.match(logical_line):
320        return False
321
322    line_idx = line_number - 1
323
324    if line_idx < 1:
325        prev_indent = 0
326    else:
327        prev_indent = expand_indent(lines[line_idx - 1])
328
329    if prev_indent > indent_level:
330        return False
331
332    while line_idx < len(lines):
333        line = lines[line_idx].strip()
334        if not line.startswith('@') and STARTSWITH_TOP_LEVEL_REGEX.match(line):
335            break
336        else:
337            line_idx += 1
338    else:
339        return False  # invalid syntax: EOF while searching for def/class
340
341    next_idx = line_idx + 1
342    while next_idx < len(lines):
343        if lines[next_idx].strip():
344            break
345        else:
346            next_idx += 1
347    else:
348        return True  # line is last in the file
349
350    return expand_indent(lines[next_idx]) <= indent_level
351
352
353@register_check
354def blank_lines(logical_line, blank_lines, indent_level, line_number,
355                blank_before, previous_logical,
356                previous_unindented_logical_line, previous_indent_level,
357                lines):
358    r"""Separate top-level function and class definitions with two blank
359    lines.
360
361    Method definitions inside a class are separated by a single blank
362    line.
363
364    Extra blank lines may be used (sparingly) to separate groups of
365    related functions.  Blank lines may be omitted between a bunch of
366    related one-liners (e.g. a set of dummy implementations).
367
368    Use blank lines in functions, sparingly, to indicate logical
369    sections.
370
371    Okay: def a():\n    pass\n\n\ndef b():\n    pass
372    Okay: def a():\n    pass\n\n\nasync def b():\n    pass
373    Okay: def a():\n    pass\n\n\n# Foo\n# Bar\n\ndef b():\n    pass
374    Okay: default = 1\nfoo = 1
375    Okay: classify = 1\nfoo = 1
376
377    E301: class Foo:\n    b = 0\n    def bar():\n        pass
378    E302: def a():\n    pass\n\ndef b(n):\n    pass
379    E302: def a():\n    pass\n\nasync def b(n):\n    pass
380    E303: def a():\n    pass\n\n\n\ndef b(n):\n    pass
381    E303: def a():\n\n\n\n    pass
382    E304: @decorator\n\ndef a():\n    pass
383    E305: def a():\n    pass\na()
384    E306: def a():\n    def b():\n        pass\n    def c():\n        pass
385    """  # noqa
386    top_level_lines = BLANK_LINES_CONFIG['top_level']
387    method_lines = BLANK_LINES_CONFIG['method']
388
389    if not previous_logical and blank_before < top_level_lines:
390        return  # Don't expect blank lines before the first line
391    if previous_logical.startswith('@'):
392        if blank_lines:
393            yield 0, "E304 blank lines found after function decorator"
394    elif (blank_lines > top_level_lines or
395            (indent_level and blank_lines == method_lines + 1)
396          ):
397        yield 0, "E303 too many blank lines (%d)" % blank_lines
398    elif STARTSWITH_TOP_LEVEL_REGEX.match(logical_line):
399        # allow a group of one-liners
400        if (
401            _is_one_liner(logical_line, indent_level, lines, line_number) and
402            blank_before == 0
403        ):
404            return
405        if indent_level:
406            if not (blank_before == method_lines or
407                    previous_indent_level < indent_level or
408                    DOCSTRING_REGEX.match(previous_logical)
409                    ):
410                ancestor_level = indent_level
411                nested = False
412                # Search backwards for a def ancestor or tree root
413                # (top level).
414                for line in lines[line_number - top_level_lines::-1]:
415                    if line.strip() and expand_indent(line) < ancestor_level:
416                        ancestor_level = expand_indent(line)
417                        nested = STARTSWITH_DEF_REGEX.match(line.lstrip())
418                        if nested or ancestor_level == 0:
419                            break
420                if nested:
421                    yield 0, "E306 expected %s blank line before a " \
422                        "nested definition, found 0" % (method_lines,)
423                else:
424                    yield 0, "E301 expected %s blank line, found 0" % (
425                        method_lines,)
426        elif blank_before != top_level_lines:
427            yield 0, "E302 expected %s blank lines, found %d" % (
428                top_level_lines, blank_before)
429    elif (logical_line and
430            not indent_level and
431            blank_before != top_level_lines and
432            previous_unindented_logical_line.startswith(('def ', 'class '))
433          ):
434        yield 0, "E305 expected %s blank lines after " \
435            "class or function definition, found %d" % (
436                top_level_lines, blank_before)
437
438
439@register_check
440def extraneous_whitespace(logical_line):
441    r"""Avoid extraneous whitespace.
442
443    Avoid extraneous whitespace in these situations:
444    - Immediately inside parentheses, brackets or braces.
445    - Immediately before a comma, semicolon, or colon.
446
447    Okay: spam(ham[1], {eggs: 2})
448    E201: spam( ham[1], {eggs: 2})
449    E201: spam(ham[ 1], {eggs: 2})
450    E201: spam(ham[1], { eggs: 2})
451    E202: spam(ham[1], {eggs: 2} )
452    E202: spam(ham[1 ], {eggs: 2})
453    E202: spam(ham[1], {eggs: 2 })
454
455    E203: if x == 4: print x, y; x, y = y , x
456    E203: if x == 4: print x, y ; x, y = y, x
457    E203: if x == 4 : print x, y; x, y = y, x
458    """
459    line = logical_line
460    for match in EXTRANEOUS_WHITESPACE_REGEX.finditer(line):
461        text = match.group()
462        char = text.strip()
463        found = match.start()
464        if text == char + ' ':
465            # assert char in '([{'
466            yield found + 1, "E201 whitespace after '%s'" % char
467        elif line[found - 1] != ',':
468            code = ('E202' if char in '}])' else 'E203')  # if char in ',;:'
469            yield found, "%s whitespace before '%s'" % (code, char)
470
471
472@register_check
473def whitespace_around_keywords(logical_line):
474    r"""Avoid extraneous whitespace around keywords.
475
476    Okay: True and False
477    E271: True and  False
478    E272: True  and False
479    E273: True and\tFalse
480    E274: True\tand False
481    """
482    for match in KEYWORD_REGEX.finditer(logical_line):
483        before, after = match.groups()
484
485        if '\t' in before:
486            yield match.start(1), "E274 tab before keyword"
487        elif len(before) > 1:
488            yield match.start(1), "E272 multiple spaces before keyword"
489
490        if '\t' in after:
491            yield match.start(2), "E273 tab after keyword"
492        elif len(after) > 1:
493            yield match.start(2), "E271 multiple spaces after keyword"
494
495
496@register_check
497def missing_whitespace_after_import_keyword(logical_line):
498    r"""Multiple imports in form from x import (a, b, c) should have
499    space between import statement and parenthesised name list.
500
501    Okay: from foo import (bar, baz)
502    E275: from foo import(bar, baz)
503    E275: from importable.module import(bar, baz)
504    """
505    line = logical_line
506    indicator = ' import('
507    if line.startswith('from '):
508        found = line.find(indicator)
509        if -1 < found:
510            pos = found + len(indicator) - 1
511            yield pos, "E275 missing whitespace after keyword"
512
513
514@register_check
515def missing_whitespace(logical_line):
516    r"""Each comma, semicolon or colon should be followed by whitespace.
517
518    Okay: [a, b]
519    Okay: (3,)
520    Okay: a[1:4]
521    Okay: a[:4]
522    Okay: a[1:]
523    Okay: a[1:4:2]
524    E231: ['a','b']
525    E231: foo(bar,baz)
526    E231: [{'a':'b'}]
527    """
528    line = logical_line
529    for index in range(len(line) - 1):
530        char = line[index]
531        next_char = line[index + 1]
532        if char in ',;:' and next_char not in WHITESPACE:
533            before = line[:index]
534            if char == ':' and before.count('[') > before.count(']') and \
535                    before.rfind('{') < before.rfind('['):
536                continue  # Slice syntax, no space required
537            if char == ',' and next_char == ')':
538                continue  # Allow tuple with only one element: (3,)
539            if char == ':' and next_char == '=' and sys.version_info >= (3, 8):
540                continue  # Allow assignment expression
541            yield index, "E231 missing whitespace after '%s'" % char
542
543
544@register_check
545def indentation(logical_line, previous_logical, indent_char,
546                indent_level, previous_indent_level):
547    r"""Use 4 spaces per indentation level.
548
549    For really old code that you don't want to mess up, you can continue
550    to use 8-space tabs.
551
552    Okay: a = 1
553    Okay: if a == 0:\n    a = 1
554    E111:   a = 1
555    E114:   # a = 1
556
557    Okay: for item in items:\n    pass
558    E112: for item in items:\npass
559    E115: for item in items:\n# Hi\n    pass
560
561    Okay: a = 1\nb = 2
562    E113: a = 1\n    b = 2
563    E116: a = 1\n    # b = 2
564    """
565    c = 0 if logical_line else 3
566    tmpl = "E11%d %s" if logical_line else "E11%d %s (comment)"
567    if indent_level % 4:
568        yield 0, tmpl % (1 + c, "indentation is not a multiple of four")
569    indent_expect = previous_logical.endswith(':')
570    if indent_expect and indent_level <= previous_indent_level:
571        yield 0, tmpl % (2 + c, "expected an indented block")
572    elif not indent_expect and indent_level > previous_indent_level:
573        yield 0, tmpl % (3 + c, "unexpected indentation")
574
575    if indent_expect:
576        expected_indent_amount = 8 if indent_char == '\t' else 4
577        expected_indent_level = previous_indent_level + expected_indent_amount
578        if indent_level > expected_indent_level:
579            yield 0, tmpl % (7, 'over-indented')
580
581
582@register_check
583def continued_indentation(logical_line, tokens, indent_level, hang_closing,
584                          indent_char, noqa, verbose):
585    r"""Continuation lines indentation.
586
587    Continuation lines should align wrapped elements either vertically
588    using Python's implicit line joining inside parentheses, brackets
589    and braces, or using a hanging indent.
590
591    When using a hanging indent these considerations should be applied:
592    - there should be no arguments on the first line, and
593    - further indentation should be used to clearly distinguish itself
594      as a continuation line.
595
596    Okay: a = (\n)
597    E123: a = (\n    )
598
599    Okay: a = (\n    42)
600    E121: a = (\n   42)
601    E122: a = (\n42)
602    E123: a = (\n    42\n    )
603    E124: a = (24,\n     42\n)
604    E125: if (\n    b):\n    pass
605    E126: a = (\n        42)
606    E127: a = (24,\n      42)
607    E128: a = (24,\n    42)
608    E129: if (a or\n    b):\n    pass
609    E131: a = (\n    42\n 24)
610    """
611    first_row = tokens[0][2][0]
612    nrows = 1 + tokens[-1][2][0] - first_row
613    if noqa or nrows == 1:
614        return
615
616    # indent_next tells us whether the next block is indented; assuming
617    # that it is indented by 4 spaces, then we should not allow 4-space
618    # indents on the final continuation line; in turn, some other
619    # indents are allowed to have an extra 4 spaces.
620    indent_next = logical_line.endswith(':')
621
622    row = depth = 0
623    valid_hangs = (4,) if indent_char != '\t' else (4, 8)
624    # remember how many brackets were opened on each line
625    parens = [0] * nrows
626    # relative indents of physical lines
627    rel_indent = [0] * nrows
628    # for each depth, collect a list of opening rows
629    open_rows = [[0]]
630    # for each depth, memorize the hanging indentation
631    hangs = [None]
632    # visual indents
633    indent_chances = {}
634    last_indent = tokens[0][2]
635    visual_indent = None
636    last_token_multiline = False
637    # for each depth, memorize the visual indent column
638    indent = [last_indent[1]]
639    if verbose >= 3:
640        print(">>> " + tokens[0][4].rstrip())
641
642    for token_type, text, start, end, line in tokens:
643
644        newline = row < start[0] - first_row
645        if newline:
646            row = start[0] - first_row
647            newline = not last_token_multiline and token_type not in NEWLINE
648
649        if newline:
650            # this is the beginning of a continuation line.
651            last_indent = start
652            if verbose >= 3:
653                print("... " + line.rstrip())
654
655            # record the initial indent.
656            rel_indent[row] = expand_indent(line) - indent_level
657
658            # identify closing bracket
659            close_bracket = (token_type == tokenize.OP and text in ']})')
660
661            # is the indent relative to an opening bracket line?
662            for open_row in reversed(open_rows[depth]):
663                hang = rel_indent[row] - rel_indent[open_row]
664                hanging_indent = hang in valid_hangs
665                if hanging_indent:
666                    break
667            if hangs[depth]:
668                hanging_indent = (hang == hangs[depth])
669            # is there any chance of visual indent?
670            visual_indent = (not close_bracket and hang > 0 and
671                             indent_chances.get(start[1]))
672
673            if close_bracket and indent[depth]:
674                # closing bracket for visual indent
675                if start[1] != indent[depth]:
676                    yield (start, "E124 closing bracket does not match "
677                           "visual indentation")
678            elif close_bracket and not hang:
679                # closing bracket matches indentation of opening
680                # bracket's line
681                if hang_closing:
682                    yield start, "E133 closing bracket is missing indentation"
683            elif indent[depth] and start[1] < indent[depth]:
684                if visual_indent is not True:
685                    # visual indent is broken
686                    yield (start, "E128 continuation line "
687                           "under-indented for visual indent")
688            elif hanging_indent or (indent_next and rel_indent[row] == 8):
689                # hanging indent is verified
690                if close_bracket and not hang_closing:
691                    yield (start, "E123 closing bracket does not match "
692                           "indentation of opening bracket's line")
693                hangs[depth] = hang
694            elif visual_indent is True:
695                # visual indent is verified
696                indent[depth] = start[1]
697            elif visual_indent in (text, str):
698                # ignore token lined up with matching one from a
699                # previous line
700                pass
701            else:
702                # indent is broken
703                if hang <= 0:
704                    error = "E122", "missing indentation or outdented"
705                elif indent[depth]:
706                    error = "E127", "over-indented for visual indent"
707                elif not close_bracket and hangs[depth]:
708                    error = "E131", "unaligned for hanging indent"
709                else:
710                    hangs[depth] = hang
711                    if hang > 4:
712                        error = "E126", "over-indented for hanging indent"
713                    else:
714                        error = "E121", "under-indented for hanging indent"
715                yield start, "%s continuation line %s" % error
716
717        # look for visual indenting
718        if (parens[row] and
719                token_type not in (tokenize.NL, tokenize.COMMENT) and
720                not indent[depth]):
721            indent[depth] = start[1]
722            indent_chances[start[1]] = True
723            if verbose >= 4:
724                print("bracket depth %s indent to %s" % (depth, start[1]))
725        # deal with implicit string concatenation
726        elif (token_type in (tokenize.STRING, tokenize.COMMENT) or
727              text in ('u', 'ur', 'b', 'br')):
728            indent_chances[start[1]] = str
729        # visual indent after assert/raise/with
730        elif not row and not depth and text in ["assert", "raise", "with"]:
731            indent_chances[end[1] + 1] = True
732        # special case for the "if" statement because len("if (") == 4
733        elif not indent_chances and not row and not depth and text == 'if':
734            indent_chances[end[1] + 1] = True
735        elif text == ':' and line[end[1]:].isspace():
736            open_rows[depth].append(row)
737
738        # keep track of bracket depth
739        if token_type == tokenize.OP:
740            if text in '([{':
741                depth += 1
742                indent.append(0)
743                hangs.append(None)
744                if len(open_rows) == depth:
745                    open_rows.append([])
746                open_rows[depth].append(row)
747                parens[row] += 1
748                if verbose >= 4:
749                    print("bracket depth %s seen, col %s, visual min = %s" %
750                          (depth, start[1], indent[depth]))
751            elif text in ')]}' and depth > 0:
752                # parent indents should not be more than this one
753                prev_indent = indent.pop() or last_indent[1]
754                hangs.pop()
755                for d in range(depth):
756                    if indent[d] > prev_indent:
757                        indent[d] = 0
758                for ind in list(indent_chances):
759                    if ind >= prev_indent:
760                        del indent_chances[ind]
761                del open_rows[depth + 1:]
762                depth -= 1
763                if depth:
764                    indent_chances[indent[depth]] = True
765                for idx in range(row, -1, -1):
766                    if parens[idx]:
767                        parens[idx] -= 1
768                        break
769            assert len(indent) == depth + 1
770            if start[1] not in indent_chances:
771                # allow lining up tokens
772                indent_chances[start[1]] = text
773
774        last_token_multiline = (start[0] != end[0])
775        if last_token_multiline:
776            rel_indent[end[0] - first_row] = rel_indent[row]
777
778    if indent_next and expand_indent(line) == indent_level + 4:
779        pos = (start[0], indent[0] + 4)
780        if visual_indent:
781            code = "E129 visually indented line"
782        else:
783            code = "E125 continuation line"
784        yield pos, "%s with same indent as next logical line" % code
785
786
787@register_check
788def whitespace_before_parameters(logical_line, tokens):
789    r"""Avoid extraneous whitespace.
790
791    Avoid extraneous whitespace in the following situations:
792    - before the open parenthesis that starts the argument list of a
793      function call.
794    - before the open parenthesis that starts an indexing or slicing.
795
796    Okay: spam(1)
797    E211: spam (1)
798
799    Okay: dict['key'] = list[index]
800    E211: dict ['key'] = list[index]
801    E211: dict['key'] = list [index]
802    """
803    prev_type, prev_text, __, prev_end, __ = tokens[0]
804    for index in range(1, len(tokens)):
805        token_type, text, start, end, __ = tokens[index]
806        if (token_type == tokenize.OP and
807            text in '([' and
808            start != prev_end and
809            (prev_type == tokenize.NAME or prev_text in '}])') and
810            # Syntax "class A (B):" is allowed, but avoid it
811            (index < 2 or tokens[index - 2][1] != 'class') and
812                # Allow "return (a.foo for a in range(5))"
813                not keyword.iskeyword(prev_text)):
814            yield prev_end, "E211 whitespace before '%s'" % text
815        prev_type = token_type
816        prev_text = text
817        prev_end = end
818
819
820@register_check
821def whitespace_around_operator(logical_line):
822    r"""Avoid extraneous whitespace around an operator.
823
824    Okay: a = 12 + 3
825    E221: a = 4  + 5
826    E222: a = 4 +  5
827    E223: a = 4\t+ 5
828    E224: a = 4 +\t5
829    """
830    for match in OPERATOR_REGEX.finditer(logical_line):
831        before, after = match.groups()
832
833        if '\t' in before:
834            yield match.start(1), "E223 tab before operator"
835        elif len(before) > 1:
836            yield match.start(1), "E221 multiple spaces before operator"
837
838        if '\t' in after:
839            yield match.start(2), "E224 tab after operator"
840        elif len(after) > 1:
841            yield match.start(2), "E222 multiple spaces after operator"
842
843
844@register_check
845def missing_whitespace_around_operator(logical_line, tokens):
846    r"""Surround operators with a single space on either side.
847
848    - Always surround these binary operators with a single space on
849      either side: assignment (=), augmented assignment (+=, -= etc.),
850      comparisons (==, <, >, !=, <=, >=, in, not in, is, is not),
851      Booleans (and, or, not).
852
853    - If operators with different priorities are used, consider adding
854      whitespace around the operators with the lowest priorities.
855
856    Okay: i = i + 1
857    Okay: submitted += 1
858    Okay: x = x * 2 - 1
859    Okay: hypot2 = x * x + y * y
860    Okay: c = (a + b) * (a - b)
861    Okay: foo(bar, key='word', *args, **kwargs)
862    Okay: alpha[:-i]
863
864    E225: i=i+1
865    E225: submitted +=1
866    E225: x = x /2 - 1
867    E225: z = x **y
868    E225: z = 1and 1
869    E226: c = (a+b) * (a-b)
870    E226: hypot2 = x*x + y*y
871    E227: c = a|b
872    E228: msg = fmt%(errno, errmsg)
873    """
874    parens = 0
875    need_space = False
876    prev_type = tokenize.OP
877    prev_text = prev_end = None
878    operator_types = (tokenize.OP, tokenize.NAME)
879    for token_type, text, start, end, line in tokens:
880        if token_type in SKIP_COMMENTS:
881            continue
882        if text in ('(', 'lambda'):
883            parens += 1
884        elif text == ')':
885            parens -= 1
886        if need_space:
887            if start != prev_end:
888                # Found a (probably) needed space
889                if need_space is not True and not need_space[1]:
890                    yield (need_space[0],
891                           "E225 missing whitespace around operator")
892                need_space = False
893            elif text == '>' and prev_text in ('<', '-'):
894                # Tolerate the "<>" operator, even if running Python 3
895                # Deal with Python 3's annotated return value "->"
896                pass
897            elif (
898                    # def f(a, /, b):
899                    #           ^
900                    # def f(a, b, /):
901                    #              ^
902                    prev_text == '/' and text in {',', ')'} or
903                    # def f(a, b, /):
904                    #               ^
905                    prev_text == ')' and text == ':'
906            ):
907                # Tolerate the "/" operator in function definition
908                # For more info see PEP570
909                pass
910            else:
911                if need_space is True or need_space[1]:
912                    # A needed trailing space was not found
913                    yield prev_end, "E225 missing whitespace around operator"
914                elif prev_text != '**':
915                    code, optype = 'E226', 'arithmetic'
916                    if prev_text == '%':
917                        code, optype = 'E228', 'modulo'
918                    elif prev_text not in ARITHMETIC_OP:
919                        code, optype = 'E227', 'bitwise or shift'
920                    yield (need_space[0], "%s missing whitespace "
921                           "around %s operator" % (code, optype))
922                need_space = False
923        elif token_type in operator_types and prev_end is not None:
924            if text == '=' and parens:
925                # Allow keyword args or defaults: foo(bar=None).
926                pass
927            elif text in WS_NEEDED_OPERATORS:
928                need_space = True
929            elif text in UNARY_OPERATORS:
930                # Check if the operator is used as a binary operator
931                # Allow unary operators: -123, -x, +1.
932                # Allow argument unpacking: foo(*args, **kwargs).
933                if (prev_text in '}])' if prev_type == tokenize.OP
934                        else prev_text not in KEYWORDS):
935                    need_space = None
936            elif text in WS_OPTIONAL_OPERATORS:
937                need_space = None
938
939            if need_space is None:
940                # Surrounding space is optional, but ensure that
941                # trailing space matches opening space
942                need_space = (prev_end, start != prev_end)
943            elif need_space and start == prev_end:
944                # A needed opening space was not found
945                yield prev_end, "E225 missing whitespace around operator"
946                need_space = False
947        prev_type = token_type
948        prev_text = text
949        prev_end = end
950
951
952@register_check
953def whitespace_around_comma(logical_line):
954    r"""Avoid extraneous whitespace after a comma or a colon.
955
956    Note: these checks are disabled by default
957
958    Okay: a = (1, 2)
959    E241: a = (1,  2)
960    E242: a = (1,\t2)
961    """
962    line = logical_line
963    for m in WHITESPACE_AFTER_COMMA_REGEX.finditer(line):
964        found = m.start() + 1
965        if '\t' in m.group():
966            yield found, "E242 tab after '%s'" % m.group()[0]
967        else:
968            yield found, "E241 multiple spaces after '%s'" % m.group()[0]
969
970
971@register_check
972def whitespace_around_named_parameter_equals(logical_line, tokens):
973    r"""Don't use spaces around the '=' sign in function arguments.
974
975    Don't use spaces around the '=' sign when used to indicate a
976    keyword argument or a default parameter value, except when
977    using a type annotation.
978
979    Okay: def complex(real, imag=0.0):
980    Okay: return magic(r=real, i=imag)
981    Okay: boolean(a == b)
982    Okay: boolean(a != b)
983    Okay: boolean(a <= b)
984    Okay: boolean(a >= b)
985    Okay: def foo(arg: int = 42):
986    Okay: async def foo(arg: int = 42):
987
988    E251: def complex(real, imag = 0.0):
989    E251: return magic(r = real, i = imag)
990    E252: def complex(real, image: float=0.0):
991    """
992    parens = 0
993    no_space = False
994    require_space = False
995    prev_end = None
996    annotated_func_arg = False
997    in_def = bool(STARTSWITH_DEF_REGEX.match(logical_line))
998
999    message = "E251 unexpected spaces around keyword / parameter equals"
1000    missing_message = "E252 missing whitespace around parameter equals"
1001
1002    for token_type, text, start, end, line in tokens:
1003        if token_type == tokenize.NL:
1004            continue
1005        if no_space:
1006            no_space = False
1007            if start != prev_end:
1008                yield (prev_end, message)
1009        if require_space:
1010            require_space = False
1011            if start == prev_end:
1012                yield (prev_end, missing_message)
1013        if token_type == tokenize.OP:
1014            if text in '([':
1015                parens += 1
1016            elif text in ')]':
1017                parens -= 1
1018            elif in_def and text == ':' and parens == 1:
1019                annotated_func_arg = True
1020            elif parens == 1 and text == ',':
1021                annotated_func_arg = False
1022            elif parens and text == '=':
1023                if annotated_func_arg and parens == 1:
1024                    require_space = True
1025                    if start == prev_end:
1026                        yield (prev_end, missing_message)
1027                else:
1028                    no_space = True
1029                    if start != prev_end:
1030                        yield (prev_end, message)
1031            if not parens:
1032                annotated_func_arg = False
1033
1034        prev_end = end
1035
1036
1037@register_check
1038def whitespace_before_comment(logical_line, tokens):
1039    r"""Separate inline comments by at least two spaces.
1040
1041    An inline comment is a comment on the same line as a statement.
1042    Inline comments should be separated by at least two spaces from the
1043    statement. They should start with a # and a single space.
1044
1045    Each line of a block comment starts with a # and a single space
1046    (unless it is indented text inside the comment).
1047
1048    Okay: x = x + 1  # Increment x
1049    Okay: x = x + 1    # Increment x
1050    Okay: # Block comment
1051    E261: x = x + 1 # Increment x
1052    E262: x = x + 1  #Increment x
1053    E262: x = x + 1  #  Increment x
1054    E265: #Block comment
1055    E266: ### Block comment
1056    """
1057    prev_end = (0, 0)
1058    for token_type, text, start, end, line in tokens:
1059        if token_type == tokenize.COMMENT:
1060            inline_comment = line[:start[1]].strip()
1061            if inline_comment:
1062                if prev_end[0] == start[0] and start[1] < prev_end[1] + 2:
1063                    yield (prev_end,
1064                           "E261 at least two spaces before inline comment")
1065            symbol, sp, comment = text.partition(' ')
1066            bad_prefix = symbol not in '#:' and (symbol.lstrip('#')[:1] or '#')
1067            if inline_comment:
1068                if bad_prefix or comment[:1] in WHITESPACE:
1069                    yield start, "E262 inline comment should start with '# '"
1070            elif bad_prefix and (bad_prefix != '!' or start[0] > 1):
1071                if bad_prefix != '#':
1072                    yield start, "E265 block comment should start with '# '"
1073                elif comment:
1074                    yield start, "E266 too many leading '#' for block comment"
1075        elif token_type != tokenize.NL:
1076            prev_end = end
1077
1078
1079@register_check
1080def imports_on_separate_lines(logical_line):
1081    r"""Place imports on separate lines.
1082
1083    Okay: import os\nimport sys
1084    E401: import sys, os
1085
1086    Okay: from subprocess import Popen, PIPE
1087    Okay: from myclas import MyClass
1088    Okay: from foo.bar.yourclass import YourClass
1089    Okay: import myclass
1090    Okay: import foo.bar.yourclass
1091    """
1092    line = logical_line
1093    if line.startswith('import '):
1094        found = line.find(',')
1095        if -1 < found and ';' not in line[:found]:
1096            yield found, "E401 multiple imports on one line"
1097
1098
1099@register_check
1100def module_imports_on_top_of_file(
1101        logical_line, indent_level, checker_state, noqa):
1102    r"""Place imports at the top of the file.
1103
1104    Always put imports at the top of the file, just after any module
1105    comments and docstrings, and before module globals and constants.
1106
1107    Okay: import os
1108    Okay: # this is a comment\nimport os
1109    Okay: '''this is a module docstring'''\nimport os
1110    Okay: r'''this is a module docstring'''\nimport os
1111    Okay:
1112    try:\n\timport x\nexcept ImportError:\n\tpass\nelse:\n\tpass\nimport y
1113    Okay:
1114    try:\n\timport x\nexcept ImportError:\n\tpass\nfinally:\n\tpass\nimport y
1115    E402: a=1\nimport os
1116    E402: 'One string'\n"Two string"\nimport os
1117    E402: a=1\nfrom sys import x
1118
1119    Okay: if x:\n    import os
1120    """  # noqa
1121    def is_string_literal(line):
1122        if line[0] in 'uUbB':
1123            line = line[1:]
1124        if line and line[0] in 'rR':
1125            line = line[1:]
1126        return line and (line[0] == '"' or line[0] == "'")
1127
1128    allowed_keywords = (
1129        'try', 'except', 'else', 'finally', 'with', 'if', 'elif')
1130
1131    if indent_level:  # Allow imports in conditional statement/function
1132        return
1133    if not logical_line:  # Allow empty lines or comments
1134        return
1135    if noqa:
1136        return
1137    line = logical_line
1138    if line.startswith('import ') or line.startswith('from '):
1139        if checker_state.get('seen_non_imports', False):
1140            yield 0, "E402 module level import not at top of file"
1141    elif re.match(DUNDER_REGEX, line):
1142        return
1143    elif any(line.startswith(kw) for kw in allowed_keywords):
1144        # Allow certain keywords intermixed with imports in order to
1145        # support conditional or filtered importing
1146        return
1147    elif is_string_literal(line):
1148        # The first literal is a docstring, allow it. Otherwise, report
1149        # error.
1150        if checker_state.get('seen_docstring', False):
1151            checker_state['seen_non_imports'] = True
1152        else:
1153            checker_state['seen_docstring'] = True
1154    else:
1155        checker_state['seen_non_imports'] = True
1156
1157
1158@register_check
1159def compound_statements(logical_line):
1160    r"""Compound statements (on the same line) are generally
1161    discouraged.
1162
1163    While sometimes it's okay to put an if/for/while with a small body
1164    on the same line, never do this for multi-clause statements.
1165    Also avoid folding such long lines!
1166
1167    Always use a def statement instead of an assignment statement that
1168    binds a lambda expression directly to a name.
1169
1170    Okay: if foo == 'blah':\n    do_blah_thing()
1171    Okay: do_one()
1172    Okay: do_two()
1173    Okay: do_three()
1174
1175    E701: if foo == 'blah': do_blah_thing()
1176    E701: for x in lst: total += x
1177    E701: while t < 10: t = delay()
1178    E701: if foo == 'blah': do_blah_thing()
1179    E701: else: do_non_blah_thing()
1180    E701: try: something()
1181    E701: finally: cleanup()
1182    E701: if foo == 'blah': one(); two(); three()
1183    E702: do_one(); do_two(); do_three()
1184    E703: do_four();  # useless semicolon
1185    E704: def f(x): return 2*x
1186    E731: f = lambda x: 2*x
1187    """
1188    line = logical_line
1189    last_char = len(line) - 1
1190    found = line.find(':')
1191    prev_found = 0
1192    counts = {char: 0 for char in '{}[]()'}
1193    while -1 < found < last_char:
1194        update_counts(line[prev_found:found], counts)
1195        if ((counts['{'] <= counts['}'] and   # {'a': 1} (dict)
1196             counts['['] <= counts[']'] and   # [1:2] (slice)
1197             counts['('] <= counts[')']) and  # (annotation)
1198            not (sys.version_info >= (3, 8) and
1199                 line[found + 1] == '=')):  # assignment expression
1200            lambda_kw = LAMBDA_REGEX.search(line, 0, found)
1201            if lambda_kw:
1202                before = line[:lambda_kw.start()].rstrip()
1203                if before[-1:] == '=' and isidentifier(before[:-1].strip()):
1204                    yield 0, ("E731 do not assign a lambda expression, use a "
1205                              "def")
1206                break
1207            if STARTSWITH_DEF_REGEX.match(line):
1208                yield 0, "E704 multiple statements on one line (def)"
1209            elif STARTSWITH_INDENT_STATEMENT_REGEX.match(line):
1210                yield found, "E701 multiple statements on one line (colon)"
1211        prev_found = found
1212        found = line.find(':', found + 1)
1213    found = line.find(';')
1214    while -1 < found:
1215        if found < last_char:
1216            yield found, "E702 multiple statements on one line (semicolon)"
1217        else:
1218            yield found, "E703 statement ends with a semicolon"
1219        found = line.find(';', found + 1)
1220
1221
1222@register_check
1223def explicit_line_join(logical_line, tokens):
1224    r"""Avoid explicit line join between brackets.
1225
1226    The preferred way of wrapping long lines is by using Python's
1227    implied line continuation inside parentheses, brackets and braces.
1228    Long lines can be broken over multiple lines by wrapping expressions
1229    in parentheses.  These should be used in preference to using a
1230    backslash for line continuation.
1231
1232    E502: aaa = [123, \\n       123]
1233    E502: aaa = ("bbb " \\n       "ccc")
1234
1235    Okay: aaa = [123,\n       123]
1236    Okay: aaa = ("bbb "\n       "ccc")
1237    Okay: aaa = "bbb " \\n    "ccc"
1238    Okay: aaa = 123  # \\
1239    """
1240    prev_start = prev_end = parens = 0
1241    comment = False
1242    backslash = None
1243    for token_type, text, start, end, line in tokens:
1244        if token_type == tokenize.COMMENT:
1245            comment = True
1246        if start[0] != prev_start and parens and backslash and not comment:
1247            yield backslash, "E502 the backslash is redundant between brackets"
1248        if end[0] != prev_end:
1249            if line.rstrip('\r\n').endswith('\\'):
1250                backslash = (end[0], len(line.splitlines()[-1]) - 1)
1251            else:
1252                backslash = None
1253            prev_start = prev_end = end[0]
1254        else:
1255            prev_start = start[0]
1256        if token_type == tokenize.OP:
1257            if text in '([{':
1258                parens += 1
1259            elif text in ')]}':
1260                parens -= 1
1261
1262
1263_SYMBOLIC_OPS = frozenset("()[]{},:.;@=%~") | frozenset(("...",))
1264
1265
1266def _is_binary_operator(token_type, text):
1267    is_op_token = token_type == tokenize.OP
1268    is_conjunction = text in ['and', 'or']
1269    # NOTE(sigmavirus24): Previously the not_a_symbol check was executed
1270    # conditionally. Since it is now *always* executed, text may be
1271    # None. In that case we get a TypeError for `text not in str`.
1272    not_a_symbol = text and text not in _SYMBOLIC_OPS
1273    # The % character is strictly speaking a binary operator, but the
1274    # common usage seems to be to put it next to the format parameters,
1275    # after a line break.
1276    return ((is_op_token or is_conjunction) and not_a_symbol)
1277
1278
1279def _break_around_binary_operators(tokens):
1280    """Private function to reduce duplication.
1281
1282    This factors out the shared details between
1283    :func:`break_before_binary_operator` and
1284    :func:`break_after_binary_operator`.
1285    """
1286    line_break = False
1287    unary_context = True
1288    # Previous non-newline token types and text
1289    previous_token_type = None
1290    previous_text = None
1291    for token_type, text, start, end, line in tokens:
1292        if token_type == tokenize.COMMENT:
1293            continue
1294        if ('\n' in text or '\r' in text) and token_type != tokenize.STRING:
1295            line_break = True
1296        else:
1297            yield (token_type, text, previous_token_type, previous_text,
1298                   line_break, unary_context, start)
1299            unary_context = text in '([{,;'
1300            line_break = False
1301            previous_token_type = token_type
1302            previous_text = text
1303
1304
1305@register_check
1306def break_before_binary_operator(logical_line, tokens):
1307    r"""
1308    Avoid breaks before binary operators.
1309
1310    The preferred place to break around a binary operator is after the
1311    operator, not before it.
1312
1313    W503: (width == 0\n + height == 0)
1314    W503: (width == 0\n and height == 0)
1315    W503: var = (1\n       & ~2)
1316    W503: var = (1\n       / -2)
1317    W503: var = (1\n       + -1\n       + -2)
1318
1319    Okay: foo(\n    -x)
1320    Okay: foo(x\n    [])
1321    Okay: x = '''\n''' + ''
1322    Okay: foo(x,\n    -y)
1323    Okay: foo(x,  # comment\n    -y)
1324    """
1325    for context in _break_around_binary_operators(tokens):
1326        (token_type, text, previous_token_type, previous_text,
1327         line_break, unary_context, start) = context
1328        if (_is_binary_operator(token_type, text) and line_break and
1329                not unary_context and
1330                not _is_binary_operator(previous_token_type,
1331                                        previous_text)):
1332            yield start, "W503 line break before binary operator"
1333
1334
1335@register_check
1336def break_after_binary_operator(logical_line, tokens):
1337    r"""
1338    Avoid breaks after binary operators.
1339
1340    The preferred place to break around a binary operator is before the
1341    operator, not after it.
1342
1343    W504: (width == 0 +\n height == 0)
1344    W504: (width == 0 and\n height == 0)
1345    W504: var = (1 &\n       ~2)
1346
1347    Okay: foo(\n    -x)
1348    Okay: foo(x\n    [])
1349    Okay: x = '''\n''' + ''
1350    Okay: x = '' + '''\n'''
1351    Okay: foo(x,\n    -y)
1352    Okay: foo(x,  # comment\n    -y)
1353
1354    The following should be W504 but unary_context is tricky with these
1355    Okay: var = (1 /\n       -2)
1356    Okay: var = (1 +\n       -1 +\n       -2)
1357    """
1358    prev_start = None
1359    for context in _break_around_binary_operators(tokens):
1360        (token_type, text, previous_token_type, previous_text,
1361         line_break, unary_context, start) = context
1362        if (_is_binary_operator(previous_token_type, previous_text) and
1363                line_break and
1364                not unary_context and
1365                not _is_binary_operator(token_type, text)):
1366            yield prev_start, "W504 line break after binary operator"
1367        prev_start = start
1368
1369
1370@register_check
1371def comparison_to_singleton(logical_line, noqa):
1372    r"""Comparison to singletons should use "is" or "is not".
1373
1374    Comparisons to singletons like None should always be done
1375    with "is" or "is not", never the equality operators.
1376
1377    Okay: if arg is not None:
1378    E711: if arg != None:
1379    E711: if None == arg:
1380    E712: if arg == True:
1381    E712: if False == arg:
1382
1383    Also, beware of writing if x when you really mean if x is not None
1384    -- e.g. when testing whether a variable or argument that defaults to
1385    None was set to some other value.  The other value might have a type
1386    (such as a container) that could be false in a boolean context!
1387    """
1388    match = not noqa and COMPARE_SINGLETON_REGEX.search(logical_line)
1389    if match:
1390        singleton = match.group(1) or match.group(3)
1391        same = (match.group(2) == '==')
1392
1393        msg = "'if cond is %s:'" % (('' if same else 'not ') + singleton)
1394        if singleton in ('None',):
1395            code = 'E711'
1396        else:
1397            code = 'E712'
1398            nonzero = ((singleton == 'True' and same) or
1399                       (singleton == 'False' and not same))
1400            msg += " or 'if %scond:'" % ('' if nonzero else 'not ')
1401        yield match.start(2), ("%s comparison to %s should be %s" %
1402                               (code, singleton, msg))
1403
1404
1405@register_check
1406def comparison_negative(logical_line):
1407    r"""Negative comparison should be done using "not in" and "is not".
1408
1409    Okay: if x not in y:\n    pass
1410    Okay: assert (X in Y or X is Z)
1411    Okay: if not (X in Y):\n    pass
1412    Okay: zz = x is not y
1413    E713: Z = not X in Y
1414    E713: if not X.B in Y:\n    pass
1415    E714: if not X is Y:\n    pass
1416    E714: Z = not X.B is Y
1417    """
1418    match = COMPARE_NEGATIVE_REGEX.search(logical_line)
1419    if match:
1420        pos = match.start(1)
1421        if match.group(2) == 'in':
1422            yield pos, "E713 test for membership should be 'not in'"
1423        else:
1424            yield pos, "E714 test for object identity should be 'is not'"
1425
1426
1427@register_check
1428def comparison_type(logical_line, noqa):
1429    r"""Object type comparisons should always use isinstance().
1430
1431    Do not compare types directly.
1432
1433    Okay: if isinstance(obj, int):
1434    E721: if type(obj) is type(1):
1435
1436    When checking if an object is a string, keep in mind that it might
1437    be a unicode string too! In Python 2.3, str and unicode have a
1438    common base class, basestring, so you can do:
1439
1440    Okay: if isinstance(obj, basestring):
1441    Okay: if type(a1) is type(b1):
1442    """
1443    match = COMPARE_TYPE_REGEX.search(logical_line)
1444    if match and not noqa:
1445        inst = match.group(1)
1446        if inst and isidentifier(inst) and inst not in SINGLETONS:
1447            return  # Allow comparison for types which are not obvious
1448        yield match.start(), "E721 do not compare types, use 'isinstance()'"
1449
1450
1451@register_check
1452def bare_except(logical_line, noqa):
1453    r"""When catching exceptions, mention specific exceptions when
1454    possible.
1455
1456    Okay: except Exception:
1457    Okay: except BaseException:
1458    E722: except:
1459    """
1460    if noqa:
1461        return
1462
1463    regex = re.compile(r"except\s*:")
1464    match = regex.match(logical_line)
1465    if match:
1466        yield match.start(), "E722 do not use bare 'except'"
1467
1468
1469@register_check
1470def ambiguous_identifier(logical_line, tokens):
1471    r"""Never use the characters 'l', 'O', or 'I' as variable names.
1472
1473    In some fonts, these characters are indistinguishable from the
1474    numerals one and zero. When tempted to use 'l', use 'L' instead.
1475
1476    Okay: L = 0
1477    Okay: o = 123
1478    Okay: i = 42
1479    E741: l = 0
1480    E741: O = 123
1481    E741: I = 42
1482
1483    Variables can be bound in several other contexts, including class
1484    and function definitions, 'global' and 'nonlocal' statements,
1485    exception handlers, and 'with' and 'for' statements.
1486    In addition, we have a special handling for function parameters.
1487
1488    Okay: except AttributeError as o:
1489    Okay: with lock as L:
1490    Okay: foo(l=12)
1491    Okay: for a in foo(l=12):
1492    E741: except AttributeError as O:
1493    E741: with lock as l:
1494    E741: global I
1495    E741: nonlocal l
1496    E741: def foo(l):
1497    E741: def foo(l=12):
1498    E741: l = foo(l=12)
1499    E741: for l in range(10):
1500    E742: class I(object):
1501    E743: def l(x):
1502    """
1503    is_func_def = False  # Set to true if 'def' is found
1504    parameter_parentheses_level = 0
1505    idents_to_avoid = ('l', 'O', 'I')
1506    prev_type, prev_text, prev_start, prev_end, __ = tokens[0]
1507    for token_type, text, start, end, line in tokens[1:]:
1508        ident = pos = None
1509        # find function definitions
1510        if prev_text == 'def':
1511            is_func_def = True
1512        # update parameter parentheses level
1513        if parameter_parentheses_level == 0 and \
1514                prev_type == tokenize.NAME and \
1515                token_type == tokenize.OP and text == '(':
1516            parameter_parentheses_level = 1
1517        elif parameter_parentheses_level > 0 and \
1518                token_type == tokenize.OP:
1519            if text == '(':
1520                parameter_parentheses_level += 1
1521            elif text == ')':
1522                parameter_parentheses_level -= 1
1523        # identifiers on the lhs of an assignment operator
1524        if token_type == tokenize.OP and '=' in text and \
1525                parameter_parentheses_level == 0:
1526            if prev_text in idents_to_avoid:
1527                ident = prev_text
1528                pos = prev_start
1529        # identifiers bound to values with 'as', 'for',
1530        # 'global', or 'nonlocal'
1531        if prev_text in ('as', 'for', 'global', 'nonlocal'):
1532            if text in idents_to_avoid:
1533                ident = text
1534                pos = start
1535        # function parameter definitions
1536        if is_func_def:
1537            if text in idents_to_avoid:
1538                ident = text
1539                pos = start
1540        if prev_text == 'class':
1541            if text in idents_to_avoid:
1542                yield start, "E742 ambiguous class definition '%s'" % text
1543        if prev_text == 'def':
1544            if text in idents_to_avoid:
1545                yield start, "E743 ambiguous function definition '%s'" % text
1546        if ident:
1547            yield pos, "E741 ambiguous variable name '%s'" % ident
1548        prev_type = token_type
1549        prev_text = text
1550        prev_start = start
1551
1552
1553@register_check
1554def python_3000_has_key(logical_line, noqa):
1555    r"""The {}.has_key() method is removed in Python 3: use the 'in'
1556    operator.
1557
1558    Okay: if "alph" in d:\n    print d["alph"]
1559    W601: assert d.has_key('alph')
1560    """
1561    pos = logical_line.find('.has_key(')
1562    if pos > -1 and not noqa:
1563        yield pos, "W601 .has_key() is deprecated, use 'in'"
1564
1565
1566@register_check
1567def python_3000_raise_comma(logical_line):
1568    r"""When raising an exception, use "raise ValueError('message')".
1569
1570    The older form is removed in Python 3.
1571
1572    Okay: raise DummyError("Message")
1573    W602: raise DummyError, "Message"
1574    """
1575    match = RAISE_COMMA_REGEX.match(logical_line)
1576    if match and not RERAISE_COMMA_REGEX.match(logical_line):
1577        yield match.end() - 1, "W602 deprecated form of raising exception"
1578
1579
1580@register_check
1581def python_3000_not_equal(logical_line):
1582    r"""New code should always use != instead of <>.
1583
1584    The older syntax is removed in Python 3.
1585
1586    Okay: if a != 'no':
1587    W603: if a <> 'no':
1588    """
1589    pos = logical_line.find('<>')
1590    if pos > -1:
1591        yield pos, "W603 '<>' is deprecated, use '!='"
1592
1593
1594@register_check
1595def python_3000_backticks(logical_line):
1596    r"""Use repr() instead of backticks in Python 3.
1597
1598    Okay: val = repr(1 + 2)
1599    W604: val = `1 + 2`
1600    """
1601    pos = logical_line.find('`')
1602    if pos > -1:
1603        yield pos, "W604 backticks are deprecated, use 'repr()'"
1604
1605
1606@register_check
1607def python_3000_invalid_escape_sequence(logical_line, tokens, noqa):
1608    r"""Invalid escape sequences are deprecated in Python 3.6.
1609
1610    Okay: regex = r'\.png$'
1611    W605: regex = '\.png$'
1612    """
1613    if noqa:
1614        return
1615
1616    # https://docs.python.org/3/reference/lexical_analysis.html#string-and-bytes-literals
1617    valid = [
1618        '\n',
1619        '\\',
1620        '\'',
1621        '"',
1622        'a',
1623        'b',
1624        'f',
1625        'n',
1626        'r',
1627        't',
1628        'v',
1629        '0', '1', '2', '3', '4', '5', '6', '7',
1630        'x',
1631
1632        # Escape sequences only recognized in string literals
1633        'N',
1634        'u',
1635        'U',
1636    ]
1637
1638    for token_type, text, start, end, line in tokens:
1639        if token_type == tokenize.STRING:
1640            start_line, start_col = start
1641            quote = text[-3:] if text[-3:] in ('"""', "'''") else text[-1]
1642            # Extract string modifiers (e.g. u or r)
1643            quote_pos = text.index(quote)
1644            prefix = text[:quote_pos].lower()
1645            start = quote_pos + len(quote)
1646            string = text[start:-len(quote)]
1647
1648            if 'r' not in prefix:
1649                pos = string.find('\\')
1650                while pos >= 0:
1651                    pos += 1
1652                    if string[pos] not in valid:
1653                        line = start_line + string.count('\n', 0, pos)
1654                        if line == start_line:
1655                            col = start_col + len(prefix) + len(quote) + pos
1656                        else:
1657                            col = pos - string.rfind('\n', 0, pos) - 1
1658                        yield (
1659                            (line, col - 1),
1660                            "W605 invalid escape sequence '\\%s'" %
1661                            string[pos],
1662                        )
1663                    pos = string.find('\\', pos + 1)
1664
1665
1666@register_check
1667def python_3000_async_await_keywords(logical_line, tokens):
1668    """'async' and 'await' are reserved keywords starting at Python 3.7.
1669
1670    W606: async = 42
1671    W606: await = 42
1672    Okay: async def read(db):\n    data = await db.fetch('SELECT ...')
1673    """
1674    # The Python tokenize library before Python 3.5 recognizes
1675    # async/await as a NAME token. Therefore, use a state machine to
1676    # look for the possible async/await constructs as defined by the
1677    # Python grammar:
1678    # https://docs.python.org/3/reference/grammar.html
1679
1680    state = None
1681    for token_type, text, start, end, line in tokens:
1682        error = False
1683
1684        if token_type == tokenize.NL:
1685            continue
1686
1687        if state is None:
1688            if token_type == tokenize.NAME:
1689                if text == 'async':
1690                    state = ('async_stmt', start)
1691                elif text == 'await':
1692                    state = ('await', start)
1693                elif (token_type == tokenize.NAME and
1694                      text in ('def', 'for')):
1695                    state = ('define', start)
1696
1697        elif state[0] == 'async_stmt':
1698            if token_type == tokenize.NAME and text in ('def', 'with', 'for'):
1699                # One of funcdef, with_stmt, or for_stmt. Return to
1700                # looking for async/await names.
1701                state = None
1702            else:
1703                error = True
1704        elif state[0] == 'await':
1705            if token_type == tokenize.NAME:
1706                # An await expression. Return to looking for async/await
1707                # names.
1708                state = None
1709            elif token_type == tokenize.OP and text == '(':
1710                state = None
1711            else:
1712                error = True
1713        elif state[0] == 'define':
1714            if token_type == tokenize.NAME and text in ('async', 'await'):
1715                error = True
1716            else:
1717                state = None
1718
1719        if error:
1720            yield (
1721                state[1],
1722                "W606 'async' and 'await' are reserved keywords starting with "
1723                "Python 3.7",
1724            )
1725            state = None
1726
1727    # Last token
1728    if state is not None:
1729        yield (
1730            state[1],
1731            "W606 'async' and 'await' are reserved keywords starting with "
1732            "Python 3.7",
1733        )
1734
1735
1736########################################################################
1737@register_check
1738def maximum_doc_length(logical_line, max_doc_length, noqa, tokens):
1739    r"""Limit all doc lines to a maximum of 72 characters.
1740
1741    For flowing long blocks of text (docstrings or comments), limiting
1742    the length to 72 characters is recommended.
1743
1744    Reports warning W505
1745    """
1746    if max_doc_length is None or noqa:
1747        return
1748
1749    prev_token = None
1750    skip_lines = set()
1751    # Skip lines that
1752    for token_type, text, start, end, line in tokens:
1753        if token_type not in SKIP_COMMENTS.union([tokenize.STRING]):
1754            skip_lines.add(line)
1755
1756    for token_type, text, start, end, line in tokens:
1757        # Skip lines that aren't pure strings
1758        if token_type == tokenize.STRING and skip_lines:
1759            continue
1760        if token_type in (tokenize.STRING, tokenize.COMMENT):
1761            # Only check comment-only lines
1762            if prev_token is None or prev_token in SKIP_TOKENS:
1763                lines = line.splitlines()
1764                for line_num, physical_line in enumerate(lines):
1765                    if hasattr(physical_line, 'decode'):  # Python 2
1766                        # The line could contain multi-byte characters
1767                        try:
1768                            physical_line = physical_line.decode('utf-8')
1769                        except UnicodeError:
1770                            pass
1771                    if start[0] + line_num == 1 and line.startswith('#!'):
1772                        return
1773                    length = len(physical_line)
1774                    chunks = physical_line.split()
1775                    if token_type == tokenize.COMMENT:
1776                        if (len(chunks) == 2 and
1777                                length - len(chunks[-1]) < MAX_DOC_LENGTH):
1778                            continue
1779                    if len(chunks) == 1 and line_num + 1 < len(lines):
1780                        if (len(chunks) == 1 and
1781                                length - len(chunks[-1]) < MAX_DOC_LENGTH):
1782                            continue
1783                    if length > max_doc_length:
1784                        doc_error = (start[0] + line_num, max_doc_length)
1785                        yield (doc_error, "W505 doc line too long "
1786                                          "(%d > %d characters)"
1787                               % (length, max_doc_length))
1788        prev_token = token_type
1789
1790
1791########################################################################
1792# Helper functions
1793########################################################################
1794
1795
1796if sys.version_info < (3,):
1797    # Python 2: implicit encoding.
1798    def readlines(filename):
1799        """Read the source code."""
1800        with open(filename, 'rU') as f:
1801            return f.readlines()
1802    isidentifier = re.compile(r'[a-zA-Z_]\w*$').match
1803    stdin_get_value = sys.stdin.read
1804else:
1805    # Python 3
1806    def readlines(filename):
1807        """Read the source code."""
1808        try:
1809            with open(filename, 'rb') as f:
1810                (coding, lines) = tokenize.detect_encoding(f.readline)
1811                f = TextIOWrapper(f, coding, line_buffering=True)
1812                return [line.decode(coding) for line in lines] + f.readlines()
1813        except (LookupError, SyntaxError, UnicodeError):
1814            # Fall back if file encoding is improperly declared
1815            with open(filename, encoding='latin-1') as f:
1816                return f.readlines()
1817    isidentifier = str.isidentifier
1818
1819    def stdin_get_value():
1820        """Read the value from stdin."""
1821        return TextIOWrapper(sys.stdin.buffer, encoding='utf-8', errors='ignore').read()
1822
1823noqa = lru_cache(512)(re.compile(r'# no(?:qa|pep8)\b', re.I).search)
1824
1825
1826def expand_indent(line):
1827    r"""Return the amount of indentation.
1828
1829    Tabs are expanded to the next multiple of 8.
1830
1831    >>> expand_indent('    ')
1832    4
1833    >>> expand_indent('\t')
1834    8
1835    >>> expand_indent('       \t')
1836    8
1837    >>> expand_indent('        \t')
1838    16
1839    """
1840    line = line.rstrip('\n\r')
1841    if '\t' not in line:
1842        return len(line) - len(line.lstrip())
1843    result = 0
1844    for char in line:
1845        if char == '\t':
1846            result = result // 8 * 8 + 8
1847        elif char == ' ':
1848            result += 1
1849        else:
1850            break
1851    return result
1852
1853
1854def mute_string(text):
1855    """Replace contents with 'xxx' to prevent syntax matching.
1856
1857    >>> mute_string('"abc"')
1858    '"xxx"'
1859    >>> mute_string("'''abc'''")
1860    "'''xxx'''"
1861    >>> mute_string("r'abc'")
1862    "r'xxx'"
1863    """
1864    # String modifiers (e.g. u or r)
1865    start = text.index(text[-1]) + 1
1866    end = len(text) - 1
1867    # Triple quotes
1868    if text[-3:] in ('"""', "'''"):
1869        start += 2
1870        end -= 2
1871    return text[:start] + 'x' * (end - start) + text[end:]
1872
1873
1874def parse_udiff(diff, patterns=None, parent='.'):
1875    """Return a dictionary of matching lines."""
1876    # For each file of the diff, the entry key is the filename,
1877    # and the value is a set of row numbers to consider.
1878    rv = {}
1879    path = nrows = None
1880    for line in diff.splitlines():
1881        if nrows:
1882            if line[:1] != '-':
1883                nrows -= 1
1884            continue
1885        if line[:3] == '@@ ':
1886            hunk_match = HUNK_REGEX.match(line)
1887            (row, nrows) = [int(g or '1') for g in hunk_match.groups()]
1888            rv[path].update(range(row, row + nrows))
1889        elif line[:3] == '+++':
1890            path = line[4:].split('\t', 1)[0]
1891            # Git diff will use (i)ndex, (w)ork tree, (c)ommit and
1892            # (o)bject instead of a/b/c/d as prefixes for patches
1893            if path[:2] in ('b/', 'w/', 'i/'):
1894                path = path[2:]
1895            rv[path] = set()
1896    return {
1897        os.path.join(parent, filepath): rows
1898        for (filepath, rows) in rv.items()
1899        if rows and filename_match(filepath, patterns)
1900    }
1901
1902
1903def normalize_paths(value, parent=os.curdir):
1904    """Parse a comma-separated list of paths.
1905
1906    Return a list of absolute paths.
1907    """
1908    if not value:
1909        return []
1910    if isinstance(value, list):
1911        return value
1912    paths = []
1913    for path in value.split(','):
1914        path = path.strip()
1915        if '/' in path:
1916            path = os.path.abspath(os.path.join(parent, path))
1917        paths.append(path.rstrip('/'))
1918    return paths
1919
1920
1921def filename_match(filename, patterns, default=True):
1922    """Check if patterns contains a pattern that matches filename.
1923
1924    If patterns is unspecified, this always returns True.
1925    """
1926    if not patterns:
1927        return default
1928    return any(fnmatch(filename, pattern) for pattern in patterns)
1929
1930
1931def update_counts(s, counts):
1932    r"""Adds one to the counts of each appearance of characters in s,
1933        for characters in counts"""
1934    for char in s:
1935        if char in counts:
1936            counts[char] += 1
1937
1938
1939def _is_eol_token(token):
1940    return token[0] in NEWLINE or token[4][token[3][1]:].lstrip() == '\\\n'
1941
1942
1943########################################################################
1944# Framework to run all checks
1945########################################################################
1946
1947
1948class Checker(object):
1949    """Load a Python source file, tokenize it, check coding style."""
1950
1951    def __init__(self, filename=None, lines=None,
1952                 options=None, report=None, **kwargs):
1953        if options is None:
1954            options = StyleGuide(kwargs).options
1955        else:
1956            assert not kwargs
1957        self._io_error = None
1958        self._physical_checks = options.physical_checks
1959        self._logical_checks = options.logical_checks
1960        self._ast_checks = options.ast_checks
1961        self.max_line_length = options.max_line_length
1962        self.max_doc_length = options.max_doc_length
1963        self.multiline = False  # in a multiline string?
1964        self.hang_closing = options.hang_closing
1965        self.verbose = options.verbose
1966        self.filename = filename
1967        # Dictionary where a checker can store its custom state.
1968        self._checker_states = {}
1969        if filename is None:
1970            self.filename = 'stdin'
1971            self.lines = lines or []
1972        elif filename == '-':
1973            self.filename = 'stdin'
1974            self.lines = stdin_get_value().splitlines(True)
1975        elif lines is None:
1976            try:
1977                self.lines = readlines(filename)
1978            except IOError:
1979                (exc_type, exc) = sys.exc_info()[:2]
1980                self._io_error = '%s: %s' % (exc_type.__name__, exc)
1981                self.lines = []
1982        else:
1983            self.lines = lines
1984        if self.lines:
1985            ord0 = ord(self.lines[0][0])
1986            if ord0 in (0xef, 0xfeff):  # Strip the UTF-8 BOM
1987                if ord0 == 0xfeff:
1988                    self.lines[0] = self.lines[0][1:]
1989                elif self.lines[0][:3] == '\xef\xbb\xbf':
1990                    self.lines[0] = self.lines[0][3:]
1991        self.report = report or options.report
1992        self.report_error = self.report.error
1993        self.noqa = False
1994
1995    def report_invalid_syntax(self):
1996        """Check if the syntax is valid."""
1997        (exc_type, exc) = sys.exc_info()[:2]
1998        if len(exc.args) > 1:
1999            offset = exc.args[1]
2000            if len(offset) > 2:
2001                offset = offset[1:3]
2002        else:
2003            offset = (1, 0)
2004        self.report_error(offset[0], offset[1] or 0,
2005                          'E901 %s: %s' % (exc_type.__name__, exc.args[0]),
2006                          self.report_invalid_syntax)
2007
2008    def readline(self):
2009        """Get the next line from the input buffer."""
2010        if self.line_number >= self.total_lines:
2011            return ''
2012        line = self.lines[self.line_number]
2013        self.line_number += 1
2014        if self.indent_char is None and line[:1] in WHITESPACE:
2015            self.indent_char = line[0]
2016        return line
2017
2018    def run_check(self, check, argument_names):
2019        """Run a check plugin."""
2020        arguments = []
2021        for name in argument_names:
2022            arguments.append(getattr(self, name))
2023        return check(*arguments)
2024
2025    def init_checker_state(self, name, argument_names):
2026        """Prepare custom state for the specific checker plugin."""
2027        if 'checker_state' in argument_names:
2028            self.checker_state = self._checker_states.setdefault(name, {})
2029
2030    def check_physical(self, line):
2031        """Run all physical checks on a raw input line."""
2032        self.physical_line = line
2033        for name, check, argument_names in self._physical_checks:
2034            self.init_checker_state(name, argument_names)
2035            result = self.run_check(check, argument_names)
2036            if result is not None:
2037                (offset, text) = result
2038                self.report_error(self.line_number, offset, text, check)
2039                if text[:4] == 'E101':
2040                    self.indent_char = line[0]
2041
2042    def build_tokens_line(self):
2043        """Build a logical line from tokens."""
2044        logical = []
2045        comments = []
2046        length = 0
2047        prev_row = prev_col = mapping = None
2048        for token_type, text, start, end, line in self.tokens:
2049            if token_type in SKIP_TOKENS:
2050                continue
2051            if not mapping:
2052                mapping = [(0, start)]
2053            if token_type == tokenize.COMMENT:
2054                comments.append(text)
2055                continue
2056            if token_type == tokenize.STRING:
2057                text = mute_string(text)
2058            if prev_row:
2059                (start_row, start_col) = start
2060                if prev_row != start_row:    # different row
2061                    prev_text = self.lines[prev_row - 1][prev_col - 1]
2062                    if prev_text == ',' or (prev_text not in '{[(' and
2063                                            text not in '}])'):
2064                        text = ' ' + text
2065                elif prev_col != start_col:  # different column
2066                    text = line[prev_col:start_col] + text
2067            logical.append(text)
2068            length += len(text)
2069            mapping.append((length, end))
2070            (prev_row, prev_col) = end
2071        self.logical_line = ''.join(logical)
2072        self.noqa = comments and noqa(''.join(comments))
2073        return mapping
2074
2075    def check_logical(self):
2076        """Build a line from tokens and run all logical checks on it."""
2077        self.report.increment_logical_line()
2078        mapping = self.build_tokens_line()
2079        if not mapping:
2080            return
2081
2082        mapping_offsets = [offset for offset, _ in mapping]
2083        (start_row, start_col) = mapping[0][1]
2084        start_line = self.lines[start_row - 1]
2085        self.indent_level = expand_indent(start_line[:start_col])
2086        if self.blank_before < self.blank_lines:
2087            self.blank_before = self.blank_lines
2088        if self.verbose >= 2:
2089            print(self.logical_line[:80].rstrip())
2090        for name, check, argument_names in self._logical_checks:
2091            if self.verbose >= 4:
2092                print('   ' + name)
2093            self.init_checker_state(name, argument_names)
2094            for offset, text in self.run_check(check, argument_names) or ():
2095                if not isinstance(offset, tuple):
2096                    # As mappings are ordered, bisecting is a fast way
2097                    # to find a given offset in them.
2098                    token_offset, pos = mapping[bisect.bisect_left(
2099                        mapping_offsets, offset)]
2100                    offset = (pos[0], pos[1] + offset - token_offset)
2101                self.report_error(offset[0], offset[1], text, check)
2102        if self.logical_line:
2103            self.previous_indent_level = self.indent_level
2104            self.previous_logical = self.logical_line
2105            if not self.indent_level:
2106                self.previous_unindented_logical_line = self.logical_line
2107        self.blank_lines = 0
2108        self.tokens = []
2109
2110    def check_ast(self):
2111        """Build the file's AST and run all AST checks."""
2112        try:
2113            tree = compile(''.join(self.lines), '', 'exec', PyCF_ONLY_AST)
2114        except (ValueError, SyntaxError, TypeError):
2115            return self.report_invalid_syntax()
2116        for name, cls, __ in self._ast_checks:
2117            checker = cls(tree, self.filename)
2118            for lineno, offset, text, check in checker.run():
2119                if not self.lines or not noqa(self.lines[lineno - 1]):
2120                    self.report_error(lineno, offset, text, check)
2121
2122    def generate_tokens(self):
2123        """Tokenize file, run physical line checks and yield tokens."""
2124        if self._io_error:
2125            self.report_error(1, 0, 'E902 %s' % self._io_error, readlines)
2126        tokengen = tokenize.generate_tokens(self.readline)
2127        try:
2128            for token in tokengen:
2129                if token[2][0] > self.total_lines:
2130                    return
2131                self.noqa = token[4] and noqa(token[4])
2132                self.maybe_check_physical(token)
2133                yield token
2134        except (SyntaxError, tokenize.TokenError):
2135            self.report_invalid_syntax()
2136
2137    def maybe_check_physical(self, token):
2138        """If appropriate for token, check current physical line(s)."""
2139        # Called after every token, but act only on end of line.
2140        if _is_eol_token(token):
2141            # Obviously, a newline token ends a single physical line.
2142            self.check_physical(token[4])
2143        elif token[0] == tokenize.STRING and '\n' in token[1]:
2144            # Less obviously, a string that contains newlines is a
2145            # multiline string, either triple-quoted or with internal
2146            # newlines backslash-escaped. Check every physical line in
2147            # the string *except* for the last one: its newline is
2148            # outside of the multiline string, so we consider it a
2149            # regular physical line, and will check it like any other
2150            # physical line.
2151            #
2152            # Subtleties:
2153            # - we don't *completely* ignore the last line; if it
2154            #   contains the magical "# noqa" comment, we disable all
2155            #   physical checks for the entire multiline string
2156            # - have to wind self.line_number back because initially it
2157            #   points to the last line of the string, and we want
2158            #   check_physical() to give accurate feedback
2159            if noqa(token[4]):
2160                return
2161            self.multiline = True
2162            self.line_number = token[2][0]
2163            _, src, (_, offset), _, _ = token
2164            src = self.lines[self.line_number - 1][:offset] + src
2165            for line in src.split('\n')[:-1]:
2166                self.check_physical(line + '\n')
2167                self.line_number += 1
2168            self.multiline = False
2169
2170    def check_all(self, expected=None, line_offset=0):
2171        """Run all checks on the input file."""
2172        self.report.init_file(self.filename, self.lines, expected, line_offset)
2173        self.total_lines = len(self.lines)
2174        if self._ast_checks:
2175            self.check_ast()
2176        self.line_number = 0
2177        self.indent_char = None
2178        self.indent_level = self.previous_indent_level = 0
2179        self.previous_logical = ''
2180        self.previous_unindented_logical_line = ''
2181        self.tokens = []
2182        self.blank_lines = self.blank_before = 0
2183        parens = 0
2184        for token in self.generate_tokens():
2185            self.tokens.append(token)
2186            token_type, text = token[0:2]
2187            if self.verbose >= 3:
2188                if token[2][0] == token[3][0]:
2189                    pos = '[%s:%s]' % (token[2][1] or '', token[3][1])
2190                else:
2191                    pos = 'l.%s' % token[3][0]
2192                print('l.%s\t%s\t%s\t%r' %
2193                      (token[2][0], pos, tokenize.tok_name[token[0]], text))
2194            if token_type == tokenize.OP:
2195                if text in '([{':
2196                    parens += 1
2197                elif text in '}])':
2198                    parens -= 1
2199            elif not parens:
2200                if token_type in NEWLINE:
2201                    if token_type == tokenize.NEWLINE:
2202                        self.check_logical()
2203                        self.blank_before = 0
2204                    elif len(self.tokens) == 1:
2205                        # The physical line contains only this token.
2206                        self.blank_lines += 1
2207                        del self.tokens[0]
2208                    else:
2209                        self.check_logical()
2210        if self.tokens:
2211            self.check_physical(self.lines[-1])
2212            self.check_logical()
2213        return self.report.get_file_results()
2214
2215
2216class BaseReport(object):
2217    """Collect the results of the checks."""
2218
2219    print_filename = False
2220
2221    def __init__(self, options):
2222        self._benchmark_keys = options.benchmark_keys
2223        self._ignore_code = options.ignore_code
2224        # Results
2225        self.elapsed = 0
2226        self.total_errors = 0
2227        self.counters = dict.fromkeys(self._benchmark_keys, 0)
2228        self.messages = {}
2229
2230    def start(self):
2231        """Start the timer."""
2232        self._start_time = time.time()
2233
2234    def stop(self):
2235        """Stop the timer."""
2236        self.elapsed = time.time() - self._start_time
2237
2238    def init_file(self, filename, lines, expected, line_offset):
2239        """Signal a new file."""
2240        self.filename = filename
2241        self.lines = lines
2242        self.expected = expected or ()
2243        self.line_offset = line_offset
2244        self.file_errors = 0
2245        self.counters['files'] += 1
2246        self.counters['physical lines'] += len(lines)
2247
2248    def increment_logical_line(self):
2249        """Signal a new logical line."""
2250        self.counters['logical lines'] += 1
2251
2252    def error(self, line_number, offset, text, check):
2253        """Report an error, according to options."""
2254        code = text[:4]
2255        if self._ignore_code(code):
2256            return
2257        if code in self.counters:
2258            self.counters[code] += 1
2259        else:
2260            self.counters[code] = 1
2261            self.messages[code] = text[5:]
2262        # Don't care about expected errors or warnings
2263        if code in self.expected:
2264            return
2265        if self.print_filename and not self.file_errors:
2266            print(self.filename)
2267        self.file_errors += 1
2268        self.total_errors += 1
2269        return code
2270
2271    def get_file_results(self):
2272        """Return the count of errors and warnings for this file."""
2273        return self.file_errors
2274
2275    def get_count(self, prefix=''):
2276        """Return the total count of errors and warnings."""
2277        return sum(self.counters[key]
2278                   for key in self.messages if key.startswith(prefix))
2279
2280    def get_statistics(self, prefix=''):
2281        """Get statistics for message codes that start with the prefix.
2282
2283        prefix='' matches all errors and warnings
2284        prefix='E' matches all errors
2285        prefix='W' matches all warnings
2286        prefix='E4' matches all errors that have to do with imports
2287        """
2288        return ['%-7s %s %s' % (self.counters[key], key, self.messages[key])
2289                for key in sorted(self.messages) if key.startswith(prefix)]
2290
2291    def print_statistics(self, prefix=''):
2292        """Print overall statistics (number of errors and warnings)."""
2293        for line in self.get_statistics(prefix):
2294            print(line)
2295
2296    def print_benchmark(self):
2297        """Print benchmark numbers."""
2298        print('%-7.2f %s' % (self.elapsed, 'seconds elapsed'))
2299        if self.elapsed:
2300            for key in self._benchmark_keys:
2301                print('%-7d %s per second (%d total)' %
2302                      (self.counters[key] / self.elapsed, key,
2303                       self.counters[key]))
2304
2305
2306class FileReport(BaseReport):
2307    """Collect the results of the checks and print the filenames."""
2308
2309    print_filename = True
2310
2311
2312class StandardReport(BaseReport):
2313    """Collect and print the results of the checks."""
2314
2315    def __init__(self, options):
2316        super(StandardReport, self).__init__(options)
2317        self._fmt = REPORT_FORMAT.get(options.format.lower(),
2318                                      options.format)
2319        self._repeat = options.repeat
2320        self._show_source = options.show_source
2321        self._show_pep8 = options.show_pep8
2322
2323    def init_file(self, filename, lines, expected, line_offset):
2324        """Signal a new file."""
2325        self._deferred_print = []
2326        return super(StandardReport, self).init_file(
2327            filename, lines, expected, line_offset)
2328
2329    def error(self, line_number, offset, text, check):
2330        """Report an error, according to options."""
2331        code = super(StandardReport, self).error(line_number, offset,
2332                                                 text, check)
2333        if code and (self.counters[code] == 1 or self._repeat):
2334            self._deferred_print.append(
2335                (line_number, offset, code, text[5:], check.__doc__))
2336        return code
2337
2338    def get_file_results(self):
2339        """Print results and return the overall count for this file."""
2340        self._deferred_print.sort()
2341        for line_number, offset, code, text, doc in self._deferred_print:
2342            print(self._fmt % {
2343                'path': self.filename,
2344                'row': self.line_offset + line_number, 'col': offset + 1,
2345                'code': code, 'text': text,
2346            })
2347            if self._show_source:
2348                if line_number > len(self.lines):
2349                    line = ''
2350                else:
2351                    line = self.lines[line_number - 1]
2352                print(line.rstrip())
2353                print(re.sub(r'\S', ' ', line[:offset]) + '^')
2354            if self._show_pep8 and doc:
2355                print('    ' + doc.strip())
2356
2357            # stdout is block buffered when not stdout.isatty().
2358            # line can be broken where buffer boundary since other
2359            # processes write to same file.
2360            # flush() after print() to avoid buffer boundary.
2361            # Typical buffer size is 8192. line written safely when
2362            # len(line) < 8192.
2363            sys.stdout.flush()
2364        return self.file_errors
2365
2366
2367class DiffReport(StandardReport):
2368    """Collect and print the results for the changed lines only."""
2369
2370    def __init__(self, options):
2371        super(DiffReport, self).__init__(options)
2372        self._selected = options.selected_lines
2373
2374    def error(self, line_number, offset, text, check):
2375        if line_number not in self._selected[self.filename]:
2376            return
2377        return super(DiffReport, self).error(line_number, offset, text, check)
2378
2379
2380class StyleGuide(object):
2381    """Initialize a PEP-8 instance with few options."""
2382
2383    def __init__(self, *args, **kwargs):
2384        # build options from the command line
2385        self.checker_class = kwargs.pop('checker_class', Checker)
2386        parse_argv = kwargs.pop('parse_argv', False)
2387        config_file = kwargs.pop('config_file', False)
2388        parser = kwargs.pop('parser', None)
2389        # build options from dict
2390        options_dict = dict(*args, **kwargs)
2391        arglist = None if parse_argv else options_dict.get('paths', None)
2392        verbose = options_dict.get('verbose', None)
2393        options, self.paths = process_options(
2394            arglist, parse_argv, config_file, parser, verbose)
2395        if options_dict:
2396            options.__dict__.update(options_dict)
2397            if 'paths' in options_dict:
2398                self.paths = options_dict['paths']
2399
2400        self.runner = self.input_file
2401        self.options = options
2402
2403        if not options.reporter:
2404            options.reporter = BaseReport if options.quiet else StandardReport
2405
2406        options.select = tuple(options.select or ())
2407        if not (options.select or options.ignore or
2408                options.testsuite or options.doctest) and DEFAULT_IGNORE:
2409            # The default choice: ignore controversial checks
2410            options.ignore = tuple(DEFAULT_IGNORE.split(','))
2411        else:
2412            # Ignore all checks which are not explicitly selected
2413            options.ignore = ('',) if options.select else tuple(options.ignore)
2414        options.benchmark_keys = BENCHMARK_KEYS[:]
2415        options.ignore_code = self.ignore_code
2416        options.physical_checks = self.get_checks('physical_line')
2417        options.logical_checks = self.get_checks('logical_line')
2418        options.ast_checks = self.get_checks('tree')
2419        self.init_report()
2420
2421    def init_report(self, reporter=None):
2422        """Initialize the report instance."""
2423        self.options.report = (reporter or self.options.reporter)(self.options)
2424        return self.options.report
2425
2426    def check_files(self, paths=None):
2427        """Run all checks on the paths."""
2428        if paths is None:
2429            paths = self.paths
2430        report = self.options.report
2431        runner = self.runner
2432        report.start()
2433        try:
2434            for path in paths:
2435                if os.path.isdir(path):
2436                    self.input_dir(path)
2437                elif not self.excluded(path):
2438                    runner(path)
2439        except KeyboardInterrupt:
2440            print('... stopped')
2441        report.stop()
2442        return report
2443
2444    def input_file(self, filename, lines=None, expected=None, line_offset=0):
2445        """Run all checks on a Python source file."""
2446        if self.options.verbose:
2447            print('checking %s' % filename)
2448        fchecker = self.checker_class(
2449            filename, lines=lines, options=self.options)
2450        return fchecker.check_all(expected=expected, line_offset=line_offset)
2451
2452    def input_dir(self, dirname):
2453        """Check all files in this directory and all subdirectories."""
2454        dirname = dirname.rstrip('/')
2455        if self.excluded(dirname):
2456            return 0
2457        counters = self.options.report.counters
2458        verbose = self.options.verbose
2459        filepatterns = self.options.filename
2460        runner = self.runner
2461        for root, dirs, files in os.walk(dirname):
2462            if verbose:
2463                print('directory ' + root)
2464            counters['directories'] += 1
2465            for subdir in sorted(dirs):
2466                if self.excluded(subdir, root):
2467                    dirs.remove(subdir)
2468            for filename in sorted(files):
2469                # contain a pattern that matches?
2470                if ((filename_match(filename, filepatterns) and
2471                     not self.excluded(filename, root))):
2472                    runner(os.path.join(root, filename))
2473
2474    def excluded(self, filename, parent=None):
2475        """Check if the file should be excluded.
2476
2477        Check if 'options.exclude' contains a pattern matching filename.
2478        """
2479        if not self.options.exclude:
2480            return False
2481        basename = os.path.basename(filename)
2482        if filename_match(basename, self.options.exclude):
2483            return True
2484        if parent:
2485            filename = os.path.join(parent, filename)
2486        filename = os.path.abspath(filename)
2487        return filename_match(filename, self.options.exclude)
2488
2489    def ignore_code(self, code):
2490        """Check if the error code should be ignored.
2491
2492        If 'options.select' contains a prefix of the error code,
2493        return False.  Else, if 'options.ignore' contains a prefix of
2494        the error code, return True.
2495        """
2496        if len(code) < 4 and any(s.startswith(code)
2497                                 for s in self.options.select):
2498            return False
2499        return (code.startswith(self.options.ignore) and
2500                not code.startswith(self.options.select))
2501
2502    def get_checks(self, argument_name):
2503        """Get all the checks for this category.
2504
2505        Find all globally visible functions where the first argument
2506        name starts with argument_name and which contain selected tests.
2507        """
2508        checks = []
2509        for check, attrs in _checks[argument_name].items():
2510            (codes, args) = attrs
2511            if any(not (code and self.ignore_code(code)) for code in codes):
2512                checks.append((check.__name__, check, args))
2513        return sorted(checks)
2514
2515
2516def get_parser(prog='pycodestyle', version=__version__):
2517    """Create the parser for the program."""
2518    parser = OptionParser(prog=prog, version=version,
2519                          usage="%prog [options] input ...")
2520    parser.config_options = [
2521        'exclude', 'filename', 'select', 'ignore', 'max-line-length',
2522        'max-doc-length', 'hang-closing', 'count', 'format', 'quiet',
2523        'show-pep8', 'show-source', 'statistics', 'verbose']
2524    parser.add_option('-v', '--verbose', default=0, action='count',
2525                      help="print status messages, or debug with -vv")
2526    parser.add_option('-q', '--quiet', default=0, action='count',
2527                      help="report only file names, or nothing with -qq")
2528    parser.add_option('-r', '--repeat', default=True, action='store_true',
2529                      help="(obsolete) show all occurrences of the same error")
2530    parser.add_option('--first', action='store_false', dest='repeat',
2531                      help="show first occurrence of each error")
2532    parser.add_option('--exclude', metavar='patterns', default=DEFAULT_EXCLUDE,
2533                      help="exclude files or directories which match these "
2534                           "comma separated patterns (default: %default)")
2535    parser.add_option('--filename', metavar='patterns', default='*.py',
2536                      help="when parsing directories, only check filenames "
2537                           "matching these comma separated patterns "
2538                           "(default: %default)")
2539    parser.add_option('--select', metavar='errors', default='',
2540                      help="select errors and warnings (e.g. E,W6)")
2541    parser.add_option('--ignore', metavar='errors', default='',
2542                      help="skip errors and warnings (e.g. E4,W) "
2543                           "(default: %s)" % DEFAULT_IGNORE)
2544    parser.add_option('--show-source', action='store_true',
2545                      help="show source code for each error")
2546    parser.add_option('--show-pep8', action='store_true',
2547                      help="show text of PEP 8 for each error "
2548                           "(implies --first)")
2549    parser.add_option('--statistics', action='store_true',
2550                      help="count errors and warnings")
2551    parser.add_option('--count', action='store_true',
2552                      help="print total number of errors and warnings "
2553                           "to standard error and set exit code to 1 if "
2554                           "total is not null")
2555    parser.add_option('--max-line-length', type='int', metavar='n',
2556                      default=MAX_LINE_LENGTH,
2557                      help="set maximum allowed line length "
2558                           "(default: %default)")
2559    parser.add_option('--max-doc-length', type='int', metavar='n',
2560                      default=None,
2561                      help="set maximum allowed doc line length and perform "
2562                           "these checks (unchecked if not set)")
2563    parser.add_option('--hang-closing', action='store_true',
2564                      help="hang closing bracket instead of matching "
2565                           "indentation of opening bracket's line")
2566    parser.add_option('--format', metavar='format', default='default',
2567                      help="set the error format [default|pylint|<custom>]")
2568    parser.add_option('--diff', action='store_true',
2569                      help="report changes only within line number ranges in "
2570                           "the unified diff received on STDIN")
2571    group = parser.add_option_group("Testing Options")
2572    if os.path.exists(TESTSUITE_PATH):
2573        group.add_option('--testsuite', metavar='dir',
2574                         help="run regression tests from dir")
2575        group.add_option('--doctest', action='store_true',
2576                         help="run doctest on myself")
2577    group.add_option('--benchmark', action='store_true',
2578                     help="measure processing speed")
2579    return parser
2580
2581
2582def read_config(options, args, arglist, parser):
2583    """Read and parse configurations.
2584
2585    If a config file is specified on the command line with the
2586    "--config" option, then only it is used for configuration.
2587
2588    Otherwise, the user configuration (~/.config/pycodestyle) and any
2589    local configurations in the current directory or above will be
2590    merged together (in that order) using the read method of
2591    ConfigParser.
2592    """
2593    config = RawConfigParser()
2594
2595    cli_conf = options.config
2596
2597    local_dir = os.curdir
2598
2599    if USER_CONFIG and os.path.isfile(USER_CONFIG):
2600        if options.verbose:
2601            print('user configuration: %s' % USER_CONFIG)
2602        config.read(USER_CONFIG)
2603
2604    parent = tail = args and os.path.abspath(os.path.commonprefix(args))
2605    while tail:
2606        if config.read(os.path.join(parent, fn) for fn in PROJECT_CONFIG):
2607            local_dir = parent
2608            if options.verbose:
2609                print('local configuration: in %s' % parent)
2610            break
2611        (parent, tail) = os.path.split(parent)
2612
2613    if cli_conf and os.path.isfile(cli_conf):
2614        if options.verbose:
2615            print('cli configuration: %s' % cli_conf)
2616        config.read(cli_conf)
2617
2618    pycodestyle_section = None
2619    if config.has_section(parser.prog):
2620        pycodestyle_section = parser.prog
2621    elif config.has_section('pep8'):
2622        pycodestyle_section = 'pep8'  # Deprecated
2623        warnings.warn('[pep8] section is deprecated. Use [pycodestyle].')
2624
2625    if pycodestyle_section:
2626        option_list = {o.dest: o.type or o.action for o in parser.option_list}
2627
2628        # First, read the default values
2629        (new_options, __) = parser.parse_args([])
2630
2631        # Second, parse the configuration
2632        for opt in config.options(pycodestyle_section):
2633            if opt.replace('_', '-') not in parser.config_options:
2634                print("  unknown option '%s' ignored" % opt)
2635                continue
2636            if options.verbose > 1:
2637                print("  %s = %s" % (opt,
2638                                     config.get(pycodestyle_section, opt)))
2639            normalized_opt = opt.replace('-', '_')
2640            opt_type = option_list[normalized_opt]
2641            if opt_type in ('int', 'count'):
2642                value = config.getint(pycodestyle_section, opt)
2643            elif opt_type in ('store_true', 'store_false'):
2644                value = config.getboolean(pycodestyle_section, opt)
2645            else:
2646                value = config.get(pycodestyle_section, opt)
2647                if normalized_opt == 'exclude':
2648                    value = normalize_paths(value, local_dir)
2649            setattr(new_options, normalized_opt, value)
2650
2651        # Third, overwrite with the command-line options
2652        (options, __) = parser.parse_args(arglist, values=new_options)
2653    options.doctest = options.testsuite = False
2654    return options
2655
2656
2657def process_options(arglist=None, parse_argv=False, config_file=None,
2658                    parser=None, verbose=None):
2659    """Process options passed either via arglist or command line args.
2660
2661    Passing in the ``config_file`` parameter allows other tools, such as
2662    flake8 to specify their own options to be processed in pycodestyle.
2663    """
2664    if not parser:
2665        parser = get_parser()
2666    if not parser.has_option('--config'):
2667        group = parser.add_option_group("Configuration", description=(
2668            "The project options are read from the [%s] section of the "
2669            "tox.ini file or the setup.cfg file located in any parent folder "
2670            "of the path(s) being processed.  Allowed options are: %s." %
2671            (parser.prog, ', '.join(parser.config_options))))
2672        group.add_option('--config', metavar='path', default=config_file,
2673                         help="user config file location")
2674    # Don't read the command line if the module is used as a library.
2675    if not arglist and not parse_argv:
2676        arglist = []
2677    # If parse_argv is True and arglist is None, arguments are
2678    # parsed from the command line (sys.argv)
2679    (options, args) = parser.parse_args(arglist)
2680    options.reporter = None
2681
2682    # If explicitly specified verbosity, override any `-v` CLI flag
2683    if verbose is not None:
2684        options.verbose = verbose
2685
2686    if options.ensure_value('testsuite', False):
2687        args.append(options.testsuite)
2688    elif not options.ensure_value('doctest', False):
2689        if parse_argv and not args:
2690            if options.diff or any(os.path.exists(name)
2691                                   for name in PROJECT_CONFIG):
2692                args = ['.']
2693            else:
2694                parser.error('input not specified')
2695        options = read_config(options, args, arglist, parser)
2696        options.reporter = parse_argv and options.quiet == 1 and FileReport
2697
2698    options.filename = _parse_multi_options(options.filename)
2699    options.exclude = normalize_paths(options.exclude)
2700    options.select = _parse_multi_options(options.select)
2701    options.ignore = _parse_multi_options(options.ignore)
2702
2703    if options.diff:
2704        options.reporter = DiffReport
2705        stdin = stdin_get_value()
2706        options.selected_lines = parse_udiff(stdin, options.filename, args[0])
2707        args = sorted(options.selected_lines)
2708
2709    return options, args
2710
2711
2712def _parse_multi_options(options, split_token=','):
2713    r"""Split and strip and discard empties.
2714
2715    Turns the following:
2716
2717    A,
2718    B,
2719
2720    into ["A", "B"]
2721    """
2722    if options:
2723        return [o.strip() for o in options.split(split_token) if o.strip()]
2724    else:
2725        return options
2726
2727
2728def _main():
2729    """Parse options and run checks on Python source."""
2730    import signal
2731
2732    # Handle "Broken pipe" gracefully
2733    try:
2734        signal.signal(signal.SIGPIPE, lambda signum, frame: sys.exit(1))
2735    except AttributeError:
2736        pass    # not supported on Windows
2737
2738    style_guide = StyleGuide(parse_argv=True)
2739    options = style_guide.options
2740
2741    if options.doctest or options.testsuite:
2742        from testsuite.support import run_tests
2743        report = run_tests(style_guide)
2744    else:
2745        report = style_guide.check_files()
2746
2747    if options.statistics:
2748        report.print_statistics()
2749
2750    if options.benchmark:
2751        report.print_benchmark()
2752
2753    if options.testsuite and not options.quiet:
2754        report.print_results()
2755
2756    if report.total_errors:
2757        if options.count:
2758            sys.stderr.write(str(report.total_errors) + '\n')
2759        sys.exit(1)
2760
2761
2762if __name__ == '__main__':
2763    _main()
2764