1#!/usr/bin/env python
2#
3# Copyright (c) 2009 Google Inc. All rights reserved.
4#
5# Redistribution and use in source and binary forms, with or without
6# modification, are permitted provided that the following conditions are
7# met:
8#
9#    * Redistributions of source code must retain the above copyright
10# notice, this list of conditions and the following disclaimer.
11#    * Redistributions in binary form must reproduce the above
12# copyright notice, this list of conditions and the following disclaimer
13# in the documentation and/or other materials provided with the
14# distribution.
15#    * Neither the name of Google Inc. nor the names of its
16# contributors may be used to endorse or promote products derived from
17# this software without specific prior written permission.
18#
19# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31"""Does google-lint on c++ files.
32
33The goal of this script is to identify places in the code that *may*
34be in non-compliance with google style.  It does not attempt to fix
35up these problems -- the point is to educate.  It does also not
36attempt to find all problems, or to ensure that everything it does
37find is legitimately a problem.
38
39In particular, we can get very confused by /* and // inside strings!
40We do a small hack, which is to ignore //'s with "'s after them on the
41same line, but it is far from perfect (in either direction).
42"""
43
44import codecs
45import copy
46import getopt
47import glob
48import itertools
49import math  # for log
50import os
51import re
52import sre_compile
53import string
54import sys
55import sysconfig
56import unicodedata
57import xml.etree.ElementTree
58
59# if empty, use defaults
60_valid_extensions = set([])
61
62__VERSION__ = '1.4.4'
63
64try:
65  xrange          # Python 2
66except NameError:
67  #  -- pylint: disable=redefined-builtin
68  xrange = range  # Python 3
69
70
71_USAGE = """
72Syntax: cpplint.py [--verbose=#] [--output=emacs|eclipse|vs7|junit]
73                   [--filter=-x,+y,...]
74                   [--counting=total|toplevel|detailed] [--root=subdir]
75                   [--repository=path]
76                   [--linelength=digits] [--headers=x,y,...]
77                   [--recursive]
78                   [--exclude=path]
79                   [--extensions=hpp,cpp,...]
80                   [--quiet]
81                   [--version]
82        <file> [file] ...
83
84  Style checker for C/C++ source files.
85  This is a fork of the Google style checker with minor extensions.
86
87  The style guidelines this tries to follow are those in
88    https://google.github.io/styleguide/cppguide.html
89
90  Every problem is given a confidence score from 1-5, with 5 meaning we are
91  certain of the problem, and 1 meaning it could be a legitimate construct.
92  This will miss some errors, and is not a substitute for a code review.
93
94  To suppress false-positive errors of a certain category, add a
95  'NOLINT(category)' comment to the line.  NOLINT or NOLINT(*)
96  suppresses errors of all categories on that line.
97
98  The files passed in will be linted; at least one file must be provided.
99  Default linted extensions are %s.
100  Other file types will be ignored.
101  Change the extensions with the --extensions flag.
102
103  Flags:
104
105    output=emacs|eclipse|vs7|junit
106      By default, the output is formatted to ease emacs parsing.  Visual Studio
107      compatible output (vs7) may also be used.  Further support exists for
108      eclipse (eclipse), and JUnit (junit). XML parsers such as those used
109      in Jenkins and Bamboo may also be used.  Other formats are unsupported.
110
111    verbose=#
112      Specify a number 0-5 to restrict errors to certain verbosity levels.
113      Errors with lower verbosity levels have lower confidence and are more
114      likely to be false positives.
115
116    quiet
117      Don't print anything if no errors are found.
118
119    filter=-x,+y,...
120      Specify a comma-separated list of category-filters to apply: only
121      error messages whose category names pass the filters will be printed.
122      (Category names are printed with the message and look like
123      "[whitespace/indent]".)  Filters are evaluated left to right.
124      "-FOO" and "FOO" means "do not print categories that start with FOO".
125      "+FOO" means "do print categories that start with FOO".
126
127      Examples: --filter=-whitespace,+whitespace/braces
128                --filter=whitespace,runtime/printf,+runtime/printf_format
129                --filter=-,+build/include_what_you_use
130
131      To see a list of all the categories used in cpplint, pass no arg:
132         --filter=
133
134    counting=total|toplevel|detailed
135      The total number of errors found is always printed. If
136      'toplevel' is provided, then the count of errors in each of
137      the top-level categories like 'build' and 'whitespace' will
138      also be printed. If 'detailed' is provided, then a count
139      is provided for each category like 'build/class'.
140
141    repository=path
142      The top level directory of the repository, used to derive the header
143      guard CPP variable. By default, this is determined by searching for a
144      path that contains .git, .hg, or .svn. When this flag is specified, the
145      given path is used instead. This option allows the header guard CPP
146      variable to remain consistent even if members of a team have different
147      repository root directories (such as when checking out a subdirectory
148      with SVN). In addition, users of non-mainstream version control systems
149      can use this flag to ensure readable header guard CPP variables.
150
151      Examples:
152        Assuming that Alice checks out ProjectName and Bob checks out
153        ProjectName/trunk and trunk contains src/chrome/ui/browser.h, then
154        with no --repository flag, the header guard CPP variable will be:
155
156        Alice => TRUNK_SRC_CHROME_BROWSER_UI_BROWSER_H_
157        Bob   => SRC_CHROME_BROWSER_UI_BROWSER_H_
158
159        If Alice uses the --repository=trunk flag and Bob omits the flag or
160        uses --repository=. then the header guard CPP variable will be:
161
162        Alice => SRC_CHROME_BROWSER_UI_BROWSER_H_
163        Bob   => SRC_CHROME_BROWSER_UI_BROWSER_H_
164
165    root=subdir
166      The root directory used for deriving header guard CPP variable.
167      This directory is relative to the top level directory of the repository
168      which by default is determined by searching for a directory that contains
169      .git, .hg, or .svn but can also be controlled with the --repository flag.
170      If the specified directory does not exist, this flag is ignored.
171
172      Examples:
173        Assuming that src is the top level directory of the repository (and
174        cwd=top/src), the header guard CPP variables for
175        src/chrome/browser/ui/browser.h are:
176
177        No flag => CHROME_BROWSER_UI_BROWSER_H_
178        --root=chrome => BROWSER_UI_BROWSER_H_
179        --root=chrome/browser => UI_BROWSER_H_
180        --root=.. => SRC_CHROME_BROWSER_UI_BROWSER_H_
181
182    linelength=digits
183      This is the allowed line length for the project. The default value is
184      80 characters.
185
186      Examples:
187        --linelength=120
188
189    recursive
190      Search for files to lint recursively. Each directory given in the list
191      of files to be linted is replaced by all files that descend from that
192      directory. Files with extensions not in the valid extensions list are
193      excluded.
194
195    exclude=path
196      Exclude the given path from the list of files to be linted. Relative
197      paths are evaluated relative to the current directory and shell globbing
198      is performed. This flag can be provided multiple times to exclude
199      multiple files.
200
201      Examples:
202        --exclude=one.cc
203        --exclude=src/*.cc
204        --exclude=src/*.cc --exclude=test/*.cc
205
206    extensions=extension,extension,...
207      The allowed file extensions that cpplint will check
208
209      Examples:
210        --extensions=%s
211
212    headers=x,y,...
213      The header extensions that cpplint will treat as .h in checks. Values are
214      automatically added to --extensions list.
215     (by default, only files with extensions %s will be assumed to be headers)
216
217      Examples:
218        --headers=%s
219        --headers=hpp,hxx
220        --headers=hpp
221
222    cpplint.py supports per-directory configurations specified in CPPLINT.cfg
223    files. CPPLINT.cfg file can contain a number of key=value pairs.
224    Currently the following options are supported:
225
226      set noparent
227      filter=+filter1,-filter2,...
228      exclude_files=regex
229      linelength=80
230      root=subdir
231      headers=x,y,...
232
233    "set noparent" option prevents cpplint from traversing directory tree
234    upwards looking for more .cfg files in parent directories. This option
235    is usually placed in the top-level project directory.
236
237    The "filter" option is similar in function to --filter flag. It specifies
238    message filters in addition to the |_DEFAULT_FILTERS| and those specified
239    through --filter command-line flag.
240
241    "exclude_files" allows to specify a regular expression to be matched against
242    a file name. If the expression matches, the file is skipped and not run
243    through the linter.
244
245    "linelength" allows to specify the allowed line length for the project.
246
247    The "root" option is similar in function to the --root flag (see example
248    above). Paths are relative to the directory of the CPPLINT.cfg.
249
250    The "headers" option is similar in function to the --headers flag
251    (see example above).
252
253    CPPLINT.cfg has an effect on files in the same directory and all
254    sub-directories, unless overridden by a nested configuration file.
255
256      Example file:
257        filter=-build/include_order,+build/include_alpha
258        exclude_files=.*\\.cc
259
260    The above example disables build/include_order warning and enables
261    build/include_alpha as well as excludes all .cc from being
262    processed by linter, in the current directory (where the .cfg
263    file is located) and all sub-directories.
264"""
265
266# We categorize each error message we print.  Here are the categories.
267# We want an explicit list so we can list them all in cpplint --filter=.
268# If you add a new error message with a new category, add it to the list
269# here!  cpplint_unittest.py should tell you if you forget to do this.
270_ERROR_CATEGORIES = [
271    'build/class',
272    'build/c++11',
273    'build/c++14',
274    'build/c++tr1',
275    'build/deprecated',
276    'build/endif_comment',
277    'build/explicit_make_pair',
278    'build/forward_decl',
279    'build/header_guard',
280    'build/include',
281    'build/include_subdir',
282    'build/include_alpha',
283    'build/include_order',
284    'build/include_what_you_use',
285    'build/namespaces_literals',
286    'build/namespaces',
287    'build/printf_format',
288    'build/storage_class',
289    'legal/copyright',
290    'readability/alt_tokens',
291    'readability/braces',
292    'readability/casting',
293    'readability/check',
294    'readability/constructors',
295    'readability/fn_size',
296    'readability/inheritance',
297    'readability/multiline_comment',
298    'readability/multiline_string',
299    'readability/namespace',
300    'readability/nolint',
301    'readability/nul',
302    'readability/strings',
303    'readability/todo',
304    'readability/utf8',
305    'runtime/arrays',
306    'runtime/casting',
307    'runtime/explicit',
308    'runtime/int',
309    'runtime/init',
310    'runtime/invalid_increment',
311    'runtime/member_string_references',
312    'runtime/memset',
313    'runtime/indentation_namespace',
314    'runtime/operator',
315    'runtime/printf',
316    'runtime/printf_format',
317    'runtime/references',
318    'runtime/string',
319    'runtime/threadsafe_fn',
320    'runtime/vlog',
321    'whitespace/blank_line',
322    'whitespace/braces',
323    'whitespace/comma',
324    'whitespace/comments',
325    'whitespace/empty_conditional_body',
326    'whitespace/empty_if_body',
327    'whitespace/empty_loop_body',
328    'whitespace/end_of_line',
329    'whitespace/ending_newline',
330    'whitespace/forcolon',
331    'whitespace/indent',
332    'whitespace/line_length',
333    'whitespace/newline',
334    'whitespace/operators',
335    'whitespace/parens',
336    'whitespace/semicolon',
337    'whitespace/tab',
338    'whitespace/todo',
339    ]
340
341# These error categories are no longer enforced by cpplint, but for backwards-
342# compatibility they may still appear in NOLINT comments.
343_LEGACY_ERROR_CATEGORIES = [
344    'readability/streams',
345    'readability/function',
346    ]
347
348# The default state of the category filter. This is overridden by the --filter=
349# flag. By default all errors are on, so only add here categories that should be
350# off by default (i.e., categories that must be enabled by the --filter= flags).
351# All entries here should start with a '-' or '+', as in the --filter= flag.
352_DEFAULT_FILTERS = ['-build/include_alpha']
353
354# The default list of categories suppressed for C (not C++) files.
355_DEFAULT_C_SUPPRESSED_CATEGORIES = [
356    'readability/casting',
357    ]
358
359# The default list of categories suppressed for Linux Kernel files.
360_DEFAULT_KERNEL_SUPPRESSED_CATEGORIES = [
361    'whitespace/tab',
362    ]
363
364# We used to check for high-bit characters, but after much discussion we
365# decided those were OK, as long as they were in UTF-8 and didn't represent
366# hard-coded international strings, which belong in a separate i18n file.
367
368# C++ headers
369_CPP_HEADERS = frozenset([
370    # Legacy
371    'algobase.h',
372    'algo.h',
373    'alloc.h',
374    'builtinbuf.h',
375    'bvector.h',
376    'complex.h',
377    'defalloc.h',
378    'deque.h',
379    'editbuf.h',
380    'fstream.h',
381    'function.h',
382    'hash_map',
383    'hash_map.h',
384    'hash_set',
385    'hash_set.h',
386    'hashtable.h',
387    'heap.h',
388    'indstream.h',
389    'iomanip.h',
390    'iostream.h',
391    'istream.h',
392    'iterator.h',
393    'list.h',
394    'map.h',
395    'multimap.h',
396    'multiset.h',
397    'ostream.h',
398    'pair.h',
399    'parsestream.h',
400    'pfstream.h',
401    'procbuf.h',
402    'pthread_alloc',
403    'pthread_alloc.h',
404    'rope',
405    'rope.h',
406    'ropeimpl.h',
407    'set.h',
408    'slist',
409    'slist.h',
410    'stack.h',
411    'stdiostream.h',
412    'stl_alloc.h',
413    'stl_relops.h',
414    'streambuf.h',
415    'stream.h',
416    'strfile.h',
417    'strstream.h',
418    'tempbuf.h',
419    'tree.h',
420    'type_traits.h',
421    'vector.h',
422    # 17.6.1.2 C++ library headers
423    'algorithm',
424    'array',
425    'atomic',
426    'bitset',
427    'chrono',
428    'codecvt',
429    'complex',
430    'condition_variable',
431    'deque',
432    'exception',
433    'forward_list',
434    'fstream',
435    'functional',
436    'future',
437    'initializer_list',
438    'iomanip',
439    'ios',
440    'iosfwd',
441    'iostream',
442    'istream',
443    'iterator',
444    'limits',
445    'list',
446    'locale',
447    'map',
448    'memory',
449    'mutex',
450    'new',
451    'numeric',
452    'ostream',
453    'queue',
454    'random',
455    'ratio',
456    'regex',
457    'scoped_allocator',
458    'set',
459    'sstream',
460    'stack',
461    'stdexcept',
462    'streambuf',
463    'string',
464    'strstream',
465    'system_error',
466    'thread',
467    'tuple',
468    'typeindex',
469    'typeinfo',
470    'type_traits',
471    'unordered_map',
472    'unordered_set',
473    'utility',
474    'valarray',
475    'vector',
476    # 17.6.1.2 C++14 headers
477    'shared_mutex',
478    # 17.6.1.2 C++17 headers
479    'any',
480    'charconv',
481    'codecvt',
482    'execution',
483    'filesystem',
484    'memory_resource',
485    'optional',
486    'string_view',
487    'variant',
488    # 17.6.1.2 C++ headers for C library facilities
489    'cassert',
490    'ccomplex',
491    'cctype',
492    'cerrno',
493    'cfenv',
494    'cfloat',
495    'cinttypes',
496    'ciso646',
497    'climits',
498    'clocale',
499    'cmath',
500    'csetjmp',
501    'csignal',
502    'cstdalign',
503    'cstdarg',
504    'cstdbool',
505    'cstddef',
506    'cstdint',
507    'cstdio',
508    'cstdlib',
509    'cstring',
510    'ctgmath',
511    'ctime',
512    'cuchar',
513    'cwchar',
514    'cwctype',
515    ])
516
517# Type names
518_TYPES = re.compile(
519    r'^(?:'
520    # [dcl.type.simple]
521    r'(char(16_t|32_t)?)|wchar_t|'
522    r'bool|short|int|long|signed|unsigned|float|double|'
523    # [support.types]
524    r'(ptrdiff_t|size_t|max_align_t|nullptr_t)|'
525    # [cstdint.syn]
526    r'(u?int(_fast|_least)?(8|16|32|64)_t)|'
527    r'(u?int(max|ptr)_t)|'
528    r')$')
529
530
531# These headers are excluded from [build/include] and [build/include_order]
532# checks:
533# - Anything not following google file name conventions (containing an
534#   uppercase character, such as Python.h or nsStringAPI.h, for example).
535# - Lua headers.
536_THIRD_PARTY_HEADERS_PATTERN = re.compile(
537    r'^(?:[^/]*[A-Z][^/]*\.h|lua\.h|lauxlib\.h|lualib\.h)$')
538
539# Pattern for matching FileInfo.BaseName() against test file name
540_test_suffixes = ['_test', '_regtest', '_unittest']
541_TEST_FILE_SUFFIX = '(' + '|'.join(_test_suffixes) + r')$'
542
543# Pattern that matches only complete whitespace, possibly across multiple lines.
544_EMPTY_CONDITIONAL_BODY_PATTERN = re.compile(r'^\s*$', re.DOTALL)
545
546# Assertion macros.  These are defined in base/logging.h and
547# testing/base/public/gunit.h.
548_CHECK_MACROS = [
549    'DCHECK', 'CHECK',
550    'EXPECT_TRUE', 'ASSERT_TRUE',
551    'EXPECT_FALSE', 'ASSERT_FALSE',
552    ]
553
554# Replacement macros for CHECK/DCHECK/EXPECT_TRUE/EXPECT_FALSE
555_CHECK_REPLACEMENT = dict([(macro_var, {}) for macro_var in _CHECK_MACROS])
556
557for op, replacement in [('==', 'EQ'), ('!=', 'NE'),
558                        ('>=', 'GE'), ('>', 'GT'),
559                        ('<=', 'LE'), ('<', 'LT')]:
560  _CHECK_REPLACEMENT['DCHECK'][op] = 'DCHECK_%s' % replacement
561  _CHECK_REPLACEMENT['CHECK'][op] = 'CHECK_%s' % replacement
562  _CHECK_REPLACEMENT['EXPECT_TRUE'][op] = 'EXPECT_%s' % replacement
563  _CHECK_REPLACEMENT['ASSERT_TRUE'][op] = 'ASSERT_%s' % replacement
564
565for op, inv_replacement in [('==', 'NE'), ('!=', 'EQ'),
566                            ('>=', 'LT'), ('>', 'LE'),
567                            ('<=', 'GT'), ('<', 'GE')]:
568  _CHECK_REPLACEMENT['EXPECT_FALSE'][op] = 'EXPECT_%s' % inv_replacement
569  _CHECK_REPLACEMENT['ASSERT_FALSE'][op] = 'ASSERT_%s' % inv_replacement
570
571# Alternative tokens and their replacements.  For full list, see section 2.5
572# Alternative tokens [lex.digraph] in the C++ standard.
573#
574# Digraphs (such as '%:') are not included here since it's a mess to
575# match those on a word boundary.
576_ALT_TOKEN_REPLACEMENT = {
577    'and': '&&',
578    'bitor': '|',
579    'or': '||',
580    'xor': '^',
581    'compl': '~',
582    'bitand': '&',
583    'and_eq': '&=',
584    'or_eq': '|=',
585    'xor_eq': '^=',
586    'not': '!',
587    'not_eq': '!='
588    }
589
590# Compile regular expression that matches all the above keywords.  The "[ =()]"
591# bit is meant to avoid matching these keywords outside of boolean expressions.
592#
593# False positives include C-style multi-line comments and multi-line strings
594# but those have always been troublesome for cpplint.
595_ALT_TOKEN_REPLACEMENT_PATTERN = re.compile(
596    r'[ =()](' + ('|'.join(_ALT_TOKEN_REPLACEMENT.keys())) + r')(?=[ (]|$)')
597
598
599# These constants define types of headers for use with
600# _IncludeState.CheckNextIncludeOrder().
601_C_SYS_HEADER = 1
602_CPP_SYS_HEADER = 2
603_LIKELY_MY_HEADER = 3
604_POSSIBLE_MY_HEADER = 4
605_OTHER_HEADER = 5
606
607# These constants define the current inline assembly state
608_NO_ASM = 0       # Outside of inline assembly block
609_INSIDE_ASM = 1   # Inside inline assembly block
610_END_ASM = 2      # Last line of inline assembly block
611_BLOCK_ASM = 3    # The whole block is an inline assembly block
612
613# Match start of assembly blocks
614_MATCH_ASM = re.compile(r'^\s*(?:asm|_asm|__asm|__asm__)'
615                        r'(?:\s+(volatile|__volatile__))?'
616                        r'\s*[{(]')
617
618# Match strings that indicate we're working on a C (not C++) file.
619_SEARCH_C_FILE = re.compile(r'\b(?:LINT_C_FILE|'
620                            r'vim?:\s*.*(\s*|:)filetype=c(\s*|:|$))')
621
622# Match string that indicates we're working on a Linux Kernel file.
623_SEARCH_KERNEL_FILE = re.compile(r'\b(?:LINT_KERNEL_FILE)')
624
625_regexp_compile_cache = {}
626
627# {str, set(int)}: a map from error categories to sets of linenumbers
628# on which those errors are expected and should be suppressed.
629_error_suppressions = {}
630
631# The root directory used for deriving header guard CPP variable.
632# This is set by --root flag.
633_root = None
634_root_debug = False
635
636# The top level repository directory. If set, _root is calculated relative to
637# this directory instead of the directory containing version control artifacts.
638# This is set by the --repository flag.
639_repository = None
640
641# Files to exclude from linting. This is set by the --exclude flag.
642_excludes = None
643
644# Whether to supress PrintInfo messages
645_quiet = False
646
647# The allowed line length of files.
648# This is set by --linelength flag.
649_line_length = 80
650
651try:
652  unicode
653except NameError:
654  #  -- pylint: disable=redefined-builtin
655  basestring = unicode = str
656
657try:
658  long
659except NameError:
660  #  -- pylint: disable=redefined-builtin
661  long = int
662
663if sys.version_info < (3,):
664  #  -- pylint: disable=no-member
665  # BINARY_TYPE = str
666  itervalues = dict.itervalues
667  iteritems = dict.iteritems
668else:
669  # BINARY_TYPE = bytes
670  itervalues = dict.values
671  iteritems = dict.items
672
673def unicode_escape_decode(x):
674  if sys.version_info < (3,):
675    return codecs.unicode_escape_decode(x)[0]
676  else:
677    return x
678
679# Treat all headers starting with 'h' equally: .h, .hpp, .hxx etc.
680# This is set by --headers flag.
681_hpp_headers = set(['h', 'hh', 'hpp', 'hxx', 'h++', 'cuh'])
682
683# {str, bool}: a map from error categories to booleans which indicate if the
684# category should be suppressed for every line.
685_global_error_suppressions = {}
686
687def ProcessHppHeadersOption(val):
688  global _hpp_headers
689  try:
690    _hpp_headers = set(val.split(','))
691    # Automatically append to extensions list so it does not have to be set 2 times
692    _valid_extensions.update(_hpp_headers)
693  except ValueError:
694    PrintUsage('Header extensions must be comma separated list.')
695
696def IsHeaderExtension(file_extension):
697  return file_extension in _hpp_headers
698
699def GetHeaderExtensions():
700  return _hpp_headers or ['h']
701
702# The allowed extensions for file names
703# This is set by --extensions flag
704def GetAllExtensions():
705  if not _valid_extensions:
706    return GetHeaderExtensions().union(set(['c', 'cc', 'cpp', 'cxx', 'c++', 'cu']))
707  return _valid_extensions
708
709def GetNonHeaderExtensions():
710  return GetAllExtensions().difference(GetHeaderExtensions())
711
712
713
714def ParseNolintSuppressions(filename, raw_line, linenum, error):
715  """Updates the global list of line error-suppressions.
716
717  Parses any NOLINT comments on the current line, updating the global
718  error_suppressions store.  Reports an error if the NOLINT comment
719  was malformed.
720
721  Args:
722    filename: str, the name of the input file.
723    raw_line: str, the line of input text, with comments.
724    linenum: int, the number of the current line.
725    error: function, an error handler.
726  """
727  matched = Search(r'\bNOLINT(NEXTLINE)?\b(\([^)]+\))?', raw_line)
728  if matched:
729    if matched.group(1):
730      suppressed_line = linenum + 1
731    else:
732      suppressed_line = linenum
733    category = matched.group(2)
734    if category in (None, '(*)'):  # => "suppress all"
735      _error_suppressions.setdefault(None, set()).add(suppressed_line)
736    else:
737      if category.startswith('(') and category.endswith(')'):
738        category = category[1:-1]
739        if category in _ERROR_CATEGORIES:
740          _error_suppressions.setdefault(category, set()).add(suppressed_line)
741        elif category not in _LEGACY_ERROR_CATEGORIES:
742          error(filename, linenum, 'readability/nolint', 5,
743                'Unknown NOLINT error category: %s' % category)
744
745
746def ProcessGlobalSuppresions(lines):
747  """Updates the list of global error suppressions.
748
749  Parses any lint directives in the file that have global effect.
750
751  Args:
752    lines: An array of strings, each representing a line of the file, with the
753           last element being empty if the file is terminated with a newline.
754  """
755  for line in lines:
756    if _SEARCH_C_FILE.search(line):
757      for category in _DEFAULT_C_SUPPRESSED_CATEGORIES:
758        _global_error_suppressions[category] = True
759    if _SEARCH_KERNEL_FILE.search(line):
760      for category in _DEFAULT_KERNEL_SUPPRESSED_CATEGORIES:
761        _global_error_suppressions[category] = True
762
763
764def ResetNolintSuppressions():
765  """Resets the set of NOLINT suppressions to empty."""
766  _error_suppressions.clear()
767  _global_error_suppressions.clear()
768
769
770def IsErrorSuppressedByNolint(category, linenum):
771  """Returns true if the specified error category is suppressed on this line.
772
773  Consults the global error_suppressions map populated by
774  ParseNolintSuppressions/ProcessGlobalSuppresions/ResetNolintSuppressions.
775
776  Args:
777    category: str, the category of the error.
778    linenum: int, the current line number.
779  Returns:
780    bool, True iff the error should be suppressed due to a NOLINT comment or
781    global suppression.
782  """
783  return (_global_error_suppressions.get(category, False) or
784          linenum in _error_suppressions.get(category, set()) or
785          linenum in _error_suppressions.get(None, set()))
786
787
788def Match(pattern, s):
789  """Matches the string with the pattern, caching the compiled regexp."""
790  # The regexp compilation caching is inlined in both Match and Search for
791  # performance reasons; factoring it out into a separate function turns out
792  # to be noticeably expensive.
793  if pattern not in _regexp_compile_cache:
794    _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
795  return _regexp_compile_cache[pattern].match(s)
796
797
798def ReplaceAll(pattern, rep, s):
799  """Replaces instances of pattern in a string with a replacement.
800
801  The compiled regex is kept in a cache shared by Match and Search.
802
803  Args:
804    pattern: regex pattern
805    rep: replacement text
806    s: search string
807
808  Returns:
809    string with replacements made (or original string if no replacements)
810  """
811  if pattern not in _regexp_compile_cache:
812    _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
813  return _regexp_compile_cache[pattern].sub(rep, s)
814
815
816def Search(pattern, s):
817  """Searches the string for the pattern, caching the compiled regexp."""
818  if pattern not in _regexp_compile_cache:
819    _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
820  return _regexp_compile_cache[pattern].search(s)
821
822
823def _IsSourceExtension(s):
824  """File extension (excluding dot) matches a source file extension."""
825  return s in GetNonHeaderExtensions()
826
827
828class _IncludeState(object):
829  """Tracks line numbers for includes, and the order in which includes appear.
830
831  include_list contains list of lists of (header, line number) pairs.
832  It's a lists of lists rather than just one flat list to make it
833  easier to update across preprocessor boundaries.
834
835  Call CheckNextIncludeOrder() once for each header in the file, passing
836  in the type constants defined above. Calls in an illegal order will
837  raise an _IncludeError with an appropriate error message.
838
839  """
840  # self._section will move monotonically through this set. If it ever
841  # needs to move backwards, CheckNextIncludeOrder will raise an error.
842  _INITIAL_SECTION = 0
843  _MY_H_SECTION = 1
844  _C_SECTION = 2
845  _CPP_SECTION = 3
846  _OTHER_H_SECTION = 4
847
848  _TYPE_NAMES = {
849      _C_SYS_HEADER: 'C system header',
850      _CPP_SYS_HEADER: 'C++ system header',
851      _LIKELY_MY_HEADER: 'header this file implements',
852      _POSSIBLE_MY_HEADER: 'header this file may implement',
853      _OTHER_HEADER: 'other header',
854      }
855  _SECTION_NAMES = {
856      _INITIAL_SECTION: "... nothing. (This can't be an error.)",
857      _MY_H_SECTION: 'a header this file implements',
858      _C_SECTION: 'C system header',
859      _CPP_SECTION: 'C++ system header',
860      _OTHER_H_SECTION: 'other header',
861      }
862
863  def __init__(self):
864    self.include_list = [[]]
865    self._section = None
866    self._last_header = None
867    self.ResetSection('')
868
869  def FindHeader(self, header):
870    """Check if a header has already been included.
871
872    Args:
873      header: header to check.
874    Returns:
875      Line number of previous occurrence, or -1 if the header has not
876      been seen before.
877    """
878    for section_list in self.include_list:
879      for f in section_list:
880        if f[0] == header:
881          return f[1]
882    return -1
883
884  def ResetSection(self, directive):
885    """Reset section checking for preprocessor directive.
886
887    Args:
888      directive: preprocessor directive (e.g. "if", "else").
889    """
890    # The name of the current section.
891    self._section = self._INITIAL_SECTION
892    # The path of last found header.
893    self._last_header = ''
894
895    # Update list of includes.  Note that we never pop from the
896    # include list.
897    if directive in ('if', 'ifdef', 'ifndef'):
898      self.include_list.append([])
899    elif directive in ('else', 'elif'):
900      self.include_list[-1] = []
901
902  def SetLastHeader(self, header_path):
903    self._last_header = header_path
904
905  def CanonicalizeAlphabeticalOrder(self, header_path):
906    """Returns a path canonicalized for alphabetical comparison.
907
908    - replaces "-" with "_" so they both cmp the same.
909    - removes '-inl' since we don't require them to be after the main header.
910    - lowercase everything, just in case.
911
912    Args:
913      header_path: Path to be canonicalized.
914
915    Returns:
916      Canonicalized path.
917    """
918    return header_path.replace('-inl.h', '.h').replace('-', '_').lower()
919
920  def IsInAlphabeticalOrder(self, clean_lines, linenum, header_path):
921    """Check if a header is in alphabetical order with the previous header.
922
923    Args:
924      clean_lines: A CleansedLines instance containing the file.
925      linenum: The number of the line to check.
926      header_path: Canonicalized header to be checked.
927
928    Returns:
929      Returns true if the header is in alphabetical order.
930    """
931    # If previous section is different from current section, _last_header will
932    # be reset to empty string, so it's always less than current header.
933    #
934    # If previous line was a blank line, assume that the headers are
935    # intentionally sorted the way they are.
936    if (self._last_header > header_path and
937        Match(r'^\s*#\s*include\b', clean_lines.elided[linenum - 1])):
938      return False
939    return True
940
941  def CheckNextIncludeOrder(self, header_type):
942    """Returns a non-empty error message if the next header is out of order.
943
944    This function also updates the internal state to be ready to check
945    the next include.
946
947    Args:
948      header_type: One of the _XXX_HEADER constants defined above.
949
950    Returns:
951      The empty string if the header is in the right order, or an
952      error message describing what's wrong.
953
954    """
955    error_message = ('Found %s after %s' %
956                     (self._TYPE_NAMES[header_type],
957                      self._SECTION_NAMES[self._section]))
958
959    last_section = self._section
960
961    if header_type == _C_SYS_HEADER:
962      if self._section <= self._C_SECTION:
963        self._section = self._C_SECTION
964      else:
965        self._last_header = ''
966        return error_message
967    elif header_type == _CPP_SYS_HEADER:
968      if self._section <= self._CPP_SECTION:
969        self._section = self._CPP_SECTION
970      else:
971        self._last_header = ''
972        return error_message
973    elif header_type == _LIKELY_MY_HEADER:
974      if self._section <= self._MY_H_SECTION:
975        self._section = self._MY_H_SECTION
976      else:
977        self._section = self._OTHER_H_SECTION
978    elif header_type == _POSSIBLE_MY_HEADER:
979      if self._section <= self._MY_H_SECTION:
980        self._section = self._MY_H_SECTION
981      else:
982        # This will always be the fallback because we're not sure
983        # enough that the header is associated with this file.
984        self._section = self._OTHER_H_SECTION
985    else:
986      assert header_type == _OTHER_HEADER
987      self._section = self._OTHER_H_SECTION
988
989    if last_section != self._section:
990      self._last_header = ''
991
992    return ''
993
994
995class _CppLintState(object):
996  """Maintains module-wide state.."""
997
998  def __init__(self):
999    self.verbose_level = 1  # global setting.
1000    self.error_count = 0    # global count of reported errors
1001    # filters to apply when emitting error messages
1002    self.filters = _DEFAULT_FILTERS[:]
1003    # backup of filter list. Used to restore the state after each file.
1004    self._filters_backup = self.filters[:]
1005    self.counting = 'total'  # In what way are we counting errors?
1006    self.errors_by_category = {}  # string to int dict storing error counts
1007    self.quiet = False  # Suppress non-error messagess?
1008
1009    # output format:
1010    # "emacs" - format that emacs can parse (default)
1011    # "eclipse" - format that eclipse can parse
1012    # "vs7" - format that Microsoft Visual Studio 7 can parse
1013    # "junit" - format that Jenkins, Bamboo, etc can parse
1014    self.output_format = 'emacs'
1015
1016    # For JUnit output, save errors and failures until the end so that they
1017    # can be written into the XML
1018    self._junit_errors = []
1019    self._junit_failures = []
1020
1021  def SetOutputFormat(self, output_format):
1022    """Sets the output format for errors."""
1023    self.output_format = output_format
1024
1025  def SetQuiet(self, quiet):
1026    """Sets the module's quiet settings, and returns the previous setting."""
1027    last_quiet = self.quiet
1028    self.quiet = quiet
1029    return last_quiet
1030
1031  def SetVerboseLevel(self, level):
1032    """Sets the module's verbosity, and returns the previous setting."""
1033    last_verbose_level = self.verbose_level
1034    self.verbose_level = level
1035    return last_verbose_level
1036
1037  def SetCountingStyle(self, counting_style):
1038    """Sets the module's counting options."""
1039    self.counting = counting_style
1040
1041  def SetFilters(self, filters):
1042    """Sets the error-message filters.
1043
1044    These filters are applied when deciding whether to emit a given
1045    error message.
1046
1047    Args:
1048      filters: A string of comma-separated filters (eg "+whitespace/indent").
1049               Each filter should start with + or -; else we die.
1050
1051    Raises:
1052      ValueError: The comma-separated filters did not all start with '+' or '-'.
1053                  E.g. "-,+whitespace,-whitespace/indent,whitespace/badfilter"
1054    """
1055    # Default filters always have less priority than the flag ones.
1056    self.filters = _DEFAULT_FILTERS[:]
1057    self.AddFilters(filters)
1058
1059  def AddFilters(self, filters):
1060    """ Adds more filters to the existing list of error-message filters. """
1061    for filt in filters.split(','):
1062      clean_filt = filt.strip()
1063      if clean_filt:
1064        self.filters.append(clean_filt)
1065    for filt in self.filters:
1066      if not (filt.startswith('+') or filt.startswith('-')):
1067        raise ValueError('Every filter in --filters must start with + or -'
1068                         ' (%s does not)' % filt)
1069
1070  def BackupFilters(self):
1071    """ Saves the current filter list to backup storage."""
1072    self._filters_backup = self.filters[:]
1073
1074  def RestoreFilters(self):
1075    """ Restores filters previously backed up."""
1076    self.filters = self._filters_backup[:]
1077
1078  def ResetErrorCounts(self):
1079    """Sets the module's error statistic back to zero."""
1080    self.error_count = 0
1081    self.errors_by_category = {}
1082
1083  def IncrementErrorCount(self, category):
1084    """Bumps the module's error statistic."""
1085    self.error_count += 1
1086    if self.counting in ('toplevel', 'detailed'):
1087      if self.counting != 'detailed':
1088        category = category.split('/')[0]
1089      if category not in self.errors_by_category:
1090        self.errors_by_category[category] = 0
1091      self.errors_by_category[category] += 1
1092
1093  def PrintErrorCounts(self):
1094    """Print a summary of errors by category, and the total."""
1095    for category, count in sorted(iteritems(self.errors_by_category)):
1096      self.PrintInfo('Category \'%s\' errors found: %d\n' %
1097                       (category, count))
1098    if self.error_count > 0:
1099      self.PrintInfo('Total errors found: %d\n' % self.error_count)
1100
1101  def PrintInfo(self, message):
1102    if not _quiet and self.output_format != 'junit':
1103      sys.stdout.write(message)
1104
1105  def PrintError(self, message):
1106    if self.output_format == 'junit':
1107      self._junit_errors.append(message)
1108    else:
1109      sys.stderr.write(message)
1110
1111  def AddJUnitFailure(self, filename, linenum, message, category, confidence):
1112    self._junit_failures.append((filename, linenum, message, category,
1113        confidence))
1114
1115  def FormatJUnitXML(self):
1116    num_errors = len(self._junit_errors)
1117    num_failures = len(self._junit_failures)
1118
1119    testsuite = xml.etree.ElementTree.Element('testsuite')
1120    testsuite.attrib['name'] = 'cpplint'
1121    testsuite.attrib['errors'] = str(num_errors)
1122    testsuite.attrib['failures'] = str(num_failures)
1123
1124    if num_errors == 0 and num_failures == 0:
1125      testsuite.attrib['tests'] = str(1)
1126      xml.etree.ElementTree.SubElement(testsuite, 'testcase', name='passed')
1127
1128    else:
1129      testsuite.attrib['tests'] = str(num_errors + num_failures)
1130      if num_errors > 0:
1131        testcase = xml.etree.ElementTree.SubElement(testsuite, 'testcase')
1132        testcase.attrib['name'] = 'errors'
1133        error = xml.etree.ElementTree.SubElement(testcase, 'error')
1134        error.text = '\n'.join(self._junit_errors)
1135      if num_failures > 0:
1136        # Group failures by file
1137        failed_file_order = []
1138        failures_by_file = {}
1139        for failure in self._junit_failures:
1140          failed_file = failure[0]
1141          if failed_file not in failed_file_order:
1142            failed_file_order.append(failed_file)
1143            failures_by_file[failed_file] = []
1144          failures_by_file[failed_file].append(failure)
1145        # Create a testcase for each file
1146        for failed_file in failed_file_order:
1147          failures = failures_by_file[failed_file]
1148          testcase = xml.etree.ElementTree.SubElement(testsuite, 'testcase')
1149          testcase.attrib['name'] = failed_file
1150          failure = xml.etree.ElementTree.SubElement(testcase, 'failure')
1151          template = '{0}: {1} [{2}] [{3}]'
1152          texts = [template.format(f[1], f[2], f[3], f[4]) for f in failures]
1153          failure.text = '\n'.join(texts)
1154
1155    xml_decl = '<?xml version="1.0" encoding="UTF-8" ?>\n'
1156    return xml_decl + xml.etree.ElementTree.tostring(testsuite, 'utf-8').decode('utf-8')
1157
1158
1159_cpplint_state = _CppLintState()
1160
1161
1162def _OutputFormat():
1163  """Gets the module's output format."""
1164  return _cpplint_state.output_format
1165
1166
1167def _SetOutputFormat(output_format):
1168  """Sets the module's output format."""
1169  _cpplint_state.SetOutputFormat(output_format)
1170
1171def _Quiet():
1172  """Return's the module's quiet setting."""
1173  return _cpplint_state.quiet
1174
1175def _SetQuiet(quiet):
1176  """Set the module's quiet status, and return previous setting."""
1177  return _cpplint_state.SetQuiet(quiet)
1178
1179
1180def _VerboseLevel():
1181  """Returns the module's verbosity setting."""
1182  return _cpplint_state.verbose_level
1183
1184
1185def _SetVerboseLevel(level):
1186  """Sets the module's verbosity, and returns the previous setting."""
1187  return _cpplint_state.SetVerboseLevel(level)
1188
1189
1190def _SetCountingStyle(level):
1191  """Sets the module's counting options."""
1192  _cpplint_state.SetCountingStyle(level)
1193
1194
1195def _Filters():
1196  """Returns the module's list of output filters, as a list."""
1197  return _cpplint_state.filters
1198
1199
1200def _SetFilters(filters):
1201  """Sets the module's error-message filters.
1202
1203  These filters are applied when deciding whether to emit a given
1204  error message.
1205
1206  Args:
1207    filters: A string of comma-separated filters (eg "whitespace/indent").
1208             Each filter should start with + or -; else we die.
1209  """
1210  _cpplint_state.SetFilters(filters)
1211
1212def _AddFilters(filters):
1213  """Adds more filter overrides.
1214
1215  Unlike _SetFilters, this function does not reset the current list of filters
1216  available.
1217
1218  Args:
1219    filters: A string of comma-separated filters (eg "whitespace/indent").
1220             Each filter should start with + or -; else we die.
1221  """
1222  _cpplint_state.AddFilters(filters)
1223
1224def _BackupFilters():
1225  """ Saves the current filter list to backup storage."""
1226  _cpplint_state.BackupFilters()
1227
1228def _RestoreFilters():
1229  """ Restores filters previously backed up."""
1230  _cpplint_state.RestoreFilters()
1231
1232class _FunctionState(object):
1233  """Tracks current function name and the number of lines in its body."""
1234
1235  _NORMAL_TRIGGER = 250  # for --v=0, 500 for --v=1, etc.
1236  _TEST_TRIGGER = 400    # about 50% more than _NORMAL_TRIGGER.
1237
1238  def __init__(self):
1239    self.in_a_function = False
1240    self.lines_in_function = 0
1241    self.current_function = ''
1242
1243  def Begin(self, function_name):
1244    """Start analyzing function body.
1245
1246    Args:
1247      function_name: The name of the function being tracked.
1248    """
1249    self.in_a_function = True
1250    self.lines_in_function = 0
1251    self.current_function = function_name
1252
1253  def Count(self):
1254    """Count line in current function body."""
1255    if self.in_a_function:
1256      self.lines_in_function += 1
1257
1258  def Check(self, error, filename, linenum):
1259    """Report if too many lines in function body.
1260
1261    Args:
1262      error: The function to call with any errors found.
1263      filename: The name of the current file.
1264      linenum: The number of the line to check.
1265    """
1266    if not self.in_a_function:
1267      return
1268
1269    if Match(r'T(EST|est)', self.current_function):
1270      base_trigger = self._TEST_TRIGGER
1271    else:
1272      base_trigger = self._NORMAL_TRIGGER
1273    trigger = base_trigger * 2**_VerboseLevel()
1274
1275    if self.lines_in_function > trigger:
1276      error_level = int(math.log(self.lines_in_function / base_trigger, 2))
1277      # 50 => 0, 100 => 1, 200 => 2, 400 => 3, 800 => 4, 1600 => 5, ...
1278      if error_level > 5:
1279        error_level = 5
1280      error(filename, linenum, 'readability/fn_size', error_level,
1281            'Small and focused functions are preferred:'
1282            ' %s has %d non-comment lines'
1283            ' (error triggered by exceeding %d lines).'  % (
1284                self.current_function, self.lines_in_function, trigger))
1285
1286  def End(self):
1287    """Stop analyzing function body."""
1288    self.in_a_function = False
1289
1290
1291class _IncludeError(Exception):
1292  """Indicates a problem with the include order in a file."""
1293  pass
1294
1295
1296class FileInfo(object):
1297  """Provides utility functions for filenames.
1298
1299  FileInfo provides easy access to the components of a file's path
1300  relative to the project root.
1301  """
1302
1303  def __init__(self, filename):
1304    self._filename = filename
1305
1306  def FullName(self):
1307    """Make Windows paths like Unix."""
1308    return os.path.abspath(self._filename).replace('\\', '/')
1309
1310  def RepositoryName(self):
1311    r"""FullName after removing the local path to the repository.
1312
1313    If we have a real absolute path name here we can try to do something smart:
1314    detecting the root of the checkout and truncating /path/to/checkout from
1315    the name so that we get header guards that don't include things like
1316    "C:\Documents and Settings\..." or "/home/username/..." in them and thus
1317    people on different computers who have checked the source out to different
1318    locations won't see bogus errors.
1319    """
1320    fullname = self.FullName()
1321
1322    if os.path.exists(fullname):
1323      project_dir = os.path.dirname(fullname)
1324
1325      # If the user specified a repository path, it exists, and the file is
1326      # contained in it, use the specified repository path
1327      if _repository:
1328        repo = FileInfo(_repository).FullName()
1329        root_dir = project_dir
1330        while os.path.exists(root_dir):
1331          # allow case insensitive compare on Windows
1332          if os.path.normcase(root_dir) == os.path.normcase(repo):
1333            return os.path.relpath(fullname, root_dir).replace('\\', '/')
1334          one_up_dir = os.path.dirname(root_dir)
1335          if one_up_dir == root_dir:
1336            break
1337          root_dir = one_up_dir
1338
1339      if os.path.exists(os.path.join(project_dir, ".svn")):
1340        # If there's a .svn file in the current directory, we recursively look
1341        # up the directory tree for the top of the SVN checkout
1342        root_dir = project_dir
1343        one_up_dir = os.path.dirname(root_dir)
1344        while os.path.exists(os.path.join(one_up_dir, ".svn")):
1345          root_dir = os.path.dirname(root_dir)
1346          one_up_dir = os.path.dirname(one_up_dir)
1347
1348        prefix = os.path.commonprefix([root_dir, project_dir])
1349        return fullname[len(prefix) + 1:]
1350
1351      # Not SVN <= 1.6? Try to find a git, hg, or svn top level directory by
1352      # searching up from the current path.
1353      root_dir = current_dir = os.path.dirname(fullname)
1354      while current_dir != os.path.dirname(current_dir):
1355        if (os.path.exists(os.path.join(current_dir, ".git")) or
1356            os.path.exists(os.path.join(current_dir, ".hg")) or
1357            os.path.exists(os.path.join(current_dir, ".svn"))):
1358          root_dir = current_dir
1359        current_dir = os.path.dirname(current_dir)
1360
1361      if (os.path.exists(os.path.join(root_dir, ".git")) or
1362          os.path.exists(os.path.join(root_dir, ".hg")) or
1363          os.path.exists(os.path.join(root_dir, ".svn"))):
1364        prefix = os.path.commonprefix([root_dir, project_dir])
1365        return fullname[len(prefix) + 1:]
1366
1367    # Don't know what to do; header guard warnings may be wrong...
1368    return fullname
1369
1370  def Split(self):
1371    """Splits the file into the directory, basename, and extension.
1372
1373    For 'chrome/browser/browser.cc', Split() would
1374    return ('chrome/browser', 'browser', '.cc')
1375
1376    Returns:
1377      A tuple of (directory, basename, extension).
1378    """
1379
1380    googlename = self.RepositoryName()
1381    project, rest = os.path.split(googlename)
1382    return (project,) + os.path.splitext(rest)
1383
1384  def BaseName(self):
1385    """File base name - text after the final slash, before the final period."""
1386    return self.Split()[1]
1387
1388  def Extension(self):
1389    """File extension - text following the final period, includes that period."""
1390    return self.Split()[2]
1391
1392  def NoExtension(self):
1393    """File has no source file extension."""
1394    return '/'.join(self.Split()[0:2])
1395
1396  def IsSource(self):
1397    """File has a source file extension."""
1398    return _IsSourceExtension(self.Extension()[1:])
1399
1400
1401def _ShouldPrintError(category, confidence, linenum):
1402  """If confidence >= verbose, category passes filter and is not suppressed."""
1403
1404  # There are three ways we might decide not to print an error message:
1405  # a "NOLINT(category)" comment appears in the source,
1406  # the verbosity level isn't high enough, or the filters filter it out.
1407  if IsErrorSuppressedByNolint(category, linenum):
1408    return False
1409
1410  if confidence < _cpplint_state.verbose_level:
1411    return False
1412
1413  is_filtered = False
1414  for one_filter in _Filters():
1415    if one_filter.startswith('-'):
1416      if category.startswith(one_filter[1:]):
1417        is_filtered = True
1418    elif one_filter.startswith('+'):
1419      if category.startswith(one_filter[1:]):
1420        is_filtered = False
1421    else:
1422      assert False  # should have been checked for in SetFilter.
1423  if is_filtered:
1424    return False
1425
1426  return True
1427
1428
1429def Error(filename, linenum, category, confidence, message):
1430  """Logs the fact we've found a lint error.
1431
1432  We log where the error was found, and also our confidence in the error,
1433  that is, how certain we are this is a legitimate style regression, and
1434  not a misidentification or a use that's sometimes justified.
1435
1436  False positives can be suppressed by the use of
1437  "cpplint(category)"  comments on the offending line.  These are
1438  parsed into _error_suppressions.
1439
1440  Args:
1441    filename: The name of the file containing the error.
1442    linenum: The number of the line containing the error.
1443    category: A string used to describe the "category" this bug
1444      falls under: "whitespace", say, or "runtime".  Categories
1445      may have a hierarchy separated by slashes: "whitespace/indent".
1446    confidence: A number from 1-5 representing a confidence score for
1447      the error, with 5 meaning that we are certain of the problem,
1448      and 1 meaning that it could be a legitimate construct.
1449    message: The error message.
1450  """
1451  if _ShouldPrintError(category, confidence, linenum):
1452    _cpplint_state.IncrementErrorCount(category)
1453    if _cpplint_state.output_format == 'vs7':
1454      _cpplint_state.PrintError('%s(%s): error cpplint: [%s] %s [%d]\n' % (
1455          filename, linenum, category, message, confidence))
1456    elif _cpplint_state.output_format == 'eclipse':
1457      sys.stderr.write('%s:%s: warning: %s  [%s] [%d]\n' % (
1458          filename, linenum, message, category, confidence))
1459    elif _cpplint_state.output_format == 'junit':
1460      _cpplint_state.AddJUnitFailure(filename, linenum, message, category,
1461          confidence)
1462    else:
1463      final_message = '%s:%s:  %s  [%s] [%d]\n' % (
1464          filename, linenum, message, category, confidence)
1465      sys.stderr.write(final_message)
1466
1467# Matches standard C++ escape sequences per 2.13.2.3 of the C++ standard.
1468_RE_PATTERN_CLEANSE_LINE_ESCAPES = re.compile(
1469    r'\\([abfnrtv?"\\\']|\d+|x[0-9a-fA-F]+)')
1470# Match a single C style comment on the same line.
1471_RE_PATTERN_C_COMMENTS = r'/\*(?:[^*]|\*(?!/))*\*/'
1472# Matches multi-line C style comments.
1473# This RE is a little bit more complicated than one might expect, because we
1474# have to take care of space removals tools so we can handle comments inside
1475# statements better.
1476# The current rule is: We only clear spaces from both sides when we're at the
1477# end of the line. Otherwise, we try to remove spaces from the right side,
1478# if this doesn't work we try on left side but only if there's a non-character
1479# on the right.
1480_RE_PATTERN_CLEANSE_LINE_C_COMMENTS = re.compile(
1481    r'(\s*' + _RE_PATTERN_C_COMMENTS + r'\s*$|' +
1482    _RE_PATTERN_C_COMMENTS + r'\s+|' +
1483    r'\s+' + _RE_PATTERN_C_COMMENTS + r'(?=\W)|' +
1484    _RE_PATTERN_C_COMMENTS + r')')
1485
1486
1487def IsCppString(line):
1488  """Does line terminate so, that the next symbol is in string constant.
1489
1490  This function does not consider single-line nor multi-line comments.
1491
1492  Args:
1493    line: is a partial line of code starting from the 0..n.
1494
1495  Returns:
1496    True, if next character appended to 'line' is inside a
1497    string constant.
1498  """
1499
1500  line = line.replace(r'\\', 'XX')  # after this, \\" does not match to \"
1501  return ((line.count('"') - line.count(r'\"') - line.count("'\"'")) & 1) == 1
1502
1503
1504def CleanseRawStrings(raw_lines):
1505  """Removes C++11 raw strings from lines.
1506
1507    Before:
1508      static const char kData[] = R"(
1509          multi-line string
1510          )";
1511
1512    After:
1513      static const char kData[] = ""
1514          (replaced by blank line)
1515          "";
1516
1517  Args:
1518    raw_lines: list of raw lines.
1519
1520  Returns:
1521    list of lines with C++11 raw strings replaced by empty strings.
1522  """
1523
1524  delimiter = None
1525  lines_without_raw_strings = []
1526  for line in raw_lines:
1527    if delimiter:
1528      # Inside a raw string, look for the end
1529      end = line.find(delimiter)
1530      if end >= 0:
1531        # Found the end of the string, match leading space for this
1532        # line and resume copying the original lines, and also insert
1533        # a "" on the last line.
1534        leading_space = Match(r'^(\s*)\S', line)
1535        line = leading_space.group(1) + '""' + line[end + len(delimiter):]
1536        delimiter = None
1537      else:
1538        # Haven't found the end yet, append a blank line.
1539        line = '""'
1540
1541    # Look for beginning of a raw string, and replace them with
1542    # empty strings.  This is done in a loop to handle multiple raw
1543    # strings on the same line.
1544    while delimiter is None:
1545      # Look for beginning of a raw string.
1546      # See 2.14.15 [lex.string] for syntax.
1547      #
1548      # Once we have matched a raw string, we check the prefix of the
1549      # line to make sure that the line is not part of a single line
1550      # comment.  It's done this way because we remove raw strings
1551      # before removing comments as opposed to removing comments
1552      # before removing raw strings.  This is because there are some
1553      # cpplint checks that requires the comments to be preserved, but
1554      # we don't want to check comments that are inside raw strings.
1555      matched = Match(r'^(.*?)\b(?:R|u8R|uR|UR|LR)"([^\s\\()]*)\((.*)$', line)
1556      if (matched and
1557          not Match(r'^([^\'"]|\'(\\.|[^\'])*\'|"(\\.|[^"])*")*//',
1558                    matched.group(1))):
1559        delimiter = ')' + matched.group(2) + '"'
1560
1561        end = matched.group(3).find(delimiter)
1562        if end >= 0:
1563          # Raw string ended on same line
1564          line = (matched.group(1) + '""' +
1565                  matched.group(3)[end + len(delimiter):])
1566          delimiter = None
1567        else:
1568          # Start of a multi-line raw string
1569          line = matched.group(1) + '""'
1570      else:
1571        break
1572
1573    lines_without_raw_strings.append(line)
1574
1575  # TODO(unknown): if delimiter is not None here, we might want to
1576  # emit a warning for unterminated string.
1577  return lines_without_raw_strings
1578
1579
1580def FindNextMultiLineCommentStart(lines, lineix):
1581  """Find the beginning marker for a multiline comment."""
1582  while lineix < len(lines):
1583    if lines[lineix].strip().startswith('/*'):
1584      # Only return this marker if the comment goes beyond this line
1585      if lines[lineix].strip().find('*/', 2) < 0:
1586        return lineix
1587    lineix += 1
1588  return len(lines)
1589
1590
1591def FindNextMultiLineCommentEnd(lines, lineix):
1592  """We are inside a comment, find the end marker."""
1593  while lineix < len(lines):
1594    if lines[lineix].strip().endswith('*/'):
1595      return lineix
1596    lineix += 1
1597  return len(lines)
1598
1599
1600def RemoveMultiLineCommentsFromRange(lines, begin, end):
1601  """Clears a range of lines for multi-line comments."""
1602  # Having // dummy comments makes the lines non-empty, so we will not get
1603  # unnecessary blank line warnings later in the code.
1604  for i in range(begin, end):
1605    lines[i] = '/**/'
1606
1607
1608def RemoveMultiLineComments(filename, lines, error):
1609  """Removes multiline (c-style) comments from lines."""
1610  lineix = 0
1611  while lineix < len(lines):
1612    lineix_begin = FindNextMultiLineCommentStart(lines, lineix)
1613    if lineix_begin >= len(lines):
1614      return
1615    lineix_end = FindNextMultiLineCommentEnd(lines, lineix_begin)
1616    if lineix_end >= len(lines):
1617      error(filename, lineix_begin + 1, 'readability/multiline_comment', 5,
1618            'Could not find end of multi-line comment')
1619      return
1620    RemoveMultiLineCommentsFromRange(lines, lineix_begin, lineix_end + 1)
1621    lineix = lineix_end + 1
1622
1623
1624def CleanseComments(line):
1625  """Removes //-comments and single-line C-style /* */ comments.
1626
1627  Args:
1628    line: A line of C++ source.
1629
1630  Returns:
1631    The line with single-line comments removed.
1632  """
1633  commentpos = line.find('//')
1634  if commentpos != -1 and not IsCppString(line[:commentpos]):
1635    line = line[:commentpos].rstrip()
1636  # get rid of /* ... */
1637  return _RE_PATTERN_CLEANSE_LINE_C_COMMENTS.sub('', line)
1638
1639
1640class CleansedLines(object):
1641  """Holds 4 copies of all lines with different preprocessing applied to them.
1642
1643  1) elided member contains lines without strings and comments.
1644  2) lines member contains lines without comments.
1645  3) raw_lines member contains all the lines without processing.
1646  4) lines_without_raw_strings member is same as raw_lines, but with C++11 raw
1647     strings removed.
1648  All these members are of <type 'list'>, and of the same length.
1649  """
1650
1651  def __init__(self, lines):
1652    self.elided = []
1653    self.lines = []
1654    self.raw_lines = lines
1655    self.num_lines = len(lines)
1656    self.lines_without_raw_strings = CleanseRawStrings(lines)
1657    for linenum in range(len(self.lines_without_raw_strings)):
1658      self.lines.append(CleanseComments(
1659          self.lines_without_raw_strings[linenum]))
1660      elided = self._CollapseStrings(self.lines_without_raw_strings[linenum])
1661      self.elided.append(CleanseComments(elided))
1662
1663  def NumLines(self):
1664    """Returns the number of lines represented."""
1665    return self.num_lines
1666
1667  @staticmethod
1668  def _CollapseStrings(elided):
1669    """Collapses strings and chars on a line to simple "" or '' blocks.
1670
1671    We nix strings first so we're not fooled by text like '"http://"'
1672
1673    Args:
1674      elided: The line being processed.
1675
1676    Returns:
1677      The line with collapsed strings.
1678    """
1679    if _RE_PATTERN_INCLUDE.match(elided):
1680      return elided
1681
1682    # Remove escaped characters first to make quote/single quote collapsing
1683    # basic.  Things that look like escaped characters shouldn't occur
1684    # outside of strings and chars.
1685    elided = _RE_PATTERN_CLEANSE_LINE_ESCAPES.sub('', elided)
1686
1687    # Replace quoted strings and digit separators.  Both single quotes
1688    # and double quotes are processed in the same loop, otherwise
1689    # nested quotes wouldn't work.
1690    collapsed = ''
1691    while True:
1692      # Find the first quote character
1693      match = Match(r'^([^\'"]*)([\'"])(.*)$', elided)
1694      if not match:
1695        collapsed += elided
1696        break
1697      head, quote, tail = match.groups()
1698
1699      if quote == '"':
1700        # Collapse double quoted strings
1701        second_quote = tail.find('"')
1702        if second_quote >= 0:
1703          collapsed += head + '""'
1704          elided = tail[second_quote + 1:]
1705        else:
1706          # Unmatched double quote, don't bother processing the rest
1707          # of the line since this is probably a multiline string.
1708          collapsed += elided
1709          break
1710      else:
1711        # Found single quote, check nearby text to eliminate digit separators.
1712        #
1713        # There is no special handling for floating point here, because
1714        # the integer/fractional/exponent parts would all be parsed
1715        # correctly as long as there are digits on both sides of the
1716        # separator.  So we are fine as long as we don't see something
1717        # like "0.'3" (gcc 4.9.0 will not allow this literal).
1718        if Search(r'\b(?:0[bBxX]?|[1-9])[0-9a-fA-F]*$', head):
1719          match_literal = Match(r'^((?:\'?[0-9a-zA-Z_])*)(.*)$', "'" + tail)
1720          collapsed += head + match_literal.group(1).replace("'", '')
1721          elided = match_literal.group(2)
1722        else:
1723          second_quote = tail.find('\'')
1724          if second_quote >= 0:
1725            collapsed += head + "''"
1726            elided = tail[second_quote + 1:]
1727          else:
1728            # Unmatched single quote
1729            collapsed += elided
1730            break
1731
1732    return collapsed
1733
1734
1735def FindEndOfExpressionInLine(line, startpos, stack):
1736  """Find the position just after the end of current parenthesized expression.
1737
1738  Args:
1739    line: a CleansedLines line.
1740    startpos: start searching at this position.
1741    stack: nesting stack at startpos.
1742
1743  Returns:
1744    On finding matching end: (index just after matching end, None)
1745    On finding an unclosed expression: (-1, None)
1746    Otherwise: (-1, new stack at end of this line)
1747  """
1748  for i in xrange(startpos, len(line)):
1749    char = line[i]
1750    if char in '([{':
1751      # Found start of parenthesized expression, push to expression stack
1752      stack.append(char)
1753    elif char == '<':
1754      # Found potential start of template argument list
1755      if i > 0 and line[i - 1] == '<':
1756        # Left shift operator
1757        if stack and stack[-1] == '<':
1758          stack.pop()
1759          if not stack:
1760            return (-1, None)
1761      elif i > 0 and Search(r'\boperator\s*$', line[0:i]):
1762        # operator<, don't add to stack
1763        continue
1764      else:
1765        # Tentative start of template argument list
1766        stack.append('<')
1767    elif char in ')]}':
1768      # Found end of parenthesized expression.
1769      #
1770      # If we are currently expecting a matching '>', the pending '<'
1771      # must have been an operator.  Remove them from expression stack.
1772      while stack and stack[-1] == '<':
1773        stack.pop()
1774      if not stack:
1775        return (-1, None)
1776      if ((stack[-1] == '(' and char == ')') or
1777          (stack[-1] == '[' and char == ']') or
1778          (stack[-1] == '{' and char == '}')):
1779        stack.pop()
1780        if not stack:
1781          return (i + 1, None)
1782      else:
1783        # Mismatched parentheses
1784        return (-1, None)
1785    elif char == '>':
1786      # Found potential end of template argument list.
1787
1788      # Ignore "->" and operator functions
1789      if (i > 0 and
1790          (line[i - 1] == '-' or Search(r'\boperator\s*$', line[0:i - 1]))):
1791        continue
1792
1793      # Pop the stack if there is a matching '<'.  Otherwise, ignore
1794      # this '>' since it must be an operator.
1795      if stack:
1796        if stack[-1] == '<':
1797          stack.pop()
1798          if not stack:
1799            return (i + 1, None)
1800    elif char == ';':
1801      # Found something that look like end of statements.  If we are currently
1802      # expecting a '>', the matching '<' must have been an operator, since
1803      # template argument list should not contain statements.
1804      while stack and stack[-1] == '<':
1805        stack.pop()
1806      if not stack:
1807        return (-1, None)
1808
1809  # Did not find end of expression or unbalanced parentheses on this line
1810  return (-1, stack)
1811
1812
1813def CloseExpression(clean_lines, linenum, pos):
1814  """If input points to ( or { or [ or <, finds the position that closes it.
1815
1816  If lines[linenum][pos] points to a '(' or '{' or '[' or '<', finds the
1817  linenum/pos that correspond to the closing of the expression.
1818
1819  TODO(unknown): cpplint spends a fair bit of time matching parentheses.
1820  Ideally we would want to index all opening and closing parentheses once
1821  and have CloseExpression be just a simple lookup, but due to preprocessor
1822  tricks, this is not so easy.
1823
1824  Args:
1825    clean_lines: A CleansedLines instance containing the file.
1826    linenum: The number of the line to check.
1827    pos: A position on the line.
1828
1829  Returns:
1830    A tuple (line, linenum, pos) pointer *past* the closing brace, or
1831    (line, len(lines), -1) if we never find a close.  Note we ignore
1832    strings and comments when matching; and the line we return is the
1833    'cleansed' line at linenum.
1834  """
1835
1836  line = clean_lines.elided[linenum]
1837  if (line[pos] not in '({[<') or Match(r'<[<=]', line[pos:]):
1838    return (line, clean_lines.NumLines(), -1)
1839
1840  # Check first line
1841  (end_pos, stack) = FindEndOfExpressionInLine(line, pos, [])
1842  if end_pos > -1:
1843    return (line, linenum, end_pos)
1844
1845  # Continue scanning forward
1846  while stack and linenum < clean_lines.NumLines() - 1:
1847    linenum += 1
1848    line = clean_lines.elided[linenum]
1849    (end_pos, stack) = FindEndOfExpressionInLine(line, 0, stack)
1850    if end_pos > -1:
1851      return (line, linenum, end_pos)
1852
1853  # Did not find end of expression before end of file, give up
1854  return (line, clean_lines.NumLines(), -1)
1855
1856
1857def FindStartOfExpressionInLine(line, endpos, stack):
1858  """Find position at the matching start of current expression.
1859
1860  This is almost the reverse of FindEndOfExpressionInLine, but note
1861  that the input position and returned position differs by 1.
1862
1863  Args:
1864    line: a CleansedLines line.
1865    endpos: start searching at this position.
1866    stack: nesting stack at endpos.
1867
1868  Returns:
1869    On finding matching start: (index at matching start, None)
1870    On finding an unclosed expression: (-1, None)
1871    Otherwise: (-1, new stack at beginning of this line)
1872  """
1873  i = endpos
1874  while i >= 0:
1875    char = line[i]
1876    if char in ')]}':
1877      # Found end of expression, push to expression stack
1878      stack.append(char)
1879    elif char == '>':
1880      # Found potential end of template argument list.
1881      #
1882      # Ignore it if it's a "->" or ">=" or "operator>"
1883      if (i > 0 and
1884          (line[i - 1] == '-' or
1885           Match(r'\s>=\s', line[i - 1:]) or
1886           Search(r'\boperator\s*$', line[0:i]))):
1887        i -= 1
1888      else:
1889        stack.append('>')
1890    elif char == '<':
1891      # Found potential start of template argument list
1892      if i > 0 and line[i - 1] == '<':
1893        # Left shift operator
1894        i -= 1
1895      else:
1896        # If there is a matching '>', we can pop the expression stack.
1897        # Otherwise, ignore this '<' since it must be an operator.
1898        if stack and stack[-1] == '>':
1899          stack.pop()
1900          if not stack:
1901            return (i, None)
1902    elif char in '([{':
1903      # Found start of expression.
1904      #
1905      # If there are any unmatched '>' on the stack, they must be
1906      # operators.  Remove those.
1907      while stack and stack[-1] == '>':
1908        stack.pop()
1909      if not stack:
1910        return (-1, None)
1911      if ((char == '(' and stack[-1] == ')') or
1912          (char == '[' and stack[-1] == ']') or
1913          (char == '{' and stack[-1] == '}')):
1914        stack.pop()
1915        if not stack:
1916          return (i, None)
1917      else:
1918        # Mismatched parentheses
1919        return (-1, None)
1920    elif char == ';':
1921      # Found something that look like end of statements.  If we are currently
1922      # expecting a '<', the matching '>' must have been an operator, since
1923      # template argument list should not contain statements.
1924      while stack and stack[-1] == '>':
1925        stack.pop()
1926      if not stack:
1927        return (-1, None)
1928
1929    i -= 1
1930
1931  return (-1, stack)
1932
1933
1934def ReverseCloseExpression(clean_lines, linenum, pos):
1935  """If input points to ) or } or ] or >, finds the position that opens it.
1936
1937  If lines[linenum][pos] points to a ')' or '}' or ']' or '>', finds the
1938  linenum/pos that correspond to the opening of the expression.
1939
1940  Args:
1941    clean_lines: A CleansedLines instance containing the file.
1942    linenum: The number of the line to check.
1943    pos: A position on the line.
1944
1945  Returns:
1946    A tuple (line, linenum, pos) pointer *at* the opening brace, or
1947    (line, 0, -1) if we never find the matching opening brace.  Note
1948    we ignore strings and comments when matching; and the line we
1949    return is the 'cleansed' line at linenum.
1950  """
1951  line = clean_lines.elided[linenum]
1952  if line[pos] not in ')}]>':
1953    return (line, 0, -1)
1954
1955  # Check last line
1956  (start_pos, stack) = FindStartOfExpressionInLine(line, pos, [])
1957  if start_pos > -1:
1958    return (line, linenum, start_pos)
1959
1960  # Continue scanning backward
1961  while stack and linenum > 0:
1962    linenum -= 1
1963    line = clean_lines.elided[linenum]
1964    (start_pos, stack) = FindStartOfExpressionInLine(line, len(line) - 1, stack)
1965    if start_pos > -1:
1966      return (line, linenum, start_pos)
1967
1968  # Did not find start of expression before beginning of file, give up
1969  return (line, 0, -1)
1970
1971
1972def CheckForCopyright(filename, lines, error):
1973  """Logs an error if no Copyright message appears at the top of the file."""
1974
1975  # We'll say it should occur by line 10. Don't forget there's a
1976  # dummy line at the front.
1977  for line in xrange(1, min(len(lines), 11)):
1978    if re.search(r'Copyright', lines[line], re.I): break
1979  else:                       # means no copyright line was found
1980    error(filename, 0, 'legal/copyright', 5,
1981          'No copyright message found.  '
1982          'You should have a line: "Copyright [year] <Copyright Owner>"')
1983
1984
1985def GetIndentLevel(line):
1986  """Return the number of leading spaces in line.
1987
1988  Args:
1989    line: A string to check.
1990
1991  Returns:
1992    An integer count of leading spaces, possibly zero.
1993  """
1994  indent = Match(r'^( *)\S', line)
1995  if indent:
1996    return len(indent.group(1))
1997  else:
1998    return 0
1999
2000def PathSplitToList(path):
2001  """Returns the path split into a list by the separator.
2002
2003  Args:
2004    path: An absolute or relative path (e.g. '/a/b/c/' or '../a')
2005
2006  Returns:
2007    A list of path components (e.g. ['a', 'b', 'c]).
2008  """
2009  lst = []
2010  while True:
2011    (head, tail) = os.path.split(path)
2012    if head == path:  # absolute paths end
2013      lst.append(head)
2014      break
2015    if tail == path:  # relative paths end
2016      lst.append(tail)
2017      break
2018
2019    path = head
2020    lst.append(tail)
2021
2022  lst.reverse()
2023  return lst
2024
2025def GetHeaderGuardCPPVariable(filename):
2026  """Returns the CPP variable that should be used as a header guard.
2027
2028  Args:
2029    filename: The name of a C++ header file.
2030
2031  Returns:
2032    The CPP variable that should be used as a header guard in the
2033    named file.
2034
2035  """
2036
2037  # Restores original filename in case that cpplint is invoked from Emacs's
2038  # flymake.
2039  filename = re.sub(r'_flymake\.h$', '.h', filename)
2040  filename = re.sub(r'/\.flymake/([^/]*)$', r'/\1', filename)
2041  # Replace 'c++' with 'cpp'.
2042  filename = filename.replace('C++', 'cpp').replace('c++', 'cpp')
2043
2044  fileinfo = FileInfo(filename)
2045  file_path_from_root = fileinfo.RepositoryName()
2046
2047  def FixupPathFromRoot():
2048    if _root_debug:
2049      sys.stderr.write("\n_root fixup, _root = '%s', repository name = '%s'\n"
2050          % (_root, fileinfo.RepositoryName()))
2051
2052    # Process the file path with the --root flag if it was set.
2053    if not _root:
2054      if _root_debug:
2055        sys.stderr.write("_root unspecified\n")
2056      return file_path_from_root
2057
2058    def StripListPrefix(lst, prefix):
2059      # f(['x', 'y'], ['w, z']) -> None  (not a valid prefix)
2060      if lst[:len(prefix)] != prefix:
2061        return None
2062      # f(['a, 'b', 'c', 'd'], ['a', 'b']) -> ['c', 'd']
2063      return lst[(len(prefix)):]
2064
2065    # root behavior:
2066    #   --root=subdir , lstrips subdir from the header guard
2067    maybe_path = StripListPrefix(PathSplitToList(file_path_from_root),
2068                                 PathSplitToList(_root))
2069
2070    if _root_debug:
2071      sys.stderr.write(("_root lstrip (maybe_path=%s, file_path_from_root=%s," +
2072          " _root=%s)\n") % (maybe_path, file_path_from_root, _root))
2073
2074    if maybe_path:
2075      return os.path.join(*maybe_path)
2076
2077    #   --root=.. , will prepend the outer directory to the header guard
2078    full_path = fileinfo.FullName()
2079    root_abspath = os.path.abspath(_root)
2080
2081    maybe_path = StripListPrefix(PathSplitToList(full_path),
2082                                 PathSplitToList(root_abspath))
2083
2084    if _root_debug:
2085      sys.stderr.write(("_root prepend (maybe_path=%s, full_path=%s, " +
2086          "root_abspath=%s)\n") % (maybe_path, full_path, root_abspath))
2087
2088    if maybe_path:
2089      return os.path.join(*maybe_path)
2090
2091    if _root_debug:
2092      sys.stderr.write("_root ignore, returning %s\n" % (file_path_from_root))
2093
2094    #   --root=FAKE_DIR is ignored
2095    return file_path_from_root
2096
2097  file_path_from_root = FixupPathFromRoot()
2098  return re.sub(r'[^a-zA-Z0-9]', '_', file_path_from_root).upper() + '_'
2099
2100
2101def CheckForHeaderGuard(filename, clean_lines, error):
2102  """Checks that the file contains a header guard.
2103
2104  Logs an error if no #ifndef header guard is present.  For other
2105  headers, checks that the full pathname is used.
2106
2107  Args:
2108    filename: The name of the C++ header file.
2109    clean_lines: A CleansedLines instance containing the file.
2110    error: The function to call with any errors found.
2111  """
2112
2113  # Don't check for header guards if there are error suppression
2114  # comments somewhere in this file.
2115  #
2116  # Because this is silencing a warning for a nonexistent line, we
2117  # only support the very specific NOLINT(build/header_guard) syntax,
2118  # and not the general NOLINT or NOLINT(*) syntax.
2119  raw_lines = clean_lines.lines_without_raw_strings
2120  for i in raw_lines:
2121    if Search(r'//\s*NOLINT\(build/header_guard\)', i):
2122      return
2123
2124  # Allow pragma once instead of header guards
2125  for i in raw_lines:
2126    if Search(r'^\s*#pragma\s+once', i):
2127      return
2128
2129  cppvar = GetHeaderGuardCPPVariable(filename)
2130
2131  ifndef = ''
2132  ifndef_linenum = 0
2133  define = ''
2134  endif = ''
2135  endif_linenum = 0
2136  for linenum, line in enumerate(raw_lines):
2137    linesplit = line.split()
2138    if len(linesplit) >= 2:
2139      # find the first occurrence of #ifndef and #define, save arg
2140      if not ifndef and linesplit[0] == '#ifndef':
2141        # set ifndef to the header guard presented on the #ifndef line.
2142        ifndef = linesplit[1]
2143        ifndef_linenum = linenum
2144      if not define and linesplit[0] == '#define':
2145        define = linesplit[1]
2146    # find the last occurrence of #endif, save entire line
2147    if line.startswith('#endif'):
2148      endif = line
2149      endif_linenum = linenum
2150
2151  if not ifndef or not define or ifndef != define:
2152    error(filename, 0, 'build/header_guard', 5,
2153          'No #ifndef header guard found, suggested CPP variable is: %s' %
2154          cppvar)
2155    return
2156
2157  # The guard should be PATH_FILE_H_, but we also allow PATH_FILE_H__
2158  # for backward compatibility.
2159  if ifndef != cppvar:
2160    error_level = 0
2161    if ifndef != cppvar + '_':
2162      error_level = 5
2163
2164    ParseNolintSuppressions(filename, raw_lines[ifndef_linenum], ifndef_linenum,
2165                            error)
2166    error(filename, ifndef_linenum, 'build/header_guard', error_level,
2167          '#ifndef header guard has wrong style, please use: %s' % cppvar)
2168
2169  # Check for "//" comments on endif line.
2170  ParseNolintSuppressions(filename, raw_lines[endif_linenum], endif_linenum,
2171                          error)
2172  match = Match(r'#endif\s*//\s*' + cppvar + r'(_)?\b', endif)
2173  if match:
2174    if match.group(1) == '_':
2175      # Issue low severity warning for deprecated double trailing underscore
2176      error(filename, endif_linenum, 'build/header_guard', 0,
2177            '#endif line should be "#endif  // %s"' % cppvar)
2178    return
2179
2180  # Didn't find the corresponding "//" comment.  If this file does not
2181  # contain any "//" comments at all, it could be that the compiler
2182  # only wants "/**/" comments, look for those instead.
2183  no_single_line_comments = True
2184  for i in xrange(1, len(raw_lines) - 1):
2185    line = raw_lines[i]
2186    if Match(r'^(?:(?:\'(?:\.|[^\'])*\')|(?:"(?:\.|[^"])*")|[^\'"])*//', line):
2187      no_single_line_comments = False
2188      break
2189
2190  if no_single_line_comments:
2191    match = Match(r'#endif\s*/\*\s*' + cppvar + r'(_)?\s*\*/', endif)
2192    if match:
2193      if match.group(1) == '_':
2194        # Low severity warning for double trailing underscore
2195        error(filename, endif_linenum, 'build/header_guard', 0,
2196              '#endif line should be "#endif  /* %s */"' % cppvar)
2197      return
2198
2199  # Didn't find anything
2200  error(filename, endif_linenum, 'build/header_guard', 5,
2201        '#endif line should be "#endif  // %s"' % cppvar)
2202
2203
2204def CheckHeaderFileIncluded(filename, include_state, error):
2205  """Logs an error if a source file does not include its header."""
2206
2207  # Do not check test files
2208  fileinfo = FileInfo(filename)
2209  if Search(_TEST_FILE_SUFFIX, fileinfo.BaseName()):
2210    return
2211
2212  for ext in GetHeaderExtensions():
2213    basefilename = filename[0:len(filename) - len(fileinfo.Extension())]
2214    headerfile = basefilename + '.' + ext
2215    if not os.path.exists(headerfile):
2216      continue
2217    headername = FileInfo(headerfile).RepositoryName()
2218    first_include = None
2219    for section_list in include_state.include_list:
2220      for f in section_list:
2221        if headername in f[0] or f[0] in headername:
2222          return
2223        if not first_include:
2224          first_include = f[1]
2225
2226    error(filename, first_include, 'build/include', 5,
2227          '%s should include its header file %s' % (fileinfo.RepositoryName(),
2228                                                    headername))
2229
2230
2231def CheckForBadCharacters(filename, lines, error):
2232  """Logs an error for each line containing bad characters.
2233
2234  Two kinds of bad characters:
2235
2236  1. Unicode replacement characters: These indicate that either the file
2237  contained invalid UTF-8 (likely) or Unicode replacement characters (which
2238  it shouldn't).  Note that it's possible for this to throw off line
2239  numbering if the invalid UTF-8 occurred adjacent to a newline.
2240
2241  2. NUL bytes.  These are problematic for some tools.
2242
2243  Args:
2244    filename: The name of the current file.
2245    lines: An array of strings, each representing a line of the file.
2246    error: The function to call with any errors found.
2247  """
2248  for linenum, line in enumerate(lines):
2249    if unicode_escape_decode('\ufffd') in line:
2250      error(filename, linenum, 'readability/utf8', 5,
2251            'Line contains invalid UTF-8 (or Unicode replacement character).')
2252    if '\0' in line:
2253      error(filename, linenum, 'readability/nul', 5, 'Line contains NUL byte.')
2254
2255
2256def CheckForNewlineAtEOF(filename, lines, error):
2257  """Logs an error if there is no newline char at the end of the file.
2258
2259  Args:
2260    filename: The name of the current file.
2261    lines: An array of strings, each representing a line of the file.
2262    error: The function to call with any errors found.
2263  """
2264
2265  # The array lines() was created by adding two newlines to the
2266  # original file (go figure), then splitting on \n.
2267  # To verify that the file ends in \n, we just have to make sure the
2268  # last-but-two element of lines() exists and is empty.
2269  if len(lines) < 3 or lines[-2]:
2270    error(filename, len(lines) - 2, 'whitespace/ending_newline', 5,
2271          'Could not find a newline character at the end of the file.')
2272
2273
2274def CheckForMultilineCommentsAndStrings(filename, clean_lines, linenum, error):
2275  """Logs an error if we see /* ... */ or "..." that extend past one line.
2276
2277  /* ... */ comments are legit inside macros, for one line.
2278  Otherwise, we prefer // comments, so it's ok to warn about the
2279  other.  Likewise, it's ok for strings to extend across multiple
2280  lines, as long as a line continuation character (backslash)
2281  terminates each line. Although not currently prohibited by the C++
2282  style guide, it's ugly and unnecessary. We don't do well with either
2283  in this lint program, so we warn about both.
2284
2285  Args:
2286    filename: The name of the current file.
2287    clean_lines: A CleansedLines instance containing the file.
2288    linenum: The number of the line to check.
2289    error: The function to call with any errors found.
2290  """
2291  line = clean_lines.elided[linenum]
2292
2293  # Remove all \\ (escaped backslashes) from the line. They are OK, and the
2294  # second (escaped) slash may trigger later \" detection erroneously.
2295  line = line.replace('\\\\', '')
2296
2297  if line.count('/*') > line.count('*/'):
2298    error(filename, linenum, 'readability/multiline_comment', 5,
2299          'Complex multi-line /*...*/-style comment found. '
2300          'Lint may give bogus warnings.  '
2301          'Consider replacing these with //-style comments, '
2302          'with #if 0...#endif, '
2303          'or with more clearly structured multi-line comments.')
2304
2305  if (line.count('"') - line.count('\\"')) % 2:
2306    error(filename, linenum, 'readability/multiline_string', 5,
2307          'Multi-line string ("...") found.  This lint script doesn\'t '
2308          'do well with such strings, and may give bogus warnings.  '
2309          'Use C++11 raw strings or concatenation instead.')
2310
2311
2312# (non-threadsafe name, thread-safe alternative, validation pattern)
2313#
2314# The validation pattern is used to eliminate false positives such as:
2315#  _rand();               // false positive due to substring match.
2316#  ->rand();              // some member function rand().
2317#  ACMRandom rand(seed);  // some variable named rand.
2318#  ISAACRandom rand();    // another variable named rand.
2319#
2320# Basically we require the return value of these functions to be used
2321# in some expression context on the same line by matching on some
2322# operator before the function name.  This eliminates constructors and
2323# member function calls.
2324_UNSAFE_FUNC_PREFIX = r'(?:[-+*/=%^&|(<]\s*|>\s+)'
2325_THREADING_LIST = (
2326    ('asctime(', 'asctime_r(', _UNSAFE_FUNC_PREFIX + r'asctime\([^)]+\)'),
2327    ('ctime(', 'ctime_r(', _UNSAFE_FUNC_PREFIX + r'ctime\([^)]+\)'),
2328    ('getgrgid(', 'getgrgid_r(', _UNSAFE_FUNC_PREFIX + r'getgrgid\([^)]+\)'),
2329    ('getgrnam(', 'getgrnam_r(', _UNSAFE_FUNC_PREFIX + r'getgrnam\([^)]+\)'),
2330    ('getlogin(', 'getlogin_r(', _UNSAFE_FUNC_PREFIX + r'getlogin\(\)'),
2331    ('getpwnam(', 'getpwnam_r(', _UNSAFE_FUNC_PREFIX + r'getpwnam\([^)]+\)'),
2332    ('getpwuid(', 'getpwuid_r(', _UNSAFE_FUNC_PREFIX + r'getpwuid\([^)]+\)'),
2333    ('gmtime(', 'gmtime_r(', _UNSAFE_FUNC_PREFIX + r'gmtime\([^)]+\)'),
2334    ('localtime(', 'localtime_r(', _UNSAFE_FUNC_PREFIX + r'localtime\([^)]+\)'),
2335    ('rand(', 'rand_r(', _UNSAFE_FUNC_PREFIX + r'rand\(\)'),
2336    ('strtok(', 'strtok_r(',
2337     _UNSAFE_FUNC_PREFIX + r'strtok\([^)]+\)'),
2338    ('ttyname(', 'ttyname_r(', _UNSAFE_FUNC_PREFIX + r'ttyname\([^)]+\)'),
2339    )
2340
2341
2342def CheckPosixThreading(filename, clean_lines, linenum, error):
2343  """Checks for calls to thread-unsafe functions.
2344
2345  Much code has been originally written without consideration of
2346  multi-threading. Also, engineers are relying on their old experience;
2347  they have learned posix before threading extensions were added. These
2348  tests guide the engineers to use thread-safe functions (when using
2349  posix directly).
2350
2351  Args:
2352    filename: The name of the current file.
2353    clean_lines: A CleansedLines instance containing the file.
2354    linenum: The number of the line to check.
2355    error: The function to call with any errors found.
2356  """
2357  line = clean_lines.elided[linenum]
2358  for single_thread_func, multithread_safe_func, pattern in _THREADING_LIST:
2359    # Additional pattern matching check to confirm that this is the
2360    # function we are looking for
2361    if Search(pattern, line):
2362      error(filename, linenum, 'runtime/threadsafe_fn', 2,
2363            'Consider using ' + multithread_safe_func +
2364            '...) instead of ' + single_thread_func +
2365            '...) for improved thread safety.')
2366
2367
2368def CheckVlogArguments(filename, clean_lines, linenum, error):
2369  """Checks that VLOG() is only used for defining a logging level.
2370
2371  For example, VLOG(2) is correct. VLOG(INFO), VLOG(WARNING), VLOG(ERROR), and
2372  VLOG(FATAL) are not.
2373
2374  Args:
2375    filename: The name of the current file.
2376    clean_lines: A CleansedLines instance containing the file.
2377    linenum: The number of the line to check.
2378    error: The function to call with any errors found.
2379  """
2380  line = clean_lines.elided[linenum]
2381  if Search(r'\bVLOG\((INFO|ERROR|WARNING|DFATAL|FATAL)\)', line):
2382    error(filename, linenum, 'runtime/vlog', 5,
2383          'VLOG() should be used with numeric verbosity level.  '
2384          'Use LOG() if you want symbolic severity levels.')
2385
2386# Matches invalid increment: *count++, which moves pointer instead of
2387# incrementing a value.
2388_RE_PATTERN_INVALID_INCREMENT = re.compile(
2389    r'^\s*\*\w+(\+\+|--);')
2390
2391
2392def CheckInvalidIncrement(filename, clean_lines, linenum, error):
2393  """Checks for invalid increment *count++.
2394
2395  For example following function:
2396  void increment_counter(int* count) {
2397    *count++;
2398  }
2399  is invalid, because it effectively does count++, moving pointer, and should
2400  be replaced with ++*count, (*count)++ or *count += 1.
2401
2402  Args:
2403    filename: The name of the current file.
2404    clean_lines: A CleansedLines instance containing the file.
2405    linenum: The number of the line to check.
2406    error: The function to call with any errors found.
2407  """
2408  line = clean_lines.elided[linenum]
2409  if _RE_PATTERN_INVALID_INCREMENT.match(line):
2410    error(filename, linenum, 'runtime/invalid_increment', 5,
2411          'Changing pointer instead of value (or unused value of operator*).')
2412
2413
2414def IsMacroDefinition(clean_lines, linenum):
2415  if Search(r'^#define', clean_lines[linenum]):
2416    return True
2417
2418  if linenum > 0 and Search(r'\\$', clean_lines[linenum - 1]):
2419    return True
2420
2421  return False
2422
2423
2424def IsForwardClassDeclaration(clean_lines, linenum):
2425  return Match(r'^\s*(\btemplate\b)*.*class\s+\w+;\s*$', clean_lines[linenum])
2426
2427
2428class _BlockInfo(object):
2429  """Stores information about a generic block of code."""
2430
2431  def __init__(self, linenum, seen_open_brace):
2432    self.starting_linenum = linenum
2433    self.seen_open_brace = seen_open_brace
2434    self.open_parentheses = 0
2435    self.inline_asm = _NO_ASM
2436    self.check_namespace_indentation = False
2437
2438  def CheckBegin(self, filename, clean_lines, linenum, error):
2439    """Run checks that applies to text up to the opening brace.
2440
2441    This is mostly for checking the text after the class identifier
2442    and the "{", usually where the base class is specified.  For other
2443    blocks, there isn't much to check, so we always pass.
2444
2445    Args:
2446      filename: The name of the current file.
2447      clean_lines: A CleansedLines instance containing the file.
2448      linenum: The number of the line to check.
2449      error: The function to call with any errors found.
2450    """
2451    pass
2452
2453  def CheckEnd(self, filename, clean_lines, linenum, error):
2454    """Run checks that applies to text after the closing brace.
2455
2456    This is mostly used for checking end of namespace comments.
2457
2458    Args:
2459      filename: The name of the current file.
2460      clean_lines: A CleansedLines instance containing the file.
2461      linenum: The number of the line to check.
2462      error: The function to call with any errors found.
2463    """
2464    pass
2465
2466  def IsBlockInfo(self):
2467    """Returns true if this block is a _BlockInfo.
2468
2469    This is convenient for verifying that an object is an instance of
2470    a _BlockInfo, but not an instance of any of the derived classes.
2471
2472    Returns:
2473      True for this class, False for derived classes.
2474    """
2475    return self.__class__ == _BlockInfo
2476
2477
2478class _ExternCInfo(_BlockInfo):
2479  """Stores information about an 'extern "C"' block."""
2480
2481  def __init__(self, linenum):
2482    _BlockInfo.__init__(self, linenum, True)
2483
2484
2485class _ClassInfo(_BlockInfo):
2486  """Stores information about a class."""
2487
2488  def __init__(self, name, class_or_struct, clean_lines, linenum):
2489    _BlockInfo.__init__(self, linenum, False)
2490    self.name = name
2491    self.is_derived = False
2492    self.check_namespace_indentation = True
2493    if class_or_struct == 'struct':
2494      self.access = 'public'
2495      self.is_struct = True
2496    else:
2497      self.access = 'private'
2498      self.is_struct = False
2499
2500    # Remember initial indentation level for this class.  Using raw_lines here
2501    # instead of elided to account for leading comments.
2502    self.class_indent = GetIndentLevel(clean_lines.raw_lines[linenum])
2503
2504    # Try to find the end of the class.  This will be confused by things like:
2505    #   class A {
2506    #   } *x = { ...
2507    #
2508    # But it's still good enough for CheckSectionSpacing.
2509    self.last_line = 0
2510    depth = 0
2511    for i in range(linenum, clean_lines.NumLines()):
2512      line = clean_lines.elided[i]
2513      depth += line.count('{') - line.count('}')
2514      if not depth:
2515        self.last_line = i
2516        break
2517
2518  def CheckBegin(self, filename, clean_lines, linenum, error):
2519    # Look for a bare ':'
2520    if Search('(^|[^:]):($|[^:])', clean_lines.elided[linenum]):
2521      self.is_derived = True
2522
2523  def CheckEnd(self, filename, clean_lines, linenum, error):
2524    # If there is a DISALLOW macro, it should appear near the end of
2525    # the class.
2526    seen_last_thing_in_class = False
2527    for i in xrange(linenum - 1, self.starting_linenum, -1):
2528      match = Search(
2529          r'\b(DISALLOW_COPY_AND_ASSIGN|DISALLOW_IMPLICIT_CONSTRUCTORS)\(' +
2530          self.name + r'\)',
2531          clean_lines.elided[i])
2532      if match:
2533        if seen_last_thing_in_class:
2534          error(filename, i, 'readability/constructors', 3,
2535                match.group(1) + ' should be the last thing in the class')
2536        break
2537
2538      if not Match(r'^\s*$', clean_lines.elided[i]):
2539        seen_last_thing_in_class = True
2540
2541    # Check that closing brace is aligned with beginning of the class.
2542    # Only do this if the closing brace is indented by only whitespaces.
2543    # This means we will not check single-line class definitions.
2544    indent = Match(r'^( *)\}', clean_lines.elided[linenum])
2545    if indent and len(indent.group(1)) != self.class_indent:
2546      if self.is_struct:
2547        parent = 'struct ' + self.name
2548      else:
2549        parent = 'class ' + self.name
2550      error(filename, linenum, 'whitespace/indent', 3,
2551            'Closing brace should be aligned with beginning of %s' % parent)
2552
2553
2554class _NamespaceInfo(_BlockInfo):
2555  """Stores information about a namespace."""
2556
2557  def __init__(self, name, linenum):
2558    _BlockInfo.__init__(self, linenum, False)
2559    self.name = name or ''
2560    self.check_namespace_indentation = True
2561
2562  def CheckEnd(self, filename, clean_lines, linenum, error):
2563    """Check end of namespace comments."""
2564    line = clean_lines.raw_lines[linenum]
2565
2566    # Check how many lines is enclosed in this namespace.  Don't issue
2567    # warning for missing namespace comments if there aren't enough
2568    # lines.  However, do apply checks if there is already an end of
2569    # namespace comment and it's incorrect.
2570    #
2571    # TODO(unknown): We always want to check end of namespace comments
2572    # if a namespace is large, but sometimes we also want to apply the
2573    # check if a short namespace contained nontrivial things (something
2574    # other than forward declarations).  There is currently no logic on
2575    # deciding what these nontrivial things are, so this check is
2576    # triggered by namespace size only, which works most of the time.
2577    if (linenum - self.starting_linenum < 10
2578        and not Match(r'^\s*};*\s*(//|/\*).*\bnamespace\b', line)):
2579      return
2580
2581    # Look for matching comment at end of namespace.
2582    #
2583    # Note that we accept C style "/* */" comments for terminating
2584    # namespaces, so that code that terminate namespaces inside
2585    # preprocessor macros can be cpplint clean.
2586    #
2587    # We also accept stuff like "// end of namespace <name>." with the
2588    # period at the end.
2589    #
2590    # Besides these, we don't accept anything else, otherwise we might
2591    # get false negatives when existing comment is a substring of the
2592    # expected namespace.
2593    if self.name:
2594      # Named namespace
2595      if not Match((r'^\s*};*\s*(//|/\*).*\bnamespace\s+' +
2596                    re.escape(self.name) + r'[\*/\.\\\s]*$'),
2597                   line):
2598        error(filename, linenum, 'readability/namespace', 5,
2599              'Namespace should be terminated with "// namespace %s"' %
2600              self.name)
2601    else:
2602      # Anonymous namespace
2603      if not Match(r'^\s*};*\s*(//|/\*).*\bnamespace[\*/\.\\\s]*$', line):
2604        # If "// namespace anonymous" or "// anonymous namespace (more text)",
2605        # mention "// anonymous namespace" as an acceptable form
2606        if Match(r'^\s*}.*\b(namespace anonymous|anonymous namespace)\b', line):
2607          error(filename, linenum, 'readability/namespace', 5,
2608                'Anonymous namespace should be terminated with "// namespace"'
2609                ' or "// anonymous namespace"')
2610        else:
2611          error(filename, linenum, 'readability/namespace', 5,
2612                'Anonymous namespace should be terminated with "// namespace"')
2613
2614
2615class _PreprocessorInfo(object):
2616  """Stores checkpoints of nesting stacks when #if/#else is seen."""
2617
2618  def __init__(self, stack_before_if):
2619    # The entire nesting stack before #if
2620    self.stack_before_if = stack_before_if
2621
2622    # The entire nesting stack up to #else
2623    self.stack_before_else = []
2624
2625    # Whether we have already seen #else or #elif
2626    self.seen_else = False
2627
2628
2629class NestingState(object):
2630  """Holds states related to parsing braces."""
2631
2632  def __init__(self):
2633    # Stack for tracking all braces.  An object is pushed whenever we
2634    # see a "{", and popped when we see a "}".  Only 3 types of
2635    # objects are possible:
2636    # - _ClassInfo: a class or struct.
2637    # - _NamespaceInfo: a namespace.
2638    # - _BlockInfo: some other type of block.
2639    self.stack = []
2640
2641    # Top of the previous stack before each Update().
2642    #
2643    # Because the nesting_stack is updated at the end of each line, we
2644    # had to do some convoluted checks to find out what is the current
2645    # scope at the beginning of the line.  This check is simplified by
2646    # saving the previous top of nesting stack.
2647    #
2648    # We could save the full stack, but we only need the top.  Copying
2649    # the full nesting stack would slow down cpplint by ~10%.
2650    self.previous_stack_top = []
2651
2652    # Stack of _PreprocessorInfo objects.
2653    self.pp_stack = []
2654
2655  def SeenOpenBrace(self):
2656    """Check if we have seen the opening brace for the innermost block.
2657
2658    Returns:
2659      True if we have seen the opening brace, False if the innermost
2660      block is still expecting an opening brace.
2661    """
2662    return (not self.stack) or self.stack[-1].seen_open_brace
2663
2664  def InNamespaceBody(self):
2665    """Check if we are currently one level inside a namespace body.
2666
2667    Returns:
2668      True if top of the stack is a namespace block, False otherwise.
2669    """
2670    return self.stack and isinstance(self.stack[-1], _NamespaceInfo)
2671
2672  def InExternC(self):
2673    """Check if we are currently one level inside an 'extern "C"' block.
2674
2675    Returns:
2676      True if top of the stack is an extern block, False otherwise.
2677    """
2678    return self.stack and isinstance(self.stack[-1], _ExternCInfo)
2679
2680  def InClassDeclaration(self):
2681    """Check if we are currently one level inside a class or struct declaration.
2682
2683    Returns:
2684      True if top of the stack is a class/struct, False otherwise.
2685    """
2686    return self.stack and isinstance(self.stack[-1], _ClassInfo)
2687
2688  def InAsmBlock(self):
2689    """Check if we are currently one level inside an inline ASM block.
2690
2691    Returns:
2692      True if the top of the stack is a block containing inline ASM.
2693    """
2694    return self.stack and self.stack[-1].inline_asm != _NO_ASM
2695
2696  def InTemplateArgumentList(self, clean_lines, linenum, pos):
2697    """Check if current position is inside template argument list.
2698
2699    Args:
2700      clean_lines: A CleansedLines instance containing the file.
2701      linenum: The number of the line to check.
2702      pos: position just after the suspected template argument.
2703    Returns:
2704      True if (linenum, pos) is inside template arguments.
2705    """
2706    while linenum < clean_lines.NumLines():
2707      # Find the earliest character that might indicate a template argument
2708      line = clean_lines.elided[linenum]
2709      match = Match(r'^[^{};=\[\]\.<>]*(.)', line[pos:])
2710      if not match:
2711        linenum += 1
2712        pos = 0
2713        continue
2714      token = match.group(1)
2715      pos += len(match.group(0))
2716
2717      # These things do not look like template argument list:
2718      #   class Suspect {
2719      #   class Suspect x; }
2720      if token in ('{', '}', ';'): return False
2721
2722      # These things look like template argument list:
2723      #   template <class Suspect>
2724      #   template <class Suspect = default_value>
2725      #   template <class Suspect[]>
2726      #   template <class Suspect...>
2727      if token in ('>', '=', '[', ']', '.'): return True
2728
2729      # Check if token is an unmatched '<'.
2730      # If not, move on to the next character.
2731      if token != '<':
2732        pos += 1
2733        if pos >= len(line):
2734          linenum += 1
2735          pos = 0
2736        continue
2737
2738      # We can't be sure if we just find a single '<', and need to
2739      # find the matching '>'.
2740      (_, end_line, end_pos) = CloseExpression(clean_lines, linenum, pos - 1)
2741      if end_pos < 0:
2742        # Not sure if template argument list or syntax error in file
2743        return False
2744      linenum = end_line
2745      pos = end_pos
2746    return False
2747
2748  def UpdatePreprocessor(self, line):
2749    """Update preprocessor stack.
2750
2751    We need to handle preprocessors due to classes like this:
2752      #ifdef SWIG
2753      struct ResultDetailsPageElementExtensionPoint {
2754      #else
2755      struct ResultDetailsPageElementExtensionPoint : public Extension {
2756      #endif
2757
2758    We make the following assumptions (good enough for most files):
2759    - Preprocessor condition evaluates to true from #if up to first
2760      #else/#elif/#endif.
2761
2762    - Preprocessor condition evaluates to false from #else/#elif up
2763      to #endif.  We still perform lint checks on these lines, but
2764      these do not affect nesting stack.
2765
2766    Args:
2767      line: current line to check.
2768    """
2769    if Match(r'^\s*#\s*(if|ifdef|ifndef)\b', line):
2770      # Beginning of #if block, save the nesting stack here.  The saved
2771      # stack will allow us to restore the parsing state in the #else case.
2772      self.pp_stack.append(_PreprocessorInfo(copy.deepcopy(self.stack)))
2773    elif Match(r'^\s*#\s*(else|elif)\b', line):
2774      # Beginning of #else block
2775      if self.pp_stack:
2776        if not self.pp_stack[-1].seen_else:
2777          # This is the first #else or #elif block.  Remember the
2778          # whole nesting stack up to this point.  This is what we
2779          # keep after the #endif.
2780          self.pp_stack[-1].seen_else = True
2781          self.pp_stack[-1].stack_before_else = copy.deepcopy(self.stack)
2782
2783        # Restore the stack to how it was before the #if
2784        self.stack = copy.deepcopy(self.pp_stack[-1].stack_before_if)
2785      else:
2786        # TODO(unknown): unexpected #else, issue warning?
2787        pass
2788    elif Match(r'^\s*#\s*endif\b', line):
2789      # End of #if or #else blocks.
2790      if self.pp_stack:
2791        # If we saw an #else, we will need to restore the nesting
2792        # stack to its former state before the #else, otherwise we
2793        # will just continue from where we left off.
2794        if self.pp_stack[-1].seen_else:
2795          # Here we can just use a shallow copy since we are the last
2796          # reference to it.
2797          self.stack = self.pp_stack[-1].stack_before_else
2798        # Drop the corresponding #if
2799        self.pp_stack.pop()
2800      else:
2801        # TODO(unknown): unexpected #endif, issue warning?
2802        pass
2803
2804  # TODO(unknown): Update() is too long, but we will refactor later.
2805  def Update(self, filename, clean_lines, linenum, error):
2806    """Update nesting state with current line.
2807
2808    Args:
2809      filename: The name of the current file.
2810      clean_lines: A CleansedLines instance containing the file.
2811      linenum: The number of the line to check.
2812      error: The function to call with any errors found.
2813    """
2814    line = clean_lines.elided[linenum]
2815
2816    # Remember top of the previous nesting stack.
2817    #
2818    # The stack is always pushed/popped and not modified in place, so
2819    # we can just do a shallow copy instead of copy.deepcopy.  Using
2820    # deepcopy would slow down cpplint by ~28%.
2821    if self.stack:
2822      self.previous_stack_top = self.stack[-1]
2823    else:
2824      self.previous_stack_top = None
2825
2826    # Update pp_stack
2827    self.UpdatePreprocessor(line)
2828
2829    # Count parentheses.  This is to avoid adding struct arguments to
2830    # the nesting stack.
2831    if self.stack:
2832      inner_block = self.stack[-1]
2833      depth_change = line.count('(') - line.count(')')
2834      inner_block.open_parentheses += depth_change
2835
2836      # Also check if we are starting or ending an inline assembly block.
2837      if inner_block.inline_asm in (_NO_ASM, _END_ASM):
2838        if (depth_change != 0 and
2839            inner_block.open_parentheses == 1 and
2840            _MATCH_ASM.match(line)):
2841          # Enter assembly block
2842          inner_block.inline_asm = _INSIDE_ASM
2843        else:
2844          # Not entering assembly block.  If previous line was _END_ASM,
2845          # we will now shift to _NO_ASM state.
2846          inner_block.inline_asm = _NO_ASM
2847      elif (inner_block.inline_asm == _INSIDE_ASM and
2848            inner_block.open_parentheses == 0):
2849        # Exit assembly block
2850        inner_block.inline_asm = _END_ASM
2851
2852    # Consume namespace declaration at the beginning of the line.  Do
2853    # this in a loop so that we catch same line declarations like this:
2854    #   namespace proto2 { namespace bridge { class MessageSet; } }
2855    while True:
2856      # Match start of namespace.  The "\b\s*" below catches namespace
2857      # declarations even if it weren't followed by a whitespace, this
2858      # is so that we don't confuse our namespace checker.  The
2859      # missing spaces will be flagged by CheckSpacing.
2860      namespace_decl_match = Match(r'^\s*namespace\b\s*([:\w]+)?(.*)$', line)
2861      if not namespace_decl_match:
2862        break
2863
2864      new_namespace = _NamespaceInfo(namespace_decl_match.group(1), linenum)
2865      self.stack.append(new_namespace)
2866
2867      line = namespace_decl_match.group(2)
2868      if line.find('{') != -1:
2869        new_namespace.seen_open_brace = True
2870        line = line[line.find('{') + 1:]
2871
2872    # Look for a class declaration in whatever is left of the line
2873    # after parsing namespaces.  The regexp accounts for decorated classes
2874    # such as in:
2875    #   class LOCKABLE API Object {
2876    #   };
2877    class_decl_match = Match(
2878        r'^(\s*(?:template\s*<[\w\s<>,:=]*>\s*)?'
2879        r'(class|struct)\s+(?:[A-Z_]+\s+)*(\w+(?:::\w+)*))'
2880        r'(.*)$', line)
2881    if (class_decl_match and
2882        (not self.stack or self.stack[-1].open_parentheses == 0)):
2883      # We do not want to accept classes that are actually template arguments:
2884      #   template <class Ignore1,
2885      #             class Ignore2 = Default<Args>,
2886      #             template <Args> class Ignore3>
2887      #   void Function() {};
2888      #
2889      # To avoid template argument cases, we scan forward and look for
2890      # an unmatched '>'.  If we see one, assume we are inside a
2891      # template argument list.
2892      end_declaration = len(class_decl_match.group(1))
2893      if not self.InTemplateArgumentList(clean_lines, linenum, end_declaration):
2894        self.stack.append(_ClassInfo(
2895            class_decl_match.group(3), class_decl_match.group(2),
2896            clean_lines, linenum))
2897        line = class_decl_match.group(4)
2898
2899    # If we have not yet seen the opening brace for the innermost block,
2900    # run checks here.
2901    if not self.SeenOpenBrace():
2902      self.stack[-1].CheckBegin(filename, clean_lines, linenum, error)
2903
2904    # Update access control if we are inside a class/struct
2905    if self.stack and isinstance(self.stack[-1], _ClassInfo):
2906      classinfo = self.stack[-1]
2907      access_match = Match(
2908          r'^(.*)\b(public|private|protected|signals)(\s+(?:slots\s*)?)?'
2909          r':(?:[^:]|$)',
2910          line)
2911      if access_match:
2912        classinfo.access = access_match.group(2)
2913
2914        # Check that access keywords are indented +1 space.  Skip this
2915        # check if the keywords are not preceded by whitespaces.
2916        indent = access_match.group(1)
2917        if (len(indent) != classinfo.class_indent + 1 and
2918            Match(r'^\s*$', indent)):
2919          if classinfo.is_struct:
2920            parent = 'struct ' + classinfo.name
2921          else:
2922            parent = 'class ' + classinfo.name
2923          slots = ''
2924          if access_match.group(3):
2925            slots = access_match.group(3)
2926          error(filename, linenum, 'whitespace/indent', 3,
2927                '%s%s: should be indented +1 space inside %s' % (
2928                    access_match.group(2), slots, parent))
2929
2930    # Consume braces or semicolons from what's left of the line
2931    while True:
2932      # Match first brace, semicolon, or closed parenthesis.
2933      matched = Match(r'^[^{;)}]*([{;)}])(.*)$', line)
2934      if not matched:
2935        break
2936
2937      token = matched.group(1)
2938      if token == '{':
2939        # If namespace or class hasn't seen a opening brace yet, mark
2940        # namespace/class head as complete.  Push a new block onto the
2941        # stack otherwise.
2942        if not self.SeenOpenBrace():
2943          self.stack[-1].seen_open_brace = True
2944        elif Match(r'^extern\s*"[^"]*"\s*\{', line):
2945          self.stack.append(_ExternCInfo(linenum))
2946        else:
2947          self.stack.append(_BlockInfo(linenum, True))
2948          if _MATCH_ASM.match(line):
2949            self.stack[-1].inline_asm = _BLOCK_ASM
2950
2951      elif token == ';' or token == ')':
2952        # If we haven't seen an opening brace yet, but we already saw
2953        # a semicolon, this is probably a forward declaration.  Pop
2954        # the stack for these.
2955        #
2956        # Similarly, if we haven't seen an opening brace yet, but we
2957        # already saw a closing parenthesis, then these are probably
2958        # function arguments with extra "class" or "struct" keywords.
2959        # Also pop these stack for these.
2960        if not self.SeenOpenBrace():
2961          self.stack.pop()
2962      else:  # token == '}'
2963        # Perform end of block checks and pop the stack.
2964        if self.stack:
2965          self.stack[-1].CheckEnd(filename, clean_lines, linenum, error)
2966          self.stack.pop()
2967      line = matched.group(2)
2968
2969  def InnermostClass(self):
2970    """Get class info on the top of the stack.
2971
2972    Returns:
2973      A _ClassInfo object if we are inside a class, or None otherwise.
2974    """
2975    for i in range(len(self.stack), 0, -1):
2976      classinfo = self.stack[i - 1]
2977      if isinstance(classinfo, _ClassInfo):
2978        return classinfo
2979    return None
2980
2981  def CheckCompletedBlocks(self, filename, error):
2982    """Checks that all classes and namespaces have been completely parsed.
2983
2984    Call this when all lines in a file have been processed.
2985    Args:
2986      filename: The name of the current file.
2987      error: The function to call with any errors found.
2988    """
2989    # Note: This test can result in false positives if #ifdef constructs
2990    # get in the way of brace matching. See the testBuildClass test in
2991    # cpplint_unittest.py for an example of this.
2992    for obj in self.stack:
2993      if isinstance(obj, _ClassInfo):
2994        error(filename, obj.starting_linenum, 'build/class', 5,
2995              'Failed to find complete declaration of class %s' %
2996              obj.name)
2997      elif isinstance(obj, _NamespaceInfo):
2998        error(filename, obj.starting_linenum, 'build/namespaces', 5,
2999              'Failed to find complete declaration of namespace %s' %
3000              obj.name)
3001
3002
3003def CheckForNonStandardConstructs(filename, clean_lines, linenum,
3004                                  nesting_state, error):
3005  r"""Logs an error if we see certain non-ANSI constructs ignored by gcc-2.
3006
3007  Complain about several constructs which gcc-2 accepts, but which are
3008  not standard C++.  Warning about these in lint is one way to ease the
3009  transition to new compilers.
3010  - put storage class first (e.g. "static const" instead of "const static").
3011  - "%lld" instead of %qd" in printf-type functions.
3012  - "%1$d" is non-standard in printf-type functions.
3013  - "\%" is an undefined character escape sequence.
3014  - text after #endif is not allowed.
3015  - invalid inner-style forward declaration.
3016  - >? and <? operators, and their >?= and <?= cousins.
3017
3018  Additionally, check for constructor/destructor style violations and reference
3019  members, as it is very convenient to do so while checking for
3020  gcc-2 compliance.
3021
3022  Args:
3023    filename: The name of the current file.
3024    clean_lines: A CleansedLines instance containing the file.
3025    linenum: The number of the line to check.
3026    nesting_state: A NestingState instance which maintains information about
3027                   the current stack of nested blocks being parsed.
3028    error: A callable to which errors are reported, which takes 4 arguments:
3029           filename, line number, error level, and message
3030  """
3031
3032  # Remove comments from the line, but leave in strings for now.
3033  line = clean_lines.lines[linenum]
3034
3035  if Search(r'printf\s*\(.*".*%[-+ ]?\d*q', line):
3036    error(filename, linenum, 'runtime/printf_format', 3,
3037          '%q in format strings is deprecated.  Use %ll instead.')
3038
3039  if Search(r'printf\s*\(.*".*%\d+\$', line):
3040    error(filename, linenum, 'runtime/printf_format', 2,
3041          '%N$ formats are unconventional.  Try rewriting to avoid them.')
3042
3043  # Remove escaped backslashes before looking for undefined escapes.
3044  line = line.replace('\\\\', '')
3045
3046  if Search(r'("|\').*\\(%|\[|\(|{)', line):
3047    error(filename, linenum, 'build/printf_format', 3,
3048          '%, [, (, and { are undefined character escapes.  Unescape them.')
3049
3050  # For the rest, work with both comments and strings removed.
3051  line = clean_lines.elided[linenum]
3052
3053  if Search(r'\b(const|volatile|void|char|short|int|long'
3054            r'|float|double|signed|unsigned'
3055            r'|schar|u?int8|u?int16|u?int32|u?int64)'
3056            r'\s+(register|static|extern|typedef)\b',
3057            line):
3058    error(filename, linenum, 'build/storage_class', 5,
3059          'Storage-class specifier (static, extern, typedef, etc) should be '
3060          'at the beginning of the declaration.')
3061
3062  if Match(r'\s*#\s*endif\s*[^/\s]+', line):
3063    error(filename, linenum, 'build/endif_comment', 5,
3064          'Uncommented text after #endif is non-standard.  Use a comment.')
3065
3066  if Match(r'\s*class\s+(\w+\s*::\s*)+\w+\s*;', line):
3067    error(filename, linenum, 'build/forward_decl', 5,
3068          'Inner-style forward declarations are invalid.  Remove this line.')
3069
3070  if Search(r'(\w+|[+-]?\d+(\.\d*)?)\s*(<|>)\?=?\s*(\w+|[+-]?\d+)(\.\d*)?',
3071            line):
3072    error(filename, linenum, 'build/deprecated', 3,
3073          '>? and <? (max and min) operators are non-standard and deprecated.')
3074
3075  if Search(r'^\s*const\s*string\s*&\s*\w+\s*;', line):
3076    # TODO(unknown): Could it be expanded safely to arbitrary references,
3077    # without triggering too many false positives? The first
3078    # attempt triggered 5 warnings for mostly benign code in the regtest, hence
3079    # the restriction.
3080    # Here's the original regexp, for the reference:
3081    # type_name = r'\w+((\s*::\s*\w+)|(\s*<\s*\w+?\s*>))?'
3082    # r'\s*const\s*' + type_name + '\s*&\s*\w+\s*;'
3083    error(filename, linenum, 'runtime/member_string_references', 2,
3084          'const string& members are dangerous. It is much better to use '
3085          'alternatives, such as pointers or simple constants.')
3086
3087  # Everything else in this function operates on class declarations.
3088  # Return early if the top of the nesting stack is not a class, or if
3089  # the class head is not completed yet.
3090  classinfo = nesting_state.InnermostClass()
3091  if not classinfo or not classinfo.seen_open_brace:
3092    return
3093
3094  # The class may have been declared with namespace or classname qualifiers.
3095  # The constructor and destructor will not have those qualifiers.
3096  base_classname = classinfo.name.split('::')[-1]
3097
3098  # Look for single-argument constructors that aren't marked explicit.
3099  # Technically a valid construct, but against style.
3100  explicit_constructor_match = Match(
3101      r'\s+(?:(?:inline|constexpr)\s+)*(explicit\s+)?'
3102      r'(?:(?:inline|constexpr)\s+)*%s\s*'
3103      r'\(((?:[^()]|\([^()]*\))*)\)'
3104      % re.escape(base_classname),
3105      line)
3106
3107  if explicit_constructor_match:
3108    is_marked_explicit = explicit_constructor_match.group(1)
3109
3110    if not explicit_constructor_match.group(2):
3111      constructor_args = []
3112    else:
3113      constructor_args = explicit_constructor_match.group(2).split(',')
3114
3115    # collapse arguments so that commas in template parameter lists and function
3116    # argument parameter lists don't split arguments in two
3117    i = 0
3118    while i < len(constructor_args):
3119      constructor_arg = constructor_args[i]
3120      while (constructor_arg.count('<') > constructor_arg.count('>') or
3121             constructor_arg.count('(') > constructor_arg.count(')')):
3122        constructor_arg += ',' + constructor_args[i + 1]
3123        del constructor_args[i + 1]
3124      constructor_args[i] = constructor_arg
3125      i += 1
3126
3127    variadic_args = [arg for arg in constructor_args if '&&...' in arg]
3128    defaulted_args = [arg for arg in constructor_args if '=' in arg]
3129    noarg_constructor = (not constructor_args or  # empty arg list
3130                         # 'void' arg specifier
3131                         (len(constructor_args) == 1 and
3132                          constructor_args[0].strip() == 'void'))
3133    onearg_constructor = ((len(constructor_args) == 1 and  # exactly one arg
3134                           not noarg_constructor) or
3135                          # all but at most one arg defaulted
3136                          (len(constructor_args) >= 1 and
3137                           not noarg_constructor and
3138                           len(defaulted_args) >= len(constructor_args) - 1) or
3139                          # variadic arguments with zero or one argument
3140                          (len(constructor_args) <= 2 and
3141                           len(variadic_args) >= 1))
3142    initializer_list_constructor = bool(
3143        onearg_constructor and
3144        Search(r'\bstd\s*::\s*initializer_list\b', constructor_args[0]))
3145    copy_constructor = bool(
3146        onearg_constructor and
3147        Match(r'(const\s+)?%s(\s*<[^>]*>)?(\s+const)?\s*(?:<\w+>\s*)?&'
3148              % re.escape(base_classname), constructor_args[0].strip()))
3149
3150    if (not is_marked_explicit and
3151        onearg_constructor and
3152        not initializer_list_constructor and
3153        not copy_constructor):
3154      if defaulted_args or variadic_args:
3155        error(filename, linenum, 'runtime/explicit', 5,
3156              'Constructors callable with one argument '
3157              'should be marked explicit.')
3158      else:
3159        error(filename, linenum, 'runtime/explicit', 5,
3160              'Single-parameter constructors should be marked explicit.')
3161    elif is_marked_explicit and not onearg_constructor:
3162      if noarg_constructor:
3163        error(filename, linenum, 'runtime/explicit', 5,
3164              'Zero-parameter constructors should not be marked explicit.')
3165
3166
3167def CheckSpacingForFunctionCall(filename, clean_lines, linenum, error):
3168  """Checks for the correctness of various spacing around function calls.
3169
3170  Args:
3171    filename: The name of the current file.
3172    clean_lines: A CleansedLines instance containing the file.
3173    linenum: The number of the line to check.
3174    error: The function to call with any errors found.
3175  """
3176  line = clean_lines.elided[linenum]
3177
3178  # Since function calls often occur inside if/for/while/switch
3179  # expressions - which have their own, more liberal conventions - we
3180  # first see if we should be looking inside such an expression for a
3181  # function call, to which we can apply more strict standards.
3182  fncall = line    # if there's no control flow construct, look at whole line
3183  for pattern in (r'\bif\s*\((.*)\)\s*{',
3184                  r'\bfor\s*\((.*)\)\s*{',
3185                  r'\bwhile\s*\((.*)\)\s*[{;]',
3186                  r'\bswitch\s*\((.*)\)\s*{'):
3187    match = Search(pattern, line)
3188    if match:
3189      fncall = match.group(1)    # look inside the parens for function calls
3190      break
3191
3192  # Except in if/for/while/switch, there should never be space
3193  # immediately inside parens (eg "f( 3, 4 )").  We make an exception
3194  # for nested parens ( (a+b) + c ).  Likewise, there should never be
3195  # a space before a ( when it's a function argument.  I assume it's a
3196  # function argument when the char before the whitespace is legal in
3197  # a function name (alnum + _) and we're not starting a macro. Also ignore
3198  # pointers and references to arrays and functions coz they're too tricky:
3199  # we use a very simple way to recognize these:
3200  # " (something)(maybe-something)" or
3201  # " (something)(maybe-something," or
3202  # " (something)[something]"
3203  # Note that we assume the contents of [] to be short enough that
3204  # they'll never need to wrap.
3205  if (  # Ignore control structures.
3206      not Search(r'\b(if|for|while|switch|return|new|delete|catch|sizeof)\b',
3207                 fncall) and
3208      # Ignore pointers/references to functions.
3209      not Search(r' \([^)]+\)\([^)]*(\)|,$)', fncall) and
3210      # Ignore pointers/references to arrays.
3211      not Search(r' \([^)]+\)\[[^\]]+\]', fncall)):
3212    if Search(r'\w\s*\(\s(?!\s*\\$)', fncall):      # a ( used for a fn call
3213      error(filename, linenum, 'whitespace/parens', 4,
3214            'Extra space after ( in function call')
3215    elif Search(r'\(\s+(?!(\s*\\)|\()', fncall):
3216      error(filename, linenum, 'whitespace/parens', 2,
3217            'Extra space after (')
3218    if (Search(r'\w\s+\(', fncall) and
3219        not Search(r'_{0,2}asm_{0,2}\s+_{0,2}volatile_{0,2}\s+\(', fncall) and
3220        not Search(r'#\s*define|typedef|using\s+\w+\s*=', fncall) and
3221        not Search(r'\w\s+\((\w+::)*\*\w+\)\(', fncall) and
3222        not Search(r'\bcase\s+\(', fncall)):
3223      # TODO(unknown): Space after an operator function seem to be a common
3224      # error, silence those for now by restricting them to highest verbosity.
3225      if Search(r'\boperator_*\b', line):
3226        error(filename, linenum, 'whitespace/parens', 0,
3227              'Extra space before ( in function call')
3228      else:
3229        error(filename, linenum, 'whitespace/parens', 4,
3230              'Extra space before ( in function call')
3231    # If the ) is followed only by a newline or a { + newline, assume it's
3232    # part of a control statement (if/while/etc), and don't complain
3233    if Search(r'[^)]\s+\)\s*[^{\s]', fncall):
3234      # If the closing parenthesis is preceded by only whitespaces,
3235      # try to give a more descriptive error message.
3236      if Search(r'^\s+\)', fncall):
3237        error(filename, linenum, 'whitespace/parens', 2,
3238              'Closing ) should be moved to the previous line')
3239      else:
3240        error(filename, linenum, 'whitespace/parens', 2,
3241              'Extra space before )')
3242
3243
3244def IsBlankLine(line):
3245  """Returns true if the given line is blank.
3246
3247  We consider a line to be blank if the line is empty or consists of
3248  only white spaces.
3249
3250  Args:
3251    line: A line of a string.
3252
3253  Returns:
3254    True, if the given line is blank.
3255  """
3256  return not line or line.isspace()
3257
3258
3259def CheckForNamespaceIndentation(filename, nesting_state, clean_lines, line,
3260                                 error):
3261  is_namespace_indent_item = (
3262      len(nesting_state.stack) > 1 and
3263      nesting_state.stack[-1].check_namespace_indentation and
3264      isinstance(nesting_state.previous_stack_top, _NamespaceInfo) and
3265      nesting_state.previous_stack_top == nesting_state.stack[-2])
3266
3267  if ShouldCheckNamespaceIndentation(nesting_state, is_namespace_indent_item,
3268                                     clean_lines.elided, line):
3269    CheckItemIndentationInNamespace(filename, clean_lines.elided,
3270                                    line, error)
3271
3272
3273def CheckForFunctionLengths(filename, clean_lines, linenum,
3274                            function_state, error):
3275  """Reports for long function bodies.
3276
3277  For an overview why this is done, see:
3278  https://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Write_Short_Functions
3279
3280  Uses a simplistic algorithm assuming other style guidelines
3281  (especially spacing) are followed.
3282  Only checks unindented functions, so class members are unchecked.
3283  Trivial bodies are unchecked, so constructors with huge initializer lists
3284  may be missed.
3285  Blank/comment lines are not counted so as to avoid encouraging the removal
3286  of vertical space and comments just to get through a lint check.
3287  NOLINT *on the last line of a function* disables this check.
3288
3289  Args:
3290    filename: The name of the current file.
3291    clean_lines: A CleansedLines instance containing the file.
3292    linenum: The number of the line to check.
3293    function_state: Current function name and lines in body so far.
3294    error: The function to call with any errors found.
3295  """
3296  lines = clean_lines.lines
3297  line = lines[linenum]
3298  joined_line = ''
3299
3300  starting_func = False
3301  regexp = r'(\w(\w|::|\*|\&|\s)*)\('  # decls * & space::name( ...
3302  match_result = Match(regexp, line)
3303  if match_result:
3304    # If the name is all caps and underscores, figure it's a macro and
3305    # ignore it, unless it's TEST or TEST_F.
3306    function_name = match_result.group(1).split()[-1]
3307    if function_name == 'TEST' or function_name == 'TEST_F' or (
3308        not Match(r'[A-Z_]+$', function_name)):
3309      starting_func = True
3310
3311  if starting_func:
3312    body_found = False
3313    for start_linenum in xrange(linenum, clean_lines.NumLines()):
3314      start_line = lines[start_linenum]
3315      joined_line += ' ' + start_line.lstrip()
3316      if Search(r'(;|})', start_line):  # Declarations and trivial functions
3317        body_found = True
3318        break                              # ... ignore
3319      elif Search(r'{', start_line):
3320        body_found = True
3321        function = Search(r'((\w|:)*)\(', line).group(1)
3322        if Match(r'TEST', function):    # Handle TEST... macros
3323          parameter_regexp = Search(r'(\(.*\))', joined_line)
3324          if parameter_regexp:             # Ignore bad syntax
3325            function += parameter_regexp.group(1)
3326        else:
3327          function += '()'
3328        function_state.Begin(function)
3329        break
3330    if not body_found:
3331      # No body for the function (or evidence of a non-function) was found.
3332      error(filename, linenum, 'readability/fn_size', 5,
3333            'Lint failed to find start of function body.')
3334  elif Match(r'^\}\s*$', line):  # function end
3335    function_state.Check(error, filename, linenum)
3336    function_state.End()
3337  elif not Match(r'^\s*$', line):
3338    function_state.Count()  # Count non-blank/non-comment lines.
3339
3340
3341_RE_PATTERN_TODO = re.compile(r'^//(\s*)TODO(\(.+?\))?:?(\s|$)?')
3342
3343
3344def CheckComment(line, filename, linenum, next_line_start, error):
3345  """Checks for common mistakes in comments.
3346
3347  Args:
3348    line: The line in question.
3349    filename: The name of the current file.
3350    linenum: The number of the line to check.
3351    next_line_start: The first non-whitespace column of the next line.
3352    error: The function to call with any errors found.
3353  """
3354  commentpos = line.find('//')
3355  if commentpos != -1:
3356    # Check if the // may be in quotes.  If so, ignore it
3357    if re.sub(r'\\.', '', line[0:commentpos]).count('"') % 2 == 0:
3358      # Allow one space for new scopes, two spaces otherwise:
3359      if (not (Match(r'^.*{ *//', line) and next_line_start == commentpos) and
3360          ((commentpos >= 1 and
3361            line[commentpos-1] not in string.whitespace) or
3362           (commentpos >= 2 and
3363            line[commentpos-2] not in string.whitespace))):
3364        error(filename, linenum, 'whitespace/comments', 2,
3365              'At least two spaces is best between code and comments')
3366
3367      # Checks for common mistakes in TODO comments.
3368      comment = line[commentpos:]
3369      match = _RE_PATTERN_TODO.match(comment)
3370      if match:
3371        # One whitespace is correct; zero whitespace is handled elsewhere.
3372        leading_whitespace = match.group(1)
3373        if len(leading_whitespace) > 1:
3374          error(filename, linenum, 'whitespace/todo', 2,
3375                'Too many spaces before TODO')
3376
3377        username = match.group(2)
3378        if not username:
3379          error(filename, linenum, 'readability/todo', 2,
3380                'Missing username in TODO; it should look like '
3381                '"// TODO(my_username): Stuff."')
3382
3383        middle_whitespace = match.group(3)
3384        # Comparisons made explicit for correctness -- pylint: disable=g-explicit-bool-comparison
3385        if middle_whitespace != ' ' and middle_whitespace != '':
3386          error(filename, linenum, 'whitespace/todo', 2,
3387                'TODO(my_username) should be followed by a space')
3388
3389      # If the comment contains an alphanumeric character, there
3390      # should be a space somewhere between it and the // unless
3391      # it's a /// or //! Doxygen comment.
3392      if (Match(r'//[^ ]*\w', comment) and
3393          not Match(r'(///|//\!)(\s+|$)', comment)):
3394        error(filename, linenum, 'whitespace/comments', 4,
3395              'Should have a space between // and comment')
3396
3397
3398def CheckSpacing(filename, clean_lines, linenum, nesting_state, error):
3399  """Checks for the correctness of various spacing issues in the code.
3400
3401  Things we check for: spaces around operators, spaces after
3402  if/for/while/switch, no spaces around parens in function calls, two
3403  spaces between code and comment, don't start a block with a blank
3404  line, don't end a function with a blank line, don't add a blank line
3405  after public/protected/private, don't have too many blank lines in a row.
3406
3407  Args:
3408    filename: The name of the current file.
3409    clean_lines: A CleansedLines instance containing the file.
3410    linenum: The number of the line to check.
3411    nesting_state: A NestingState instance which maintains information about
3412                   the current stack of nested blocks being parsed.
3413    error: The function to call with any errors found.
3414  """
3415
3416  # Don't use "elided" lines here, otherwise we can't check commented lines.
3417  # Don't want to use "raw" either, because we don't want to check inside C++11
3418  # raw strings,
3419  raw = clean_lines.lines_without_raw_strings
3420  line = raw[linenum]
3421
3422  # Before nixing comments, check if the line is blank for no good
3423  # reason.  This includes the first line after a block is opened, and
3424  # blank lines at the end of a function (ie, right before a line like '}'
3425  #
3426  # Skip all the blank line checks if we are immediately inside a
3427  # namespace body.  In other words, don't issue blank line warnings
3428  # for this block:
3429  #   namespace {
3430  #
3431  #   }
3432  #
3433  # A warning about missing end of namespace comments will be issued instead.
3434  #
3435  # Also skip blank line checks for 'extern "C"' blocks, which are formatted
3436  # like namespaces.
3437  if (IsBlankLine(line) and
3438      not nesting_state.InNamespaceBody() and
3439      not nesting_state.InExternC()):
3440    elided = clean_lines.elided
3441    prev_line = elided[linenum - 1]
3442    prevbrace = prev_line.rfind('{')
3443    # TODO(unknown): Don't complain if line before blank line, and line after,
3444    #                both start with alnums and are indented the same amount.
3445    #                This ignores whitespace at the start of a namespace block
3446    #                because those are not usually indented.
3447    if prevbrace != -1 and prev_line[prevbrace:].find('}') == -1:
3448      # OK, we have a blank line at the start of a code block.  Before we
3449      # complain, we check if it is an exception to the rule: The previous
3450      # non-empty line has the parameters of a function header that are indented
3451      # 4 spaces (because they did not fit in a 80 column line when placed on
3452      # the same line as the function name).  We also check for the case where
3453      # the previous line is indented 6 spaces, which may happen when the
3454      # initializers of a constructor do not fit into a 80 column line.
3455      exception = False
3456      if Match(r' {6}\w', prev_line):  # Initializer list?
3457        # We are looking for the opening column of initializer list, which
3458        # should be indented 4 spaces to cause 6 space indentation afterwards.
3459        search_position = linenum-2
3460        while (search_position >= 0
3461               and Match(r' {6}\w', elided[search_position])):
3462          search_position -= 1
3463        exception = (search_position >= 0
3464                     and elided[search_position][:5] == '    :')
3465      else:
3466        # Search for the function arguments or an initializer list.  We use a
3467        # simple heuristic here: If the line is indented 4 spaces; and we have a
3468        # closing paren, without the opening paren, followed by an opening brace
3469        # or colon (for initializer lists) we assume that it is the last line of
3470        # a function header.  If we have a colon indented 4 spaces, it is an
3471        # initializer list.
3472        exception = (Match(r' {4}\w[^\(]*\)\s*(const\s*)?(\{\s*$|:)',
3473                           prev_line)
3474                     or Match(r' {4}:', prev_line))
3475
3476      if not exception:
3477        error(filename, linenum, 'whitespace/blank_line', 2,
3478              'Redundant blank line at the start of a code block '
3479              'should be deleted.')
3480    # Ignore blank lines at the end of a block in a long if-else
3481    # chain, like this:
3482    #   if (condition1) {
3483    #     // Something followed by a blank line
3484    #
3485    #   } else if (condition2) {
3486    #     // Something else
3487    #   }
3488    if linenum + 1 < clean_lines.NumLines():
3489      next_line = raw[linenum + 1]
3490      if (next_line
3491          and Match(r'\s*}', next_line)
3492          and next_line.find('} else ') == -1):
3493        error(filename, linenum, 'whitespace/blank_line', 3,
3494              'Redundant blank line at the end of a code block '
3495              'should be deleted.')
3496
3497    matched = Match(r'\s*(public|protected|private):', prev_line)
3498    if matched:
3499      error(filename, linenum, 'whitespace/blank_line', 3,
3500            'Do not leave a blank line after "%s:"' % matched.group(1))
3501
3502  # Next, check comments
3503  next_line_start = 0
3504  if linenum + 1 < clean_lines.NumLines():
3505    next_line = raw[linenum + 1]
3506    next_line_start = len(next_line) - len(next_line.lstrip())
3507  CheckComment(line, filename, linenum, next_line_start, error)
3508
3509  # get rid of comments and strings
3510  line = clean_lines.elided[linenum]
3511
3512  # You shouldn't have spaces before your brackets, except maybe after
3513  # 'delete []' or 'return []() {};'
3514  if Search(r'\w\s+\[', line) and not Search(r'(?:delete|return)\s+\[', line):
3515    error(filename, linenum, 'whitespace/braces', 5,
3516          'Extra space before [')
3517
3518  # In range-based for, we wanted spaces before and after the colon, but
3519  # not around "::" tokens that might appear.
3520  if (Search(r'for *\(.*[^:]:[^: ]', line) or
3521      Search(r'for *\(.*[^: ]:[^:]', line)):
3522    error(filename, linenum, 'whitespace/forcolon', 2,
3523          'Missing space around colon in range-based for loop')
3524
3525
3526def CheckOperatorSpacing(filename, clean_lines, linenum, error):
3527  """Checks for horizontal spacing around operators.
3528
3529  Args:
3530    filename: The name of the current file.
3531    clean_lines: A CleansedLines instance containing the file.
3532    linenum: The number of the line to check.
3533    error: The function to call with any errors found.
3534  """
3535  line = clean_lines.elided[linenum]
3536
3537  # Don't try to do spacing checks for operator methods.  Do this by
3538  # replacing the troublesome characters with something else,
3539  # preserving column position for all other characters.
3540  #
3541  # The replacement is done repeatedly to avoid false positives from
3542  # operators that call operators.
3543  while True:
3544    match = Match(r'^(.*\boperator\b)(\S+)(\s*\(.*)$', line)
3545    if match:
3546      line = match.group(1) + ('_' * len(match.group(2))) + match.group(3)
3547    else:
3548      break
3549
3550  # We allow no-spaces around = within an if: "if ( (a=Foo()) == 0 )".
3551  # Otherwise not.  Note we only check for non-spaces on *both* sides;
3552  # sometimes people put non-spaces on one side when aligning ='s among
3553  # many lines (not that this is behavior that I approve of...)
3554  if ((Search(r'[\w.]=', line) or
3555       Search(r'=[\w.]', line))
3556      and not Search(r'\b(if|while|for) ', line)
3557      # Operators taken from [lex.operators] in C++11 standard.
3558      and not Search(r'(>=|<=|==|!=|&=|\^=|\|=|\+=|\*=|\/=|\%=)', line)
3559      and not Search(r'operator=', line)):
3560    error(filename, linenum, 'whitespace/operators', 4,
3561          'Missing spaces around =')
3562
3563  # It's ok not to have spaces around binary operators like + - * /, but if
3564  # there's too little whitespace, we get concerned.  It's hard to tell,
3565  # though, so we punt on this one for now.  TODO.
3566
3567  # You should always have whitespace around binary operators.
3568  #
3569  # Check <= and >= first to avoid false positives with < and >, then
3570  # check non-include lines for spacing around < and >.
3571  #
3572  # If the operator is followed by a comma, assume it's be used in a
3573  # macro context and don't do any checks.  This avoids false
3574  # positives.
3575  #
3576  # Note that && is not included here.  This is because there are too
3577  # many false positives due to RValue references.
3578  match = Search(r'[^<>=!\s](==|!=|<=|>=|\|\|)[^<>=!\s,;\)]', line)
3579  if match:
3580    error(filename, linenum, 'whitespace/operators', 3,
3581          'Missing spaces around %s' % match.group(1))
3582  elif not Match(r'#.*include', line):
3583    # Look for < that is not surrounded by spaces.  This is only
3584    # triggered if both sides are missing spaces, even though
3585    # technically should should flag if at least one side is missing a
3586    # space.  This is done to avoid some false positives with shifts.
3587    match = Match(r'^(.*[^\s<])<[^\s=<,]', line)
3588    if match:
3589      (_, _, end_pos) = CloseExpression(
3590          clean_lines, linenum, len(match.group(1)))
3591      if end_pos <= -1:
3592        error(filename, linenum, 'whitespace/operators', 3,
3593              'Missing spaces around <')
3594
3595    # Look for > that is not surrounded by spaces.  Similar to the
3596    # above, we only trigger if both sides are missing spaces to avoid
3597    # false positives with shifts.
3598    match = Match(r'^(.*[^-\s>])>[^\s=>,]', line)
3599    if match:
3600      (_, _, start_pos) = ReverseCloseExpression(
3601          clean_lines, linenum, len(match.group(1)))
3602      if start_pos <= -1:
3603        error(filename, linenum, 'whitespace/operators', 3,
3604              'Missing spaces around >')
3605
3606  # We allow no-spaces around << when used like this: 10<<20, but
3607  # not otherwise (particularly, not when used as streams)
3608  #
3609  # We also allow operators following an opening parenthesis, since
3610  # those tend to be macros that deal with operators.
3611  match = Search(r'(operator|[^\s(<])(?:L|UL|LL|ULL|l|ul|ll|ull)?<<([^\s,=<])', line)
3612  if (match and not (match.group(1).isdigit() and match.group(2).isdigit()) and
3613      not (match.group(1) == 'operator' and match.group(2) == ';')):
3614    error(filename, linenum, 'whitespace/operators', 3,
3615          'Missing spaces around <<')
3616
3617  # We allow no-spaces around >> for almost anything.  This is because
3618  # C++11 allows ">>" to close nested templates, which accounts for
3619  # most cases when ">>" is not followed by a space.
3620  #
3621  # We still warn on ">>" followed by alpha character, because that is
3622  # likely due to ">>" being used for right shifts, e.g.:
3623  #   value >> alpha
3624  #
3625  # When ">>" is used to close templates, the alphanumeric letter that
3626  # follows would be part of an identifier, and there should still be
3627  # a space separating the template type and the identifier.
3628  #   type<type<type>> alpha
3629  match = Search(r'>>[a-zA-Z_]', line)
3630  if match:
3631    error(filename, linenum, 'whitespace/operators', 3,
3632          'Missing spaces around >>')
3633
3634  # There shouldn't be space around unary operators
3635  match = Search(r'(!\s|~\s|[\s]--[\s;]|[\s]\+\+[\s;])', line)
3636  if match:
3637    error(filename, linenum, 'whitespace/operators', 4,
3638          'Extra space for operator %s' % match.group(1))
3639
3640
3641def CheckParenthesisSpacing(filename, clean_lines, linenum, error):
3642  """Checks for horizontal spacing around parentheses.
3643
3644  Args:
3645    filename: The name of the current file.
3646    clean_lines: A CleansedLines instance containing the file.
3647    linenum: The number of the line to check.
3648    error: The function to call with any errors found.
3649  """
3650  line = clean_lines.elided[linenum]
3651
3652  # No spaces after an if, while, switch, or for
3653  match = Search(r' (if\(|for\(|while\(|switch\()', line)
3654  if match:
3655    error(filename, linenum, 'whitespace/parens', 5,
3656          'Missing space before ( in %s' % match.group(1))
3657
3658  # For if/for/while/switch, the left and right parens should be
3659  # consistent about how many spaces are inside the parens, and
3660  # there should either be zero or one spaces inside the parens.
3661  # We don't want: "if ( foo)" or "if ( foo   )".
3662  # Exception: "for ( ; foo; bar)" and "for (foo; bar; )" are allowed.
3663  match = Search(r'\b(if|for|while|switch)\s*'
3664                 r'\(([ ]*)(.).*[^ ]+([ ]*)\)\s*{\s*$',
3665                 line)
3666  if match:
3667    if len(match.group(2)) != len(match.group(4)):
3668      if not (match.group(3) == ';' and
3669              len(match.group(2)) == 1 + len(match.group(4)) or
3670              not match.group(2) and Search(r'\bfor\s*\(.*; \)', line)):
3671        error(filename, linenum, 'whitespace/parens', 5,
3672              'Mismatching spaces inside () in %s' % match.group(1))
3673    if len(match.group(2)) not in [0, 1]:
3674      error(filename, linenum, 'whitespace/parens', 5,
3675            'Should have zero or one spaces inside ( and ) in %s' %
3676            match.group(1))
3677
3678
3679def CheckCommaSpacing(filename, clean_lines, linenum, error):
3680  """Checks for horizontal spacing near commas and semicolons.
3681
3682  Args:
3683    filename: The name of the current file.
3684    clean_lines: A CleansedLines instance containing the file.
3685    linenum: The number of the line to check.
3686    error: The function to call with any errors found.
3687  """
3688  raw = clean_lines.lines_without_raw_strings
3689  line = clean_lines.elided[linenum]
3690
3691  # You should always have a space after a comma (either as fn arg or operator)
3692  #
3693  # This does not apply when the non-space character following the
3694  # comma is another comma, since the only time when that happens is
3695  # for empty macro arguments.
3696  #
3697  # We run this check in two passes: first pass on elided lines to
3698  # verify that lines contain missing whitespaces, second pass on raw
3699  # lines to confirm that those missing whitespaces are not due to
3700  # elided comments.
3701  if (Search(r',[^,\s]', ReplaceAll(r'\boperator\s*,\s*\(', 'F(', line)) and
3702      Search(r',[^,\s]', raw[linenum])):
3703    error(filename, linenum, 'whitespace/comma', 3,
3704          'Missing space after ,')
3705
3706  # You should always have a space after a semicolon
3707  # except for few corner cases
3708  # TODO(unknown): clarify if 'if (1) { return 1;}' is requires one more
3709  # space after ;
3710  if Search(r';[^\s};\\)/]', line):
3711    error(filename, linenum, 'whitespace/semicolon', 3,
3712          'Missing space after ;')
3713
3714
3715def _IsType(clean_lines, nesting_state, expr):
3716  """Check if expression looks like a type name, returns true if so.
3717
3718  Args:
3719    clean_lines: A CleansedLines instance containing the file.
3720    nesting_state: A NestingState instance which maintains information about
3721                   the current stack of nested blocks being parsed.
3722    expr: The expression to check.
3723  Returns:
3724    True, if token looks like a type.
3725  """
3726  # Keep only the last token in the expression
3727  last_word = Match(r'^.*(\b\S+)$', expr)
3728  if last_word:
3729    token = last_word.group(1)
3730  else:
3731    token = expr
3732
3733  # Match native types and stdint types
3734  if _TYPES.match(token):
3735    return True
3736
3737  # Try a bit harder to match templated types.  Walk up the nesting
3738  # stack until we find something that resembles a typename
3739  # declaration for what we are looking for.
3740  typename_pattern = (r'\b(?:typename|class|struct)\s+' + re.escape(token) +
3741                      r'\b')
3742  block_index = len(nesting_state.stack) - 1
3743  while block_index >= 0:
3744    if isinstance(nesting_state.stack[block_index], _NamespaceInfo):
3745      return False
3746
3747    # Found where the opening brace is.  We want to scan from this
3748    # line up to the beginning of the function, minus a few lines.
3749    #   template <typename Type1,  // stop scanning here
3750    #             ...>
3751    #   class C
3752    #     : public ... {  // start scanning here
3753    last_line = nesting_state.stack[block_index].starting_linenum
3754
3755    next_block_start = 0
3756    if block_index > 0:
3757      next_block_start = nesting_state.stack[block_index - 1].starting_linenum
3758    first_line = last_line
3759    while first_line >= next_block_start:
3760      if clean_lines.elided[first_line].find('template') >= 0:
3761        break
3762      first_line -= 1
3763    if first_line < next_block_start:
3764      # Didn't find any "template" keyword before reaching the next block,
3765      # there are probably no template things to check for this block
3766      block_index -= 1
3767      continue
3768
3769    # Look for typename in the specified range
3770    for i in xrange(first_line, last_line + 1, 1):
3771      if Search(typename_pattern, clean_lines.elided[i]):
3772        return True
3773    block_index -= 1
3774
3775  return False
3776
3777
3778def CheckBracesSpacing(filename, clean_lines, linenum, nesting_state, error):
3779  """Checks for horizontal spacing near commas.
3780
3781  Args:
3782    filename: The name of the current file.
3783    clean_lines: A CleansedLines instance containing the file.
3784    linenum: The number of the line to check.
3785    nesting_state: A NestingState instance which maintains information about
3786                   the current stack of nested blocks being parsed.
3787    error: The function to call with any errors found.
3788  """
3789  line = clean_lines.elided[linenum]
3790
3791  # Except after an opening paren, or after another opening brace (in case of
3792  # an initializer list, for instance), you should have spaces before your
3793  # braces when they are delimiting blocks, classes, namespaces etc.
3794  # And since you should never have braces at the beginning of a line,
3795  # this is an easy test.  Except that braces used for initialization don't
3796  # follow the same rule; we often don't want spaces before those.
3797  match = Match(r'^(.*[^ ({>]){', line)
3798
3799  if match:
3800    # Try a bit harder to check for brace initialization.  This
3801    # happens in one of the following forms:
3802    #   Constructor() : initializer_list_{} { ... }
3803    #   Constructor{}.MemberFunction()
3804    #   Type variable{};
3805    #   FunctionCall(type{}, ...);
3806    #   LastArgument(..., type{});
3807    #   LOG(INFO) << type{} << " ...";
3808    #   map_of_type[{...}] = ...;
3809    #   ternary = expr ? new type{} : nullptr;
3810    #   OuterTemplate<InnerTemplateConstructor<Type>{}>
3811    #
3812    # We check for the character following the closing brace, and
3813    # silence the warning if it's one of those listed above, i.e.
3814    # "{.;,)<>]:".
3815    #
3816    # To account for nested initializer list, we allow any number of
3817    # closing braces up to "{;,)<".  We can't simply silence the
3818    # warning on first sight of closing brace, because that would
3819    # cause false negatives for things that are not initializer lists.
3820    #   Silence this:         But not this:
3821    #     Outer{                if (...) {
3822    #       Inner{...}            if (...){  // Missing space before {
3823    #     };                    }
3824    #
3825    # There is a false negative with this approach if people inserted
3826    # spurious semicolons, e.g. "if (cond){};", but we will catch the
3827    # spurious semicolon with a separate check.
3828    leading_text = match.group(1)
3829    (endline, endlinenum, endpos) = CloseExpression(
3830        clean_lines, linenum, len(match.group(1)))
3831    trailing_text = ''
3832    if endpos > -1:
3833      trailing_text = endline[endpos:]
3834    for offset in xrange(endlinenum + 1,
3835                         min(endlinenum + 3, clean_lines.NumLines() - 1)):
3836      trailing_text += clean_lines.elided[offset]
3837    # We also suppress warnings for `uint64_t{expression}` etc., as the style
3838    # guide recommends brace initialization for integral types to avoid
3839    # overflow/truncation.
3840    if (not Match(r'^[\s}]*[{.;,)<>\]:]', trailing_text)
3841        and not _IsType(clean_lines, nesting_state, leading_text)):
3842      error(filename, linenum, 'whitespace/braces', 5,
3843            'Missing space before {')
3844
3845  # Make sure '} else {' has spaces.
3846  if Search(r'}else', line):
3847    error(filename, linenum, 'whitespace/braces', 5,
3848          'Missing space before else')
3849
3850  # You shouldn't have a space before a semicolon at the end of the line.
3851  # There's a special case for "for" since the style guide allows space before
3852  # the semicolon there.
3853  if Search(r':\s*;\s*$', line):
3854    error(filename, linenum, 'whitespace/semicolon', 5,
3855          'Semicolon defining empty statement. Use {} instead.')
3856  elif Search(r'^\s*;\s*$', line):
3857    error(filename, linenum, 'whitespace/semicolon', 5,
3858          'Line contains only semicolon. If this should be an empty statement, '
3859          'use {} instead.')
3860  elif (Search(r'\s+;\s*$', line) and
3861        not Search(r'\bfor\b', line)):
3862    error(filename, linenum, 'whitespace/semicolon', 5,
3863          'Extra space before last semicolon. If this should be an empty '
3864          'statement, use {} instead.')
3865
3866
3867def IsDecltype(clean_lines, linenum, column):
3868  """Check if the token ending on (linenum, column) is decltype().
3869
3870  Args:
3871    clean_lines: A CleansedLines instance containing the file.
3872    linenum: the number of the line to check.
3873    column: end column of the token to check.
3874  Returns:
3875    True if this token is decltype() expression, False otherwise.
3876  """
3877  (text, _, start_col) = ReverseCloseExpression(clean_lines, linenum, column)
3878  if start_col < 0:
3879    return False
3880  if Search(r'\bdecltype\s*$', text[0:start_col]):
3881    return True
3882  return False
3883
3884def CheckSectionSpacing(filename, clean_lines, class_info, linenum, error):
3885  """Checks for additional blank line issues related to sections.
3886
3887  Currently the only thing checked here is blank line before protected/private.
3888
3889  Args:
3890    filename: The name of the current file.
3891    clean_lines: A CleansedLines instance containing the file.
3892    class_info: A _ClassInfo objects.
3893    linenum: The number of the line to check.
3894    error: The function to call with any errors found.
3895  """
3896  # Skip checks if the class is small, where small means 25 lines or less.
3897  # 25 lines seems like a good cutoff since that's the usual height of
3898  # terminals, and any class that can't fit in one screen can't really
3899  # be considered "small".
3900  #
3901  # Also skip checks if we are on the first line.  This accounts for
3902  # classes that look like
3903  #   class Foo { public: ... };
3904  #
3905  # If we didn't find the end of the class, last_line would be zero,
3906  # and the check will be skipped by the first condition.
3907  if (class_info.last_line - class_info.starting_linenum <= 24 or
3908      linenum <= class_info.starting_linenum):
3909    return
3910
3911  matched = Match(r'\s*(public|protected|private):', clean_lines.lines[linenum])
3912  if matched:
3913    # Issue warning if the line before public/protected/private was
3914    # not a blank line, but don't do this if the previous line contains
3915    # "class" or "struct".  This can happen two ways:
3916    #  - We are at the beginning of the class.
3917    #  - We are forward-declaring an inner class that is semantically
3918    #    private, but needed to be public for implementation reasons.
3919    # Also ignores cases where the previous line ends with a backslash as can be
3920    # common when defining classes in C macros.
3921    prev_line = clean_lines.lines[linenum - 1]
3922    if (not IsBlankLine(prev_line) and
3923        not Search(r'\b(class|struct)\b', prev_line) and
3924        not Search(r'\\$', prev_line)):
3925      # Try a bit harder to find the beginning of the class.  This is to
3926      # account for multi-line base-specifier lists, e.g.:
3927      #   class Derived
3928      #       : public Base {
3929      end_class_head = class_info.starting_linenum
3930      for i in range(class_info.starting_linenum, linenum):
3931        if Search(r'\{\s*$', clean_lines.lines[i]):
3932          end_class_head = i
3933          break
3934      if end_class_head < linenum - 1:
3935        error(filename, linenum, 'whitespace/blank_line', 3,
3936              '"%s:" should be preceded by a blank line' % matched.group(1))
3937
3938
3939def GetPreviousNonBlankLine(clean_lines, linenum):
3940  """Return the most recent non-blank line and its line number.
3941
3942  Args:
3943    clean_lines: A CleansedLines instance containing the file contents.
3944    linenum: The number of the line to check.
3945
3946  Returns:
3947    A tuple with two elements.  The first element is the contents of the last
3948    non-blank line before the current line, or the empty string if this is the
3949    first non-blank line.  The second is the line number of that line, or -1
3950    if this is the first non-blank line.
3951  """
3952
3953  prevlinenum = linenum - 1
3954  while prevlinenum >= 0:
3955    prevline = clean_lines.elided[prevlinenum]
3956    if not IsBlankLine(prevline):     # if not a blank line...
3957      return (prevline, prevlinenum)
3958    prevlinenum -= 1
3959  return ('', -1)
3960
3961
3962def CheckBraces(filename, clean_lines, linenum, error):
3963  """Looks for misplaced braces (e.g. at the end of line).
3964
3965  Args:
3966    filename: The name of the current file.
3967    clean_lines: A CleansedLines instance containing the file.
3968    linenum: The number of the line to check.
3969    error: The function to call with any errors found.
3970  """
3971
3972  line = clean_lines.elided[linenum]        # get rid of comments and strings
3973
3974  if Match(r'\s*{\s*$', line):
3975    # We allow an open brace to start a line in the case where someone is using
3976    # braces in a block to explicitly create a new scope, which is commonly used
3977    # to control the lifetime of stack-allocated variables.  Braces are also
3978    # used for brace initializers inside function calls.  We don't detect this
3979    # perfectly: we just don't complain if the last non-whitespace character on
3980    # the previous non-blank line is ',', ';', ':', '(', '{', or '}', or if the
3981    # previous line starts a preprocessor block. We also allow a brace on the
3982    # following line if it is part of an array initialization and would not fit
3983    # within the 80 character limit of the preceding line.
3984    prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0]
3985    if (not Search(r'[,;:}{(]\s*$', prevline) and
3986        not Match(r'\s*#', prevline) and
3987        not (GetLineWidth(prevline) > _line_length - 2 and '[]' in prevline)):
3988      error(filename, linenum, 'whitespace/braces', 4,
3989            '{ should almost always be at the end of the previous line')
3990
3991  # An else clause should be on the same line as the preceding closing brace.
3992  if Match(r'\s*else\b\s*(?:if\b|\{|$)', line):
3993    prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0]
3994    if Match(r'\s*}\s*$', prevline):
3995      error(filename, linenum, 'whitespace/newline', 4,
3996            'An else should appear on the same line as the preceding }')
3997
3998  # If braces come on one side of an else, they should be on both.
3999  # However, we have to worry about "else if" that spans multiple lines!
4000  if Search(r'else if\s*\(', line):       # could be multi-line if
4001    brace_on_left = bool(Search(r'}\s*else if\s*\(', line))
4002    # find the ( after the if
4003    pos = line.find('else if')
4004    pos = line.find('(', pos)
4005    if pos > 0:
4006      (endline, _, endpos) = CloseExpression(clean_lines, linenum, pos)
4007      brace_on_right = endline[endpos:].find('{') != -1
4008      if brace_on_left != brace_on_right:    # must be brace after if
4009        error(filename, linenum, 'readability/braces', 5,
4010              'If an else has a brace on one side, it should have it on both')
4011  elif Search(r'}\s*else[^{]*$', line) or Match(r'[^}]*else\s*{', line):
4012    error(filename, linenum, 'readability/braces', 5,
4013          'If an else has a brace on one side, it should have it on both')
4014
4015  # Likewise, an else should never have the else clause on the same line
4016  if Search(r'\belse [^\s{]', line) and not Search(r'\belse if\b', line):
4017    error(filename, linenum, 'whitespace/newline', 4,
4018          'Else clause should never be on same line as else (use 2 lines)')
4019
4020  # In the same way, a do/while should never be on one line
4021  if Match(r'\s*do [^\s{]', line):
4022    error(filename, linenum, 'whitespace/newline', 4,
4023          'do/while clauses should not be on a single line')
4024
4025  # Check single-line if/else bodies. The style guide says 'curly braces are not
4026  # required for single-line statements'. We additionally allow multi-line,
4027  # single statements, but we reject anything with more than one semicolon in
4028  # it. This means that the first semicolon after the if should be at the end of
4029  # its line, and the line after that should have an indent level equal to or
4030  # lower than the if. We also check for ambiguous if/else nesting without
4031  # braces.
4032  if_else_match = Search(r'\b(if\s*\(|else\b)', line)
4033  if if_else_match and not Match(r'\s*#', line):
4034    if_indent = GetIndentLevel(line)
4035    endline, endlinenum, endpos = line, linenum, if_else_match.end()
4036    if_match = Search(r'\bif\s*\(', line)
4037    if if_match:
4038      # This could be a multiline if condition, so find the end first.
4039      pos = if_match.end() - 1
4040      (endline, endlinenum, endpos) = CloseExpression(clean_lines, linenum, pos)
4041    # Check for an opening brace, either directly after the if or on the next
4042    # line. If found, this isn't a single-statement conditional.
4043    if (not Match(r'\s*{', endline[endpos:])
4044        and not (Match(r'\s*$', endline[endpos:])
4045                 and endlinenum < (len(clean_lines.elided) - 1)
4046                 and Match(r'\s*{', clean_lines.elided[endlinenum + 1]))):
4047      while (endlinenum < len(clean_lines.elided)
4048             and ';' not in clean_lines.elided[endlinenum][endpos:]):
4049        endlinenum += 1
4050        endpos = 0
4051      if endlinenum < len(clean_lines.elided):
4052        endline = clean_lines.elided[endlinenum]
4053        # We allow a mix of whitespace and closing braces (e.g. for one-liner
4054        # methods) and a single \ after the semicolon (for macros)
4055        endpos = endline.find(';')
4056        if not Match(r';[\s}]*(\\?)$', endline[endpos:]):
4057          # Semicolon isn't the last character, there's something trailing.
4058          # Output a warning if the semicolon is not contained inside
4059          # a lambda expression.
4060          if not Match(r'^[^{};]*\[[^\[\]]*\][^{}]*\{[^{}]*\}\s*\)*[;,]\s*$',
4061                       endline):
4062            error(filename, linenum, 'readability/braces', 4,
4063                  'If/else bodies with multiple statements require braces')
4064        elif endlinenum < len(clean_lines.elided) - 1:
4065          # Make sure the next line is dedented
4066          next_line = clean_lines.elided[endlinenum + 1]
4067          next_indent = GetIndentLevel(next_line)
4068          # With ambiguous nested if statements, this will error out on the
4069          # if that *doesn't* match the else, regardless of whether it's the
4070          # inner one or outer one.
4071          if (if_match and Match(r'\s*else\b', next_line)
4072              and next_indent != if_indent):
4073            error(filename, linenum, 'readability/braces', 4,
4074                  'Else clause should be indented at the same level as if. '
4075                  'Ambiguous nested if/else chains require braces.')
4076          elif next_indent > if_indent:
4077            error(filename, linenum, 'readability/braces', 4,
4078                  'If/else bodies with multiple statements require braces')
4079
4080
4081def CheckTrailingSemicolon(filename, clean_lines, linenum, error):
4082  """Looks for redundant trailing semicolon.
4083
4084  Args:
4085    filename: The name of the current file.
4086    clean_lines: A CleansedLines instance containing the file.
4087    linenum: The number of the line to check.
4088    error: The function to call with any errors found.
4089  """
4090
4091  line = clean_lines.elided[linenum]
4092
4093  # Block bodies should not be followed by a semicolon.  Due to C++11
4094  # brace initialization, there are more places where semicolons are
4095  # required than not, so we use a whitelist approach to check these
4096  # rather than a blacklist.  These are the places where "};" should
4097  # be replaced by just "}":
4098  # 1. Some flavor of block following closing parenthesis:
4099  #    for (;;) {};
4100  #    while (...) {};
4101  #    switch (...) {};
4102  #    Function(...) {};
4103  #    if (...) {};
4104  #    if (...) else if (...) {};
4105  #
4106  # 2. else block:
4107  #    if (...) else {};
4108  #
4109  # 3. const member function:
4110  #    Function(...) const {};
4111  #
4112  # 4. Block following some statement:
4113  #    x = 42;
4114  #    {};
4115  #
4116  # 5. Block at the beginning of a function:
4117  #    Function(...) {
4118  #      {};
4119  #    }
4120  #
4121  #    Note that naively checking for the preceding "{" will also match
4122  #    braces inside multi-dimensional arrays, but this is fine since
4123  #    that expression will not contain semicolons.
4124  #
4125  # 6. Block following another block:
4126  #    while (true) {}
4127  #    {};
4128  #
4129  # 7. End of namespaces:
4130  #    namespace {};
4131  #
4132  #    These semicolons seems far more common than other kinds of
4133  #    redundant semicolons, possibly due to people converting classes
4134  #    to namespaces.  For now we do not warn for this case.
4135  #
4136  # Try matching case 1 first.
4137  match = Match(r'^(.*\)\s*)\{', line)
4138  if match:
4139    # Matched closing parenthesis (case 1).  Check the token before the
4140    # matching opening parenthesis, and don't warn if it looks like a
4141    # macro.  This avoids these false positives:
4142    #  - macro that defines a base class
4143    #  - multi-line macro that defines a base class
4144    #  - macro that defines the whole class-head
4145    #
4146    # But we still issue warnings for macros that we know are safe to
4147    # warn, specifically:
4148    #  - TEST, TEST_F, TEST_P, MATCHER, MATCHER_P
4149    #  - TYPED_TEST
4150    #  - INTERFACE_DEF
4151    #  - EXCLUSIVE_LOCKS_REQUIRED, SHARED_LOCKS_REQUIRED, LOCKS_EXCLUDED:
4152    #
4153    # We implement a whitelist of safe macros instead of a blacklist of
4154    # unsafe macros, even though the latter appears less frequently in
4155    # google code and would have been easier to implement.  This is because
4156    # the downside for getting the whitelist wrong means some extra
4157    # semicolons, while the downside for getting the blacklist wrong
4158    # would result in compile errors.
4159    #
4160    # In addition to macros, we also don't want to warn on
4161    #  - Compound literals
4162    #  - Lambdas
4163    #  - alignas specifier with anonymous structs
4164    #  - decltype
4165    closing_brace_pos = match.group(1).rfind(')')
4166    opening_parenthesis = ReverseCloseExpression(
4167        clean_lines, linenum, closing_brace_pos)
4168    if opening_parenthesis[2] > -1:
4169      line_prefix = opening_parenthesis[0][0:opening_parenthesis[2]]
4170      macro = Search(r'\b([A-Z_][A-Z0-9_]*)\s*$', line_prefix)
4171      func = Match(r'^(.*\])\s*$', line_prefix)
4172      if ((macro and
4173           macro.group(1) not in (
4174               'TEST', 'TEST_F', 'MATCHER', 'MATCHER_P', 'TYPED_TEST',
4175               'EXCLUSIVE_LOCKS_REQUIRED', 'SHARED_LOCKS_REQUIRED',
4176               'LOCKS_EXCLUDED', 'INTERFACE_DEF')) or
4177          (func and not Search(r'\boperator\s*\[\s*\]', func.group(1))) or
4178          Search(r'\b(?:struct|union)\s+alignas\s*$', line_prefix) or
4179          Search(r'\bdecltype$', line_prefix) or
4180          Search(r'\s+=\s*$', line_prefix)):
4181        match = None
4182    if (match and
4183        opening_parenthesis[1] > 1 and
4184        Search(r'\]\s*$', clean_lines.elided[opening_parenthesis[1] - 1])):
4185      # Multi-line lambda-expression
4186      match = None
4187
4188  else:
4189    # Try matching cases 2-3.
4190    match = Match(r'^(.*(?:else|\)\s*const)\s*)\{', line)
4191    if not match:
4192      # Try matching cases 4-6.  These are always matched on separate lines.
4193      #
4194      # Note that we can't simply concatenate the previous line to the
4195      # current line and do a single match, otherwise we may output
4196      # duplicate warnings for the blank line case:
4197      #   if (cond) {
4198      #     // blank line
4199      #   }
4200      prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0]
4201      if prevline and Search(r'[;{}]\s*$', prevline):
4202        match = Match(r'^(\s*)\{', line)
4203
4204  # Check matching closing brace
4205  if match:
4206    (endline, endlinenum, endpos) = CloseExpression(
4207        clean_lines, linenum, len(match.group(1)))
4208    if endpos > -1 and Match(r'^\s*;', endline[endpos:]):
4209      # Current {} pair is eligible for semicolon check, and we have found
4210      # the redundant semicolon, output warning here.
4211      #
4212      # Note: because we are scanning forward for opening braces, and
4213      # outputting warnings for the matching closing brace, if there are
4214      # nested blocks with trailing semicolons, we will get the error
4215      # messages in reversed order.
4216
4217      # We need to check the line forward for NOLINT
4218      raw_lines = clean_lines.raw_lines
4219      ParseNolintSuppressions(filename, raw_lines[endlinenum-1], endlinenum-1,
4220                              error)
4221      ParseNolintSuppressions(filename, raw_lines[endlinenum], endlinenum,
4222                              error)
4223
4224      error(filename, endlinenum, 'readability/braces', 4,
4225            "You don't need a ; after a }")
4226
4227
4228def CheckEmptyBlockBody(filename, clean_lines, linenum, error):
4229  """Look for empty loop/conditional body with only a single semicolon.
4230
4231  Args:
4232    filename: The name of the current file.
4233    clean_lines: A CleansedLines instance containing the file.
4234    linenum: The number of the line to check.
4235    error: The function to call with any errors found.
4236  """
4237
4238  # Search for loop keywords at the beginning of the line.  Because only
4239  # whitespaces are allowed before the keywords, this will also ignore most
4240  # do-while-loops, since those lines should start with closing brace.
4241  #
4242  # We also check "if" blocks here, since an empty conditional block
4243  # is likely an error.
4244  line = clean_lines.elided[linenum]
4245  matched = Match(r'\s*(for|while|if)\s*\(', line)
4246  if matched:
4247    # Find the end of the conditional expression.
4248    (end_line, end_linenum, end_pos) = CloseExpression(
4249        clean_lines, linenum, line.find('('))
4250
4251    # Output warning if what follows the condition expression is a semicolon.
4252    # No warning for all other cases, including whitespace or newline, since we
4253    # have a separate check for semicolons preceded by whitespace.
4254    if end_pos >= 0 and Match(r';', end_line[end_pos:]):
4255      if matched.group(1) == 'if':
4256        error(filename, end_linenum, 'whitespace/empty_conditional_body', 5,
4257              'Empty conditional bodies should use {}')
4258      else:
4259        error(filename, end_linenum, 'whitespace/empty_loop_body', 5,
4260              'Empty loop bodies should use {} or continue')
4261
4262    # Check for if statements that have completely empty bodies (no comments)
4263    # and no else clauses.
4264    if end_pos >= 0 and matched.group(1) == 'if':
4265      # Find the position of the opening { for the if statement.
4266      # Return without logging an error if it has no brackets.
4267      opening_linenum = end_linenum
4268      opening_line_fragment = end_line[end_pos:]
4269      # Loop until EOF or find anything that's not whitespace or opening {.
4270      while not Search(r'^\s*\{', opening_line_fragment):
4271        if Search(r'^(?!\s*$)', opening_line_fragment):
4272          # Conditional has no brackets.
4273          return
4274        opening_linenum += 1
4275        if opening_linenum == len(clean_lines.elided):
4276          # Couldn't find conditional's opening { or any code before EOF.
4277          return
4278        opening_line_fragment = clean_lines.elided[opening_linenum]
4279      # Set opening_line (opening_line_fragment may not be entire opening line).
4280      opening_line = clean_lines.elided[opening_linenum]
4281
4282      # Find the position of the closing }.
4283      opening_pos = opening_line_fragment.find('{')
4284      if opening_linenum == end_linenum:
4285        # We need to make opening_pos relative to the start of the entire line.
4286        opening_pos += end_pos
4287      (closing_line, closing_linenum, closing_pos) = CloseExpression(
4288          clean_lines, opening_linenum, opening_pos)
4289      if closing_pos < 0:
4290        return
4291
4292      # Now construct the body of the conditional. This consists of the portion
4293      # of the opening line after the {, all lines until the closing line,
4294      # and the portion of the closing line before the }.
4295      if (clean_lines.raw_lines[opening_linenum] !=
4296          CleanseComments(clean_lines.raw_lines[opening_linenum])):
4297        # Opening line ends with a comment, so conditional isn't empty.
4298        return
4299      if closing_linenum > opening_linenum:
4300        # Opening line after the {. Ignore comments here since we checked above.
4301        bodylist = list(opening_line[opening_pos+1:])
4302        # All lines until closing line, excluding closing line, with comments.
4303        bodylist.extend(clean_lines.raw_lines[opening_linenum+1:closing_linenum])
4304        # Closing line before the }. Won't (and can't) have comments.
4305        bodylist.append(clean_lines.elided[closing_linenum][:closing_pos-1])
4306        body = '\n'.join(bodylist)
4307      else:
4308        # If statement has brackets and fits on a single line.
4309        body = opening_line[opening_pos+1:closing_pos-1]
4310
4311      # Check if the body is empty
4312      if not _EMPTY_CONDITIONAL_BODY_PATTERN.search(body):
4313        return
4314      # The body is empty. Now make sure there's not an else clause.
4315      current_linenum = closing_linenum
4316      current_line_fragment = closing_line[closing_pos:]
4317      # Loop until EOF or find anything that's not whitespace or else clause.
4318      while Search(r'^\s*$|^(?=\s*else)', current_line_fragment):
4319        if Search(r'^(?=\s*else)', current_line_fragment):
4320          # Found an else clause, so don't log an error.
4321          return
4322        current_linenum += 1
4323        if current_linenum == len(clean_lines.elided):
4324          break
4325        current_line_fragment = clean_lines.elided[current_linenum]
4326
4327      # The body is empty and there's no else clause until EOF or other code.
4328      error(filename, end_linenum, 'whitespace/empty_if_body', 4,
4329            ('If statement had no body and no else clause'))
4330
4331
4332def FindCheckMacro(line):
4333  """Find a replaceable CHECK-like macro.
4334
4335  Args:
4336    line: line to search on.
4337  Returns:
4338    (macro name, start position), or (None, -1) if no replaceable
4339    macro is found.
4340  """
4341  for macro in _CHECK_MACROS:
4342    i = line.find(macro)
4343    if i >= 0:
4344      # Find opening parenthesis.  Do a regular expression match here
4345      # to make sure that we are matching the expected CHECK macro, as
4346      # opposed to some other macro that happens to contain the CHECK
4347      # substring.
4348      matched = Match(r'^(.*\b' + macro + r'\s*)\(', line)
4349      if not matched:
4350        continue
4351      return (macro, len(matched.group(1)))
4352  return (None, -1)
4353
4354
4355def CheckCheck(filename, clean_lines, linenum, error):
4356  """Checks the use of CHECK and EXPECT macros.
4357
4358  Args:
4359    filename: The name of the current file.
4360    clean_lines: A CleansedLines instance containing the file.
4361    linenum: The number of the line to check.
4362    error: The function to call with any errors found.
4363  """
4364
4365  # Decide the set of replacement macros that should be suggested
4366  lines = clean_lines.elided
4367  (check_macro, start_pos) = FindCheckMacro(lines[linenum])
4368  if not check_macro:
4369    return
4370
4371  # Find end of the boolean expression by matching parentheses
4372  (last_line, end_line, end_pos) = CloseExpression(
4373      clean_lines, linenum, start_pos)
4374  if end_pos < 0:
4375    return
4376
4377  # If the check macro is followed by something other than a
4378  # semicolon, assume users will log their own custom error messages
4379  # and don't suggest any replacements.
4380  if not Match(r'\s*;', last_line[end_pos:]):
4381    return
4382
4383  if linenum == end_line:
4384    expression = lines[linenum][start_pos + 1:end_pos - 1]
4385  else:
4386    expression = lines[linenum][start_pos + 1:]
4387    for i in xrange(linenum + 1, end_line):
4388      expression += lines[i]
4389    expression += last_line[0:end_pos - 1]
4390
4391  # Parse expression so that we can take parentheses into account.
4392  # This avoids false positives for inputs like "CHECK((a < 4) == b)",
4393  # which is not replaceable by CHECK_LE.
4394  lhs = ''
4395  rhs = ''
4396  operator = None
4397  while expression:
4398    matched = Match(r'^\s*(<<|<<=|>>|>>=|->\*|->|&&|\|\||'
4399                    r'==|!=|>=|>|<=|<|\()(.*)$', expression)
4400    if matched:
4401      token = matched.group(1)
4402      if token == '(':
4403        # Parenthesized operand
4404        expression = matched.group(2)
4405        (end, _) = FindEndOfExpressionInLine(expression, 0, ['('])
4406        if end < 0:
4407          return  # Unmatched parenthesis
4408        lhs += '(' + expression[0:end]
4409        expression = expression[end:]
4410      elif token in ('&&', '||'):
4411        # Logical and/or operators.  This means the expression
4412        # contains more than one term, for example:
4413        #   CHECK(42 < a && a < b);
4414        #
4415        # These are not replaceable with CHECK_LE, so bail out early.
4416        return
4417      elif token in ('<<', '<<=', '>>', '>>=', '->*', '->'):
4418        # Non-relational operator
4419        lhs += token
4420        expression = matched.group(2)
4421      else:
4422        # Relational operator
4423        operator = token
4424        rhs = matched.group(2)
4425        break
4426    else:
4427      # Unparenthesized operand.  Instead of appending to lhs one character
4428      # at a time, we do another regular expression match to consume several
4429      # characters at once if possible.  Trivial benchmark shows that this
4430      # is more efficient when the operands are longer than a single
4431      # character, which is generally the case.
4432      matched = Match(r'^([^-=!<>()&|]+)(.*)$', expression)
4433      if not matched:
4434        matched = Match(r'^(\s*\S)(.*)$', expression)
4435        if not matched:
4436          break
4437      lhs += matched.group(1)
4438      expression = matched.group(2)
4439
4440  # Only apply checks if we got all parts of the boolean expression
4441  if not (lhs and operator and rhs):
4442    return
4443
4444  # Check that rhs do not contain logical operators.  We already know
4445  # that lhs is fine since the loop above parses out && and ||.
4446  if rhs.find('&&') > -1 or rhs.find('||') > -1:
4447    return
4448
4449  # At least one of the operands must be a constant literal.  This is
4450  # to avoid suggesting replacements for unprintable things like
4451  # CHECK(variable != iterator)
4452  #
4453  # The following pattern matches decimal, hex integers, strings, and
4454  # characters (in that order).
4455  lhs = lhs.strip()
4456  rhs = rhs.strip()
4457  match_constant = r'^([-+]?(\d+|0[xX][0-9a-fA-F]+)[lLuU]{0,3}|".*"|\'.*\')$'
4458  if Match(match_constant, lhs) or Match(match_constant, rhs):
4459    # Note: since we know both lhs and rhs, we can provide a more
4460    # descriptive error message like:
4461    #   Consider using CHECK_EQ(x, 42) instead of CHECK(x == 42)
4462    # Instead of:
4463    #   Consider using CHECK_EQ instead of CHECK(a == b)
4464    #
4465    # We are still keeping the less descriptive message because if lhs
4466    # or rhs gets long, the error message might become unreadable.
4467    error(filename, linenum, 'readability/check', 2,
4468          'Consider using %s instead of %s(a %s b)' % (
4469              _CHECK_REPLACEMENT[check_macro][operator],
4470              check_macro, operator))
4471
4472
4473def CheckAltTokens(filename, clean_lines, linenum, error):
4474  """Check alternative keywords being used in boolean expressions.
4475
4476  Args:
4477    filename: The name of the current file.
4478    clean_lines: A CleansedLines instance containing the file.
4479    linenum: The number of the line to check.
4480    error: The function to call with any errors found.
4481  """
4482  line = clean_lines.elided[linenum]
4483
4484  # Avoid preprocessor lines
4485  if Match(r'^\s*#', line):
4486    return
4487
4488  # Last ditch effort to avoid multi-line comments.  This will not help
4489  # if the comment started before the current line or ended after the
4490  # current line, but it catches most of the false positives.  At least,
4491  # it provides a way to workaround this warning for people who use
4492  # multi-line comments in preprocessor macros.
4493  #
4494  # TODO(unknown): remove this once cpplint has better support for
4495  # multi-line comments.
4496  if line.find('/*') >= 0 or line.find('*/') >= 0:
4497    return
4498
4499  for match in _ALT_TOKEN_REPLACEMENT_PATTERN.finditer(line):
4500    error(filename, linenum, 'readability/alt_tokens', 2,
4501          'Use operator %s instead of %s' % (
4502              _ALT_TOKEN_REPLACEMENT[match.group(1)], match.group(1)))
4503
4504
4505def GetLineWidth(line):
4506  """Determines the width of the line in column positions.
4507
4508  Args:
4509    line: A string, which may be a Unicode string.
4510
4511  Returns:
4512    The width of the line in column positions, accounting for Unicode
4513    combining characters and wide characters.
4514  """
4515  if isinstance(line, unicode):
4516    width = 0
4517    for uc in unicodedata.normalize('NFC', line):
4518      if unicodedata.east_asian_width(uc) in ('W', 'F'):
4519        width += 2
4520      elif not unicodedata.combining(uc):
4521        # Issue 337
4522        # https://mail.python.org/pipermail/python-list/2012-August/628809.html
4523        if (sys.version_info.major, sys.version_info.minor) <= (3, 2):
4524          # https://github.com/python/cpython/blob/2.7/Include/unicodeobject.h#L81
4525          is_wide_build = sysconfig.get_config_var("Py_UNICODE_SIZE") >= 4
4526          # https://github.com/python/cpython/blob/2.7/Objects/unicodeobject.c#L564
4527          is_low_surrogate = 0xDC00 <= ord(uc) <= 0xDFFF
4528          if not is_wide_build and is_low_surrogate:
4529            width -= 1
4530
4531        width += 1
4532    return width
4533  else:
4534    return len(line)
4535
4536
4537def CheckStyle(filename, clean_lines, linenum, file_extension, nesting_state,
4538               error):
4539  """Checks rules from the 'C++ style rules' section of cppguide.html.
4540
4541  Most of these rules are hard to test (naming, comment style), but we
4542  do what we can.  In particular we check for 2-space indents, line lengths,
4543  tab usage, spaces inside code, etc.
4544
4545  Args:
4546    filename: The name of the current file.
4547    clean_lines: A CleansedLines instance containing the file.
4548    linenum: The number of the line to check.
4549    file_extension: The extension (without the dot) of the filename.
4550    nesting_state: A NestingState instance which maintains information about
4551                   the current stack of nested blocks being parsed.
4552    error: The function to call with any errors found.
4553  """
4554
4555  # Don't use "elided" lines here, otherwise we can't check commented lines.
4556  # Don't want to use "raw" either, because we don't want to check inside C++11
4557  # raw strings,
4558  raw_lines = clean_lines.lines_without_raw_strings
4559  line = raw_lines[linenum]
4560  prev = raw_lines[linenum - 1] if linenum > 0 else ''
4561
4562  if line.find('\t') != -1:
4563    error(filename, linenum, 'whitespace/tab', 1,
4564          'Tab found; better to use spaces')
4565
4566  # One or three blank spaces at the beginning of the line is weird; it's
4567  # hard to reconcile that with 2-space indents.
4568  # NOTE: here are the conditions rob pike used for his tests.  Mine aren't
4569  # as sophisticated, but it may be worth becoming so:  RLENGTH==initial_spaces
4570  # if(RLENGTH > 20) complain = 0;
4571  # if(match($0, " +(error|private|public|protected):")) complain = 0;
4572  # if(match(prev, "&& *$")) complain = 0;
4573  # if(match(prev, "\\|\\| *$")) complain = 0;
4574  # if(match(prev, "[\",=><] *$")) complain = 0;
4575  # if(match($0, " <<")) complain = 0;
4576  # if(match(prev, " +for \\(")) complain = 0;
4577  # if(prevodd && match(prevprev, " +for \\(")) complain = 0;
4578  scope_or_label_pattern = r'\s*\w+\s*:\s*\\?$'
4579  classinfo = nesting_state.InnermostClass()
4580  initial_spaces = 0
4581  cleansed_line = clean_lines.elided[linenum]
4582  while initial_spaces < len(line) and line[initial_spaces] == ' ':
4583    initial_spaces += 1
4584  # There are certain situations we allow one space, notably for
4585  # section labels, and also lines containing multi-line raw strings.
4586  # We also don't check for lines that look like continuation lines
4587  # (of lines ending in double quotes, commas, equals, or angle brackets)
4588  # because the rules for how to indent those are non-trivial.
4589  if (not Search(r'[",=><] *$', prev) and
4590      (initial_spaces == 1 or initial_spaces == 3) and
4591      not Match(scope_or_label_pattern, cleansed_line) and
4592      not (clean_lines.raw_lines[linenum] != line and
4593           Match(r'^\s*""', line))):
4594    error(filename, linenum, 'whitespace/indent', 3,
4595          'Weird number of spaces at line-start.  '
4596          'Are you using a 2-space indent?')
4597
4598  if line and line[-1].isspace():
4599    error(filename, linenum, 'whitespace/end_of_line', 4,
4600          'Line ends in whitespace.  Consider deleting these extra spaces.')
4601
4602  # Check if the line is a header guard.
4603  is_header_guard = False
4604  if IsHeaderExtension(file_extension):
4605    cppvar = GetHeaderGuardCPPVariable(filename)
4606    if (line.startswith('#ifndef %s' % cppvar) or
4607        line.startswith('#define %s' % cppvar) or
4608        line.startswith('#endif  // %s' % cppvar)):
4609      is_header_guard = True
4610  # #include lines and header guards can be long, since there's no clean way to
4611  # split them.
4612  #
4613  # URLs can be long too.  It's possible to split these, but it makes them
4614  # harder to cut&paste.
4615  #
4616  # The "$Id:...$" comment may also get very long without it being the
4617  # developers fault.
4618  #
4619  # Doxygen documentation copying can get pretty long when using an overloaded
4620  # function declaration
4621  if (not line.startswith('#include') and not is_header_guard and
4622      not Match(r'^\s*//.*http(s?)://\S*$', line) and
4623      not Match(r'^\s*//\s*[^\s]*$', line) and
4624      not Match(r'^// \$Id:.*#[0-9]+ \$$', line) and
4625      not Match(r'^\s*/// [@\\](copydoc|copydetails|copybrief) .*$', line)):
4626    line_width = GetLineWidth(line)
4627    if line_width > _line_length:
4628      error(filename, linenum, 'whitespace/line_length', 2,
4629            'Lines should be <= %i characters long' % _line_length)
4630
4631  if (cleansed_line.count(';') > 1 and
4632      # allow simple single line lambdas
4633      not Match(r'^[^{};]*\[[^\[\]]*\][^{}]*\{[^{}\n\r]*\}',
4634                line) and
4635      # for loops are allowed two ;'s (and may run over two lines).
4636      cleansed_line.find('for') == -1 and
4637      (GetPreviousNonBlankLine(clean_lines, linenum)[0].find('for') == -1 or
4638       GetPreviousNonBlankLine(clean_lines, linenum)[0].find(';') != -1) and
4639      # It's ok to have many commands in a switch case that fits in 1 line
4640      not ((cleansed_line.find('case ') != -1 or
4641            cleansed_line.find('default:') != -1) and
4642           cleansed_line.find('break;') != -1)):
4643    error(filename, linenum, 'whitespace/newline', 0,
4644          'More than one command on the same line')
4645
4646  # Some more style checks
4647  CheckBraces(filename, clean_lines, linenum, error)
4648  CheckTrailingSemicolon(filename, clean_lines, linenum, error)
4649  CheckEmptyBlockBody(filename, clean_lines, linenum, error)
4650  CheckSpacing(filename, clean_lines, linenum, nesting_state, error)
4651  CheckOperatorSpacing(filename, clean_lines, linenum, error)
4652  CheckParenthesisSpacing(filename, clean_lines, linenum, error)
4653  CheckCommaSpacing(filename, clean_lines, linenum, error)
4654  CheckBracesSpacing(filename, clean_lines, linenum, nesting_state, error)
4655  CheckSpacingForFunctionCall(filename, clean_lines, linenum, error)
4656  CheckCheck(filename, clean_lines, linenum, error)
4657  CheckAltTokens(filename, clean_lines, linenum, error)
4658  classinfo = nesting_state.InnermostClass()
4659  if classinfo:
4660    CheckSectionSpacing(filename, clean_lines, classinfo, linenum, error)
4661
4662
4663_RE_PATTERN_INCLUDE = re.compile(r'^\s*#\s*include\s*([<"])([^>"]*)[>"].*$')
4664# Matches the first component of a filename delimited by -s and _s. That is:
4665#  _RE_FIRST_COMPONENT.match('foo').group(0) == 'foo'
4666#  _RE_FIRST_COMPONENT.match('foo.cc').group(0) == 'foo'
4667#  _RE_FIRST_COMPONENT.match('foo-bar_baz.cc').group(0) == 'foo'
4668#  _RE_FIRST_COMPONENT.match('foo_bar-baz.cc').group(0) == 'foo'
4669_RE_FIRST_COMPONENT = re.compile(r'^[^-_.]+')
4670
4671
4672def _DropCommonSuffixes(filename):
4673  """Drops common suffixes like _test.cc or -inl.h from filename.
4674
4675  For example:
4676    >>> _DropCommonSuffixes('foo/foo-inl.h')
4677    'foo/foo'
4678    >>> _DropCommonSuffixes('foo/bar/foo.cc')
4679    'foo/bar/foo'
4680    >>> _DropCommonSuffixes('foo/foo_internal.h')
4681    'foo/foo'
4682    >>> _DropCommonSuffixes('foo/foo_unusualinternal.h')
4683    'foo/foo_unusualinternal'
4684
4685  Args:
4686    filename: The input filename.
4687
4688  Returns:
4689    The filename with the common suffix removed.
4690  """
4691  for suffix in itertools.chain(
4692      ('%s.%s' % (test_suffix.lstrip('_'), ext)
4693       for test_suffix, ext in itertools.product(_test_suffixes, GetNonHeaderExtensions())),
4694      ('%s.%s' % (suffix, ext)
4695       for suffix, ext in itertools.product(['inl', 'imp', 'internal'], GetHeaderExtensions()))):
4696    if (filename.endswith(suffix) and len(filename) > len(suffix) and
4697        filename[-len(suffix) - 1] in ('-', '_')):
4698      return filename[:-len(suffix) - 1]
4699  return os.path.splitext(filename)[0]
4700
4701
4702def _ClassifyInclude(fileinfo, include, is_system):
4703  """Figures out what kind of header 'include' is.
4704
4705  Args:
4706    fileinfo: The current file cpplint is running over. A FileInfo instance.
4707    include: The path to a #included file.
4708    is_system: True if the #include used <> rather than "".
4709
4710  Returns:
4711    One of the _XXX_HEADER constants.
4712
4713  For example:
4714    >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'stdio.h', True)
4715    _C_SYS_HEADER
4716    >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'string', True)
4717    _CPP_SYS_HEADER
4718    >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'foo/foo.h', False)
4719    _LIKELY_MY_HEADER
4720    >>> _ClassifyInclude(FileInfo('foo/foo_unknown_extension.cc'),
4721    ...                  'bar/foo_other_ext.h', False)
4722    _POSSIBLE_MY_HEADER
4723    >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'foo/bar.h', False)
4724    _OTHER_HEADER
4725  """
4726  # This is a list of all standard c++ header files, except
4727  # those already checked for above.
4728  is_cpp_h = include in _CPP_HEADERS
4729
4730  # Headers with C++ extensions shouldn't be considered C system headers
4731  if is_system and os.path.splitext(include)[1] in ['.hpp', '.hxx', '.h++']:
4732    is_system = False
4733
4734  if is_system:
4735    if is_cpp_h:
4736      return _CPP_SYS_HEADER
4737    else:
4738      return _C_SYS_HEADER
4739
4740  # If the target file and the include we're checking share a
4741  # basename when we drop common extensions, and the include
4742  # lives in . , then it's likely to be owned by the target file.
4743  target_dir, target_base = (
4744      os.path.split(_DropCommonSuffixes(fileinfo.RepositoryName())))
4745  include_dir, include_base = os.path.split(_DropCommonSuffixes(include))
4746  target_dir_pub = os.path.normpath(target_dir + '/../public')
4747  target_dir_pub = target_dir_pub.replace('\\', '/')
4748  if target_base == include_base and (
4749      include_dir == target_dir or
4750      include_dir == target_dir_pub):
4751    return _LIKELY_MY_HEADER
4752
4753  # If the target and include share some initial basename
4754  # component, it's possible the target is implementing the
4755  # include, so it's allowed to be first, but we'll never
4756  # complain if it's not there.
4757  target_first_component = _RE_FIRST_COMPONENT.match(target_base)
4758  include_first_component = _RE_FIRST_COMPONENT.match(include_base)
4759  if (target_first_component and include_first_component and
4760      target_first_component.group(0) ==
4761      include_first_component.group(0)):
4762    return _POSSIBLE_MY_HEADER
4763
4764  return _OTHER_HEADER
4765
4766
4767
4768def CheckIncludeLine(filename, clean_lines, linenum, include_state, error):
4769  """Check rules that are applicable to #include lines.
4770
4771  Strings on #include lines are NOT removed from elided line, to make
4772  certain tasks easier. However, to prevent false positives, checks
4773  applicable to #include lines in CheckLanguage must be put here.
4774
4775  Args:
4776    filename: The name of the current file.
4777    clean_lines: A CleansedLines instance containing the file.
4778    linenum: The number of the line to check.
4779    include_state: An _IncludeState instance in which the headers are inserted.
4780    error: The function to call with any errors found.
4781  """
4782  fileinfo = FileInfo(filename)
4783  line = clean_lines.lines[linenum]
4784
4785  # "include" should use the new style "foo/bar.h" instead of just "bar.h"
4786  # Only do this check if the included header follows google naming
4787  # conventions.  If not, assume that it's a 3rd party API that
4788  # requires special include conventions.
4789  #
4790  # We also make an exception for Lua headers, which follow google
4791  # naming convention but not the include convention.
4792  match = Match(r'#include\s*"([^/]+\.h)"', line)
4793  if match and not _THIRD_PARTY_HEADERS_PATTERN.match(match.group(1)):
4794    error(filename, linenum, 'build/include_subdir', 4,
4795          'Include the directory when naming .h files')
4796
4797  # we shouldn't include a file more than once. actually, there are a
4798  # handful of instances where doing so is okay, but in general it's
4799  # not.
4800  match = _RE_PATTERN_INCLUDE.search(line)
4801  if match:
4802    include = match.group(2)
4803    is_system = (match.group(1) == '<')
4804    duplicate_line = include_state.FindHeader(include)
4805    if duplicate_line >= 0:
4806      error(filename, linenum, 'build/include', 4,
4807            '"%s" already included at %s:%s' %
4808            (include, filename, duplicate_line))
4809      return
4810
4811    for extension in GetNonHeaderExtensions():
4812      if (include.endswith('.' + extension) and
4813          os.path.dirname(fileinfo.RepositoryName()) != os.path.dirname(include)):
4814        error(filename, linenum, 'build/include', 4,
4815              'Do not include .' + extension + ' files from other packages')
4816        return
4817
4818    if not _THIRD_PARTY_HEADERS_PATTERN.match(include):
4819      include_state.include_list[-1].append((include, linenum))
4820
4821      # We want to ensure that headers appear in the right order:
4822      # 1) for foo.cc, foo.h  (preferred location)
4823      # 2) c system files
4824      # 3) cpp system files
4825      # 4) for foo.cc, foo.h  (deprecated location)
4826      # 5) other google headers
4827      #
4828      # We classify each include statement as one of those 5 types
4829      # using a number of techniques. The include_state object keeps
4830      # track of the highest type seen, and complains if we see a
4831      # lower type after that.
4832      error_message = include_state.CheckNextIncludeOrder(
4833          _ClassifyInclude(fileinfo, include, is_system))
4834      if error_message:
4835        error(filename, linenum, 'build/include_order', 4,
4836              '%s. Should be: %s.h, c system, c++ system, other.' %
4837              (error_message, fileinfo.BaseName()))
4838      canonical_include = include_state.CanonicalizeAlphabeticalOrder(include)
4839      if not include_state.IsInAlphabeticalOrder(
4840          clean_lines, linenum, canonical_include):
4841        error(filename, linenum, 'build/include_alpha', 4,
4842              'Include "%s" not in alphabetical order' % include)
4843      include_state.SetLastHeader(canonical_include)
4844
4845
4846
4847def _GetTextInside(text, start_pattern):
4848  r"""Retrieves all the text between matching open and close parentheses.
4849
4850  Given a string of lines and a regular expression string, retrieve all the text
4851  following the expression and between opening punctuation symbols like
4852  (, [, or {, and the matching close-punctuation symbol. This properly nested
4853  occurrences of the punctuations, so for the text like
4854    printf(a(), b(c()));
4855  a call to _GetTextInside(text, r'printf\(') will return 'a(), b(c())'.
4856  start_pattern must match string having an open punctuation symbol at the end.
4857
4858  Args:
4859    text: The lines to extract text. Its comments and strings must be elided.
4860           It can be single line and can span multiple lines.
4861    start_pattern: The regexp string indicating where to start extracting
4862                   the text.
4863  Returns:
4864    The extracted text.
4865    None if either the opening string or ending punctuation could not be found.
4866  """
4867  # TODO(unknown): Audit cpplint.py to see what places could be profitably
4868  # rewritten to use _GetTextInside (and use inferior regexp matching today).
4869
4870  # Give opening punctuations to get the matching close-punctuations.
4871  matching_punctuation = {'(': ')', '{': '}', '[': ']'}
4872  closing_punctuation = set(itervalues(matching_punctuation))
4873
4874  # Find the position to start extracting text.
4875  match = re.search(start_pattern, text, re.M)
4876  if not match:  # start_pattern not found in text.
4877    return None
4878  start_position = match.end(0)
4879
4880  assert start_position > 0, (
4881      'start_pattern must ends with an opening punctuation.')
4882  assert text[start_position - 1] in matching_punctuation, (
4883      'start_pattern must ends with an opening punctuation.')
4884  # Stack of closing punctuations we expect to have in text after position.
4885  punctuation_stack = [matching_punctuation[text[start_position - 1]]]
4886  position = start_position
4887  while punctuation_stack and position < len(text):
4888    if text[position] == punctuation_stack[-1]:
4889      punctuation_stack.pop()
4890    elif text[position] in closing_punctuation:
4891      # A closing punctuation without matching opening punctuations.
4892      return None
4893    elif text[position] in matching_punctuation:
4894      punctuation_stack.append(matching_punctuation[text[position]])
4895    position += 1
4896  if punctuation_stack:
4897    # Opening punctuations left without matching close-punctuations.
4898    return None
4899  # punctuations match.
4900  return text[start_position:position - 1]
4901
4902
4903# Patterns for matching call-by-reference parameters.
4904#
4905# Supports nested templates up to 2 levels deep using this messy pattern:
4906#   < (?: < (?: < [^<>]*
4907#               >
4908#           |   [^<>] )*
4909#         >
4910#     |   [^<>] )*
4911#   >
4912_RE_PATTERN_IDENT = r'[_a-zA-Z]\w*'  # =~ [[:alpha:]][[:alnum:]]*
4913_RE_PATTERN_TYPE = (
4914    r'(?:const\s+)?(?:typename\s+|class\s+|struct\s+|union\s+|enum\s+)?'
4915    r'(?:\w|'
4916    r'\s*<(?:<(?:<[^<>]*>|[^<>])*>|[^<>])*>|'
4917    r'::)+')
4918# A call-by-reference parameter ends with '& identifier'.
4919_RE_PATTERN_REF_PARAM = re.compile(
4920    r'(' + _RE_PATTERN_TYPE + r'(?:\s*(?:\bconst\b|[*]))*\s*'
4921    r'&\s*' + _RE_PATTERN_IDENT + r')\s*(?:=[^,()]+)?[,)]')
4922# A call-by-const-reference parameter either ends with 'const& identifier'
4923# or looks like 'const type& identifier' when 'type' is atomic.
4924_RE_PATTERN_CONST_REF_PARAM = (
4925    r'(?:.*\s*\bconst\s*&\s*' + _RE_PATTERN_IDENT +
4926    r'|const\s+' + _RE_PATTERN_TYPE + r'\s*&\s*' + _RE_PATTERN_IDENT + r')')
4927# Stream types.
4928_RE_PATTERN_REF_STREAM_PARAM = (
4929    r'(?:.*stream\s*&\s*' + _RE_PATTERN_IDENT + r')')
4930
4931
4932def CheckLanguage(filename, clean_lines, linenum, file_extension,
4933                  include_state, nesting_state, error):
4934  """Checks rules from the 'C++ language rules' section of cppguide.html.
4935
4936  Some of these rules are hard to test (function overloading, using
4937  uint32 inappropriately), but we do the best we can.
4938
4939  Args:
4940    filename: The name of the current file.
4941    clean_lines: A CleansedLines instance containing the file.
4942    linenum: The number of the line to check.
4943    file_extension: The extension (without the dot) of the filename.
4944    include_state: An _IncludeState instance in which the headers are inserted.
4945    nesting_state: A NestingState instance which maintains information about
4946                   the current stack of nested blocks being parsed.
4947    error: The function to call with any errors found.
4948  """
4949  # If the line is empty or consists of entirely a comment, no need to
4950  # check it.
4951  line = clean_lines.elided[linenum]
4952  if not line:
4953    return
4954
4955  match = _RE_PATTERN_INCLUDE.search(line)
4956  if match:
4957    CheckIncludeLine(filename, clean_lines, linenum, include_state, error)
4958    return
4959
4960  # Reset include state across preprocessor directives.  This is meant
4961  # to silence warnings for conditional includes.
4962  match = Match(r'^\s*#\s*(if|ifdef|ifndef|elif|else|endif)\b', line)
4963  if match:
4964    include_state.ResetSection(match.group(1))
4965
4966
4967  # Perform other checks now that we are sure that this is not an include line
4968  CheckCasts(filename, clean_lines, linenum, error)
4969  CheckGlobalStatic(filename, clean_lines, linenum, error)
4970  CheckPrintf(filename, clean_lines, linenum, error)
4971
4972  if IsHeaderExtension(file_extension):
4973    # TODO(unknown): check that 1-arg constructors are explicit.
4974    #                How to tell it's a constructor?
4975    #                (handled in CheckForNonStandardConstructs for now)
4976    # TODO(unknown): check that classes declare or disable copy/assign
4977    #                (level 1 error)
4978    pass
4979
4980  # Check if people are using the verboten C basic types.  The only exception
4981  # we regularly allow is "unsigned short port" for port.
4982  if Search(r'\bshort port\b', line):
4983    if not Search(r'\bunsigned short port\b', line):
4984      error(filename, linenum, 'runtime/int', 4,
4985            'Use "unsigned short" for ports, not "short"')
4986  else:
4987    match = Search(r'\b(short|long(?! +double)|long long)\b', line)
4988    if match:
4989      error(filename, linenum, 'runtime/int', 4,
4990            'Use int16/int64/etc, rather than the C type %s' % match.group(1))
4991
4992  # Check if some verboten operator overloading is going on
4993  # TODO(unknown): catch out-of-line unary operator&:
4994  #   class X {};
4995  #   int operator&(const X& x) { return 42; }  // unary operator&
4996  # The trick is it's hard to tell apart from binary operator&:
4997  #   class Y { int operator&(const Y& x) { return 23; } }; // binary operator&
4998  if Search(r'\boperator\s*&\s*\(\s*\)', line):
4999    error(filename, linenum, 'runtime/operator', 4,
5000          'Unary operator& is dangerous.  Do not use it.')
5001
5002  # Check for suspicious usage of "if" like
5003  # } if (a == b) {
5004  if Search(r'\}\s*if\s*\(', line):
5005    error(filename, linenum, 'readability/braces', 4,
5006          'Did you mean "else if"? If not, start a new line for "if".')
5007
5008  # Check for potential format string bugs like printf(foo).
5009  # We constrain the pattern not to pick things like DocidForPrintf(foo).
5010  # Not perfect but it can catch printf(foo.c_str()) and printf(foo->c_str())
5011  # TODO(unknown): Catch the following case. Need to change the calling
5012  # convention of the whole function to process multiple line to handle it.
5013  #   printf(
5014  #       boy_this_is_a_really_long_variable_that_cannot_fit_on_the_prev_line);
5015  printf_args = _GetTextInside(line, r'(?i)\b(string)?printf\s*\(')
5016  if printf_args:
5017    match = Match(r'([\w.\->()]+)$', printf_args)
5018    if match and match.group(1) != '__VA_ARGS__':
5019      function_name = re.search(r'\b((?:string)?printf)\s*\(',
5020                                line, re.I).group(1)
5021      error(filename, linenum, 'runtime/printf', 4,
5022            'Potential format string bug. Do %s("%%s", %s) instead.'
5023            % (function_name, match.group(1)))
5024
5025  # Check for potential memset bugs like memset(buf, sizeof(buf), 0).
5026  match = Search(r'memset\s*\(([^,]*),\s*([^,]*),\s*0\s*\)', line)
5027  if match and not Match(r"^''|-?[0-9]+|0x[0-9A-Fa-f]$", match.group(2)):
5028    error(filename, linenum, 'runtime/memset', 4,
5029          'Did you mean "memset(%s, 0, %s)"?'
5030          % (match.group(1), match.group(2)))
5031
5032  if Search(r'\busing namespace\b', line):
5033    if Search(r'\bliterals\b', line):
5034      error(filename, linenum, 'build/namespaces_literals', 5,
5035            'Do not use namespace using-directives.  '
5036            'Use using-declarations instead.')
5037    else:
5038      error(filename, linenum, 'build/namespaces', 5,
5039            'Do not use namespace using-directives.  '
5040            'Use using-declarations instead.')
5041
5042  # Detect variable-length arrays.
5043  match = Match(r'\s*(.+::)?(\w+) [a-z]\w*\[(.+)];', line)
5044  if (match and match.group(2) != 'return' and match.group(2) != 'delete' and
5045      match.group(3).find(']') == -1):
5046    # Split the size using space and arithmetic operators as delimiters.
5047    # If any of the resulting tokens are not compile time constants then
5048    # report the error.
5049    tokens = re.split(r'\s|\+|\-|\*|\/|<<|>>]', match.group(3))
5050    is_const = True
5051    skip_next = False
5052    for tok in tokens:
5053      if skip_next:
5054        skip_next = False
5055        continue
5056
5057      if Search(r'sizeof\(.+\)', tok): continue
5058      if Search(r'arraysize\(\w+\)', tok): continue
5059
5060      tok = tok.lstrip('(')
5061      tok = tok.rstrip(')')
5062      if not tok: continue
5063      if Match(r'\d+', tok): continue
5064      if Match(r'0[xX][0-9a-fA-F]+', tok): continue
5065      if Match(r'k[A-Z0-9]\w*', tok): continue
5066      if Match(r'(.+::)?k[A-Z0-9]\w*', tok): continue
5067      if Match(r'(.+::)?[A-Z][A-Z0-9_]*', tok): continue
5068      # A catch all for tricky sizeof cases, including 'sizeof expression',
5069      # 'sizeof(*type)', 'sizeof(const type)', 'sizeof(struct StructName)'
5070      # requires skipping the next token because we split on ' ' and '*'.
5071      if tok.startswith('sizeof'):
5072        skip_next = True
5073        continue
5074      is_const = False
5075      break
5076    if not is_const:
5077      error(filename, linenum, 'runtime/arrays', 1,
5078            'Do not use variable-length arrays.  Use an appropriately named '
5079            "('k' followed by CamelCase) compile-time constant for the size.")
5080
5081  # Check for use of unnamed namespaces in header files.  Registration
5082  # macros are typically OK, so we allow use of "namespace {" on lines
5083  # that end with backslashes.
5084  if (IsHeaderExtension(file_extension)
5085      and Search(r'\bnamespace\s*{', line)
5086      and line[-1] != '\\'):
5087    error(filename, linenum, 'build/namespaces', 4,
5088          'Do not use unnamed namespaces in header files.  See '
5089          'https://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Namespaces'
5090          ' for more information.')
5091
5092
5093def CheckGlobalStatic(filename, clean_lines, linenum, error):
5094  """Check for unsafe global or static objects.
5095
5096  Args:
5097    filename: The name of the current file.
5098    clean_lines: A CleansedLines instance containing the file.
5099    linenum: The number of the line to check.
5100    error: The function to call with any errors found.
5101  """
5102  line = clean_lines.elided[linenum]
5103
5104  # Match two lines at a time to support multiline declarations
5105  if linenum + 1 < clean_lines.NumLines() and not Search(r'[;({]', line):
5106    line += clean_lines.elided[linenum + 1].strip()
5107
5108  # Check for people declaring static/global STL strings at the top level.
5109  # This is dangerous because the C++ language does not guarantee that
5110  # globals with constructors are initialized before the first access, and
5111  # also because globals can be destroyed when some threads are still running.
5112  # TODO(unknown): Generalize this to also find static unique_ptr instances.
5113  # TODO(unknown): File bugs for clang-tidy to find these.
5114  match = Match(
5115      r'((?:|static +)(?:|const +))(?::*std::)?string( +const)? +'
5116      r'([a-zA-Z0-9_:]+)\b(.*)',
5117      line)
5118
5119  # Remove false positives:
5120  # - String pointers (as opposed to values).
5121  #    string *pointer
5122  #    const string *pointer
5123  #    string const *pointer
5124  #    string *const pointer
5125  #
5126  # - Functions and template specializations.
5127  #    string Function<Type>(...
5128  #    string Class<Type>::Method(...
5129  #
5130  # - Operators.  These are matched separately because operator names
5131  #   cross non-word boundaries, and trying to match both operators
5132  #   and functions at the same time would decrease accuracy of
5133  #   matching identifiers.
5134  #    string Class::operator*()
5135  if (match and
5136      not Search(r'\bstring\b(\s+const)?\s*[\*\&]\s*(const\s+)?\w', line) and
5137      not Search(r'\boperator\W', line) and
5138      not Match(r'\s*(<.*>)?(::[a-zA-Z0-9_]+)*\s*\(([^"]|$)', match.group(4))):
5139    if Search(r'\bconst\b', line):
5140      error(filename, linenum, 'runtime/string', 4,
5141            'For a static/global string constant, use a C style string '
5142            'instead: "%schar%s %s[]".' %
5143            (match.group(1), match.group(2) or '', match.group(3)))
5144    else:
5145      error(filename, linenum, 'runtime/string', 4,
5146            'Static/global string variables are not permitted.')
5147
5148  if (Search(r'\b([A-Za-z0-9_]*_)\(\1\)', line) or
5149      Search(r'\b([A-Za-z0-9_]*_)\(CHECK_NOTNULL\(\1\)\)', line)):
5150    error(filename, linenum, 'runtime/init', 4,
5151          'You seem to be initializing a member variable with itself.')
5152
5153
5154def CheckPrintf(filename, clean_lines, linenum, error):
5155  """Check for printf related issues.
5156
5157  Args:
5158    filename: The name of the current file.
5159    clean_lines: A CleansedLines instance containing the file.
5160    linenum: The number of the line to check.
5161    error: The function to call with any errors found.
5162  """
5163  line = clean_lines.elided[linenum]
5164
5165  # When snprintf is used, the second argument shouldn't be a literal.
5166  match = Search(r'snprintf\s*\(([^,]*),\s*([0-9]*)\s*,', line)
5167  if match and match.group(2) != '0':
5168    # If 2nd arg is zero, snprintf is used to calculate size.
5169    error(filename, linenum, 'runtime/printf', 3,
5170          'If you can, use sizeof(%s) instead of %s as the 2nd arg '
5171          'to snprintf.' % (match.group(1), match.group(2)))
5172
5173  # Check if some verboten C functions are being used.
5174  if Search(r'\bsprintf\s*\(', line):
5175    error(filename, linenum, 'runtime/printf', 5,
5176          'Never use sprintf. Use snprintf instead.')
5177  match = Search(r'\b(strcpy|strcat)\s*\(', line)
5178  if match:
5179    error(filename, linenum, 'runtime/printf', 4,
5180          'Almost always, snprintf is better than %s' % match.group(1))
5181
5182
5183def IsDerivedFunction(clean_lines, linenum):
5184  """Check if current line contains an inherited function.
5185
5186  Args:
5187    clean_lines: A CleansedLines instance containing the file.
5188    linenum: The number of the line to check.
5189  Returns:
5190    True if current line contains a function with "override"
5191    virt-specifier.
5192  """
5193  # Scan back a few lines for start of current function
5194  for i in xrange(linenum, max(-1, linenum - 10), -1):
5195    match = Match(r'^([^()]*\w+)\(', clean_lines.elided[i])
5196    if match:
5197      # Look for "override" after the matching closing parenthesis
5198      line, _, closing_paren = CloseExpression(
5199          clean_lines, i, len(match.group(1)))
5200      return (closing_paren >= 0 and
5201              Search(r'\boverride\b', line[closing_paren:]))
5202  return False
5203
5204
5205def IsOutOfLineMethodDefinition(clean_lines, linenum):
5206  """Check if current line contains an out-of-line method definition.
5207
5208  Args:
5209    clean_lines: A CleansedLines instance containing the file.
5210    linenum: The number of the line to check.
5211  Returns:
5212    True if current line contains an out-of-line method definition.
5213  """
5214  # Scan back a few lines for start of current function
5215  for i in xrange(linenum, max(-1, linenum - 10), -1):
5216    if Match(r'^([^()]*\w+)\(', clean_lines.elided[i]):
5217      return Match(r'^[^()]*\w+::\w+\(', clean_lines.elided[i]) is not None
5218  return False
5219
5220
5221def IsInitializerList(clean_lines, linenum):
5222  """Check if current line is inside constructor initializer list.
5223
5224  Args:
5225    clean_lines: A CleansedLines instance containing the file.
5226    linenum: The number of the line to check.
5227  Returns:
5228    True if current line appears to be inside constructor initializer
5229    list, False otherwise.
5230  """
5231  for i in xrange(linenum, 1, -1):
5232    line = clean_lines.elided[i]
5233    if i == linenum:
5234      remove_function_body = Match(r'^(.*)\{\s*$', line)
5235      if remove_function_body:
5236        line = remove_function_body.group(1)
5237
5238    if Search(r'\s:\s*\w+[({]', line):
5239      # A lone colon tend to indicate the start of a constructor
5240      # initializer list.  It could also be a ternary operator, which
5241      # also tend to appear in constructor initializer lists as
5242      # opposed to parameter lists.
5243      return True
5244    if Search(r'\}\s*,\s*$', line):
5245      # A closing brace followed by a comma is probably the end of a
5246      # brace-initialized member in constructor initializer list.
5247      return True
5248    if Search(r'[{};]\s*$', line):
5249      # Found one of the following:
5250      # - A closing brace or semicolon, probably the end of the previous
5251      #   function.
5252      # - An opening brace, probably the start of current class or namespace.
5253      #
5254      # Current line is probably not inside an initializer list since
5255      # we saw one of those things without seeing the starting colon.
5256      return False
5257
5258  # Got to the beginning of the file without seeing the start of
5259  # constructor initializer list.
5260  return False
5261
5262
5263def CheckForNonConstReference(filename, clean_lines, linenum,
5264                              nesting_state, error):
5265  """Check for non-const references.
5266
5267  Separate from CheckLanguage since it scans backwards from current
5268  line, instead of scanning forward.
5269
5270  Args:
5271    filename: The name of the current file.
5272    clean_lines: A CleansedLines instance containing the file.
5273    linenum: The number of the line to check.
5274    nesting_state: A NestingState instance which maintains information about
5275                   the current stack of nested blocks being parsed.
5276    error: The function to call with any errors found.
5277  """
5278  # Do nothing if there is no '&' on current line.
5279  line = clean_lines.elided[linenum]
5280  if '&' not in line:
5281    return
5282
5283  # If a function is inherited, current function doesn't have much of
5284  # a choice, so any non-const references should not be blamed on
5285  # derived function.
5286  if IsDerivedFunction(clean_lines, linenum):
5287    return
5288
5289  # Don't warn on out-of-line method definitions, as we would warn on the
5290  # in-line declaration, if it isn't marked with 'override'.
5291  if IsOutOfLineMethodDefinition(clean_lines, linenum):
5292    return
5293
5294  # Long type names may be broken across multiple lines, usually in one
5295  # of these forms:
5296  #   LongType
5297  #       ::LongTypeContinued &identifier
5298  #   LongType::
5299  #       LongTypeContinued &identifier
5300  #   LongType<
5301  #       ...>::LongTypeContinued &identifier
5302  #
5303  # If we detected a type split across two lines, join the previous
5304  # line to current line so that we can match const references
5305  # accordingly.
5306  #
5307  # Note that this only scans back one line, since scanning back
5308  # arbitrary number of lines would be expensive.  If you have a type
5309  # that spans more than 2 lines, please use a typedef.
5310  if linenum > 1:
5311    previous = None
5312    if Match(r'\s*::(?:[\w<>]|::)+\s*&\s*\S', line):
5313      # previous_line\n + ::current_line
5314      previous = Search(r'\b((?:const\s*)?(?:[\w<>]|::)+[\w<>])\s*$',
5315                        clean_lines.elided[linenum - 1])
5316    elif Match(r'\s*[a-zA-Z_]([\w<>]|::)+\s*&\s*\S', line):
5317      # previous_line::\n + current_line
5318      previous = Search(r'\b((?:const\s*)?(?:[\w<>]|::)+::)\s*$',
5319                        clean_lines.elided[linenum - 1])
5320    if previous:
5321      line = previous.group(1) + line.lstrip()
5322    else:
5323      # Check for templated parameter that is split across multiple lines
5324      endpos = line.rfind('>')
5325      if endpos > -1:
5326        (_, startline, startpos) = ReverseCloseExpression(
5327            clean_lines, linenum, endpos)
5328        if startpos > -1 and startline < linenum:
5329          # Found the matching < on an earlier line, collect all
5330          # pieces up to current line.
5331          line = ''
5332          for i in xrange(startline, linenum + 1):
5333            line += clean_lines.elided[i].strip()
5334
5335  # Check for non-const references in function parameters.  A single '&' may
5336  # found in the following places:
5337  #   inside expression: binary & for bitwise AND
5338  #   inside expression: unary & for taking the address of something
5339  #   inside declarators: reference parameter
5340  # We will exclude the first two cases by checking that we are not inside a
5341  # function body, including one that was just introduced by a trailing '{'.
5342  # TODO(unknown): Doesn't account for 'catch(Exception& e)' [rare].
5343  if (nesting_state.previous_stack_top and
5344      not (isinstance(nesting_state.previous_stack_top, _ClassInfo) or
5345           isinstance(nesting_state.previous_stack_top, _NamespaceInfo))):
5346    # Not at toplevel, not within a class, and not within a namespace
5347    return
5348
5349  # Avoid initializer lists.  We only need to scan back from the
5350  # current line for something that starts with ':'.
5351  #
5352  # We don't need to check the current line, since the '&' would
5353  # appear inside the second set of parentheses on the current line as
5354  # opposed to the first set.
5355  if linenum > 0:
5356    for i in xrange(linenum - 1, max(0, linenum - 10), -1):
5357      previous_line = clean_lines.elided[i]
5358      if not Search(r'[),]\s*$', previous_line):
5359        break
5360      if Match(r'^\s*:\s+\S', previous_line):
5361        return
5362
5363  # Avoid preprocessors
5364  if Search(r'\\\s*$', line):
5365    return
5366
5367  # Avoid constructor initializer lists
5368  if IsInitializerList(clean_lines, linenum):
5369    return
5370
5371  # We allow non-const references in a few standard places, like functions
5372  # called "swap()" or iostream operators like "<<" or ">>".  Do not check
5373  # those function parameters.
5374  #
5375  # We also accept & in static_assert, which looks like a function but
5376  # it's actually a declaration expression.
5377  whitelisted_functions = (r'(?:[sS]wap(?:<\w:+>)?|'
5378                           r'operator\s*[<>][<>]|'
5379                           r'static_assert|COMPILE_ASSERT'
5380                           r')\s*\(')
5381  if Search(whitelisted_functions, line):
5382    return
5383  elif not Search(r'\S+\([^)]*$', line):
5384    # Don't see a whitelisted function on this line.  Actually we
5385    # didn't see any function name on this line, so this is likely a
5386    # multi-line parameter list.  Try a bit harder to catch this case.
5387    for i in xrange(2):
5388      if (linenum > i and
5389          Search(whitelisted_functions, clean_lines.elided[linenum - i - 1])):
5390        return
5391
5392  decls = ReplaceAll(r'{[^}]*}', ' ', line)  # exclude function body
5393  for parameter in re.findall(_RE_PATTERN_REF_PARAM, decls):
5394    if (not Match(_RE_PATTERN_CONST_REF_PARAM, parameter) and
5395        not Match(_RE_PATTERN_REF_STREAM_PARAM, parameter)):
5396      error(filename, linenum, 'runtime/references', 2,
5397            'Is this a non-const reference? '
5398            'If so, make const or use a pointer: ' +
5399            ReplaceAll(' *<', '<', parameter))
5400
5401
5402def CheckCasts(filename, clean_lines, linenum, error):
5403  """Various cast related checks.
5404
5405  Args:
5406    filename: The name of the current file.
5407    clean_lines: A CleansedLines instance containing the file.
5408    linenum: The number of the line to check.
5409    error: The function to call with any errors found.
5410  """
5411  line = clean_lines.elided[linenum]
5412
5413  # Check to see if they're using an conversion function cast.
5414  # I just try to capture the most common basic types, though there are more.
5415  # Parameterless conversion functions, such as bool(), are allowed as they are
5416  # probably a member operator declaration or default constructor.
5417  match = Search(
5418      r'(\bnew\s+(?:const\s+)?|\S<\s*(?:const\s+)?)?\b'
5419      r'(int|float|double|bool|char|int32|uint32|int64|uint64)'
5420      r'(\([^)].*)', line)
5421  expecting_function = ExpectingFunctionArgs(clean_lines, linenum)
5422  if match and not expecting_function:
5423    matched_type = match.group(2)
5424
5425    # matched_new_or_template is used to silence two false positives:
5426    # - New operators
5427    # - Template arguments with function types
5428    #
5429    # For template arguments, we match on types immediately following
5430    # an opening bracket without any spaces.  This is a fast way to
5431    # silence the common case where the function type is the first
5432    # template argument.  False negative with less-than comparison is
5433    # avoided because those operators are usually followed by a space.
5434    #
5435    #   function<double(double)>   // bracket + no space = false positive
5436    #   value < double(42)         // bracket + space = true positive
5437    matched_new_or_template = match.group(1)
5438
5439    # Avoid arrays by looking for brackets that come after the closing
5440    # parenthesis.
5441    if Match(r'\([^()]+\)\s*\[', match.group(3)):
5442      return
5443
5444    # Other things to ignore:
5445    # - Function pointers
5446    # - Casts to pointer types
5447    # - Placement new
5448    # - Alias declarations
5449    matched_funcptr = match.group(3)
5450    if (matched_new_or_template is None and
5451        not (matched_funcptr and
5452             (Match(r'\((?:[^() ]+::\s*\*\s*)?[^() ]+\)\s*\(',
5453                    matched_funcptr) or
5454              matched_funcptr.startswith('(*)'))) and
5455        not Match(r'\s*using\s+\S+\s*=\s*' + matched_type, line) and
5456        not Search(r'new\(\S+\)\s*' + matched_type, line)):
5457      error(filename, linenum, 'readability/casting', 4,
5458            'Using deprecated casting style.  '
5459            'Use static_cast<%s>(...) instead' %
5460            matched_type)
5461
5462  if not expecting_function:
5463    CheckCStyleCast(filename, clean_lines, linenum, 'static_cast',
5464                    r'\((int|float|double|bool|char|u?int(16|32|64))\)', error)
5465
5466  # This doesn't catch all cases. Consider (const char * const)"hello".
5467  #
5468  # (char *) "foo" should always be a const_cast (reinterpret_cast won't
5469  # compile).
5470  if CheckCStyleCast(filename, clean_lines, linenum, 'const_cast',
5471                     r'\((char\s?\*+\s?)\)\s*"', error):
5472    pass
5473  else:
5474    # Check pointer casts for other than string constants
5475    CheckCStyleCast(filename, clean_lines, linenum, 'reinterpret_cast',
5476                    r'\((\w+\s?\*+\s?)\)', error)
5477
5478  # In addition, we look for people taking the address of a cast.  This
5479  # is dangerous -- casts can assign to temporaries, so the pointer doesn't
5480  # point where you think.
5481  #
5482  # Some non-identifier character is required before the '&' for the
5483  # expression to be recognized as a cast.  These are casts:
5484  #   expression = &static_cast<int*>(temporary());
5485  #   function(&(int*)(temporary()));
5486  #
5487  # This is not a cast:
5488  #   reference_type&(int* function_param);
5489  match = Search(
5490      r'(?:[^\w]&\(([^)*][^)]*)\)[\w(])|'
5491      r'(?:[^\w]&(static|dynamic|down|reinterpret)_cast\b)', line)
5492  if match:
5493    # Try a better error message when the & is bound to something
5494    # dereferenced by the casted pointer, as opposed to the casted
5495    # pointer itself.
5496    parenthesis_error = False
5497    match = Match(r'^(.*&(?:static|dynamic|down|reinterpret)_cast\b)<', line)
5498    if match:
5499      _, y1, x1 = CloseExpression(clean_lines, linenum, len(match.group(1)))
5500      if x1 >= 0 and clean_lines.elided[y1][x1] == '(':
5501        _, y2, x2 = CloseExpression(clean_lines, y1, x1)
5502        if x2 >= 0:
5503          extended_line = clean_lines.elided[y2][x2:]
5504          if y2 < clean_lines.NumLines() - 1:
5505            extended_line += clean_lines.elided[y2 + 1]
5506          if Match(r'\s*(?:->|\[)', extended_line):
5507            parenthesis_error = True
5508
5509    if parenthesis_error:
5510      error(filename, linenum, 'readability/casting', 4,
5511            ('Are you taking an address of something dereferenced '
5512             'from a cast?  Wrapping the dereferenced expression in '
5513             'parentheses will make the binding more obvious'))
5514    else:
5515      error(filename, linenum, 'runtime/casting', 4,
5516            ('Are you taking an address of a cast?  '
5517             'This is dangerous: could be a temp var.  '
5518             'Take the address before doing the cast, rather than after'))
5519
5520
5521def CheckCStyleCast(filename, clean_lines, linenum, cast_type, pattern, error):
5522  """Checks for a C-style cast by looking for the pattern.
5523
5524  Args:
5525    filename: The name of the current file.
5526    clean_lines: A CleansedLines instance containing the file.
5527    linenum: The number of the line to check.
5528    cast_type: The string for the C++ cast to recommend.  This is either
5529      reinterpret_cast, static_cast, or const_cast, depending.
5530    pattern: The regular expression used to find C-style casts.
5531    error: The function to call with any errors found.
5532
5533  Returns:
5534    True if an error was emitted.
5535    False otherwise.
5536  """
5537  line = clean_lines.elided[linenum]
5538  match = Search(pattern, line)
5539  if not match:
5540    return False
5541
5542  # Exclude lines with keywords that tend to look like casts
5543  context = line[0:match.start(1) - 1]
5544  if Match(r'.*\b(?:sizeof|alignof|alignas|[_A-Z][_A-Z0-9]*)\s*$', context):
5545    return False
5546
5547  # Try expanding current context to see if we one level of
5548  # parentheses inside a macro.
5549  if linenum > 0:
5550    for i in xrange(linenum - 1, max(0, linenum - 5), -1):
5551      context = clean_lines.elided[i] + context
5552  if Match(r'.*\b[_A-Z][_A-Z0-9]*\s*\((?:\([^()]*\)|[^()])*$', context):
5553    return False
5554
5555  # operator++(int) and operator--(int)
5556  if context.endswith(' operator++') or context.endswith(' operator--'):
5557    return False
5558
5559  # A single unnamed argument for a function tends to look like old style cast.
5560  # If we see those, don't issue warnings for deprecated casts.
5561  remainder = line[match.end(0):]
5562  if Match(r'^\s*(?:;|const\b|throw\b|final\b|override\b|[=>{),]|->)',
5563           remainder):
5564    return False
5565
5566  # At this point, all that should be left is actual casts.
5567  error(filename, linenum, 'readability/casting', 4,
5568        'Using C-style cast.  Use %s<%s>(...) instead' %
5569        (cast_type, match.group(1)))
5570
5571  return True
5572
5573
5574def ExpectingFunctionArgs(clean_lines, linenum):
5575  """Checks whether where function type arguments are expected.
5576
5577  Args:
5578    clean_lines: A CleansedLines instance containing the file.
5579    linenum: The number of the line to check.
5580
5581  Returns:
5582    True if the line at 'linenum' is inside something that expects arguments
5583    of function types.
5584  """
5585  line = clean_lines.elided[linenum]
5586  return (Match(r'^\s*MOCK_(CONST_)?METHOD\d+(_T)?\(', line) or
5587          (linenum >= 2 and
5588           (Match(r'^\s*MOCK_(?:CONST_)?METHOD\d+(?:_T)?\((?:\S+,)?\s*$',
5589                  clean_lines.elided[linenum - 1]) or
5590            Match(r'^\s*MOCK_(?:CONST_)?METHOD\d+(?:_T)?\(\s*$',
5591                  clean_lines.elided[linenum - 2]) or
5592            Search(r'\bstd::m?function\s*\<\s*$',
5593                   clean_lines.elided[linenum - 1]))))
5594
5595
5596_HEADERS_CONTAINING_TEMPLATES = (
5597    ('<deque>', ('deque',)),
5598    ('<functional>', ('unary_function', 'binary_function',
5599                      'plus', 'minus', 'multiplies', 'divides', 'modulus',
5600                      'negate',
5601                      'equal_to', 'not_equal_to', 'greater', 'less',
5602                      'greater_equal', 'less_equal',
5603                      'logical_and', 'logical_or', 'logical_not',
5604                      'unary_negate', 'not1', 'binary_negate', 'not2',
5605                      'bind1st', 'bind2nd',
5606                      'pointer_to_unary_function',
5607                      'pointer_to_binary_function',
5608                      'ptr_fun',
5609                      'mem_fun_t', 'mem_fun', 'mem_fun1_t', 'mem_fun1_ref_t',
5610                      'mem_fun_ref_t',
5611                      'const_mem_fun_t', 'const_mem_fun1_t',
5612                      'const_mem_fun_ref_t', 'const_mem_fun1_ref_t',
5613                      'mem_fun_ref',
5614                     )),
5615    ('<limits>', ('numeric_limits',)),
5616    ('<list>', ('list',)),
5617    ('<map>', ('map', 'multimap',)),
5618    ('<memory>', ('allocator', 'make_shared', 'make_unique', 'shared_ptr',
5619                  'unique_ptr', 'weak_ptr')),
5620    ('<queue>', ('queue', 'priority_queue',)),
5621    ('<set>', ('set', 'multiset',)),
5622    ('<stack>', ('stack',)),
5623    ('<string>', ('char_traits', 'basic_string',)),
5624    ('<tuple>', ('tuple',)),
5625    ('<unordered_map>', ('unordered_map', 'unordered_multimap')),
5626    ('<unordered_set>', ('unordered_set', 'unordered_multiset')),
5627    ('<utility>', ('pair',)),
5628    ('<vector>', ('vector',)),
5629
5630    # gcc extensions.
5631    # Note: std::hash is their hash, ::hash is our hash
5632    ('<hash_map>', ('hash_map', 'hash_multimap',)),
5633    ('<hash_set>', ('hash_set', 'hash_multiset',)),
5634    ('<slist>', ('slist',)),
5635    )
5636
5637_HEADERS_MAYBE_TEMPLATES = (
5638    ('<algorithm>', ('copy', 'max', 'min', 'min_element', 'sort',
5639                     'transform',
5640                    )),
5641    ('<utility>', ('forward', 'make_pair', 'move', 'swap')),
5642    )
5643
5644_RE_PATTERN_STRING = re.compile(r'\bstring\b')
5645
5646_re_pattern_headers_maybe_templates = []
5647for _header, _templates in _HEADERS_MAYBE_TEMPLATES:
5648  for _template in _templates:
5649    # Match max<type>(..., ...), max(..., ...), but not foo->max, foo.max or
5650    # 'type::max()'.
5651    _re_pattern_headers_maybe_templates.append(
5652        (re.compile(r'[^>.]\b' + _template + r'(<.*?>)?\([^\)]'),
5653            _template,
5654            _header))
5655
5656# Other scripts may reach in and modify this pattern.
5657_re_pattern_templates = []
5658for _header, _templates in _HEADERS_CONTAINING_TEMPLATES:
5659  for _template in _templates:
5660    _re_pattern_templates.append(
5661        (re.compile(r'(\<|\b)' + _template + r'\s*\<'),
5662         _template + '<>',
5663         _header))
5664
5665
5666def FilesBelongToSameModule(filename_cc, filename_h):
5667  """Check if these two filenames belong to the same module.
5668
5669  The concept of a 'module' here is a as follows:
5670  foo.h, foo-inl.h, foo.cc, foo_test.cc and foo_unittest.cc belong to the
5671  same 'module' if they are in the same directory.
5672  some/path/public/xyzzy and some/path/internal/xyzzy are also considered
5673  to belong to the same module here.
5674
5675  If the filename_cc contains a longer path than the filename_h, for example,
5676  '/absolute/path/to/base/sysinfo.cc', and this file would include
5677  'base/sysinfo.h', this function also produces the prefix needed to open the
5678  header. This is used by the caller of this function to more robustly open the
5679  header file. We don't have access to the real include paths in this context,
5680  so we need this guesswork here.
5681
5682  Known bugs: tools/base/bar.cc and base/bar.h belong to the same module
5683  according to this implementation. Because of this, this function gives
5684  some false positives. This should be sufficiently rare in practice.
5685
5686  Args:
5687    filename_cc: is the path for the source (e.g. .cc) file
5688    filename_h: is the path for the header path
5689
5690  Returns:
5691    Tuple with a bool and a string:
5692    bool: True if filename_cc and filename_h belong to the same module.
5693    string: the additional prefix needed to open the header file.
5694  """
5695  fileinfo_cc = FileInfo(filename_cc)
5696  if not fileinfo_cc.Extension().lstrip('.') in GetNonHeaderExtensions():
5697    return (False, '')
5698
5699  fileinfo_h = FileInfo(filename_h)
5700  if not IsHeaderExtension(fileinfo_h.Extension().lstrip('.')):
5701    return (False, '')
5702
5703  filename_cc = filename_cc[:-(len(fileinfo_cc.Extension()))]
5704  matched_test_suffix = Search(_TEST_FILE_SUFFIX, fileinfo_cc.BaseName())
5705  if matched_test_suffix:
5706    filename_cc = filename_cc[:-len(matched_test_suffix.group(1))]
5707
5708  filename_cc = filename_cc.replace('/public/', '/')
5709  filename_cc = filename_cc.replace('/internal/', '/')
5710
5711  filename_h = filename_h[:-(len(fileinfo_h.Extension()))]
5712  if filename_h.endswith('-inl'):
5713    filename_h = filename_h[:-len('-inl')]
5714  filename_h = filename_h.replace('/public/', '/')
5715  filename_h = filename_h.replace('/internal/', '/')
5716
5717  files_belong_to_same_module = filename_cc.endswith(filename_h)
5718  common_path = ''
5719  if files_belong_to_same_module:
5720    common_path = filename_cc[:-len(filename_h)]
5721  return files_belong_to_same_module, common_path
5722
5723
5724def UpdateIncludeState(filename, include_dict, io=codecs):
5725  """Fill up the include_dict with new includes found from the file.
5726
5727  Args:
5728    filename: the name of the header to read.
5729    include_dict: a dictionary in which the headers are inserted.
5730    io: The io factory to use to read the file. Provided for testability.
5731
5732  Returns:
5733    True if a header was successfully added. False otherwise.
5734  """
5735  headerfile = None
5736  try:
5737    headerfile = io.open(filename, 'r', 'utf8', 'replace')
5738  except IOError:
5739    return False
5740  linenum = 0
5741  for line in headerfile:
5742    linenum += 1
5743    clean_line = CleanseComments(line)
5744    match = _RE_PATTERN_INCLUDE.search(clean_line)
5745    if match:
5746      include = match.group(2)
5747      include_dict.setdefault(include, linenum)
5748  return True
5749
5750
5751def CheckForIncludeWhatYouUse(filename, clean_lines, include_state, error,
5752                              io=codecs):
5753  """Reports for missing stl includes.
5754
5755  This function will output warnings to make sure you are including the headers
5756  necessary for the stl containers and functions that you use. We only give one
5757  reason to include a header. For example, if you use both equal_to<> and
5758  less<> in a .h file, only one (the latter in the file) of these will be
5759  reported as a reason to include the <functional>.
5760
5761  Args:
5762    filename: The name of the current file.
5763    clean_lines: A CleansedLines instance containing the file.
5764    include_state: An _IncludeState instance.
5765    error: The function to call with any errors found.
5766    io: The IO factory to use to read the header file. Provided for unittest
5767        injection.
5768  """
5769  required = {}  # A map of header name to linenumber and the template entity.
5770                 # Example of required: { '<functional>': (1219, 'less<>') }
5771
5772  for linenum in xrange(clean_lines.NumLines()):
5773    line = clean_lines.elided[linenum]
5774    if not line or line[0] == '#':
5775      continue
5776
5777    # String is special -- it is a non-templatized type in STL.
5778    matched = _RE_PATTERN_STRING.search(line)
5779    if matched:
5780      # Don't warn about strings in non-STL namespaces:
5781      # (We check only the first match per line; good enough.)
5782      prefix = line[:matched.start()]
5783      if prefix.endswith('std::') or not prefix.endswith('::'):
5784        required['<string>'] = (linenum, 'string')
5785
5786    for pattern, template, header in _re_pattern_headers_maybe_templates:
5787      if pattern.search(line):
5788        required[header] = (linenum, template)
5789
5790    # The following function is just a speed up, no semantics are changed.
5791    if not '<' in line:  # Reduces the cpu time usage by skipping lines.
5792      continue
5793
5794    for pattern, template, header in _re_pattern_templates:
5795      matched = pattern.search(line)
5796      if matched:
5797        # Don't warn about IWYU in non-STL namespaces:
5798        # (We check only the first match per line; good enough.)
5799        prefix = line[:matched.start()]
5800        if prefix.endswith('std::') or not prefix.endswith('::'):
5801          required[header] = (linenum, template)
5802
5803  # The policy is that if you #include something in foo.h you don't need to
5804  # include it again in foo.cc. Here, we will look at possible includes.
5805  # Let's flatten the include_state include_list and copy it into a dictionary.
5806  include_dict = dict([item for sublist in include_state.include_list
5807                       for item in sublist])
5808
5809  # Did we find the header for this file (if any) and successfully load it?
5810  header_found = False
5811
5812  # Use the absolute path so that matching works properly.
5813  abs_filename = FileInfo(filename).FullName()
5814
5815  # For Emacs's flymake.
5816  # If cpplint is invoked from Emacs's flymake, a temporary file is generated
5817  # by flymake and that file name might end with '_flymake.cc'. In that case,
5818  # restore original file name here so that the corresponding header file can be
5819  # found.
5820  # e.g. If the file name is 'foo_flymake.cc', we should search for 'foo.h'
5821  # instead of 'foo_flymake.h'
5822  abs_filename = re.sub(r'_flymake\.cc$', '.cc', abs_filename)
5823
5824  # include_dict is modified during iteration, so we iterate over a copy of
5825  # the keys.
5826  header_keys = list(include_dict.keys())
5827  for header in header_keys:
5828    (same_module, common_path) = FilesBelongToSameModule(abs_filename, header)
5829    fullpath = common_path + header
5830    if same_module and UpdateIncludeState(fullpath, include_dict, io):
5831      header_found = True
5832
5833  # If we can't find the header file for a .cc, assume it's because we don't
5834  # know where to look. In that case we'll give up as we're not sure they
5835  # didn't include it in the .h file.
5836  # TODO(unknown): Do a better job of finding .h files so we are confident that
5837  # not having the .h file means there isn't one.
5838  if not header_found:
5839    for extension in GetNonHeaderExtensions():
5840      if filename.endswith('.' + extension):
5841        return
5842
5843  # All the lines have been processed, report the errors found.
5844  for required_header_unstripped in sorted(required, key=required.__getitem__):
5845    template = required[required_header_unstripped][1]
5846    if required_header_unstripped.strip('<>"') not in include_dict:
5847      error(filename, required[required_header_unstripped][0],
5848            'build/include_what_you_use', 4,
5849            'Add #include ' + required_header_unstripped + ' for ' + template)
5850
5851
5852_RE_PATTERN_EXPLICIT_MAKEPAIR = re.compile(r'\bmake_pair\s*<')
5853
5854
5855def CheckMakePairUsesDeduction(filename, clean_lines, linenum, error):
5856  """Check that make_pair's template arguments are deduced.
5857
5858  G++ 4.6 in C++11 mode fails badly if make_pair's template arguments are
5859  specified explicitly, and such use isn't intended in any case.
5860
5861  Args:
5862    filename: The name of the current file.
5863    clean_lines: A CleansedLines instance containing the file.
5864    linenum: The number of the line to check.
5865    error: The function to call with any errors found.
5866  """
5867  line = clean_lines.elided[linenum]
5868  match = _RE_PATTERN_EXPLICIT_MAKEPAIR.search(line)
5869  if match:
5870    error(filename, linenum, 'build/explicit_make_pair',
5871          4,  # 4 = high confidence
5872          'For C++11-compatibility, omit template arguments from make_pair'
5873          ' OR use pair directly OR if appropriate, construct a pair directly')
5874
5875
5876def CheckRedundantVirtual(filename, clean_lines, linenum, error):
5877  """Check if line contains a redundant "virtual" function-specifier.
5878
5879  Args:
5880    filename: The name of the current file.
5881    clean_lines: A CleansedLines instance containing the file.
5882    linenum: The number of the line to check.
5883    error: The function to call with any errors found.
5884  """
5885  # Look for "virtual" on current line.
5886  line = clean_lines.elided[linenum]
5887  virtual = Match(r'^(.*)(\bvirtual\b)(.*)$', line)
5888  if not virtual: return
5889
5890  # Ignore "virtual" keywords that are near access-specifiers.  These
5891  # are only used in class base-specifier and do not apply to member
5892  # functions.
5893  if (Search(r'\b(public|protected|private)\s+$', virtual.group(1)) or
5894      Match(r'^\s+(public|protected|private)\b', virtual.group(3))):
5895    return
5896
5897  # Ignore the "virtual" keyword from virtual base classes.  Usually
5898  # there is a column on the same line in these cases (virtual base
5899  # classes are rare in google3 because multiple inheritance is rare).
5900  if Match(r'^.*[^:]:[^:].*$', line): return
5901
5902  # Look for the next opening parenthesis.  This is the start of the
5903  # parameter list (possibly on the next line shortly after virtual).
5904  # TODO(unknown): doesn't work if there are virtual functions with
5905  # decltype() or other things that use parentheses, but csearch suggests
5906  # that this is rare.
5907  end_col = -1
5908  end_line = -1
5909  start_col = len(virtual.group(2))
5910  for start_line in xrange(linenum, min(linenum + 3, clean_lines.NumLines())):
5911    line = clean_lines.elided[start_line][start_col:]
5912    parameter_list = Match(r'^([^(]*)\(', line)
5913    if parameter_list:
5914      # Match parentheses to find the end of the parameter list
5915      (_, end_line, end_col) = CloseExpression(
5916          clean_lines, start_line, start_col + len(parameter_list.group(1)))
5917      break
5918    start_col = 0
5919
5920  if end_col < 0:
5921    return  # Couldn't find end of parameter list, give up
5922
5923  # Look for "override" or "final" after the parameter list
5924  # (possibly on the next few lines).
5925  for i in xrange(end_line, min(end_line + 3, clean_lines.NumLines())):
5926    line = clean_lines.elided[i][end_col:]
5927    match = Search(r'\b(override|final)\b', line)
5928    if match:
5929      error(filename, linenum, 'readability/inheritance', 4,
5930            ('"virtual" is redundant since function is '
5931             'already declared as "%s"' % match.group(1)))
5932
5933    # Set end_col to check whole lines after we are done with the
5934    # first line.
5935    end_col = 0
5936    if Search(r'[^\w]\s*$', line):
5937      break
5938
5939
5940def CheckRedundantOverrideOrFinal(filename, clean_lines, linenum, error):
5941  """Check if line contains a redundant "override" or "final" virt-specifier.
5942
5943  Args:
5944    filename: The name of the current file.
5945    clean_lines: A CleansedLines instance containing the file.
5946    linenum: The number of the line to check.
5947    error: The function to call with any errors found.
5948  """
5949  # Look for closing parenthesis nearby.  We need one to confirm where
5950  # the declarator ends and where the virt-specifier starts to avoid
5951  # false positives.
5952  line = clean_lines.elided[linenum]
5953  declarator_end = line.rfind(')')
5954  if declarator_end >= 0:
5955    fragment = line[declarator_end:]
5956  else:
5957    if linenum > 1 and clean_lines.elided[linenum - 1].rfind(')') >= 0:
5958      fragment = line
5959    else:
5960      return
5961
5962  # Check that at most one of "override" or "final" is present, not both
5963  if Search(r'\boverride\b', fragment) and Search(r'\bfinal\b', fragment):
5964    error(filename, linenum, 'readability/inheritance', 4,
5965          ('"override" is redundant since function is '
5966           'already declared as "final"'))
5967
5968
5969
5970
5971# Returns true if we are at a new block, and it is directly
5972# inside of a namespace.
5973def IsBlockInNameSpace(nesting_state, is_forward_declaration):
5974  """Checks that the new block is directly in a namespace.
5975
5976  Args:
5977    nesting_state: The _NestingState object that contains info about our state.
5978    is_forward_declaration: If the class is a forward declared class.
5979  Returns:
5980    Whether or not the new block is directly in a namespace.
5981  """
5982  if is_forward_declaration:
5983    return len(nesting_state.stack) >= 1 and (
5984      isinstance(nesting_state.stack[-1], _NamespaceInfo))
5985
5986
5987  return (len(nesting_state.stack) > 1 and
5988          nesting_state.stack[-1].check_namespace_indentation and
5989          isinstance(nesting_state.stack[-2], _NamespaceInfo))
5990
5991
5992def ShouldCheckNamespaceIndentation(nesting_state, is_namespace_indent_item,
5993                                    raw_lines_no_comments, linenum):
5994  """This method determines if we should apply our namespace indentation check.
5995
5996  Args:
5997    nesting_state: The current nesting state.
5998    is_namespace_indent_item: If we just put a new class on the stack, True.
5999      If the top of the stack is not a class, or we did not recently
6000      add the class, False.
6001    raw_lines_no_comments: The lines without the comments.
6002    linenum: The current line number we are processing.
6003
6004  Returns:
6005    True if we should apply our namespace indentation check. Currently, it
6006    only works for classes and namespaces inside of a namespace.
6007  """
6008
6009  is_forward_declaration = IsForwardClassDeclaration(raw_lines_no_comments,
6010                                                     linenum)
6011
6012  if not (is_namespace_indent_item or is_forward_declaration):
6013    return False
6014
6015  # If we are in a macro, we do not want to check the namespace indentation.
6016  if IsMacroDefinition(raw_lines_no_comments, linenum):
6017    return False
6018
6019  return IsBlockInNameSpace(nesting_state, is_forward_declaration)
6020
6021
6022# Call this method if the line is directly inside of a namespace.
6023# If the line above is blank (excluding comments) or the start of
6024# an inner namespace, it cannot be indented.
6025def CheckItemIndentationInNamespace(filename, raw_lines_no_comments, linenum,
6026                                    error):
6027  line = raw_lines_no_comments[linenum]
6028  if Match(r'^\s+', line):
6029    error(filename, linenum, 'runtime/indentation_namespace', 4,
6030          'Do not indent within a namespace')
6031
6032
6033def ProcessLine(filename, file_extension, clean_lines, line,
6034                include_state, function_state, nesting_state, error,
6035                extra_check_functions=None):
6036  """Processes a single line in the file.
6037
6038  Args:
6039    filename: Filename of the file that is being processed.
6040    file_extension: The extension (dot not included) of the file.
6041    clean_lines: An array of strings, each representing a line of the file,
6042                 with comments stripped.
6043    line: Number of line being processed.
6044    include_state: An _IncludeState instance in which the headers are inserted.
6045    function_state: A _FunctionState instance which counts function lines, etc.
6046    nesting_state: A NestingState instance which maintains information about
6047                   the current stack of nested blocks being parsed.
6048    error: A callable to which errors are reported, which takes 4 arguments:
6049           filename, line number, error level, and message
6050    extra_check_functions: An array of additional check functions that will be
6051                           run on each source line. Each function takes 4
6052                           arguments: filename, clean_lines, line, error
6053  """
6054  raw_lines = clean_lines.raw_lines
6055  ParseNolintSuppressions(filename, raw_lines[line], line, error)
6056  nesting_state.Update(filename, clean_lines, line, error)
6057  CheckForNamespaceIndentation(filename, nesting_state, clean_lines, line,
6058                               error)
6059  if nesting_state.InAsmBlock(): return
6060  CheckForFunctionLengths(filename, clean_lines, line, function_state, error)
6061  CheckForMultilineCommentsAndStrings(filename, clean_lines, line, error)
6062  CheckStyle(filename, clean_lines, line, file_extension, nesting_state, error)
6063  CheckLanguage(filename, clean_lines, line, file_extension, include_state,
6064                nesting_state, error)
6065  CheckForNonConstReference(filename, clean_lines, line, nesting_state, error)
6066  CheckForNonStandardConstructs(filename, clean_lines, line,
6067                                nesting_state, error)
6068  CheckVlogArguments(filename, clean_lines, line, error)
6069  CheckPosixThreading(filename, clean_lines, line, error)
6070  CheckInvalidIncrement(filename, clean_lines, line, error)
6071  CheckMakePairUsesDeduction(filename, clean_lines, line, error)
6072  CheckRedundantVirtual(filename, clean_lines, line, error)
6073  CheckRedundantOverrideOrFinal(filename, clean_lines, line, error)
6074  if extra_check_functions:
6075    for check_fn in extra_check_functions:
6076      check_fn(filename, clean_lines, line, error)
6077
6078def FlagCxx11Features(filename, clean_lines, linenum, error):
6079  """Flag those c++11 features that we only allow in certain places.
6080
6081  Args:
6082    filename: The name of the current file.
6083    clean_lines: A CleansedLines instance containing the file.
6084    linenum: The number of the line to check.
6085    error: The function to call with any errors found.
6086  """
6087  line = clean_lines.elided[linenum]
6088
6089  include = Match(r'\s*#\s*include\s+[<"]([^<"]+)[">]', line)
6090
6091  # Flag unapproved C++ TR1 headers.
6092  if include and include.group(1).startswith('tr1/'):
6093    error(filename, linenum, 'build/c++tr1', 5,
6094          ('C++ TR1 headers such as <%s> are unapproved.') % include.group(1))
6095
6096  # Flag unapproved C++11 headers.
6097  if include and include.group(1) in ('cfenv',
6098                                      'condition_variable',
6099                                      'fenv.h',
6100                                      'future',
6101                                      'mutex',
6102                                      'thread',
6103                                      'chrono',
6104                                      'ratio',
6105                                      'regex',
6106                                      'system_error',
6107                                     ):
6108    error(filename, linenum, 'build/c++11', 5,
6109          ('<%s> is an unapproved C++11 header.') % include.group(1))
6110
6111  # The only place where we need to worry about C++11 keywords and library
6112  # features in preprocessor directives is in macro definitions.
6113  if Match(r'\s*#', line) and not Match(r'\s*#\s*define\b', line): return
6114
6115  # These are classes and free functions.  The classes are always
6116  # mentioned as std::*, but we only catch the free functions if
6117  # they're not found by ADL.  They're alphabetical by header.
6118  for top_name in (
6119      # type_traits
6120      'alignment_of',
6121      'aligned_union',
6122      ):
6123    if Search(r'\bstd::%s\b' % top_name, line):
6124      error(filename, linenum, 'build/c++11', 5,
6125            ('std::%s is an unapproved C++11 class or function.  Send c-style '
6126             'an example of where it would make your code more readable, and '
6127             'they may let you use it.') % top_name)
6128
6129
6130def FlagCxx14Features(filename, clean_lines, linenum, error):
6131  """Flag those C++14 features that we restrict.
6132
6133  Args:
6134    filename: The name of the current file.
6135    clean_lines: A CleansedLines instance containing the file.
6136    linenum: The number of the line to check.
6137    error: The function to call with any errors found.
6138  """
6139  line = clean_lines.elided[linenum]
6140
6141  include = Match(r'\s*#\s*include\s+[<"]([^<"]+)[">]', line)
6142
6143  # Flag unapproved C++14 headers.
6144  if include and include.group(1) in ('scoped_allocator', 'shared_mutex'):
6145    error(filename, linenum, 'build/c++14', 5,
6146          ('<%s> is an unapproved C++14 header.') % include.group(1))
6147
6148
6149def ProcessFileData(filename, file_extension, lines, error,
6150                    extra_check_functions=None):
6151  """Performs lint checks and reports any errors to the given error function.
6152
6153  Args:
6154    filename: Filename of the file that is being processed.
6155    file_extension: The extension (dot not included) of the file.
6156    lines: An array of strings, each representing a line of the file, with the
6157           last element being empty if the file is terminated with a newline.
6158    error: A callable to which errors are reported, which takes 4 arguments:
6159           filename, line number, error level, and message
6160    extra_check_functions: An array of additional check functions that will be
6161                           run on each source line. Each function takes 4
6162                           arguments: filename, clean_lines, line, error
6163  """
6164  lines = (['// marker so line numbers and indices both start at 1'] + lines +
6165           ['// marker so line numbers end in a known way'])
6166
6167  include_state = _IncludeState()
6168  function_state = _FunctionState()
6169  nesting_state = NestingState()
6170
6171  ResetNolintSuppressions()
6172
6173  CheckForCopyright(filename, lines, error)
6174  ProcessGlobalSuppresions(lines)
6175  RemoveMultiLineComments(filename, lines, error)
6176  clean_lines = CleansedLines(lines)
6177
6178  if IsHeaderExtension(file_extension):
6179    CheckForHeaderGuard(filename, clean_lines, error)
6180
6181  for line in xrange(clean_lines.NumLines()):
6182    ProcessLine(filename, file_extension, clean_lines, line,
6183                include_state, function_state, nesting_state, error,
6184                extra_check_functions)
6185    FlagCxx11Features(filename, clean_lines, line, error)
6186  nesting_state.CheckCompletedBlocks(filename, error)
6187
6188  CheckForIncludeWhatYouUse(filename, clean_lines, include_state, error)
6189
6190  # Check that the .cc file has included its header if it exists.
6191  if _IsSourceExtension(file_extension):
6192    CheckHeaderFileIncluded(filename, include_state, error)
6193
6194  # We check here rather than inside ProcessLine so that we see raw
6195  # lines rather than "cleaned" lines.
6196  CheckForBadCharacters(filename, lines, error)
6197
6198  CheckForNewlineAtEOF(filename, lines, error)
6199
6200def ProcessConfigOverrides(filename):
6201  """ Loads the configuration files and processes the config overrides.
6202
6203  Args:
6204    filename: The name of the file being processed by the linter.
6205
6206  Returns:
6207    False if the current |filename| should not be processed further.
6208  """
6209
6210  abs_filename = os.path.abspath(filename)
6211  cfg_filters = []
6212  keep_looking = True
6213  while keep_looking:
6214    abs_path, base_name = os.path.split(abs_filename)
6215    if not base_name:
6216      break  # Reached the root directory.
6217
6218    cfg_file = os.path.join(abs_path, "CPPLINT.cfg")
6219    abs_filename = abs_path
6220    if not os.path.isfile(cfg_file):
6221      continue
6222
6223    try:
6224      with open(cfg_file) as file_handle:
6225        for line in file_handle:
6226          line, _, _ = line.partition('#')  # Remove comments.
6227          if not line.strip():
6228            continue
6229
6230          name, _, val = line.partition('=')
6231          name = name.strip()
6232          val = val.strip()
6233          if name == 'set noparent':
6234            keep_looking = False
6235          elif name == 'filter':
6236            cfg_filters.append(val)
6237          elif name == 'exclude_files':
6238            # When matching exclude_files pattern, use the base_name of
6239            # the current file name or the directory name we are processing.
6240            # For example, if we are checking for lint errors in /foo/bar/baz.cc
6241            # and we found the .cfg file at /foo/CPPLINT.cfg, then the config
6242            # file's "exclude_files" filter is meant to be checked against "bar"
6243            # and not "baz" nor "bar/baz.cc".
6244            if base_name:
6245              pattern = re.compile(val)
6246              if pattern.match(base_name):
6247                if _cpplint_state.quiet:
6248                  # Suppress "Ignoring file" warning when using --quiet.
6249                  return False
6250                _cpplint_state.PrintInfo('Ignoring "%s": file excluded by "%s". '
6251                                 'File path component "%s" matches '
6252                                 'pattern "%s"\n' %
6253                                 (filename, cfg_file, base_name, val))
6254                return False
6255          elif name == 'linelength':
6256            global _line_length
6257            try:
6258              _line_length = int(val)
6259            except ValueError:
6260              _cpplint_state.PrintError('Line length must be numeric.')
6261          elif name == 'extensions':
6262            global _valid_extensions
6263            try:
6264              extensions = [ext.strip() for ext in val.split(',')]
6265              _valid_extensions = set(extensions)
6266            except ValueError:
6267              sys.stderr.write('Extensions should be a comma-separated list of values;'
6268                               'for example: extensions=hpp,cpp\n'
6269                               'This could not be parsed: "%s"' % (val,))
6270          elif name == 'root':
6271            global _root
6272            # root directories are specified relative to CPPLINT.cfg dir.
6273            _root = os.path.join(os.path.dirname(cfg_file), val)
6274          elif name == 'headers':
6275            ProcessHppHeadersOption(val)
6276          else:
6277            _cpplint_state.PrintError(
6278                'Invalid configuration option (%s) in file %s\n' %
6279                (name, cfg_file))
6280
6281    except IOError:
6282      _cpplint_state.PrintError(
6283          "Skipping config file '%s': Can't open for reading\n" % cfg_file)
6284      keep_looking = False
6285
6286  # Apply all the accumulated filters in reverse order (top-level directory
6287  # config options having the least priority).
6288  for cfg_filter in reversed(cfg_filters):
6289    _AddFilters(cfg_filter)
6290
6291  return True
6292
6293
6294def ProcessFile(filename, vlevel, extra_check_functions=None):
6295  """Does google-lint on a single file.
6296
6297  Args:
6298    filename: The name of the file to parse.
6299
6300    vlevel: The level of errors to report.  Every error of confidence
6301    >= verbose_level will be reported.  0 is a good default.
6302
6303    extra_check_functions: An array of additional check functions that will be
6304                           run on each source line. Each function takes 4
6305                           arguments: filename, clean_lines, line, error
6306  """
6307
6308  _SetVerboseLevel(vlevel)
6309  _BackupFilters()
6310  old_errors = _cpplint_state.error_count
6311
6312  if not ProcessConfigOverrides(filename):
6313    _RestoreFilters()
6314    return
6315
6316  lf_lines = []
6317  crlf_lines = []
6318  try:
6319    # Support the UNIX convention of using "-" for stdin.  Note that
6320    # we are not opening the file with universal newline support
6321    # (which codecs doesn't support anyway), so the resulting lines do
6322    # contain trailing '\r' characters if we are reading a file that
6323    # has CRLF endings.
6324    # If after the split a trailing '\r' is present, it is removed
6325    # below.
6326    if filename == '-':
6327      lines = codecs.StreamReaderWriter(sys.stdin,
6328                                        codecs.getreader('utf8'),
6329                                        codecs.getwriter('utf8'),
6330                                        'replace').read().split('\n')
6331    else:
6332      lines = codecs.open(filename, 'r', 'utf8', 'replace').read().split('\n')
6333
6334    # Remove trailing '\r'.
6335    # The -1 accounts for the extra trailing blank line we get from split()
6336    for linenum in range(len(lines) - 1):
6337      if lines[linenum].endswith('\r'):
6338        lines[linenum] = lines[linenum].rstrip('\r')
6339        crlf_lines.append(linenum + 1)
6340      else:
6341        lf_lines.append(linenum + 1)
6342
6343  except IOError:
6344    _cpplint_state.PrintError(
6345        "Skipping input '%s': Can't open for reading\n" % filename)
6346    _RestoreFilters()
6347    return
6348
6349  # Note, if no dot is found, this will give the entire filename as the ext.
6350  file_extension = filename[filename.rfind('.') + 1:]
6351
6352  # When reading from stdin, the extension is unknown, so no cpplint tests
6353  # should rely on the extension.
6354  if filename != '-' and file_extension not in GetAllExtensions():
6355    _cpplint_state.PrintError('Ignoring %s; not a valid file name '
6356                     '(%s)\n' % (filename, ', '.join(GetAllExtensions())))
6357  else:
6358    ProcessFileData(filename, file_extension, lines, Error,
6359                    extra_check_functions)
6360
6361    # If end-of-line sequences are a mix of LF and CR-LF, issue
6362    # warnings on the lines with CR.
6363    #
6364    # Don't issue any warnings if all lines are uniformly LF or CR-LF,
6365    # since critique can handle these just fine, and the style guide
6366    # doesn't dictate a particular end of line sequence.
6367    #
6368    # We can't depend on os.linesep to determine what the desired
6369    # end-of-line sequence should be, since that will return the
6370    # server-side end-of-line sequence.
6371    if lf_lines and crlf_lines:
6372      # Warn on every line with CR.  An alternative approach might be to
6373      # check whether the file is mostly CRLF or just LF, and warn on the
6374      # minority, we bias toward LF here since most tools prefer LF.
6375      for linenum in crlf_lines:
6376        Error(filename, linenum, 'whitespace/newline', 1,
6377              'Unexpected \\r (^M) found; better to use only \\n')
6378
6379  # Suppress printing anything if --quiet was passed unless the error
6380  # count has increased after processing this file.
6381  if not _cpplint_state.quiet or old_errors != _cpplint_state.error_count:
6382    _cpplint_state.PrintInfo('Done processing %s\n' % filename)
6383  _RestoreFilters()
6384
6385
6386def PrintUsage(message):
6387  """Prints a brief usage string and exits, optionally with an error message.
6388
6389  Args:
6390    message: The optional error message.
6391  """
6392  sys.stderr.write(_USAGE  % (list(GetAllExtensions()),
6393       ','.join(list(GetAllExtensions())),
6394       GetHeaderExtensions(),
6395       ','.join(GetHeaderExtensions())))
6396
6397  if message:
6398    sys.exit('\nFATAL ERROR: ' + message)
6399  else:
6400    sys.exit(0)
6401
6402def PrintVersion():
6403  sys.stdout.write('Cpplint fork (https://github.com/cpplint/cpplint)\n')
6404  sys.stdout.write('cpplint ' + __VERSION__ + '\n')
6405  sys.stdout.write('Python ' + sys.version + '\n')
6406  sys.exit(0)
6407
6408def PrintCategories():
6409  """Prints a list of all the error-categories used by error messages.
6410
6411  These are the categories used to filter messages via --filter.
6412  """
6413  sys.stderr.write(''.join('  %s\n' % cat for cat in _ERROR_CATEGORIES))
6414  sys.exit(0)
6415
6416
6417def ParseArguments(args):
6418  """Parses the command line arguments.
6419
6420  This may set the output format and verbosity level as side-effects.
6421
6422  Args:
6423    args: The command line arguments:
6424
6425  Returns:
6426    The list of filenames to lint.
6427  """
6428  try:
6429    (opts, filenames) = getopt.getopt(args, '', ['help', 'output=', 'verbose=',
6430                                                 'v=',
6431                                                 'version',
6432                                                 'counting=',
6433                                                 'filter=',
6434                                                 'root=',
6435                                                 'repository=',
6436                                                 'linelength=',
6437                                                 'extensions=',
6438                                                 'exclude=',
6439                                                 'recursive',
6440                                                 'headers=',
6441                                                 'quiet'])
6442  except getopt.GetoptError:
6443    PrintUsage('Invalid arguments.')
6444
6445  verbosity = _VerboseLevel()
6446  output_format = _OutputFormat()
6447  filters = ''
6448  quiet = _Quiet()
6449  counting_style = ''
6450  recursive = False
6451
6452  for (opt, val) in opts:
6453    if opt == '--help':
6454      PrintUsage(None)
6455    if opt == '--version':
6456      PrintVersion()
6457    elif opt == '--output':
6458      if val not in ('emacs', 'vs7', 'eclipse', 'junit'):
6459        PrintUsage('The only allowed output formats are emacs, vs7, eclipse '
6460                   'and junit.')
6461      output_format = val
6462    elif opt == '--quiet':
6463      quiet = True
6464    elif opt == '--verbose' or opt == '--v':
6465      verbosity = int(val)
6466    elif opt == '--filter':
6467      filters = val
6468      if not filters:
6469        PrintCategories()
6470    elif opt == '--counting':
6471      if val not in ('total', 'toplevel', 'detailed'):
6472        PrintUsage('Valid counting options are total, toplevel, and detailed')
6473      counting_style = val
6474    elif opt == '--root':
6475      global _root
6476      _root = val
6477    elif opt == '--repository':
6478      global _repository
6479      _repository = val
6480    elif opt == '--linelength':
6481      global _line_length
6482      try:
6483        _line_length = int(val)
6484      except ValueError:
6485        PrintUsage('Line length must be digits.')
6486    elif opt == '--exclude':
6487      global _excludes
6488      if not _excludes:
6489        _excludes = set()
6490      _excludes.update(glob.glob(val))
6491    elif opt == '--extensions':
6492      global _valid_extensions
6493      try:
6494        _valid_extensions = set(val.split(','))
6495      except ValueError:
6496        PrintUsage('Extensions must be comma seperated list.')
6497    elif opt == '--headers':
6498      ProcessHppHeadersOption(val)
6499    elif opt == '--recursive':
6500      recursive = True
6501
6502  if not filenames:
6503    PrintUsage('No files were specified.')
6504
6505  if recursive:
6506    filenames = _ExpandDirectories(filenames)
6507
6508  if _excludes:
6509    filenames = _FilterExcludedFiles(filenames)
6510
6511  _SetOutputFormat(output_format)
6512  _SetQuiet(quiet)
6513  _SetVerboseLevel(verbosity)
6514  _SetFilters(filters)
6515  _SetCountingStyle(counting_style)
6516
6517  return filenames
6518
6519def _ExpandDirectories(filenames):
6520  """Searches a list of filenames and replaces directories in the list with
6521  all files descending from those directories. Files with extensions not in
6522  the valid extensions list are excluded.
6523
6524  Args:
6525    filenames: A list of files or directories
6526
6527  Returns:
6528    A list of all files that are members of filenames or descended from a
6529    directory in filenames
6530  """
6531  expanded = set()
6532  for filename in filenames:
6533    if not os.path.isdir(filename):
6534      expanded.add(filename)
6535      continue
6536
6537    for root, _, files in os.walk(filename):
6538      for loopfile in files:
6539        fullname = os.path.join(root, loopfile)
6540        if fullname.startswith('.' + os.path.sep):
6541          fullname = fullname[len('.' + os.path.sep):]
6542        expanded.add(fullname)
6543
6544  filtered = []
6545  for filename in expanded:
6546    if os.path.splitext(filename)[1][1:] in GetAllExtensions():
6547      filtered.append(filename)
6548
6549  return filtered
6550
6551def _FilterExcludedFiles(filenames):
6552  """Filters out files listed in the --exclude command line switch. File paths
6553  in the switch are evaluated relative to the current working directory
6554  """
6555  exclude_paths = [os.path.abspath(f) for f in _excludes]
6556  return [f for f in filenames if os.path.abspath(f) not in exclude_paths]
6557
6558def main():
6559  filenames = ParseArguments(sys.argv[1:])
6560  backup_err = sys.stderr
6561  try:
6562    # Change stderr to write with replacement characters so we don't die
6563    # if we try to print something containing non-ASCII characters.
6564    sys.stderr = codecs.StreamReader(sys.stderr, 'replace')
6565
6566    _cpplint_state.ResetErrorCounts()
6567    for filename in filenames:
6568      ProcessFile(filename, _cpplint_state.verbose_level)
6569    # If --quiet is passed, suppress printing error count unless there are errors.
6570    if not _cpplint_state.quiet or _cpplint_state.error_count > 0:
6571      _cpplint_state.PrintErrorCounts()
6572
6573    if _cpplint_state.output_format == 'junit':
6574      sys.stderr.write(_cpplint_state.FormatJUnitXML())
6575
6576  finally:
6577    sys.stderr = backup_err
6578
6579  sys.exit(_cpplint_state.error_count > 0)
6580
6581
6582if __name__ == '__main__':
6583  main()
6584