1#!/usr/bin/env python
2#
3# Copyright (c) 2009 Google Inc. All rights reserved.
4#
5# Redistribution and use in source and binary forms, with or without
6# modification, are permitted provided that the following conditions are
7# met:
8#
9#    * Redistributions of source code must retain the above copyright
10# notice, this list of conditions and the following disclaimer.
11#    * Redistributions in binary form must reproduce the above
12# copyright notice, this list of conditions and the following disclaimer
13# in the documentation and/or other materials provided with the
14# distribution.
15#    * Neither the name of Google Inc. nor the names of its
16# contributors may be used to endorse or promote products derived from
17# this software without specific prior written permission.
18#
19# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31"""Does google-lint on c++ files.
32
33The goal of this script is to identify places in the code that *may*
34be in non-compliance with google style.  It does not attempt to fix
35up these problems -- the point is to educate.  It does also not
36attempt to find all problems, or to ensure that everything it does
37find is legitimately a problem.
38
39In particular, we can get very confused by /* and // inside strings!
40We do a small hack, which is to ignore //'s with "'s after them on the
41same line, but it is far from perfect (in either direction).
42"""
43
44import codecs
45import copy
46import getopt
47import math  # for log
48import os
49import re
50import sre_compile
51import string
52import sys
53import unicodedata
54
55
56_USAGE = """
57Syntax: cpplint.py [--verbose=#] [--output=vs7] [--filter=-x,+y,...]
58                   [--counting=total|toplevel|detailed] [--root=subdir]
59                   [--linelength=digits]
60        <file> [file] ...
61
62  The style guidelines this tries to follow are those in
63    http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml
64
65  Every problem is given a confidence score from 1-5, with 5 meaning we are
66  certain of the problem, and 1 meaning it could be a legitimate construct.
67  This will miss some errors, and is not a substitute for a code review.
68
69  To suppress false-positive errors of a certain category, add a
70  'NOLINT(category)' comment to the line.  NOLINT or NOLINT(*)
71  suppresses errors of all categories on that line.
72
73  The files passed in will be linted; at least one file must be provided.
74  Default linted extensions are .cc, .cpp, .cu, .cuh and .h.  Change the
75  extensions with the --extensions flag.
76
77  Flags:
78
79    output=vs7
80      By default, the output is formatted to ease emacs parsing.  Visual Studio
81      compatible output (vs7) may also be used.  Other formats are unsupported.
82
83    verbose=#
84      Specify a number 0-5 to restrict errors to certain verbosity levels.
85
86    filter=-x,+y,...
87      Specify a comma-separated list of category-filters to apply: only
88      error messages whose category names pass the filters will be printed.
89      (Category names are printed with the message and look like
90      "[whitespace/indent]".)  Filters are evaluated left to right.
91      "-FOO" and "FOO" means "do not print categories that start with FOO".
92      "+FOO" means "do print categories that start with FOO".
93
94      Examples: --filter=-whitespace,+whitespace/braces
95                --filter=whitespace,runtime/printf,+runtime/printf_format
96                --filter=-,+build/include_what_you_use
97
98      To see a list of all the categories used in cpplint, pass no arg:
99         --filter=
100
101    counting=total|toplevel|detailed
102      The total number of errors found is always printed. If
103      'toplevel' is provided, then the count of errors in each of
104      the top-level categories like 'build' and 'whitespace' will
105      also be printed. If 'detailed' is provided, then a count
106      is provided for each category like 'build/class'.
107
108    root=subdir
109      The root directory used for deriving header guard CPP variable.
110      By default, the header guard CPP variable is calculated as the relative
111      path to the directory that contains .git, .hg, or .svn.  When this flag
112      is specified, the relative path is calculated from the specified
113      directory. If the specified directory does not exist, this flag is
114      ignored.
115
116      Examples:
117        Assuming that src/.git exists, the header guard CPP variables for
118        src/chrome/browser/ui/browser.h are:
119
120        No flag => CHROME_BROWSER_UI_BROWSER_H_
121        --root=chrome => BROWSER_UI_BROWSER_H_
122        --root=chrome/browser => UI_BROWSER_H_
123
124    linelength=digits
125      This is the allowed line length for the project. The default value is
126      80 characters.
127
128      Examples:
129        --linelength=120
130
131    extensions=extension,extension,...
132      The allowed file extensions that cpplint will check
133
134      Examples:
135        --extensions=hpp,cpp
136
137    cpplint.py supports per-directory configurations specified in CPPLINT.cfg
138    files. CPPLINT.cfg file can contain a number of key=value pairs.
139    Currently the following options are supported:
140
141      set noparent
142      filter=+filter1,-filter2,...
143      exclude_files=regex
144      linelength=80
145
146    "set noparent" option prevents cpplint from traversing directory tree
147    upwards looking for more .cfg files in parent directories. This option
148    is usually placed in the top-level project directory.
149
150    The "filter" option is similar in function to --filter flag. It specifies
151    message filters in addition to the |_DEFAULT_FILTERS| and those specified
152    through --filter command-line flag.
153
154    "exclude_files" allows to specify a regular expression to be matched against
155    a file name. If the expression matches, the file is skipped and not run
156    through liner.
157
158    "linelength" allows to specify the allowed line length for the project.
159
160    CPPLINT.cfg has an effect on files in the same directory and all
161    sub-directories, unless overridden by a nested configuration file.
162
163      Example file:
164        filter=-build/include_order,+build/include_alpha
165        exclude_files=.*\.cc
166
167    The above example disables build/include_order warning and enables
168    build/include_alpha as well as excludes all .cc from being
169    processed by linter, in the current directory (where the .cfg
170    file is located) and all sub-directories.
171"""
172
173# We categorize each error message we print.  Here are the categories.
174# We want an explicit list so we can list them all in cpplint --filter=.
175# If you add a new error message with a new category, add it to the list
176# here!  cpplint_unittest.py should tell you if you forget to do this.
177_ERROR_CATEGORIES = [
178  'build/class',
179  'build/c++11',
180  'build/deprecated',
181  'build/endif_comment',
182  'build/explicit_make_pair',
183  'build/forward_decl',
184  'build/header_guard',
185  'build/include',
186  'build/include_alpha',
187  'build/include_order',
188  'build/include_what_you_use',
189  'build/namespaces',
190  'build/printf_format',
191  'build/storage_class',
192  'legal/copyright',
193  'legal/license',
194  'mongo/polyfill',
195  'readability/alt_tokens',
196  'readability/braces',
197  'readability/casting',
198  'readability/check',
199  'readability/constructors',
200  'readability/fn_size',
201  'readability/function',
202  'readability/inheritance',
203  'readability/multiline_comment',
204  'readability/multiline_string',
205  'readability/namespace',
206  'readability/nolint',
207  'readability/nul',
208  'readability/streams',
209  'readability/todo',
210  'readability/utf8',
211  'runtime/arrays',
212  'runtime/casting',
213  'runtime/explicit',
214  'runtime/int',
215  'runtime/init',
216  'runtime/invalid_increment',
217  'runtime/member_string_references',
218  'runtime/memset',
219  'runtime/indentation_namespace',
220  'runtime/operator',
221  'runtime/printf',
222  'runtime/printf_format',
223  'runtime/references',
224  'runtime/string',
225  'runtime/threadsafe_fn',
226  'runtime/vlog',
227  'whitespace/blank_line',
228  'whitespace/braces',
229  'whitespace/comma',
230  'whitespace/comments',
231  'whitespace/empty_conditional_body',
232  'whitespace/empty_loop_body',
233  'whitespace/end_of_line',
234  'whitespace/ending_newline',
235  'whitespace/forcolon',
236  'whitespace/indent',
237  'whitespace/line_length',
238  'whitespace/newline',
239  'whitespace/operators',
240  'whitespace/parens',
241  'whitespace/semicolon',
242  'whitespace/tab',
243  'whitespace/todo'
244  ]
245
246# The default state of the category filter. This is overridden by the --filter=
247# flag. By default all errors are on, so only add here categories that should be
248# off by default (i.e., categories that must be enabled by the --filter= flags).
249# All entries here should start with a '-' or '+', as in the --filter= flag.
250_DEFAULT_FILTERS = ['-build/include_alpha']
251
252# We used to check for high-bit characters, but after much discussion we
253# decided those were OK, as long as they were in UTF-8 and didn't represent
254# hard-coded international strings, which belong in a separate i18n file.
255
256# C++ headers
257_CPP_HEADERS = frozenset([
258    # Legacy
259    'algobase.h',
260    'algo.h',
261    'alloc.h',
262    'builtinbuf.h',
263    'bvector.h',
264    'complex.h',
265    'defalloc.h',
266    'deque.h',
267    'editbuf.h',
268    'fstream.h',
269    'function.h',
270    'hash_map',
271    'hash_map.h',
272    'hash_set',
273    'hash_set.h',
274    'hashtable.h',
275    'heap.h',
276    'indstream.h',
277    'iomanip.h',
278    'iostream.h',
279    'istream.h',
280    'iterator.h',
281    'list.h',
282    'map.h',
283    'multimap.h',
284    'multiset.h',
285    'ostream.h',
286    'pair.h',
287    'parsestream.h',
288    'pfstream.h',
289    'procbuf.h',
290    'pthread_alloc',
291    'pthread_alloc.h',
292    'rope',
293    'rope.h',
294    'ropeimpl.h',
295    'set.h',
296    'slist',
297    'slist.h',
298    'stack.h',
299    'stdiostream.h',
300    'stl_alloc.h',
301    'stl_relops.h',
302    'streambuf.h',
303    'stream.h',
304    'strfile.h',
305    'strstream.h',
306    'tempbuf.h',
307    'tree.h',
308    'type_traits.h',
309    'vector.h',
310    # 17.6.1.2 C++ library headers
311    'algorithm',
312    'array',
313    'atomic',
314    'bitset',
315    'chrono',
316    'codecvt',
317    'complex',
318    'condition_variable',
319    'deque',
320    'exception',
321    'forward_list',
322    'fstream',
323    'functional',
324    'future',
325    'initializer_list',
326    'iomanip',
327    'ios',
328    'iosfwd',
329    'iostream',
330    'istream',
331    'iterator',
332    'limits',
333    'list',
334    'locale',
335    'map',
336    'memory',
337    'mutex',
338    'new',
339    'numeric',
340    'ostream',
341    'queue',
342    'random',
343    'ratio',
344    'regex',
345    'set',
346    'sstream',
347    'stack',
348    'stdexcept',
349    'streambuf',
350    'string',
351    'strstream',
352    'system_error',
353    'thread',
354    'tuple',
355    'typeindex',
356    'typeinfo',
357    'type_traits',
358    'unordered_map',
359    'unordered_set',
360    'utility',
361    'valarray',
362    'vector',
363    # 17.6.1.2 C++ headers for C library facilities
364    'cassert',
365    'ccomplex',
366    'cctype',
367    'cerrno',
368    'cfenv',
369    'cfloat',
370    'cinttypes',
371    'ciso646',
372    'climits',
373    'clocale',
374    'cmath',
375    'csetjmp',
376    'csignal',
377    'cstdalign',
378    'cstdarg',
379    'cstdbool',
380    'cstddef',
381    'cstdint',
382    'cstdio',
383    'cstdlib',
384    'cstring',
385    'ctgmath',
386    'ctime',
387    'cuchar',
388    'cwchar',
389    'cwctype',
390    ])
391
392
393# These headers are excluded from [build/include] and [build/include_order]
394# checks:
395# - Anything not following google file name conventions (containing an
396#   uppercase character, such as Python.h or nsStringAPI.h, for example).
397# - Lua headers.
398_THIRD_PARTY_HEADERS_PATTERN = re.compile(
399    r'^(?:[^/]*[A-Z][^/]*\.h|lua\.h|lauxlib\.h|lualib\.h)$')
400
401
402# Assertion macros.  These are defined in base/logging.h and
403# testing/base/gunit.h.  Note that the _M versions need to come first
404# for substring matching to work.
405_CHECK_MACROS = [
406    'DCHECK', 'CHECK',
407    'EXPECT_TRUE_M', 'EXPECT_TRUE',
408    'ASSERT_TRUE_M', 'ASSERT_TRUE',
409    'EXPECT_FALSE_M', 'EXPECT_FALSE',
410    'ASSERT_FALSE_M', 'ASSERT_FALSE',
411    ]
412
413# Replacement macros for CHECK/DCHECK/EXPECT_TRUE/EXPECT_FALSE
414_CHECK_REPLACEMENT = dict([(m, {}) for m in _CHECK_MACROS])
415
416for op, replacement in [('==', 'EQ'), ('!=', 'NE'),
417                        ('>=', 'GE'), ('>', 'GT'),
418                        ('<=', 'LE'), ('<', 'LT')]:
419  _CHECK_REPLACEMENT['DCHECK'][op] = 'DCHECK_%s' % replacement
420  _CHECK_REPLACEMENT['CHECK'][op] = 'CHECK_%s' % replacement
421  _CHECK_REPLACEMENT['EXPECT_TRUE'][op] = 'EXPECT_%s' % replacement
422  _CHECK_REPLACEMENT['ASSERT_TRUE'][op] = 'ASSERT_%s' % replacement
423  _CHECK_REPLACEMENT['EXPECT_TRUE_M'][op] = 'EXPECT_%s_M' % replacement
424  _CHECK_REPLACEMENT['ASSERT_TRUE_M'][op] = 'ASSERT_%s_M' % replacement
425
426for op, inv_replacement in [('==', 'NE'), ('!=', 'EQ'),
427                            ('>=', 'LT'), ('>', 'LE'),
428                            ('<=', 'GT'), ('<', 'GE')]:
429  _CHECK_REPLACEMENT['EXPECT_FALSE'][op] = 'EXPECT_%s' % inv_replacement
430  _CHECK_REPLACEMENT['ASSERT_FALSE'][op] = 'ASSERT_%s' % inv_replacement
431  _CHECK_REPLACEMENT['EXPECT_FALSE_M'][op] = 'EXPECT_%s_M' % inv_replacement
432  _CHECK_REPLACEMENT['ASSERT_FALSE_M'][op] = 'ASSERT_%s_M' % inv_replacement
433
434# Alternative tokens and their replacements.  For full list, see section 2.5
435# Alternative tokens [lex.digraph] in the C++ standard.
436#
437# Digraphs (such as '%:') are not included here since it's a mess to
438# match those on a word boundary.
439_ALT_TOKEN_REPLACEMENT = {
440    'and': '&&',
441    'bitor': '|',
442    'or': '||',
443    'xor': '^',
444    'compl': '~',
445    'bitand': '&',
446    'and_eq': '&=',
447    'or_eq': '|=',
448    'xor_eq': '^=',
449    'not': '!',
450    'not_eq': '!='
451    }
452
453# Compile regular expression that matches all the above keywords.  The "[ =()]"
454# bit is meant to avoid matching these keywords outside of boolean expressions.
455#
456# False positives include C-style multi-line comments and multi-line strings
457# but those have always been troublesome for cpplint.
458_ALT_TOKEN_REPLACEMENT_PATTERN = re.compile(
459    r'[ =()](' + ('|'.join(_ALT_TOKEN_REPLACEMENT.keys())) + r')(?=[ (]|$)')
460
461
462# These constants define types of headers for use with
463# _IncludeState.CheckNextIncludeOrder().
464_C_SYS_HEADER = 1
465_CPP_SYS_HEADER = 2
466_LIKELY_MY_HEADER = 3
467_POSSIBLE_MY_HEADER = 4
468_OTHER_HEADER = 5
469
470# These constants define the current inline assembly state
471_NO_ASM = 0       # Outside of inline assembly block
472_INSIDE_ASM = 1   # Inside inline assembly block
473_END_ASM = 2      # Last line of inline assembly block
474_BLOCK_ASM = 3    # The whole block is an inline assembly block
475
476# Match start of assembly blocks
477_MATCH_ASM = re.compile(r'^\s*(?:asm|_asm|__asm|__asm__)'
478                        r'(?:\s+(volatile|__volatile__))?'
479                        r'\s*[{(]')
480
481
482_regexp_compile_cache = {}
483
484# {str, set(int)}: a map from error categories to sets of linenumbers
485# on which those errors are expected and should be suppressed.
486_error_suppressions = {}
487
488# The root directory used for deriving header guard CPP variable.
489# This is set by --root flag.
490_root = None
491
492# The allowed line length of files.
493# This is set by --linelength flag.
494_line_length = 80
495
496# The allowed extensions for file names
497# This is set by --extensions flag.
498_valid_extensions = set(['cc', 'h', 'cpp', 'cu', 'cuh'])
499
500def ParseNolintSuppressions(filename, raw_line, linenum, error):
501  """Updates the global list of error-suppressions.
502
503  Parses any NOLINT comments on the current line, updating the global
504  error_suppressions store.  Reports an error if the NOLINT comment
505  was malformed.
506
507  Args:
508    filename: str, the name of the input file.
509    raw_line: str, the line of input text, with comments.
510    linenum: int, the number of the current line.
511    error: function, an error handler.
512  """
513  matched = Search(r'\bNOLINT(NEXTLINE)?\b(\([^)]+\))?', raw_line)
514  if matched:
515    if matched.group(1):
516      suppressed_line = linenum + 1
517    else:
518      suppressed_line = linenum
519    category = matched.group(2)
520    if category in (None, '(*)'):  # => "suppress all"
521      _error_suppressions.setdefault(None, set()).add(suppressed_line)
522    else:
523      if category.startswith('(') and category.endswith(')'):
524        category = category[1:-1]
525        if category in _ERROR_CATEGORIES:
526          _error_suppressions.setdefault(category, set()).add(suppressed_line)
527        else:
528          error(filename, linenum, 'readability/nolint', 5,
529                'Unknown NOLINT error category: %s' % category)
530
531
532def ResetNolintSuppressions():
533  """Resets the set of NOLINT suppressions to empty."""
534  _error_suppressions.clear()
535
536
537def IsErrorSuppressedByNolint(category, linenum):
538  """Returns true if the specified error category is suppressed on this line.
539
540  Consults the global error_suppressions map populated by
541  ParseNolintSuppressions/ResetNolintSuppressions.
542
543  Args:
544    category: str, the category of the error.
545    linenum: int, the current line number.
546  Returns:
547    bool, True iff the error should be suppressed due to a NOLINT comment.
548  """
549  return (linenum in _error_suppressions.get(category, set()) or
550          linenum in _error_suppressions.get(None, set()))
551
552
553def Match(pattern, s):
554  """Matches the string with the pattern, caching the compiled regexp."""
555  # The regexp compilation caching is inlined in both Match and Search for
556  # performance reasons; factoring it out into a separate function turns out
557  # to be noticeably expensive.
558  if pattern not in _regexp_compile_cache:
559    _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
560  return _regexp_compile_cache[pattern].match(s)
561
562
563def ReplaceAll(pattern, rep, s):
564  """Replaces instances of pattern in a string with a replacement.
565
566  The compiled regex is kept in a cache shared by Match and Search.
567
568  Args:
569    pattern: regex pattern
570    rep: replacement text
571    s: search string
572
573  Returns:
574    string with replacements made (or original string if no replacements)
575  """
576  if pattern not in _regexp_compile_cache:
577    _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
578  return _regexp_compile_cache[pattern].sub(rep, s)
579
580
581def Search(pattern, s):
582  """Searches the string for the pattern, caching the compiled regexp."""
583  if pattern not in _regexp_compile_cache:
584    _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
585  return _regexp_compile_cache[pattern].search(s)
586
587
588class _IncludeState(object):
589  """Tracks line numbers for includes, and the order in which includes appear.
590
591  include_list contains list of lists of (header, line number) pairs.
592  It's a lists of lists rather than just one flat list to make it
593  easier to update across preprocessor boundaries.
594
595  Call CheckNextIncludeOrder() once for each header in the file, passing
596  in the type constants defined above. Calls in an illegal order will
597  raise an _IncludeError with an appropriate error message.
598
599  """
600  # self._section will move monotonically through this set. If it ever
601  # needs to move backwards, CheckNextIncludeOrder will raise an error.
602  _INITIAL_SECTION = 0
603  _MY_H_SECTION = 1
604  _C_SECTION = 2
605  _CPP_SECTION = 3
606  _OTHER_H_SECTION = 4
607
608  _TYPE_NAMES = {
609      _C_SYS_HEADER: 'C system header',
610      _CPP_SYS_HEADER: 'C++ system header',
611      _LIKELY_MY_HEADER: 'header this file implements',
612      _POSSIBLE_MY_HEADER: 'header this file may implement',
613      _OTHER_HEADER: 'other header',
614      }
615  _SECTION_NAMES = {
616      _INITIAL_SECTION: "... nothing. (This can't be an error.)",
617      _MY_H_SECTION: 'a header this file implements',
618      _C_SECTION: 'C system header',
619      _CPP_SECTION: 'C++ system header',
620      _OTHER_H_SECTION: 'other header',
621      }
622
623  def __init__(self):
624    self.include_list = [[]]
625    self.ResetSection('')
626
627  def FindHeader(self, header):
628    """Check if a header has already been included.
629
630    Args:
631      header: header to check.
632    Returns:
633      Line number of previous occurrence, or -1 if the header has not
634      been seen before.
635    """
636    for section_list in self.include_list:
637      for f in section_list:
638        if f[0] == header:
639          return f[1]
640    return -1
641
642  def ResetSection(self, directive):
643    """Reset section checking for preprocessor directive.
644
645    Args:
646      directive: preprocessor directive (e.g. "if", "else").
647    """
648    # The name of the current section.
649    self._section = self._INITIAL_SECTION
650    # The path of last found header.
651    self._last_header = ''
652
653    # Update list of includes.  Note that we never pop from the
654    # include list.
655    if directive in ('if', 'ifdef', 'ifndef'):
656      self.include_list.append([])
657    elif directive in ('else', 'elif'):
658      self.include_list[-1] = []
659
660  def SetLastHeader(self, header_path):
661    self._last_header = header_path
662
663  def CanonicalizeAlphabeticalOrder(self, header_path):
664    """Returns a path canonicalized for alphabetical comparison.
665
666    - replaces "-" with "_" so they both cmp the same.
667    - removes '-inl' since we don't require them to be after the main header.
668    - lowercase everything, just in case.
669
670    Args:
671      header_path: Path to be canonicalized.
672
673    Returns:
674      Canonicalized path.
675    """
676    return header_path.replace('-inl.h', '.h').replace('-', '_').lower()
677
678  def IsInAlphabeticalOrder(self, clean_lines, linenum, header_path):
679    """Check if a header is in alphabetical order with the previous header.
680
681    Args:
682      clean_lines: A CleansedLines instance containing the file.
683      linenum: The number of the line to check.
684      header_path: Canonicalized header to be checked.
685
686    Returns:
687      Returns true if the header is in alphabetical order.
688    """
689    # If previous section is different from current section, _last_header will
690    # be reset to empty string, so it's always less than current header.
691    #
692    # If previous line was a blank line, assume that the headers are
693    # intentionally sorted the way they are.
694    if (self._last_header > header_path and
695        not Match(r'^\s*$', clean_lines.elided[linenum - 1])):
696      return False
697    return True
698
699  def CheckNextIncludeOrder(self, header_type):
700    """Returns a non-empty error message if the next header is out of order.
701
702    This function also updates the internal state to be ready to check
703    the next include.
704
705    Args:
706      header_type: One of the _XXX_HEADER constants defined above.
707
708    Returns:
709      The empty string if the header is in the right order, or an
710      error message describing what's wrong.
711
712    """
713    error_message = ('Found %s after %s' %
714                     (self._TYPE_NAMES[header_type],
715                      self._SECTION_NAMES[self._section]))
716
717    last_section = self._section
718
719    if header_type == _C_SYS_HEADER:
720      if self._section <= self._C_SECTION:
721        self._section = self._C_SECTION
722      else:
723        self._last_header = ''
724        return error_message
725    elif header_type == _CPP_SYS_HEADER:
726      if self._section <= self._CPP_SECTION:
727        self._section = self._CPP_SECTION
728      else:
729        self._last_header = ''
730        return error_message
731    elif header_type == _LIKELY_MY_HEADER:
732      if self._section <= self._MY_H_SECTION:
733        self._section = self._MY_H_SECTION
734      else:
735        self._section = self._OTHER_H_SECTION
736    elif header_type == _POSSIBLE_MY_HEADER:
737      if self._section <= self._MY_H_SECTION:
738        self._section = self._MY_H_SECTION
739      else:
740        # This will always be the fallback because we're not sure
741        # enough that the header is associated with this file.
742        self._section = self._OTHER_H_SECTION
743    else:
744      assert header_type == _OTHER_HEADER
745      self._section = self._OTHER_H_SECTION
746
747    if last_section != self._section:
748      self._last_header = ''
749
750    return ''
751
752
753class _CppLintState(object):
754  """Maintains module-wide state.."""
755
756  def __init__(self):
757    self.verbose_level = 1  # global setting.
758    self.error_count = 0    # global count of reported errors
759    # filters to apply when emitting error messages
760    self.filters = _DEFAULT_FILTERS[:]
761    # backup of filter list. Used to restore the state after each file.
762    self._filters_backup = self.filters[:]
763    self.counting = 'total'  # In what way are we counting errors?
764    self.errors_by_category = {}  # string to int dict storing error counts
765
766    # output format:
767    # "emacs" - format that emacs can parse (default)
768    # "vs7" - format that Microsoft Visual Studio 7 can parse
769    self.output_format = 'emacs'
770
771  def SetOutputFormat(self, output_format):
772    """Sets the output format for errors."""
773    self.output_format = output_format
774
775  def SetVerboseLevel(self, level):
776    """Sets the module's verbosity, and returns the previous setting."""
777    last_verbose_level = self.verbose_level
778    self.verbose_level = level
779    return last_verbose_level
780
781  def SetCountingStyle(self, counting_style):
782    """Sets the module's counting options."""
783    self.counting = counting_style
784
785  def SetFilters(self, filters):
786    """Sets the error-message filters.
787
788    These filters are applied when deciding whether to emit a given
789    error message.
790
791    Args:
792      filters: A string of comma-separated filters (eg "+whitespace/indent").
793               Each filter should start with + or -; else we die.
794
795    Raises:
796      ValueError: The comma-separated filters did not all start with '+' or '-'.
797                  E.g. "-,+whitespace,-whitespace/indent,whitespace/badfilter"
798    """
799    # Default filters always have less priority than the flag ones.
800    self.filters = _DEFAULT_FILTERS[:]
801    self.AddFilters(filters)
802
803  def AddFilters(self, filters):
804    """ Adds more filters to the existing list of error-message filters. """
805    for filt in filters.split(','):
806      clean_filt = filt.strip()
807      if clean_filt:
808        self.filters.append(clean_filt)
809    for filt in self.filters:
810      if not (filt.startswith('+') or filt.startswith('-')):
811        raise ValueError('Every filter in --filters must start with + or -'
812                         ' (%s does not)' % filt)
813
814  def BackupFilters(self):
815    """ Saves the current filter list to backup storage."""
816    self._filters_backup = self.filters[:]
817
818  def RestoreFilters(self):
819    """ Restores filters previously backed up."""
820    self.filters = self._filters_backup[:]
821
822  def ResetErrorCounts(self):
823    """Sets the module's error statistic back to zero."""
824    self.error_count = 0
825    self.errors_by_category = {}
826
827  def IncrementErrorCount(self, category):
828    """Bumps the module's error statistic."""
829    self.error_count += 1
830    if self.counting in ('toplevel', 'detailed'):
831      if self.counting != 'detailed':
832        category = category.split('/')[0]
833      if category not in self.errors_by_category:
834        self.errors_by_category[category] = 0
835      self.errors_by_category[category] += 1
836
837  def PrintErrorCounts(self):
838    """Print a summary of errors by category, and the total."""
839    for category, count in self.errors_by_category.iteritems():
840      sys.stderr.write('Category \'%s\' errors found: %d\n' %
841                       (category, count))
842    sys.stderr.write('Total errors found: %d\n' % self.error_count)
843
844_cpplint_state = _CppLintState()
845
846
847def _OutputFormat():
848  """Gets the module's output format."""
849  return _cpplint_state.output_format
850
851
852def _SetOutputFormat(output_format):
853  """Sets the module's output format."""
854  _cpplint_state.SetOutputFormat(output_format)
855
856
857def _VerboseLevel():
858  """Returns the module's verbosity setting."""
859  return _cpplint_state.verbose_level
860
861
862def _SetVerboseLevel(level):
863  """Sets the module's verbosity, and returns the previous setting."""
864  return _cpplint_state.SetVerboseLevel(level)
865
866
867def _SetCountingStyle(level):
868  """Sets the module's counting options."""
869  _cpplint_state.SetCountingStyle(level)
870
871
872def _Filters():
873  """Returns the module's list of output filters, as a list."""
874  return _cpplint_state.filters
875
876
877def _SetFilters(filters):
878  """Sets the module's error-message filters.
879
880  These filters are applied when deciding whether to emit a given
881  error message.
882
883  Args:
884    filters: A string of comma-separated filters (eg "whitespace/indent").
885             Each filter should start with + or -; else we die.
886  """
887  _cpplint_state.SetFilters(filters)
888
889def _AddFilters(filters):
890  """Adds more filter overrides.
891
892  Unlike _SetFilters, this function does not reset the current list of filters
893  available.
894
895  Args:
896    filters: A string of comma-separated filters (eg "whitespace/indent").
897             Each filter should start with + or -; else we die.
898  """
899  _cpplint_state.AddFilters(filters)
900
901def _BackupFilters():
902  """ Saves the current filter list to backup storage."""
903  _cpplint_state.BackupFilters()
904
905def _RestoreFilters():
906  """ Restores filters previously backed up."""
907  _cpplint_state.RestoreFilters()
908
909class _FunctionState(object):
910  """Tracks current function name and the number of lines in its body."""
911
912  _NORMAL_TRIGGER = 250  # for --v=0, 500 for --v=1, etc.
913  _TEST_TRIGGER = 400    # about 50% more than _NORMAL_TRIGGER.
914
915  def __init__(self):
916    self.in_a_function = False
917    self.lines_in_function = 0
918    self.current_function = ''
919
920  def Begin(self, function_name):
921    """Start analyzing function body.
922
923    Args:
924      function_name: The name of the function being tracked.
925    """
926    self.in_a_function = True
927    self.lines_in_function = 0
928    self.current_function = function_name
929
930  def Count(self):
931    """Count line in current function body."""
932    if self.in_a_function:
933      self.lines_in_function += 1
934
935  def Check(self, error, filename, linenum):
936    """Report if too many lines in function body.
937
938    Args:
939      error: The function to call with any errors found.
940      filename: The name of the current file.
941      linenum: The number of the line to check.
942    """
943    if Match(r'T(EST|est)', self.current_function):
944      base_trigger = self._TEST_TRIGGER
945    else:
946      base_trigger = self._NORMAL_TRIGGER
947    trigger = base_trigger * 2**_VerboseLevel()
948
949    if self.lines_in_function > trigger:
950      error_level = int(math.log(self.lines_in_function / base_trigger, 2))
951      # 50 => 0, 100 => 1, 200 => 2, 400 => 3, 800 => 4, 1600 => 5, ...
952      if error_level > 5:
953        error_level = 5
954      error(filename, linenum, 'readability/fn_size', error_level,
955            'Small and focused functions are preferred:'
956            ' %s has %d non-comment lines'
957            ' (error triggered by exceeding %d lines).'  % (
958                self.current_function, self.lines_in_function, trigger))
959
960  def End(self):
961    """Stop analyzing function body."""
962    self.in_a_function = False
963
964
965class _IncludeError(Exception):
966  """Indicates a problem with the include order in a file."""
967  pass
968
969
970class FileInfo(object):
971  """Provides utility functions for filenames.
972
973  FileInfo provides easy access to the components of a file's path
974  relative to the project root.
975  """
976
977  def __init__(self, filename):
978    self._filename = filename
979
980  def FullName(self):
981    """Make Windows paths like Unix."""
982    return os.path.abspath(self._filename).replace('\\', '/')
983
984  def RepositoryName(self):
985    """FullName after removing the local path to the repository.
986
987    If we have a real absolute path name here we can try to do something smart:
988    detecting the root of the checkout and truncating /path/to/checkout from
989    the name so that we get header guards that don't include things like
990    "C:\Documents and Settings\..." or "/home/username/..." in them and thus
991    people on different computers who have checked the source out to different
992    locations won't see bogus errors.
993    """
994    fullname = self.FullName()
995
996    if os.path.exists(fullname):
997      project_dir = os.path.dirname(fullname)
998
999      if os.path.exists(os.path.join(project_dir, ".svn")):
1000        # If there's a .svn file in the current directory, we recursively look
1001        # up the directory tree for the top of the SVN checkout
1002        root_dir = project_dir
1003        one_up_dir = os.path.dirname(root_dir)
1004        while os.path.exists(os.path.join(one_up_dir, ".svn")):
1005          root_dir = os.path.dirname(root_dir)
1006          one_up_dir = os.path.dirname(one_up_dir)
1007
1008        prefix = os.path.commonprefix([root_dir, project_dir])
1009        return fullname[len(prefix) + 1:]
1010
1011      # Not SVN <= 1.6? Try to find a git, hg, or svn top level directory by
1012      # searching up from the current path.
1013      root_dir = os.path.dirname(fullname)
1014      while (root_dir != os.path.dirname(root_dir) and
1015             not os.path.exists(os.path.join(root_dir, ".git")) and
1016             not os.path.exists(os.path.join(root_dir, ".hg")) and
1017             not os.path.exists(os.path.join(root_dir, ".svn"))):
1018        root_dir = os.path.dirname(root_dir)
1019
1020      if (os.path.exists(os.path.join(root_dir, ".git")) or
1021          os.path.exists(os.path.join(root_dir, ".hg")) or
1022          os.path.exists(os.path.join(root_dir, ".svn"))):
1023        prefix = os.path.commonprefix([root_dir, project_dir])
1024        return fullname[len(prefix) + 1:]
1025
1026    # Don't know what to do; header guard warnings may be wrong...
1027    return fullname
1028
1029  def Split(self):
1030    """Splits the file into the directory, basename, and extension.
1031
1032    For 'chrome/browser/browser.cc', Split() would
1033    return ('chrome/browser', 'browser', '.cc')
1034
1035    Returns:
1036      A tuple of (directory, basename, extension).
1037    """
1038
1039    googlename = self.RepositoryName()
1040    project, rest = os.path.split(googlename)
1041    return (project,) + os.path.splitext(rest)
1042
1043  def BaseName(self):
1044    """File base name - text after the final slash, before the final period."""
1045    return self.Split()[1]
1046
1047  def Extension(self):
1048    """File extension - text following the final period."""
1049    return self.Split()[2]
1050
1051  def NoExtension(self):
1052    """File has no source file extension."""
1053    return '/'.join(self.Split()[0:2])
1054
1055  def IsSource(self):
1056    """File has a source file extension."""
1057    return self.Extension()[1:] in ('c', 'cc', 'cpp', 'cxx')
1058
1059
1060def _ShouldPrintError(category, confidence, linenum):
1061  """If confidence >= verbose, category passes filter and is not suppressed."""
1062
1063  # There are three ways we might decide not to print an error message:
1064  # a "NOLINT(category)" comment appears in the source,
1065  # the verbosity level isn't high enough, or the filters filter it out.
1066  if IsErrorSuppressedByNolint(category, linenum):
1067    return False
1068
1069  if confidence < _cpplint_state.verbose_level:
1070    return False
1071
1072  is_filtered = False
1073  for one_filter in _Filters():
1074    if one_filter.startswith('-'):
1075      if category.startswith(one_filter[1:]):
1076        is_filtered = True
1077    elif one_filter.startswith('+'):
1078      if category.startswith(one_filter[1:]):
1079        is_filtered = False
1080    else:
1081      assert False  # should have been checked for in SetFilter.
1082  if is_filtered:
1083    return False
1084
1085  return True
1086
1087
1088def Error(filename, linenum, category, confidence, message):
1089  """Logs the fact we've found a lint error.
1090
1091  We log where the error was found, and also our confidence in the error,
1092  that is, how certain we are this is a legitimate style regression, and
1093  not a misidentification or a use that's sometimes justified.
1094
1095  False positives can be suppressed by the use of
1096  "cpplint(category)"  comments on the offending line.  These are
1097  parsed into _error_suppressions.
1098
1099  Args:
1100    filename: The name of the file containing the error.
1101    linenum: The number of the line containing the error.
1102    category: A string used to describe the "category" this bug
1103      falls under: "whitespace", say, or "runtime".  Categories
1104      may have a hierarchy separated by slashes: "whitespace/indent".
1105    confidence: A number from 1-5 representing a confidence score for
1106      the error, with 5 meaning that we are certain of the problem,
1107      and 1 meaning that it could be a legitimate construct.
1108    message: The error message.
1109  """
1110  if _ShouldPrintError(category, confidence, linenum):
1111    _cpplint_state.IncrementErrorCount(category)
1112    if _cpplint_state.output_format == 'vs7':
1113      sys.stderr.write('%s(%s):  %s  [%s] [%d]\n' % (
1114          filename, linenum, message, category, confidence))
1115    elif _cpplint_state.output_format == 'eclipse':
1116      sys.stderr.write('%s:%s: warning: %s  [%s] [%d]\n' % (
1117          filename, linenum, message, category, confidence))
1118    else:
1119      sys.stderr.write('%s:%s:  %s  [%s] [%d]\n' % (
1120          filename, linenum, message, category, confidence))
1121
1122
1123# Matches standard C++ escape sequences per 2.13.2.3 of the C++ standard.
1124_RE_PATTERN_CLEANSE_LINE_ESCAPES = re.compile(
1125    r'\\([abfnrtv?"\\\']|\d+|x[0-9a-fA-F]+)')
1126# Match a single C style comment on the same line.
1127_RE_PATTERN_C_COMMENTS = r'/\*(?:[^*]|\*(?!/))*\*/'
1128# Matches multi-line C style comments.
1129# This RE is a little bit more complicated than one might expect, because we
1130# have to take care of space removals tools so we can handle comments inside
1131# statements better.
1132# The current rule is: We only clear spaces from both sides when we're at the
1133# end of the line. Otherwise, we try to remove spaces from the right side,
1134# if this doesn't work we try on left side but only if there's a non-character
1135# on the right.
1136_RE_PATTERN_CLEANSE_LINE_C_COMMENTS = re.compile(
1137    r'(\s*' + _RE_PATTERN_C_COMMENTS + r'\s*$|' +
1138    _RE_PATTERN_C_COMMENTS + r'\s+|' +
1139    r'\s+' + _RE_PATTERN_C_COMMENTS + r'(?=\W)|' +
1140    _RE_PATTERN_C_COMMENTS + r')')
1141
1142
1143def IsCppString(line):
1144  """Does line terminate so, that the next symbol is in string constant.
1145
1146  This function does not consider single-line nor multi-line comments.
1147
1148  Args:
1149    line: is a partial line of code starting from the 0..n.
1150
1151  Returns:
1152    True, if next character appended to 'line' is inside a
1153    string constant.
1154  """
1155
1156  line = line.replace(r'\\', 'XX')  # after this, \\" does not match to \"
1157  return ((line.count('"') - line.count(r'\"') - line.count("'\"'")) & 1) == 1
1158
1159
1160def CleanseRawStrings(raw_lines):
1161  """Removes C++11 raw strings from lines.
1162
1163    Before:
1164      static const char kData[] = R"(
1165          multi-line string
1166          )";
1167
1168    After:
1169      static const char kData[] = ""
1170          (replaced by blank line)
1171          "";
1172
1173  Args:
1174    raw_lines: list of raw lines.
1175
1176  Returns:
1177    list of lines with C++11 raw strings replaced by empty strings.
1178  """
1179
1180  delimiter = None
1181  lines_without_raw_strings = []
1182  for line in raw_lines:
1183    if delimiter:
1184      # Inside a raw string, look for the end
1185      end = line.find(delimiter)
1186      if end >= 0:
1187        # Found the end of the string, match leading space for this
1188        # line and resume copying the original lines, and also insert
1189        # a "" on the last line.
1190        leading_space = Match(r'^(\s*)\S', line)
1191        line = leading_space.group(1) + '""' + line[end + len(delimiter):]
1192        delimiter = None
1193      else:
1194        # Haven't found the end yet, append a blank line.
1195        line = '""'
1196
1197    # Look for beginning of a raw string, and replace them with
1198    # empty strings.  This is done in a loop to handle multiple raw
1199    # strings on the same line.
1200    while delimiter is None:
1201      # Look for beginning of a raw string.
1202      # See 2.14.15 [lex.string] for syntax.
1203      matched = Match(r'^(.*)\b(?:R|u8R|uR|UR|LR)"([^\s\\()]*)\((.*)$', line)
1204      if matched:
1205        delimiter = ')' + matched.group(2) + '"'
1206
1207        end = matched.group(3).find(delimiter)
1208        if end >= 0:
1209          # Raw string ended on same line
1210          line = (matched.group(1) + '""' +
1211                  matched.group(3)[end + len(delimiter):])
1212          delimiter = None
1213        else:
1214          # Start of a multi-line raw string
1215          line = matched.group(1) + '""'
1216      else:
1217        break
1218
1219    lines_without_raw_strings.append(line)
1220
1221  # TODO(unknown): if delimiter is not None here, we might want to
1222  # emit a warning for unterminated string.
1223  return lines_without_raw_strings
1224
1225
1226def FindNextMultiLineCommentStart(lines, lineix):
1227  """Find the beginning marker for a multiline comment."""
1228  while lineix < len(lines):
1229    if lines[lineix].strip().startswith('/*'):
1230      # Only return this marker if the comment goes beyond this line
1231      if lines[lineix].strip().find('*/', 2) < 0:
1232        return lineix
1233    lineix += 1
1234  return len(lines)
1235
1236
1237def FindNextMultiLineCommentEnd(lines, lineix):
1238  """We are inside a comment, find the end marker."""
1239  while lineix < len(lines):
1240    if lines[lineix].strip().endswith('*/'):
1241      return lineix
1242    lineix += 1
1243  return len(lines)
1244
1245
1246def RemoveMultiLineCommentsFromRange(lines, begin, end):
1247  """Clears a range of lines for multi-line comments."""
1248  # Having // dummy comments makes the lines non-empty, so we will not get
1249  # unnecessary blank line warnings later in the code.
1250  for i in range(begin, end):
1251    lines[i] = '// dummy'
1252
1253
1254def RemoveMultiLineComments(filename, lines, error):
1255  """Removes multiline (c-style) comments from lines."""
1256  lineix = 0
1257  while lineix < len(lines):
1258    lineix_begin = FindNextMultiLineCommentStart(lines, lineix)
1259    if lineix_begin >= len(lines):
1260      return
1261    lineix_end = FindNextMultiLineCommentEnd(lines, lineix_begin)
1262    if lineix_end >= len(lines):
1263      error(filename, lineix_begin + 1, 'readability/multiline_comment', 5,
1264            'Could not find end of multi-line comment')
1265      return
1266    RemoveMultiLineCommentsFromRange(lines, lineix_begin, lineix_end + 1)
1267    lineix = lineix_end + 1
1268
1269
1270def CleanseComments(line):
1271  """Removes //-comments and single-line C-style /* */ comments.
1272
1273  Args:
1274    line: A line of C++ source.
1275
1276  Returns:
1277    The line with single-line comments removed.
1278  """
1279  commentpos = line.find('//')
1280  if commentpos != -1 and not IsCppString(line[:commentpos]):
1281    line = line[:commentpos].rstrip()
1282  # get rid of /* ... */
1283  return _RE_PATTERN_CLEANSE_LINE_C_COMMENTS.sub('', line)
1284
1285
1286class CleansedLines(object):
1287  """Holds 3 copies of all lines with different preprocessing applied to them.
1288
1289  1) elided member contains lines without strings and comments,
1290  2) lines member contains lines without comments, and
1291  3) raw_lines member contains all the lines without processing.
1292  All these three members are of <type 'list'>, and of the same length.
1293  """
1294
1295  def __init__(self, lines):
1296    self.elided = []
1297    self.lines = []
1298    self.raw_lines = lines
1299    self.num_lines = len(lines)
1300    self.lines_without_raw_strings = CleanseRawStrings(lines)
1301    for linenum in range(len(self.lines_without_raw_strings)):
1302      self.lines.append(CleanseComments(
1303          self.lines_without_raw_strings[linenum]))
1304      elided = self._CollapseStrings(self.lines_without_raw_strings[linenum])
1305      self.elided.append(CleanseComments(elided))
1306
1307  def NumLines(self):
1308    """Returns the number of lines represented."""
1309    return self.num_lines
1310
1311  @staticmethod
1312  def _CollapseStrings(elided):
1313    """Collapses strings and chars on a line to simple "" or '' blocks.
1314
1315    We nix strings first so we're not fooled by text like '"http://"'
1316
1317    Args:
1318      elided: The line being processed.
1319
1320    Returns:
1321      The line with collapsed strings.
1322    """
1323    if _RE_PATTERN_INCLUDE.match(elided):
1324      return elided
1325
1326    # Remove escaped characters first to make quote/single quote collapsing
1327    # basic.  Things that look like escaped characters shouldn't occur
1328    # outside of strings and chars.
1329    elided = _RE_PATTERN_CLEANSE_LINE_ESCAPES.sub('', elided)
1330
1331    # Replace quoted strings and digit separators.  Both single quotes
1332    # and double quotes are processed in the same loop, otherwise
1333    # nested quotes wouldn't work.
1334    collapsed = ''
1335    while True:
1336      # Find the first quote character
1337      match = Match(r'^([^\'"]*)([\'"])(.*)$', elided)
1338      if not match:
1339        collapsed += elided
1340        break
1341      head, quote, tail = match.groups()
1342
1343      if quote == '"':
1344        # Collapse double quoted strings
1345        second_quote = tail.find('"')
1346        if second_quote >= 0:
1347          collapsed += head + '""'
1348          elided = tail[second_quote + 1:]
1349        else:
1350          # Unmatched double quote, don't bother processing the rest
1351          # of the line since this is probably a multiline string.
1352          collapsed += elided
1353          break
1354      else:
1355        # Found single quote, check nearby text to eliminate digit separators.
1356        #
1357        # There is no special handling for floating point here, because
1358        # the integer/fractional/exponent parts would all be parsed
1359        # correctly as long as there are digits on both sides of the
1360        # separator.  So we are fine as long as we don't see something
1361        # like "0.'3" (gcc 4.9.0 will not allow this literal).
1362        if Search(r'\b(?:0[bBxX]?|[1-9])[0-9a-fA-F]*$', head):
1363          match_literal = Match(r'^((?:\'?[0-9a-zA-Z_])*)(.*)$', "'" + tail)
1364          collapsed += head + match_literal.group(1).replace("'", '')
1365          elided = match_literal.group(2)
1366        else:
1367          second_quote = tail.find('\'')
1368          if second_quote >= 0:
1369            collapsed += head + "''"
1370            elided = tail[second_quote + 1:]
1371          else:
1372            # Unmatched single quote
1373            collapsed += elided
1374            break
1375
1376    return collapsed
1377
1378
1379def FindEndOfExpressionInLine(line, startpos, stack):
1380  """Find the position just after the end of current parenthesized expression.
1381
1382  Args:
1383    line: a CleansedLines line.
1384    startpos: start searching at this position.
1385    stack: nesting stack at startpos.
1386
1387  Returns:
1388    On finding matching end: (index just after matching end, None)
1389    On finding an unclosed expression: (-1, None)
1390    Otherwise: (-1, new stack at end of this line)
1391  """
1392  for i in xrange(startpos, len(line)):
1393    char = line[i]
1394    if char in '([{':
1395      # Found start of parenthesized expression, push to expression stack
1396      stack.append(char)
1397    elif char == '<':
1398      # Found potential start of template argument list
1399      if i > 0 and line[i - 1] == '<':
1400        # Left shift operator
1401        if stack and stack[-1] == '<':
1402          stack.pop()
1403          if not stack:
1404            return (-1, None)
1405      elif i > 0 and Search(r'\boperator\s*$', line[0:i]):
1406        # operator<, don't add to stack
1407        continue
1408      else:
1409        # Tentative start of template argument list
1410        stack.append('<')
1411    elif char in ')]}':
1412      # Found end of parenthesized expression.
1413      #
1414      # If we are currently expecting a matching '>', the pending '<'
1415      # must have been an operator.  Remove them from expression stack.
1416      while stack and stack[-1] == '<':
1417        stack.pop()
1418      if not stack:
1419        return (-1, None)
1420      if ((stack[-1] == '(' and char == ')') or
1421          (stack[-1] == '[' and char == ']') or
1422          (stack[-1] == '{' and char == '}')):
1423        stack.pop()
1424        if not stack:
1425          return (i + 1, None)
1426      else:
1427        # Mismatched parentheses
1428        return (-1, None)
1429    elif char == '>':
1430      # Found potential end of template argument list.
1431
1432      # Ignore "->" and operator functions
1433      if (i > 0 and
1434          (line[i - 1] == '-' or Search(r'\boperator\s*$', line[0:i - 1]))):
1435        continue
1436
1437      # Pop the stack if there is a matching '<'.  Otherwise, ignore
1438      # this '>' since it must be an operator.
1439      if stack:
1440        if stack[-1] == '<':
1441          stack.pop()
1442          if not stack:
1443            return (i + 1, None)
1444    elif char == ';':
1445      # Found something that look like end of statements.  If we are currently
1446      # expecting a '>', the matching '<' must have been an operator, since
1447      # template argument list should not contain statements.
1448      while stack and stack[-1] == '<':
1449        stack.pop()
1450      if not stack:
1451        return (-1, None)
1452
1453  # Did not find end of expression or unbalanced parentheses on this line
1454  return (-1, stack)
1455
1456
1457def CloseExpression(clean_lines, linenum, pos):
1458  """If input points to ( or { or [ or <, finds the position that closes it.
1459
1460  If lines[linenum][pos] points to a '(' or '{' or '[' or '<', finds the
1461  linenum/pos that correspond to the closing of the expression.
1462
1463  TODO(unknown): cpplint spends a fair bit of time matching parentheses.
1464  Ideally we would want to index all opening and closing parentheses once
1465  and have CloseExpression be just a simple lookup, but due to preprocessor
1466  tricks, this is not so easy.
1467
1468  Args:
1469    clean_lines: A CleansedLines instance containing the file.
1470    linenum: The number of the line to check.
1471    pos: A position on the line.
1472
1473  Returns:
1474    A tuple (line, linenum, pos) pointer *past* the closing brace, or
1475    (line, len(lines), -1) if we never find a close.  Note we ignore
1476    strings and comments when matching; and the line we return is the
1477    'cleansed' line at linenum.
1478  """
1479
1480  line = clean_lines.elided[linenum]
1481  if (line[pos] not in '({[<') or Match(r'<[<=]', line[pos:]):
1482    return (line, clean_lines.NumLines(), -1)
1483
1484  # Check first line
1485  (end_pos, stack) = FindEndOfExpressionInLine(line, pos, [])
1486  if end_pos > -1:
1487    return (line, linenum, end_pos)
1488
1489  # Continue scanning forward
1490  while stack and linenum < clean_lines.NumLines() - 1:
1491    linenum += 1
1492    line = clean_lines.elided[linenum]
1493    (end_pos, stack) = FindEndOfExpressionInLine(line, 0, stack)
1494    if end_pos > -1:
1495      return (line, linenum, end_pos)
1496
1497  # Did not find end of expression before end of file, give up
1498  return (line, clean_lines.NumLines(), -1)
1499
1500
1501def FindStartOfExpressionInLine(line, endpos, stack):
1502  """Find position at the matching start of current expression.
1503
1504  This is almost the reverse of FindEndOfExpressionInLine, but note
1505  that the input position and returned position differs by 1.
1506
1507  Args:
1508    line: a CleansedLines line.
1509    endpos: start searching at this position.
1510    stack: nesting stack at endpos.
1511
1512  Returns:
1513    On finding matching start: (index at matching start, None)
1514    On finding an unclosed expression: (-1, None)
1515    Otherwise: (-1, new stack at beginning of this line)
1516  """
1517  i = endpos
1518  while i >= 0:
1519    char = line[i]
1520    if char in ')]}':
1521      # Found end of expression, push to expression stack
1522      stack.append(char)
1523    elif char == '>':
1524      # Found potential end of template argument list.
1525      #
1526      # Ignore it if it's a "->" or ">=" or "operator>"
1527      if (i > 0 and
1528          (line[i - 1] == '-' or
1529           Match(r'\s>=\s', line[i - 1:]) or
1530           Search(r'\boperator\s*$', line[0:i]))):
1531        i -= 1
1532      else:
1533        stack.append('>')
1534    elif char == '<':
1535      # Found potential start of template argument list
1536      if i > 0 and line[i - 1] == '<':
1537        # Left shift operator
1538        i -= 1
1539      else:
1540        # If there is a matching '>', we can pop the expression stack.
1541        # Otherwise, ignore this '<' since it must be an operator.
1542        if stack and stack[-1] == '>':
1543          stack.pop()
1544          if not stack:
1545            return (i, None)
1546    elif char in '([{':
1547      # Found start of expression.
1548      #
1549      # If there are any unmatched '>' on the stack, they must be
1550      # operators.  Remove those.
1551      while stack and stack[-1] == '>':
1552        stack.pop()
1553      if not stack:
1554        return (-1, None)
1555      if ((char == '(' and stack[-1] == ')') or
1556          (char == '[' and stack[-1] == ']') or
1557          (char == '{' and stack[-1] == '}')):
1558        stack.pop()
1559        if not stack:
1560          return (i, None)
1561      else:
1562        # Mismatched parentheses
1563        return (-1, None)
1564    elif char == ';':
1565      # Found something that look like end of statements.  If we are currently
1566      # expecting a '<', the matching '>' must have been an operator, since
1567      # template argument list should not contain statements.
1568      while stack and stack[-1] == '>':
1569        stack.pop()
1570      if not stack:
1571        return (-1, None)
1572
1573    i -= 1
1574
1575  return (-1, stack)
1576
1577
1578def ReverseCloseExpression(clean_lines, linenum, pos):
1579  """If input points to ) or } or ] or >, finds the position that opens it.
1580
1581  If lines[linenum][pos] points to a ')' or '}' or ']' or '>', finds the
1582  linenum/pos that correspond to the opening of the expression.
1583
1584  Args:
1585    clean_lines: A CleansedLines instance containing the file.
1586    linenum: The number of the line to check.
1587    pos: A position on the line.
1588
1589  Returns:
1590    A tuple (line, linenum, pos) pointer *at* the opening brace, or
1591    (line, 0, -1) if we never find the matching opening brace.  Note
1592    we ignore strings and comments when matching; and the line we
1593    return is the 'cleansed' line at linenum.
1594  """
1595  line = clean_lines.elided[linenum]
1596  if line[pos] not in ')}]>':
1597    return (line, 0, -1)
1598
1599  # Check last line
1600  (start_pos, stack) = FindStartOfExpressionInLine(line, pos, [])
1601  if start_pos > -1:
1602    return (line, linenum, start_pos)
1603
1604  # Continue scanning backward
1605  while stack and linenum > 0:
1606    linenum -= 1
1607    line = clean_lines.elided[linenum]
1608    (start_pos, stack) = FindStartOfExpressionInLine(line, len(line) - 1, stack)
1609    if start_pos > -1:
1610      return (line, linenum, start_pos)
1611
1612  # Did not find start of expression before beginning of file, give up
1613  return (line, 0, -1)
1614
1615def make_polyfill_regex():
1616  polyfill_required_names = [
1617    '_',
1618    'adopt_lock',
1619    'async',
1620    'chrono',
1621    'condition_variable',
1622    'condition_variable_any',
1623    'cv_status',
1624    'defer_lock',
1625    'future',
1626    'future_status',
1627    'launch',
1628    'lock_guard',
1629    'mutex',
1630    'notify_all_at_thread_exit',
1631    'packaged_task',
1632    'promise',
1633    'recursive_mutex',
1634    'shared_lock,',
1635    'shared_mutex',
1636    'shared_timed_mutex',
1637    'this_thread(?!::at_thread_exit)',
1638    'thread',
1639    'timed_mutex',
1640    'try_to_lock',
1641    'unique_lock',
1642    'unordered_map',
1643    'unordered_multimap',
1644    'unordered_multiset',
1645    'unordered_set',
1646  ]
1647
1648  qualified_names = ['boost::' + name + "\\b" for name in polyfill_required_names]
1649  qualified_names.extend('std::' + name  + "\\b" for name in polyfill_required_names)
1650  qualified_names_regex = '|'.join(qualified_names)
1651  return re.compile(qualified_names_regex)
1652_RE_PATTERN_MONGO_POLYFILL=make_polyfill_regex()
1653
1654def CheckForMongoPolyfill(filename, clean_lines, linenum, error):
1655  line = clean_lines.elided[linenum]
1656  if re.search(_RE_PATTERN_MONGO_POLYFILL, line):
1657    error(filename, linenum, 'mongodb/polyfill', 5,
1658          'Illegal use of banned name from std::/boost::, use mongo::stdx:: variant instead')
1659
1660def CheckForMongoAtomic(filename, clean_lines, linenum, error):
1661  line = clean_lines.elided[linenum]
1662  if re.search('std::atomic', line):
1663    error(filename, linenum, 'mongodb/stdatomic', 5,
1664          'Illegal use of prohibited std::atomic<T>, use AtomicWord<T> or other types '
1665          'from "mongo/platform/atomic_word.h"')
1666
1667def CheckForMongoVolatile(filename, clean_lines, linenum, error):
1668  line = clean_lines.elided[linenum]
1669  if re.search('[^_]volatile', line) and not "__asm__" in line:
1670    error(filename, linenum, 'mongodb/volatile', 5,
1671          'Illegal use of the volatile storage keyword, use AtomicWord instead '
1672          'from "mongo/platform/atomic_word.h"')
1673
1674def CheckForNonMongoAssert(filename, clean_lines, linenum, error):
1675  line = clean_lines.elided[linenum]
1676  if re.search(r'\bassert\s*\(', line):
1677    error(filename, linenum, 'mongodb/assert', 5,
1678          'Illegal use of the bare assert function, use a function from assert_utils.h instead.')
1679
1680def CheckForCopyright(filename, lines, error):
1681  """Logs an error if no Copyright message appears at the top of the file."""
1682
1683  # We'll say it should occur by line 10. Don't forget there's a
1684  # dummy line at the front.
1685  for line in xrange(1, min(len(lines), 11)):
1686    if re.search(r'Copyright', lines[line], re.I):
1687      CheckForServerSidePublicLicense(line, filename, lines, error)
1688      break
1689  else:                       # means no copyright line was found
1690    error(filename, 0, 'legal/copyright', 5,
1691          'No copyright message found.  '
1692          'You should have a line: "Copyright [year] <Copyright Owner>"')
1693
1694def CheckForServerSidePublicLicense(copyright_offset, filename, lines, error):
1695  license_header = '''\
1696 *    This program is free software: you can redistribute it and/or modify
1697 *    it under the terms of the Server Side Public License, version 1,
1698 *    as published by MongoDB, Inc.
1699 *
1700 *    This program is distributed in the hope that it will be useful,
1701 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
1702 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
1703 *    Server Side Public License for more details.
1704 *
1705 *    You should have received a copy of the Server Side Public License
1706 *    along with this program. If not, see
1707 *    <http://www.mongodb.com/licensing/server-side-public-license>.
1708 *
1709 *    As a special exception, the copyright holders give permission to link the
1710 *    code of portions of this program with the OpenSSL library under certain
1711 *    conditions as described in each individual source file and distribute
1712 *    linked combinations including the program with the OpenSSL library. You
1713 *    must comply with the Server Side Public License in all respects for
1714 *    all of the code used other than as permitted herein. If you modify file(s)
1715 *    with this exception, you may extend this exception to your version of the
1716 *    file(s), but you are not obligated to do so. If you do not wish to do so,
1717 *    delete this exception statement from your version. If you delete this
1718 *    exception statement from all source files in the program, then also delete
1719 *    it in the license file.
1720 */'''.splitlines()
1721
1722  # The following files are in the src/mongo/ directory but technically belong
1723  # in src/third_party/ because their copyright does not belong to MongoDB. Note
1724  # that we do not need to use os.path.normpath() to match these pathnames on
1725  # Windows because FileInfo.RepositoryName() normalizes the path separator for
1726  # us already.
1727  files_to_ignore = set([
1728    'src/mongo/shell/linenoise.cpp',
1729    'src/mongo/shell/linenoise.h',
1730    'src/mongo/shell/mk_wcwidth.cpp',
1731    'src/mongo/shell/mk_wcwidth.h',
1732    'src/mongo/util/md5.cpp',
1733    'src/mongo/util/md5.h',
1734    'src/mongo/util/md5main.cpp',
1735    'src/mongo/util/net/ssl_stream.cpp',
1736    'src/mongo/util/scopeguard.h',
1737  ])
1738
1739  if FileInfo(filename).RepositoryName() in files_to_ignore:
1740    return
1741
1742  # We expect the first line of the license header to follow shortly after the
1743  # "Copyright" message.
1744  for line in xrange(copyright_offset, min(len(lines), copyright_offset + 3)):
1745    if re.search(r'This program is free software', lines[line]):
1746      license_header_start_line = line
1747      for i in xrange(len(license_header)):
1748        line = i + license_header_start_line
1749        if line >= len(lines) or lines[line] != license_header[i]:
1750          error(filename, 0, 'legal/license', 5,
1751                'Incorrect license header found.  '
1752                'Expected "' + license_header[i] + '".  '
1753                'See https://github.com/mongodb/mongo/wiki/Server-Code-Style')
1754          # We break here to stop reporting legal/license errors for this file.
1755          break
1756
1757      # We break here to indicate that we found some license header.
1758      break
1759  else:
1760    error(filename, 0, 'legal/license', 5,
1761          'No license header found.  '
1762          'See https://github.com/mongodb/mongo/wiki/Server-Code-Style')
1763
1764def GetIndentLevel(line):
1765  """Return the number of leading spaces in line.
1766
1767  Args:
1768    line: A string to check.
1769
1770  Returns:
1771    An integer count of leading spaces, possibly zero.
1772  """
1773  indent = Match(r'^( *)\S', line)
1774  if indent:
1775    return len(indent.group(1))
1776  else:
1777    return 0
1778
1779
1780def GetHeaderGuardCPPVariable(filename):
1781  """Returns the CPP variable that should be used as a header guard.
1782
1783  Args:
1784    filename: The name of a C++ header file.
1785
1786  Returns:
1787    The CPP variable that should be used as a header guard in the
1788    named file.
1789
1790  """
1791
1792  # Restores original filename in case that cpplint is invoked from Emacs's
1793  # flymake.
1794  filename = re.sub(r'_flymake\.h$', '.h', filename)
1795  filename = re.sub(r'/\.flymake/([^/]*)$', r'/\1', filename)
1796
1797  fileinfo = FileInfo(filename)
1798  file_path_from_root = fileinfo.RepositoryName()
1799  if _root:
1800    file_path_from_root = re.sub('^' + _root + os.sep, '', file_path_from_root)
1801  return re.sub(r'[-./\s]', '_', file_path_from_root).upper() + '_'
1802
1803
1804def CheckForHeaderGuard(filename, lines, error):
1805  """Checks that the file contains a header guard.
1806
1807  Logs an error if no #ifndef header guard is present.  For other
1808  headers, checks that the full pathname is used.
1809
1810  Args:
1811    filename: The name of the C++ header file.
1812    lines: An array of strings, each representing a line of the file.
1813    error: The function to call with any errors found.
1814  """
1815
1816  # Don't check for header guards if there are error suppression
1817  # comments somewhere in this file.
1818  #
1819  # Because this is silencing a warning for a nonexistent line, we
1820  # only support the very specific NOLINT(build/header_guard) syntax,
1821  # and not the general NOLINT or NOLINT(*) syntax.
1822  for i in lines:
1823    if Search(r'//\s*NOLINT\(build/header_guard\)', i):
1824      return
1825
1826  cppvar = GetHeaderGuardCPPVariable(filename)
1827
1828  ifndef = None
1829  ifndef_linenum = 0
1830  define = None
1831  endif = None
1832  endif_linenum = 0
1833  for linenum, line in enumerate(lines):
1834    linesplit = line.split()
1835    if len(linesplit) >= 2:
1836      # find the first occurrence of #ifndef and #define, save arg
1837      if not ifndef and linesplit[0] == '#ifndef':
1838        # set ifndef to the header guard presented on the #ifndef line.
1839        ifndef = linesplit[1]
1840        ifndef_linenum = linenum
1841      if not define and linesplit[0] == '#define':
1842        define = linesplit[1]
1843    # find the last occurrence of #endif, save entire line
1844    if line.startswith('#endif'):
1845      endif = line
1846      endif_linenum = linenum
1847
1848  if not ifndef:
1849    error(filename, 0, 'build/header_guard', 5,
1850          'No #ifndef header guard found, suggested CPP variable is: %s' %
1851          cppvar)
1852    return
1853
1854  if not define:
1855    error(filename, 0, 'build/header_guard', 5,
1856          'No #define header guard found, suggested CPP variable is: %s' %
1857          cppvar)
1858    return
1859
1860  # The guard should be PATH_FILE_H_, but we also allow PATH_FILE_H__
1861  # for backward compatibility.
1862  if ifndef != cppvar:
1863    error_level = 0
1864    if ifndef != cppvar + '_':
1865      error_level = 5
1866
1867    ParseNolintSuppressions(filename, lines[ifndef_linenum], ifndef_linenum,
1868                            error)
1869    error(filename, ifndef_linenum, 'build/header_guard', error_level,
1870          '#ifndef header guard has wrong style, please use: %s' % cppvar)
1871
1872  if define != ifndef:
1873    error(filename, 0, 'build/header_guard', 5,
1874          '#ifndef and #define don\'t match, suggested CPP variable is: %s' %
1875          cppvar)
1876    return
1877
1878  if endif != ('#endif  // %s' % cppvar):
1879    error_level = 0
1880    if endif != ('#endif  // %s' % (cppvar + '_')):
1881      error_level = 5
1882
1883    ParseNolintSuppressions(filename, lines[endif_linenum], endif_linenum,
1884                            error)
1885    error(filename, endif_linenum, 'build/header_guard', error_level,
1886          '#endif line should be "#endif  // %s"' % cppvar)
1887
1888
1889def CheckForBadCharacters(filename, lines, error):
1890  """Logs an error for each line containing bad characters.
1891
1892  Two kinds of bad characters:
1893
1894  1. Unicode replacement characters: These indicate that either the file
1895  contained invalid UTF-8 (likely) or Unicode replacement characters (which
1896  it shouldn't).  Note that it's possible for this to throw off line
1897  numbering if the invalid UTF-8 occurred adjacent to a newline.
1898
1899  2. NUL bytes.  These are problematic for some tools.
1900
1901  Args:
1902    filename: The name of the current file.
1903    lines: An array of strings, each representing a line of the file.
1904    error: The function to call with any errors found.
1905  """
1906  for linenum, line in enumerate(lines):
1907    if u'\ufffd' in line:
1908      error(filename, linenum, 'readability/utf8', 5,
1909            'Line contains invalid UTF-8 (or Unicode replacement character).')
1910    if '\0' in line:
1911      error(filename, linenum, 'readability/nul', 5, 'Line contains NUL byte.')
1912
1913
1914def CheckForNewlineAtEOF(filename, lines, error):
1915  """Logs an error if there is no newline char at the end of the file.
1916
1917  Args:
1918    filename: The name of the current file.
1919    lines: An array of strings, each representing a line of the file.
1920    error: The function to call with any errors found.
1921  """
1922
1923  # The array lines() was created by adding two newlines to the
1924  # original file (go figure), then splitting on \n.
1925  # To verify that the file ends in \n, we just have to make sure the
1926  # last-but-two element of lines() exists and is empty.
1927  if len(lines) < 3 or lines[-2]:
1928    error(filename, len(lines) - 2, 'whitespace/ending_newline', 5,
1929          'Could not find a newline character at the end of the file.')
1930
1931
1932def CheckForMultilineCommentsAndStrings(filename, clean_lines, linenum, error):
1933  """Logs an error if we see /* ... */ or "..." that extend past one line.
1934
1935  /* ... */ comments are legit inside macros, for one line.
1936  Otherwise, we prefer // comments, so it's ok to warn about the
1937  other.  Likewise, it's ok for strings to extend across multiple
1938  lines, as long as a line continuation character (backslash)
1939  terminates each line. Although not currently prohibited by the C++
1940  style guide, it's ugly and unnecessary. We don't do well with either
1941  in this lint program, so we warn about both.
1942
1943  Args:
1944    filename: The name of the current file.
1945    clean_lines: A CleansedLines instance containing the file.
1946    linenum: The number of the line to check.
1947    error: The function to call with any errors found.
1948  """
1949  line = clean_lines.elided[linenum]
1950
1951  # Remove all \\ (escaped backslashes) from the line. They are OK, and the
1952  # second (escaped) slash may trigger later \" detection erroneously.
1953  line = line.replace('\\\\', '')
1954
1955  if line.count('/*') > line.count('*/'):
1956    error(filename, linenum, 'readability/multiline_comment', 5,
1957          'Complex multi-line /*...*/-style comment found. '
1958          'Lint may give bogus warnings.  '
1959          'Consider replacing these with //-style comments, '
1960          'with #if 0...#endif, '
1961          'or with more clearly structured multi-line comments.')
1962
1963  if (line.count('"') - line.count('\\"')) % 2:
1964    error(filename, linenum, 'readability/multiline_string', 5,
1965          'Multi-line string ("...") found.  This lint script doesn\'t '
1966          'do well with such strings, and may give bogus warnings.  '
1967          'Use C++11 raw strings or concatenation instead.')
1968
1969
1970# (non-threadsafe name, thread-safe alternative, validation pattern)
1971#
1972# The validation pattern is used to eliminate false positives such as:
1973#  _rand();               // false positive due to substring match.
1974#  ->rand();              // some member function rand().
1975#  ACMRandom rand(seed);  // some variable named rand.
1976#  ISAACRandom rand();    // another variable named rand.
1977#
1978# Basically we require the return value of these functions to be used
1979# in some expression context on the same line by matching on some
1980# operator before the function name.  This eliminates constructors and
1981# member function calls.
1982_UNSAFE_FUNC_PREFIX = r'(?:[-+*/=%^&|(<]\s*|>\s+)'
1983_THREADING_LIST = (
1984    ('asctime(', 'asctime_r(', _UNSAFE_FUNC_PREFIX + r'asctime\([^)]+\)'),
1985    ('ctime(', 'ctime_r(', _UNSAFE_FUNC_PREFIX + r'ctime\([^)]+\)'),
1986    ('getgrgid(', 'getgrgid_r(', _UNSAFE_FUNC_PREFIX + r'getgrgid\([^)]+\)'),
1987    ('getgrnam(', 'getgrnam_r(', _UNSAFE_FUNC_PREFIX + r'getgrnam\([^)]+\)'),
1988    ('getlogin(', 'getlogin_r(', _UNSAFE_FUNC_PREFIX + r'getlogin\(\)'),
1989    ('getpwnam(', 'getpwnam_r(', _UNSAFE_FUNC_PREFIX + r'getpwnam\([^)]+\)'),
1990    ('getpwuid(', 'getpwuid_r(', _UNSAFE_FUNC_PREFIX + r'getpwuid\([^)]+\)'),
1991    ('gmtime(', 'gmtime_r(', _UNSAFE_FUNC_PREFIX + r'gmtime\([^)]+\)'),
1992    ('localtime(', 'localtime_r(', _UNSAFE_FUNC_PREFIX + r'localtime\([^)]+\)'),
1993    ('rand(', 'rand_r(', _UNSAFE_FUNC_PREFIX + r'rand\(\)'),
1994    ('strtok(', 'strtok_r(',
1995     _UNSAFE_FUNC_PREFIX + r'strtok\([^)]+\)'),
1996    ('ttyname(', 'ttyname_r(', _UNSAFE_FUNC_PREFIX + r'ttyname\([^)]+\)'),
1997    )
1998
1999
2000def CheckPosixThreading(filename, clean_lines, linenum, error):
2001  """Checks for calls to thread-unsafe functions.
2002
2003  Much code has been originally written without consideration of
2004  multi-threading. Also, engineers are relying on their old experience;
2005  they have learned posix before threading extensions were added. These
2006  tests guide the engineers to use thread-safe functions (when using
2007  posix directly).
2008
2009  Args:
2010    filename: The name of the current file.
2011    clean_lines: A CleansedLines instance containing the file.
2012    linenum: The number of the line to check.
2013    error: The function to call with any errors found.
2014  """
2015  line = clean_lines.elided[linenum]
2016  for single_thread_func, multithread_safe_func, pattern in _THREADING_LIST:
2017    # Additional pattern matching check to confirm that this is the
2018    # function we are looking for
2019    if Search(pattern, line):
2020      error(filename, linenum, 'runtime/threadsafe_fn', 2,
2021            'Consider using ' + multithread_safe_func +
2022            '...) instead of ' + single_thread_func +
2023            '...) for improved thread safety.')
2024
2025
2026def CheckVlogArguments(filename, clean_lines, linenum, error):
2027  """Checks that VLOG() is only used for defining a logging level.
2028
2029  For example, VLOG(2) is correct. VLOG(INFO), VLOG(WARNING), VLOG(ERROR), and
2030  VLOG(FATAL) are not.
2031
2032  Args:
2033    filename: The name of the current file.
2034    clean_lines: A CleansedLines instance containing the file.
2035    linenum: The number of the line to check.
2036    error: The function to call with any errors found.
2037  """
2038  line = clean_lines.elided[linenum]
2039  if Search(r'\bVLOG\((INFO|ERROR|WARNING|DFATAL|FATAL)\)', line):
2040    error(filename, linenum, 'runtime/vlog', 5,
2041          'VLOG() should be used with numeric verbosity level.  '
2042          'Use LOG() if you want symbolic severity levels.')
2043
2044# Matches invalid increment: *count++, which moves pointer instead of
2045# incrementing a value.
2046_RE_PATTERN_INVALID_INCREMENT = re.compile(
2047    r'^\s*\*\w+(\+\+|--);')
2048
2049
2050def CheckInvalidIncrement(filename, clean_lines, linenum, error):
2051  """Checks for invalid increment *count++.
2052
2053  For example following function:
2054  void increment_counter(int* count) {
2055    *count++;
2056  }
2057  is invalid, because it effectively does count++, moving pointer, and should
2058  be replaced with ++*count, (*count)++ or *count += 1.
2059
2060  Args:
2061    filename: The name of the current file.
2062    clean_lines: A CleansedLines instance containing the file.
2063    linenum: The number of the line to check.
2064    error: The function to call with any errors found.
2065  """
2066  line = clean_lines.elided[linenum]
2067  if _RE_PATTERN_INVALID_INCREMENT.match(line):
2068    error(filename, linenum, 'runtime/invalid_increment', 5,
2069          'Changing pointer instead of value (or unused value of operator*).')
2070
2071
2072def IsMacroDefinition(clean_lines, linenum):
2073  if Search(r'^#define', clean_lines[linenum]):
2074    return True
2075
2076  if linenum > 0 and Search(r'\\$', clean_lines[linenum - 1]):
2077    return True
2078
2079  return False
2080
2081
2082def IsForwardClassDeclaration(clean_lines, linenum):
2083  return Match(r'^\s*(\btemplate\b)*.*class\s+\w+;\s*$', clean_lines[linenum])
2084
2085
2086class _BlockInfo(object):
2087  """Stores information about a generic block of code."""
2088
2089  def __init__(self, seen_open_brace):
2090    self.seen_open_brace = seen_open_brace
2091    self.open_parentheses = 0
2092    self.inline_asm = _NO_ASM
2093    self.check_namespace_indentation = False
2094
2095  def CheckBegin(self, filename, clean_lines, linenum, error):
2096    """Run checks that applies to text up to the opening brace.
2097
2098    This is mostly for checking the text after the class identifier
2099    and the "{", usually where the base class is specified.  For other
2100    blocks, there isn't much to check, so we always pass.
2101
2102    Args:
2103      filename: The name of the current file.
2104      clean_lines: A CleansedLines instance containing the file.
2105      linenum: The number of the line to check.
2106      error: The function to call with any errors found.
2107    """
2108    pass
2109
2110  def CheckEnd(self, filename, clean_lines, linenum, error):
2111    """Run checks that applies to text after the closing brace.
2112
2113    This is mostly used for checking end of namespace comments.
2114
2115    Args:
2116      filename: The name of the current file.
2117      clean_lines: A CleansedLines instance containing the file.
2118      linenum: The number of the line to check.
2119      error: The function to call with any errors found.
2120    """
2121    pass
2122
2123  def IsBlockInfo(self):
2124    """Returns true if this block is a _BlockInfo.
2125
2126    This is convenient for verifying that an object is an instance of
2127    a _BlockInfo, but not an instance of any of the derived classes.
2128
2129    Returns:
2130      True for this class, False for derived classes.
2131    """
2132    return self.__class__ == _BlockInfo
2133
2134
2135class _ExternCInfo(_BlockInfo):
2136  """Stores information about an 'extern "C"' block."""
2137
2138  def __init__(self):
2139    _BlockInfo.__init__(self, True)
2140
2141
2142class _ClassInfo(_BlockInfo):
2143  """Stores information about a class."""
2144
2145  def __init__(self, name, class_or_struct, clean_lines, linenum):
2146    _BlockInfo.__init__(self, False)
2147    self.name = name
2148    self.starting_linenum = linenum
2149    self.is_derived = False
2150    self.check_namespace_indentation = True
2151    if class_or_struct == 'struct':
2152      self.access = 'public'
2153      self.is_struct = True
2154    else:
2155      self.access = 'private'
2156      self.is_struct = False
2157
2158    # Remember initial indentation level for this class.  Using raw_lines here
2159    # instead of elided to account for leading comments.
2160    self.class_indent = GetIndentLevel(clean_lines.raw_lines[linenum])
2161
2162    # Try to find the end of the class.  This will be confused by things like:
2163    #   class A {
2164    #   } *x = { ...
2165    #
2166    # But it's still good enough for CheckSectionSpacing.
2167    self.last_line = 0
2168    depth = 0
2169    for i in range(linenum, clean_lines.NumLines()):
2170      line = clean_lines.elided[i]
2171      depth += line.count('{') - line.count('}')
2172      if not depth:
2173        self.last_line = i
2174        break
2175
2176  def CheckBegin(self, filename, clean_lines, linenum, error):
2177    # Look for a bare ':'
2178    if Search('(^|[^:]):($|[^:])', clean_lines.elided[linenum]):
2179      self.is_derived = True
2180
2181  def CheckEnd(self, filename, clean_lines, linenum, error):
2182    # Check that closing brace is aligned with beginning of the class.
2183    # Only do this if the closing brace is indented by only whitespaces.
2184    # This means we will not check single-line class definitions.
2185    indent = Match(r'^( *)\}', clean_lines.elided[linenum])
2186    if indent and len(indent.group(1)) != self.class_indent:
2187      if self.is_struct:
2188        parent = 'struct ' + self.name
2189      else:
2190        parent = 'class ' + self.name
2191      error(filename, linenum, 'whitespace/indent', 3,
2192            'Closing brace should be aligned with beginning of %s' % parent)
2193
2194
2195class _NamespaceInfo(_BlockInfo):
2196  """Stores information about a namespace."""
2197
2198  def __init__(self, name, linenum):
2199    _BlockInfo.__init__(self, False)
2200    self.name = name or ''
2201    self.starting_linenum = linenum
2202    self.check_namespace_indentation = True
2203
2204  def CheckEnd(self, filename, clean_lines, linenum, error):
2205    """Check end of namespace comments."""
2206    line = clean_lines.raw_lines[linenum]
2207
2208    # Check how many lines is enclosed in this namespace.  Don't issue
2209    # warning for missing namespace comments if there aren't enough
2210    # lines.  However, do apply checks if there is already an end of
2211    # namespace comment and it's incorrect.
2212    #
2213    # TODO(unknown): We always want to check end of namespace comments
2214    # if a namespace is large, but sometimes we also want to apply the
2215    # check if a short namespace contained nontrivial things (something
2216    # other than forward declarations).  There is currently no logic on
2217    # deciding what these nontrivial things are, so this check is
2218    # triggered by namespace size only, which works most of the time.
2219    if (linenum - self.starting_linenum < 10
2220        and not Match(r'};*\s*(//|/\*).*\bnamespace\b', line)):
2221      return
2222
2223    # Look for matching comment at end of namespace.
2224    #
2225    # Note that we accept C style "/* */" comments for terminating
2226    # namespaces, so that code that terminate namespaces inside
2227    # preprocessor macros can be cpplint clean.
2228    #
2229    # We also accept stuff like "// end of namespace <name>." with the
2230    # period at the end.
2231    #
2232    # Besides these, we don't accept anything else, otherwise we might
2233    # get false negatives when existing comment is a substring of the
2234    # expected namespace.
2235    if self.name:
2236      # Named namespace
2237      if not Match((r'};*\s*(//|/\*).*\bnamespace\s+' + re.escape(self.name) +
2238                    r'[\*/\.\\\s]*$'),
2239                   line):
2240        error(filename, linenum, 'readability/namespace', 5,
2241              'Namespace should be terminated with "// namespace %s"' %
2242              self.name)
2243    else:
2244      # Anonymous namespace
2245      if not Match(r'};*\s*(//|/\*).*\bnamespace[\*/\.\\\s]*$', line):
2246        # If "// namespace anonymous" or "// anonymous namespace (more text)",
2247        # mention "// anonymous namespace" as an acceptable form
2248        if Match(r'}.*\b(namespace anonymous|anonymous namespace)\b', line):
2249          error(filename, linenum, 'readability/namespace', 5,
2250                'Anonymous namespace should be terminated with "// namespace"'
2251                ' or "// anonymous namespace"')
2252        else:
2253          error(filename, linenum, 'readability/namespace', 5,
2254                'Anonymous namespace should be terminated with "// namespace"')
2255
2256
2257class _PreprocessorInfo(object):
2258  """Stores checkpoints of nesting stacks when #if/#else is seen."""
2259
2260  def __init__(self, stack_before_if):
2261    # The entire nesting stack before #if
2262    self.stack_before_if = stack_before_if
2263
2264    # The entire nesting stack up to #else
2265    self.stack_before_else = []
2266
2267    # Whether we have already seen #else or #elif
2268    self.seen_else = False
2269
2270
2271class NestingState(object):
2272  """Holds states related to parsing braces."""
2273
2274  def __init__(self):
2275    # Stack for tracking all braces.  An object is pushed whenever we
2276    # see a "{", and popped when we see a "}".  Only 3 types of
2277    # objects are possible:
2278    # - _ClassInfo: a class or struct.
2279    # - _NamespaceInfo: a namespace.
2280    # - _BlockInfo: some other type of block.
2281    self.stack = []
2282
2283    # Top of the previous stack before each Update().
2284    #
2285    # Because the nesting_stack is updated at the end of each line, we
2286    # had to do some convoluted checks to find out what is the current
2287    # scope at the beginning of the line.  This check is simplified by
2288    # saving the previous top of nesting stack.
2289    #
2290    # We could save the full stack, but we only need the top.  Copying
2291    # the full nesting stack would slow down cpplint by ~10%.
2292    self.previous_stack_top = []
2293
2294    # Stack of _PreprocessorInfo objects.
2295    self.pp_stack = []
2296
2297  def SeenOpenBrace(self):
2298    """Check if we have seen the opening brace for the innermost block.
2299
2300    Returns:
2301      True if we have seen the opening brace, False if the innermost
2302      block is still expecting an opening brace.
2303    """
2304    return (not self.stack) or self.stack[-1].seen_open_brace
2305
2306  def InNamespaceBody(self):
2307    """Check if we are currently one level inside a namespace body.
2308
2309    Returns:
2310      True if top of the stack is a namespace block, False otherwise.
2311    """
2312    return self.stack and isinstance(self.stack[-1], _NamespaceInfo)
2313
2314  def InExternC(self):
2315    """Check if we are currently one level inside an 'extern "C"' block.
2316
2317    Returns:
2318      True if top of the stack is an extern block, False otherwise.
2319    """
2320    return self.stack and isinstance(self.stack[-1], _ExternCInfo)
2321
2322  def InClassDeclaration(self):
2323    """Check if we are currently one level inside a class or struct declaration.
2324
2325    Returns:
2326      True if top of the stack is a class/struct, False otherwise.
2327    """
2328    return self.stack and isinstance(self.stack[-1], _ClassInfo)
2329
2330  def InAsmBlock(self):
2331    """Check if we are currently one level inside an inline ASM block.
2332
2333    Returns:
2334      True if the top of the stack is a block containing inline ASM.
2335    """
2336    return self.stack and self.stack[-1].inline_asm != _NO_ASM
2337
2338  def InTemplateArgumentList(self, clean_lines, linenum, pos):
2339    """Check if current position is inside template argument list.
2340
2341    Args:
2342      clean_lines: A CleansedLines instance containing the file.
2343      linenum: The number of the line to check.
2344      pos: position just after the suspected template argument.
2345    Returns:
2346      True if (linenum, pos) is inside template arguments.
2347    """
2348    while linenum < clean_lines.NumLines():
2349      # Find the earliest character that might indicate a template argument
2350      line = clean_lines.elided[linenum]
2351      match = Match(r'^[^{};=\[\]\.<>]*(.)', line[pos:])
2352      if not match:
2353        linenum += 1
2354        pos = 0
2355        continue
2356      token = match.group(1)
2357      pos += len(match.group(0))
2358
2359      # These things do not look like template argument list:
2360      #   class Suspect {
2361      #   class Suspect x; }
2362      if token in ('{', '}', ';'): return False
2363
2364      # These things look like template argument list:
2365      #   template <class Suspect>
2366      #   template <class Suspect = default_value>
2367      #   template <class Suspect[]>
2368      #   template <class Suspect...>
2369      if token in ('>', '=', '[', ']', '.'): return True
2370
2371      # Check if token is an unmatched '<'.
2372      # If not, move on to the next character.
2373      if token != '<':
2374        pos += 1
2375        if pos >= len(line):
2376          linenum += 1
2377          pos = 0
2378        continue
2379
2380      # We can't be sure if we just find a single '<', and need to
2381      # find the matching '>'.
2382      (_, end_line, end_pos) = CloseExpression(clean_lines, linenum, pos - 1)
2383      if end_pos < 0:
2384        # Not sure if template argument list or syntax error in file
2385        return False
2386      linenum = end_line
2387      pos = end_pos
2388    return False
2389
2390  def UpdatePreprocessor(self, line):
2391    """Update preprocessor stack.
2392
2393    We need to handle preprocessors due to classes like this:
2394      #ifdef SWIG
2395      struct ResultDetailsPageElementExtensionPoint {
2396      #else
2397      struct ResultDetailsPageElementExtensionPoint : public Extension {
2398      #endif
2399
2400    We make the following assumptions (good enough for most files):
2401    - Preprocessor condition evaluates to true from #if up to first
2402      #else/#elif/#endif.
2403
2404    - Preprocessor condition evaluates to false from #else/#elif up
2405      to #endif.  We still perform lint checks on these lines, but
2406      these do not affect nesting stack.
2407
2408    Args:
2409      line: current line to check.
2410    """
2411    if Match(r'^\s*#\s*(if|ifdef|ifndef)\b', line):
2412      # Beginning of #if block, save the nesting stack here.  The saved
2413      # stack will allow us to restore the parsing state in the #else case.
2414      self.pp_stack.append(_PreprocessorInfo(copy.deepcopy(self.stack)))
2415    elif Match(r'^\s*#\s*(else|elif)\b', line):
2416      # Beginning of #else block
2417      if self.pp_stack:
2418        if not self.pp_stack[-1].seen_else:
2419          # This is the first #else or #elif block.  Remember the
2420          # whole nesting stack up to this point.  This is what we
2421          # keep after the #endif.
2422          self.pp_stack[-1].seen_else = True
2423          self.pp_stack[-1].stack_before_else = copy.deepcopy(self.stack)
2424
2425        # Restore the stack to how it was before the #if
2426        self.stack = copy.deepcopy(self.pp_stack[-1].stack_before_if)
2427      else:
2428        # TODO(unknown): unexpected #else, issue warning?
2429        pass
2430    elif Match(r'^\s*#\s*endif\b', line):
2431      # End of #if or #else blocks.
2432      if self.pp_stack:
2433        # If we saw an #else, we will need to restore the nesting
2434        # stack to its former state before the #else, otherwise we
2435        # will just continue from where we left off.
2436        if self.pp_stack[-1].seen_else:
2437          # Here we can just use a shallow copy since we are the last
2438          # reference to it.
2439          self.stack = self.pp_stack[-1].stack_before_else
2440        # Drop the corresponding #if
2441        self.pp_stack.pop()
2442      else:
2443        # TODO(unknown): unexpected #endif, issue warning?
2444        pass
2445
2446  # TODO(unknown): Update() is too long, but we will refactor later.
2447  def Update(self, filename, clean_lines, linenum, error):
2448    """Update nesting state with current line.
2449
2450    Args:
2451      filename: The name of the current file.
2452      clean_lines: A CleansedLines instance containing the file.
2453      linenum: The number of the line to check.
2454      error: The function to call with any errors found.
2455    """
2456    line = clean_lines.elided[linenum]
2457
2458    # Remember top of the previous nesting stack.
2459    #
2460    # The stack is always pushed/popped and not modified in place, so
2461    # we can just do a shallow copy instead of copy.deepcopy.  Using
2462    # deepcopy would slow down cpplint by ~28%.
2463    if self.stack:
2464      self.previous_stack_top = self.stack[-1]
2465    else:
2466      self.previous_stack_top = None
2467
2468    # Update pp_stack
2469    self.UpdatePreprocessor(line)
2470
2471    # Count parentheses.  This is to avoid adding struct arguments to
2472    # the nesting stack.
2473    if self.stack:
2474      inner_block = self.stack[-1]
2475      depth_change = line.count('(') - line.count(')')
2476      inner_block.open_parentheses += depth_change
2477
2478      # Also check if we are starting or ending an inline assembly block.
2479      if inner_block.inline_asm in (_NO_ASM, _END_ASM):
2480        if (depth_change != 0 and
2481            inner_block.open_parentheses == 1 and
2482            _MATCH_ASM.match(line)):
2483          # Enter assembly block
2484          inner_block.inline_asm = _INSIDE_ASM
2485        else:
2486          # Not entering assembly block.  If previous line was _END_ASM,
2487          # we will now shift to _NO_ASM state.
2488          inner_block.inline_asm = _NO_ASM
2489      elif (inner_block.inline_asm == _INSIDE_ASM and
2490            inner_block.open_parentheses == 0):
2491        # Exit assembly block
2492        inner_block.inline_asm = _END_ASM
2493
2494    # Consume namespace declaration at the beginning of the line.  Do
2495    # this in a loop so that we catch same line declarations like this:
2496    #   namespace proto2 { namespace bridge { class MessageSet; } }
2497    while True:
2498      # Match start of namespace.  The "\b\s*" below catches namespace
2499      # declarations even if it weren't followed by a whitespace, this
2500      # is so that we don't confuse our namespace checker.  The
2501      # missing spaces will be flagged by CheckSpacing.
2502      namespace_decl_match = Match(r'^\s*namespace\b\s*([:\w]+)?(.*)$', line)
2503      if not namespace_decl_match:
2504        break
2505
2506      new_namespace = _NamespaceInfo(namespace_decl_match.group(1), linenum)
2507      self.stack.append(new_namespace)
2508
2509      line = namespace_decl_match.group(2)
2510      if line.find('{') != -1:
2511        new_namespace.seen_open_brace = True
2512        line = line[line.find('{') + 1:]
2513
2514    # Look for a class declaration in whatever is left of the line
2515    # after parsing namespaces.  The regexp accounts for decorated classes
2516    # such as in:
2517    #   class LOCKABLE API Object {
2518    #   };
2519    class_decl_match = Match(
2520        r'^(\s*(?:template\s*<[\w\s<>,:]*>\s*)?'
2521        r'(class|struct)\s+(?:[A-Z_]+\s+)*(\w+(?:::\w+)*))'
2522        r'(.*)$', line)
2523    if (class_decl_match and
2524        (not self.stack or self.stack[-1].open_parentheses == 0)):
2525      # We do not want to accept classes that are actually template arguments:
2526      #   template <class Ignore1,
2527      #             class Ignore2 = Default<Args>,
2528      #             template <Args> class Ignore3>
2529      #   void Function() {};
2530      #
2531      # To avoid template argument cases, we scan forward and look for
2532      # an unmatched '>'.  If we see one, assume we are inside a
2533      # template argument list.
2534      end_declaration = len(class_decl_match.group(1))
2535      if not self.InTemplateArgumentList(clean_lines, linenum, end_declaration):
2536        self.stack.append(_ClassInfo(
2537            class_decl_match.group(3), class_decl_match.group(2),
2538            clean_lines, linenum))
2539        line = class_decl_match.group(4)
2540
2541    # If we have not yet seen the opening brace for the innermost block,
2542    # run checks here.
2543    if not self.SeenOpenBrace():
2544      self.stack[-1].CheckBegin(filename, clean_lines, linenum, error)
2545
2546    # Update access control if we are inside a class/struct
2547    if self.stack and isinstance(self.stack[-1], _ClassInfo):
2548      classinfo = self.stack[-1]
2549      access_match = Match(
2550          r'^(.*)\b(public|private|protected|signals)(\s+(?:slots\s*)?)?'
2551          r':(?:[^:]|$)',
2552          line)
2553      if access_match:
2554        classinfo.access = access_match.group(2)
2555
2556        # Check that access keywords are indented +1 space.  Skip this
2557        # check if the keywords are not preceded by whitespaces.
2558        indent = access_match.group(1)
2559        if (len(indent) != classinfo.class_indent + 1 and
2560            Match(r'^\s*$', indent)):
2561          if classinfo.is_struct:
2562            parent = 'struct ' + classinfo.name
2563          else:
2564            parent = 'class ' + classinfo.name
2565          slots = ''
2566          if access_match.group(3):
2567            slots = access_match.group(3)
2568          error(filename, linenum, 'whitespace/indent', 3,
2569                '%s%s: should be indented +1 space inside %s' % (
2570                    access_match.group(2), slots, parent))
2571
2572    # Consume braces or semicolons from what's left of the line
2573    while True:
2574      # Match first brace, semicolon, or closed parenthesis.
2575      matched = Match(r'^[^{;)}]*([{;)}])(.*)$', line)
2576      if not matched:
2577        break
2578
2579      token = matched.group(1)
2580      if token == '{':
2581        # If namespace or class hasn't seen a opening brace yet, mark
2582        # namespace/class head as complete.  Push a new block onto the
2583        # stack otherwise.
2584        if not self.SeenOpenBrace():
2585          self.stack[-1].seen_open_brace = True
2586        elif Match(r'^extern\s*"[^"]*"\s*\{', line):
2587          self.stack.append(_ExternCInfo())
2588        else:
2589          self.stack.append(_BlockInfo(True))
2590          if _MATCH_ASM.match(line):
2591            self.stack[-1].inline_asm = _BLOCK_ASM
2592
2593      elif token == ';' or token == ')':
2594        # If we haven't seen an opening brace yet, but we already saw
2595        # a semicolon, this is probably a forward declaration.  Pop
2596        # the stack for these.
2597        #
2598        # Similarly, if we haven't seen an opening brace yet, but we
2599        # already saw a closing parenthesis, then these are probably
2600        # function arguments with extra "class" or "struct" keywords.
2601        # Also pop these stack for these.
2602        if not self.SeenOpenBrace():
2603          self.stack.pop()
2604      else:  # token == '}'
2605        # Perform end of block checks and pop the stack.
2606        if self.stack:
2607          self.stack[-1].CheckEnd(filename, clean_lines, linenum, error)
2608          self.stack.pop()
2609      line = matched.group(2)
2610
2611  def InnermostClass(self):
2612    """Get class info on the top of the stack.
2613
2614    Returns:
2615      A _ClassInfo object if we are inside a class, or None otherwise.
2616    """
2617    for i in range(len(self.stack), 0, -1):
2618      classinfo = self.stack[i - 1]
2619      if isinstance(classinfo, _ClassInfo):
2620        return classinfo
2621    return None
2622
2623  def CheckCompletedBlocks(self, filename, error):
2624    """Checks that all classes and namespaces have been completely parsed.
2625
2626    Call this when all lines in a file have been processed.
2627    Args:
2628      filename: The name of the current file.
2629      error: The function to call with any errors found.
2630    """
2631    # Note: This test can result in false positives if #ifdef constructs
2632    # get in the way of brace matching. See the testBuildClass test in
2633    # cpplint_unittest.py for an example of this.
2634    for obj in self.stack:
2635      if isinstance(obj, _ClassInfo):
2636        error(filename, obj.starting_linenum, 'build/class', 5,
2637              'Failed to find complete declaration of class %s' %
2638              obj.name)
2639      elif isinstance(obj, _NamespaceInfo):
2640        error(filename, obj.starting_linenum, 'build/namespaces', 5,
2641              'Failed to find complete declaration of namespace %s' %
2642              obj.name)
2643
2644
2645def CheckForNonStandardConstructs(filename, clean_lines, linenum,
2646                                  nesting_state, error):
2647  r"""Logs an error if we see certain non-ANSI constructs ignored by gcc-2.
2648
2649  Complain about several constructs which gcc-2 accepts, but which are
2650  not standard C++.  Warning about these in lint is one way to ease the
2651  transition to new compilers.
2652  - put storage class first (e.g. "static const" instead of "const static").
2653  - "%lld" instead of %qd" in printf-type functions.
2654  - "%1$d" is non-standard in printf-type functions.
2655  - "\%" is an undefined character escape sequence.
2656  - text after #endif is not allowed.
2657  - invalid inner-style forward declaration.
2658  - >? and <? operators, and their >?= and <?= cousins.
2659
2660  Additionally, check for constructor/destructor style violations and reference
2661  members, as it is very convenient to do so while checking for
2662  gcc-2 compliance.
2663
2664  Args:
2665    filename: The name of the current file.
2666    clean_lines: A CleansedLines instance containing the file.
2667    linenum: The number of the line to check.
2668    nesting_state: A NestingState instance which maintains information about
2669                   the current stack of nested blocks being parsed.
2670    error: A callable to which errors are reported, which takes 4 arguments:
2671           filename, line number, error level, and message
2672  """
2673
2674  # Remove comments from the line, but leave in strings for now.
2675  line = clean_lines.lines[linenum]
2676
2677  if Search(r'printf\s*\(.*".*%[-+ ]?\d*q', line):
2678    error(filename, linenum, 'runtime/printf_format', 3,
2679          '%q in format strings is deprecated.  Use %ll instead.')
2680
2681  if Search(r'printf\s*\(.*".*%\d+\$', line):
2682    error(filename, linenum, 'runtime/printf_format', 2,
2683          '%N$ formats are unconventional.  Try rewriting to avoid them.')
2684
2685  # Remove escaped backslashes before looking for undefined escapes.
2686  line = line.replace('\\\\', '')
2687
2688  if Search(r'("|\').*\\(%|\[|\(|{)', line):
2689    error(filename, linenum, 'build/printf_format', 3,
2690          '%, [, (, and { are undefined character escapes.  Unescape them.')
2691
2692  # For the rest, work with both comments and strings removed.
2693  line = clean_lines.elided[linenum]
2694
2695  if Search(r'\b(const|volatile|void|char|short|int|long'
2696            r'|float|double|signed|unsigned'
2697            r'|schar|u?int8|u?int16|u?int32|u?int64)'
2698            r'\s+(register|static|extern|typedef)\b',
2699            line):
2700    error(filename, linenum, 'build/storage_class', 5,
2701          'Storage class (static, extern, typedef, etc) should be first.')
2702
2703  if Match(r'\s*#\s*endif\s*[^/\s]+', line):
2704    error(filename, linenum, 'build/endif_comment', 5,
2705          'Uncommented text after #endif is non-standard.  Use a comment.')
2706
2707  if Match(r'\s*class\s+(\w+\s*::\s*)+\w+\s*;', line):
2708    error(filename, linenum, 'build/forward_decl', 5,
2709          'Inner-style forward declarations are invalid.  Remove this line.')
2710
2711  if Search(r'(\w+|[+-]?\d+(\.\d*)?)\s*(<|>)\?=?\s*(\w+|[+-]?\d+)(\.\d*)?',
2712            line):
2713    error(filename, linenum, 'build/deprecated', 3,
2714          '>? and <? (max and min) operators are non-standard and deprecated.')
2715
2716  if Search(r'^\s*const\s*string\s*&\s*\w+\s*;', line):
2717    # TODO(unknown): Could it be expanded safely to arbitrary references,
2718    # without triggering too many false positives? The first
2719    # attempt triggered 5 warnings for mostly benign code in the regtest, hence
2720    # the restriction.
2721    # Here's the original regexp, for the reference:
2722    # type_name = r'\w+((\s*::\s*\w+)|(\s*<\s*\w+?\s*>))?'
2723    # r'\s*const\s*' + type_name + '\s*&\s*\w+\s*;'
2724    error(filename, linenum, 'runtime/member_string_references', 2,
2725          'const string& members are dangerous. It is much better to use '
2726          'alternatives, such as pointers or simple constants.')
2727
2728  # Everything else in this function operates on class declarations.
2729  # Return early if the top of the nesting stack is not a class, or if
2730  # the class head is not completed yet.
2731  classinfo = nesting_state.InnermostClass()
2732  if not classinfo or not classinfo.seen_open_brace:
2733    return
2734
2735  # The class may have been declared with namespace or classname qualifiers.
2736  # The constructor and destructor will not have those qualifiers.
2737  base_classname = classinfo.name.split('::')[-1]
2738
2739  # Look for single-argument constructors that aren't marked explicit.
2740  # Technically a valid construct, but against style. Also look for
2741  # non-single-argument constructors which are also technically valid, but
2742  # strongly suggest something is wrong.
2743  explicit_constructor_match = Match(
2744      r'\s+(?:inline\s+)?(explicit\s+)?(?:inline\s+)?%s\s*'
2745      r'\(((?:[^()]|\([^()]*\))*)\)'
2746      % re.escape(base_classname),
2747      line)
2748
2749  if explicit_constructor_match:
2750    is_marked_explicit = explicit_constructor_match.group(1)
2751
2752    if not explicit_constructor_match.group(2):
2753      constructor_args = []
2754    else:
2755      constructor_args = explicit_constructor_match.group(2).split(',')
2756
2757    # collapse arguments so that commas in template parameter lists and function
2758    # argument parameter lists don't split arguments in two
2759    i = 0
2760    while i < len(constructor_args):
2761      constructor_arg = constructor_args[i]
2762      while (constructor_arg.count('<') > constructor_arg.count('>') or
2763             constructor_arg.count('(') > constructor_arg.count(')')):
2764        constructor_arg += ',' + constructor_args[i + 1]
2765        del constructor_args[i + 1]
2766      constructor_args[i] = constructor_arg
2767      i += 1
2768
2769    defaulted_args = [arg for arg in constructor_args if '=' in arg]
2770    noarg_constructor = (not constructor_args or  # empty arg list
2771                         # 'void' arg specifier
2772                         (len(constructor_args) == 1 and
2773                          constructor_args[0].strip() == 'void'))
2774    onearg_constructor = ((len(constructor_args) == 1 and  # exactly one arg
2775                           not noarg_constructor) or
2776                          # all but at most one arg defaulted
2777                          (len(constructor_args) >= 1 and
2778                           not noarg_constructor and
2779                           len(defaulted_args) >= len(constructor_args) - 1))
2780    initializer_list_constructor = bool(
2781        onearg_constructor and
2782        Search(r'\bstd\s*::\s*initializer_list\b', constructor_args[0]))
2783    copy_constructor = bool(
2784        onearg_constructor and
2785        Match(r'(const\s+)?%s(\s*<[^>]*>)?(\s+const)?\s*(?:<\w+>\s*)?&'
2786              % re.escape(base_classname), constructor_args[0].strip()))
2787
2788    if (not is_marked_explicit and
2789        onearg_constructor and
2790        not initializer_list_constructor and
2791        not copy_constructor):
2792      if defaulted_args:
2793        error(filename, linenum, 'runtime/explicit', 5,
2794              'Constructors callable with one argument '
2795              'should be marked explicit.')
2796      else:
2797        error(filename, linenum, 'runtime/explicit', 5,
2798              'Single-parameter constructors should be marked explicit.')
2799    elif is_marked_explicit and not onearg_constructor:
2800      if noarg_constructor:
2801        error(filename, linenum, 'runtime/explicit', 5,
2802              'Zero-parameter constructors should not be marked explicit.')
2803      else:
2804        error(filename, linenum, 'runtime/explicit', 0,
2805              'Constructors that require multiple arguments '
2806              'should not be marked explicit.')
2807
2808
2809def CheckSpacingForFunctionCall(filename, clean_lines, linenum, error):
2810  """Checks for the correctness of various spacing around function calls.
2811
2812  Args:
2813    filename: The name of the current file.
2814    clean_lines: A CleansedLines instance containing the file.
2815    linenum: The number of the line to check.
2816    error: The function to call with any errors found.
2817  """
2818  line = clean_lines.elided[linenum]
2819
2820  # Since function calls often occur inside if/for/while/switch
2821  # expressions - which have their own, more liberal conventions - we
2822  # first see if we should be looking inside such an expression for a
2823  # function call, to which we can apply more strict standards.
2824  fncall = line    # if there's no control flow construct, look at whole line
2825  for pattern in (r'\bif\s*\((.*)\)\s*{',
2826                  r'\bfor\s*\((.*)\)\s*{',
2827                  r'\bwhile\s*\((.*)\)\s*[{;]',
2828                  r'\bswitch\s*\((.*)\)\s*{'):
2829    match = Search(pattern, line)
2830    if match:
2831      fncall = match.group(1)    # look inside the parens for function calls
2832      break
2833
2834  # Except in if/for/while/switch, there should never be space
2835  # immediately inside parens (eg "f( 3, 4 )").  We make an exception
2836  # for nested parens ( (a+b) + c ).  Likewise, there should never be
2837  # a space before a ( when it's a function argument.  I assume it's a
2838  # function argument when the char before the whitespace is legal in
2839  # a function name (alnum + _) and we're not starting a macro. Also ignore
2840  # pointers and references to arrays and functions coz they're too tricky:
2841  # we use a very simple way to recognize these:
2842  # " (something)(maybe-something)" or
2843  # " (something)(maybe-something," or
2844  # " (something)[something]"
2845  # Note that we assume the contents of [] to be short enough that
2846  # they'll never need to wrap.
2847  if (  # Ignore control structures.
2848      not Search(r'\b(if|for|while|switch|return|new|delete|catch|sizeof)\b',
2849                 fncall) and
2850      # Ignore pointers/references to functions.
2851      not Search(r' \([^)]+\)\([^)]*(\)|,$)', fncall) and
2852      # Ignore pointers/references to arrays.
2853      not Search(r' \([^)]+\)\[[^\]]+\]', fncall)):
2854    if Search(r'\w\s*\(\s(?!\s*\\$)', fncall):      # a ( used for a fn call
2855      error(filename, linenum, 'whitespace/parens', 4,
2856            'Extra space after ( in function call')
2857    elif Search(r'\(\s+(?!(\s*\\)|\()', fncall):
2858      error(filename, linenum, 'whitespace/parens', 2,
2859            'Extra space after (')
2860    if (Search(r'\w\s+\(', fncall) and
2861        not Search(r'#\s*define|typedef|using\s+\w+\s*=', fncall) and
2862        not Search(r'\w\s+\((\w+::)*\*\w+\)\(', fncall)):
2863      # TODO(unknown): Space after an operator function seem to be a common
2864      # error, silence those for now by restricting them to highest verbosity.
2865      if Search(r'\boperator_*\b', line):
2866        error(filename, linenum, 'whitespace/parens', 0,
2867              'Extra space before ( in function call')
2868      else:
2869        error(filename, linenum, 'whitespace/parens', 4,
2870              'Extra space before ( in function call')
2871    # If the ) is followed only by a newline or a { + newline, assume it's
2872    # part of a control statement (if/while/etc), and don't complain
2873    if Search(r'[^)]\s+\)\s*[^{\s]', fncall):
2874      # If the closing parenthesis is preceded by only whitespaces,
2875      # try to give a more descriptive error message.
2876      if Search(r'^\s+\)', fncall):
2877        error(filename, linenum, 'whitespace/parens', 2,
2878              'Closing ) should be moved to the previous line')
2879      else:
2880        error(filename, linenum, 'whitespace/parens', 2,
2881              'Extra space before )')
2882
2883
2884def IsBlankLine(line):
2885  """Returns true if the given line is blank.
2886
2887  We consider a line to be blank if the line is empty or consists of
2888  only white spaces.
2889
2890  Args:
2891    line: A line of a string.
2892
2893  Returns:
2894    True, if the given line is blank.
2895  """
2896  return not line or line.isspace()
2897
2898
2899def CheckForNamespaceIndentation(filename, nesting_state, clean_lines, line,
2900                                 error):
2901  is_namespace_indent_item = (
2902      len(nesting_state.stack) > 1 and
2903      nesting_state.stack[-1].check_namespace_indentation and
2904      isinstance(nesting_state.previous_stack_top, _NamespaceInfo) and
2905      nesting_state.previous_stack_top == nesting_state.stack[-2])
2906
2907  if ShouldCheckNamespaceIndentation(nesting_state, is_namespace_indent_item,
2908                                     clean_lines.elided, line):
2909    CheckItemIndentationInNamespace(filename, clean_lines.elided,
2910                                    line, error)
2911
2912
2913def CheckForFunctionLengths(filename, clean_lines, linenum,
2914                            function_state, error):
2915  """Reports for long function bodies.
2916
2917  For an overview why this is done, see:
2918  http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Write_Short_Functions
2919
2920  Uses a simplistic algorithm assuming other style guidelines
2921  (especially spacing) are followed.
2922  Only checks unindented functions, so class members are unchecked.
2923  Trivial bodies are unchecked, so constructors with huge initializer lists
2924  may be missed.
2925  Blank/comment lines are not counted so as to avoid encouraging the removal
2926  of vertical space and comments just to get through a lint check.
2927  NOLINT *on the last line of a function* disables this check.
2928
2929  Args:
2930    filename: The name of the current file.
2931    clean_lines: A CleansedLines instance containing the file.
2932    linenum: The number of the line to check.
2933    function_state: Current function name and lines in body so far.
2934    error: The function to call with any errors found.
2935  """
2936  lines = clean_lines.lines
2937  line = lines[linenum]
2938  joined_line = ''
2939
2940  starting_func = False
2941  regexp = r'(\w(\w|::|\*|\&|\s)*)\('  # decls * & space::name( ...
2942  match_result = Match(regexp, line)
2943  if match_result:
2944    # If the name is all caps and underscores, figure it's a macro and
2945    # ignore it, unless it's TEST or TEST_F.
2946    function_name = match_result.group(1).split()[-1]
2947    if function_name == 'TEST' or function_name == 'TEST_F' or (
2948        not Match(r'[A-Z_]+$', function_name)):
2949      starting_func = True
2950
2951  if starting_func:
2952    body_found = False
2953    for start_linenum in xrange(linenum, clean_lines.NumLines()):
2954      start_line = lines[start_linenum]
2955      joined_line += ' ' + start_line.lstrip()
2956      if Search(r'(;|})', start_line):  # Declarations and trivial functions
2957        body_found = True
2958        break                              # ... ignore
2959      elif Search(r'{', start_line):
2960        body_found = True
2961        function = Search(r'((\w|:)*)\(', line).group(1)
2962        if Match(r'TEST', function):    # Handle TEST... macros
2963          parameter_regexp = Search(r'(\(.*\))', joined_line)
2964          if parameter_regexp:             # Ignore bad syntax
2965            function += parameter_regexp.group(1)
2966        else:
2967          function += '()'
2968        function_state.Begin(function)
2969        break
2970    if not body_found:
2971      # No body for the function (or evidence of a non-function) was found.
2972      error(filename, linenum, 'readability/fn_size', 5,
2973            'Lint failed to find start of function body.')
2974  elif Match(r'^\}\s*$', line):  # function end
2975    function_state.Check(error, filename, linenum)
2976    function_state.End()
2977  elif not Match(r'^\s*$', line):
2978    function_state.Count()  # Count non-blank/non-comment lines.
2979
2980
2981_RE_PATTERN_TODO = re.compile(r'^//(\s*)TODO(\(.+?\))?:?(\s|$)?')
2982
2983
2984def CheckComment(line, filename, linenum, next_line_start, error):
2985  """Checks for common mistakes in comments.
2986
2987  Args:
2988    line: The line in question.
2989    filename: The name of the current file.
2990    linenum: The number of the line to check.
2991    next_line_start: The first non-whitespace column of the next line.
2992    error: The function to call with any errors found.
2993  """
2994  commentpos = line.find('//')
2995  if commentpos != -1:
2996    # Check if the // may be in quotes.  If so, ignore it
2997    # Comparisons made explicit for clarity -- pylint: disable=g-explicit-bool-comparison
2998    if (line.count('"', 0, commentpos) -
2999        line.count('\\"', 0, commentpos)) % 2 == 0:   # not in quotes
3000      # Allow one space for new scopes, two spaces otherwise:
3001      if (not (Match(r'^.*{ *//', line) and next_line_start == commentpos) and
3002          ((commentpos >= 1 and
3003            line[commentpos-1] not in string.whitespace) or
3004           (commentpos >= 2 and
3005            line[commentpos-2] not in string.whitespace))):
3006        error(filename, linenum, 'whitespace/comments', 2,
3007              'At least two spaces is best between code and comments')
3008
3009      # Checks for common mistakes in TODO comments.
3010      comment = line[commentpos:]
3011      match = _RE_PATTERN_TODO.match(comment)
3012      if match:
3013        # One whitespace is correct; zero whitespace is handled elsewhere.
3014        leading_whitespace = match.group(1)
3015        if len(leading_whitespace) > 1:
3016          error(filename, linenum, 'whitespace/todo', 2,
3017                'Too many spaces before TODO')
3018
3019        username = match.group(2)
3020        if not username:
3021          error(filename, linenum, 'readability/todo', 2,
3022                'Missing username in TODO; it should look like '
3023                '"// TODO(my_username): Stuff."')
3024
3025        middle_whitespace = match.group(3)
3026        # Comparisons made explicit for correctness -- pylint: disable=g-explicit-bool-comparison
3027        if middle_whitespace != ' ' and middle_whitespace != '':
3028          error(filename, linenum, 'whitespace/todo', 2,
3029                'TODO(my_username) should be followed by a space')
3030
3031      # If the comment contains an alphanumeric character, there
3032      # should be a space somewhere between it and the //.
3033      if Match(r'//[^ ]*\w', comment):
3034        error(filename, linenum, 'whitespace/comments', 4,
3035              'Should have a space between // and comment')
3036
3037def CheckAccess(filename, clean_lines, linenum, nesting_state, error):
3038  """Checks for improper use of DISALLOW* macros.
3039
3040  Args:
3041    filename: The name of the current file.
3042    clean_lines: A CleansedLines instance containing the file.
3043    linenum: The number of the line to check.
3044    nesting_state: A NestingState instance which maintains information about
3045                   the current stack of nested blocks being parsed.
3046    error: The function to call with any errors found.
3047  """
3048  line = clean_lines.elided[linenum]  # get rid of comments and strings
3049
3050  matched = Match((r'\s*(DISALLOW_COPY_AND_ASSIGN|'
3051                   r'DISALLOW_IMPLICIT_CONSTRUCTORS)'), line)
3052  if not matched:
3053    return
3054  if nesting_state.stack and isinstance(nesting_state.stack[-1], _ClassInfo):
3055    if nesting_state.stack[-1].access != 'private':
3056      error(filename, linenum, 'readability/constructors', 3,
3057            '%s must be in the private: section' % matched.group(1))
3058
3059  else:
3060    # Found DISALLOW* macro outside a class declaration, or perhaps it
3061    # was used inside a function when it should have been part of the
3062    # class declaration.  We could issue a warning here, but it
3063    # probably resulted in a compiler error already.
3064    pass
3065
3066
3067def CheckSpacing(filename, clean_lines, linenum, nesting_state, error):
3068  """Checks for the correctness of various spacing issues in the code.
3069
3070  Things we check for: spaces around operators, spaces after
3071  if/for/while/switch, no spaces around parens in function calls, two
3072  spaces between code and comment, don't start a block with a blank
3073  line, don't end a function with a blank line, don't add a blank line
3074  after public/protected/private, don't have too many blank lines in a row.
3075
3076  Args:
3077    filename: The name of the current file.
3078    clean_lines: A CleansedLines instance containing the file.
3079    linenum: The number of the line to check.
3080    nesting_state: A NestingState instance which maintains information about
3081                   the current stack of nested blocks being parsed.
3082    error: The function to call with any errors found.
3083  """
3084
3085  # Don't use "elided" lines here, otherwise we can't check commented lines.
3086  # Don't want to use "raw" either, because we don't want to check inside C++11
3087  # raw strings,
3088  raw = clean_lines.lines_without_raw_strings
3089  line = raw[linenum]
3090
3091  # Before nixing comments, check if the line is blank for no good
3092  # reason.  This includes the first line after a block is opened, and
3093  # blank lines at the end of a function (ie, right before a line like '}'
3094  #
3095  # Skip all the blank line checks if we are immediately inside a
3096  # namespace body.  In other words, don't issue blank line warnings
3097  # for this block:
3098  #   namespace {
3099  #
3100  #   }
3101  #
3102  # A warning about missing end of namespace comments will be issued instead.
3103  #
3104  # Also skip blank line checks for 'extern "C"' blocks, which are formatted
3105  # like namespaces.
3106  if (IsBlankLine(line) and
3107      not nesting_state.InNamespaceBody() and
3108      not nesting_state.InExternC()):
3109    elided = clean_lines.elided
3110    prev_line = elided[linenum - 1]
3111    prevbrace = prev_line.rfind('{')
3112    # TODO(unknown): Don't complain if line before blank line, and line after,
3113    #                both start with alnums and are indented the same amount.
3114    #                This ignores whitespace at the start of a namespace block
3115    #                because those are not usually indented.
3116    if prevbrace != -1 and prev_line[prevbrace:].find('}') == -1:
3117      # OK, we have a blank line at the start of a code block.  Before we
3118      # complain, we check if it is an exception to the rule: The previous
3119      # non-empty line has the parameters of a function header that are indented
3120      # 4 spaces (because they did not fit in a 80 column line when placed on
3121      # the same line as the function name).  We also check for the case where
3122      # the previous line is indented 6 spaces, which may happen when the
3123      # initializers of a constructor do not fit into a 80 column line.
3124      exception = False
3125      if Match(r' {6}\w', prev_line):  # Initializer list?
3126        # We are looking for the opening column of initializer list, which
3127        # should be indented 4 spaces to cause 6 space indentation afterwards.
3128        search_position = linenum-2
3129        while (search_position >= 0
3130               and Match(r' {6}\w', elided[search_position])):
3131          search_position -= 1
3132        exception = (search_position >= 0
3133                     and elided[search_position][:5] == '    :')
3134      else:
3135        # Search for the function arguments or an initializer list.  We use a
3136        # simple heuristic here: If the line is indented 4 spaces; and we have a
3137        # closing paren, without the opening paren, followed by an opening brace
3138        # or colon (for initializer lists) we assume that it is the last line of
3139        # a function header.  If we have a colon indented 4 spaces, it is an
3140        # initializer list.
3141        exception = (Match(r' {4}\w[^\(]*\)\s*(const\s*)?(\{\s*$|:)',
3142                           prev_line)
3143                     or Match(r' {4}:', prev_line))
3144
3145      if not exception:
3146        error(filename, linenum, 'whitespace/blank_line', 2,
3147              'Redundant blank line at the start of a code block '
3148              'should be deleted.')
3149    # Ignore blank lines at the end of a block in a long if-else
3150    # chain, like this:
3151    #   if (condition1) {
3152    #     // Something followed by a blank line
3153    #
3154    #   } else if (condition2) {
3155    #     // Something else
3156    #   }
3157    if linenum + 1 < clean_lines.NumLines():
3158      next_line = raw[linenum + 1]
3159      if (next_line
3160          and Match(r'\s*}', next_line)
3161          and next_line.find('} else ') == -1):
3162        error(filename, linenum, 'whitespace/blank_line', 3,
3163              'Redundant blank line at the end of a code block '
3164              'should be deleted.')
3165
3166    matched = Match(r'\s*(public|protected|private):', prev_line)
3167    if matched:
3168      error(filename, linenum, 'whitespace/blank_line', 3,
3169            'Do not leave a blank line after "%s:"' % matched.group(1))
3170
3171  # Next, check comments
3172  next_line_start = 0
3173  if linenum + 1 < clean_lines.NumLines():
3174    next_line = raw[linenum + 1]
3175    next_line_start = len(next_line) - len(next_line.lstrip())
3176  CheckComment(line, filename, linenum, next_line_start, error)
3177
3178  # get rid of comments and strings
3179  line = clean_lines.elided[linenum]
3180
3181  # You shouldn't have spaces before your brackets, except maybe after
3182  # 'delete []' or 'return []() {};'
3183  if Search(r'\w\s+\[', line) and not Search(r'(?:delete|return)\s+\[', line):
3184    error(filename, linenum, 'whitespace/braces', 5,
3185          'Extra space before [')
3186
3187  # In range-based for, we wanted spaces before and after the colon, but
3188  # not around "::" tokens that might appear.
3189  if (Search(r'for *\(.*[^:]:[^: ]', line) or
3190      Search(r'for *\(.*[^: ]:[^:]', line)):
3191    error(filename, linenum, 'whitespace/forcolon', 2,
3192          'Missing space around colon in range-based for loop')
3193
3194
3195def CheckOperatorSpacing(filename, clean_lines, linenum, error):
3196  """Checks for horizontal spacing around operators.
3197
3198  Args:
3199    filename: The name of the current file.
3200    clean_lines: A CleansedLines instance containing the file.
3201    linenum: The number of the line to check.
3202    error: The function to call with any errors found.
3203  """
3204  line = clean_lines.elided[linenum]
3205
3206  # Don't try to do spacing checks for operator methods.  Do this by
3207  # replacing the troublesome characters with something else,
3208  # preserving column position for all other characters.
3209  #
3210  # The replacement is done repeatedly to avoid false positives from
3211  # operators that call operators.
3212  while True:
3213    match = Match(r'^(.*\boperator\b)(\S+)(\s*\(.*)$', line)
3214    if match:
3215      line = match.group(1) + ('_' * len(match.group(2))) + match.group(3)
3216    else:
3217      break
3218
3219  # We allow no-spaces around = within an if: "if ( (a=Foo()) == 0 )".
3220  # Otherwise not.  Note we only check for non-spaces on *both* sides;
3221  # sometimes people put non-spaces on one side when aligning ='s among
3222  # many lines (not that this is behavior that I approve of...)
3223  if Search(r'[\w.]=[\w.]', line) and not Search(r'\b(if|while) ', line):
3224    error(filename, linenum, 'whitespace/operators', 4,
3225          'Missing spaces around =')
3226
3227  # It's ok not to have spaces around binary operators like + - * /, but if
3228  # there's too little whitespace, we get concerned.  It's hard to tell,
3229  # though, so we punt on this one for now.  TODO.
3230
3231  # You should always have whitespace around binary operators.
3232  #
3233  # Check <= and >= first to avoid false positives with < and >, then
3234  # check non-include lines for spacing around < and >.
3235  #
3236  # If the operator is followed by a comma, assume it's be used in a
3237  # macro context and don't do any checks.  This avoids false
3238  # positives.
3239  #
3240  # Note that && is not included here.  Those are checked separately
3241  # in CheckRValueReference
3242  match = Search(r'[^<>=!\s](==|!=|<=|>=|\|\|)[^<>=!\s,;\)]', line)
3243  if match:
3244    error(filename, linenum, 'whitespace/operators', 3,
3245          'Missing spaces around %s' % match.group(1))
3246  elif not Match(r'#.*include', line):
3247    # Look for < that is not surrounded by spaces.  This is only
3248    # triggered if both sides are missing spaces, even though
3249    # technically should should flag if at least one side is missing a
3250    # space.  This is done to avoid some false positives with shifts.
3251    match = Match(r'^(.*[^\s<])<[^\s=<,]', line)
3252    if match:
3253      (_, _, end_pos) = CloseExpression(
3254          clean_lines, linenum, len(match.group(1)))
3255      if end_pos <= -1:
3256        error(filename, linenum, 'whitespace/operators', 3,
3257              'Missing spaces around <')
3258
3259    # Look for > that is not surrounded by spaces.  Similar to the
3260    # above, we only trigger if both sides are missing spaces to avoid
3261    # false positives with shifts.
3262    match = Match(r'^(.*[^-\s>])>[^\s=>,]', line)
3263    if match:
3264      (_, _, start_pos) = ReverseCloseExpression(
3265          clean_lines, linenum, len(match.group(1)))
3266      if start_pos <= -1:
3267        error(filename, linenum, 'whitespace/operators', 3,
3268              'Missing spaces around >')
3269
3270  # We allow no-spaces around << when used like this: 10<<20, but
3271  # not otherwise (particularly, not when used as streams)
3272  #
3273  # We also allow operators following an opening parenthesis, since
3274  # those tend to be macros that deal with operators.
3275  match = Search(r'(operator|\S)(?:L|UL|ULL|l|ul|ull)?<<([^\s,=])', line)
3276  if (match and match.group(1) != '(' and
3277      not (match.group(1).isdigit() and match.group(2).isdigit()) and
3278      not (match.group(1) == 'operator' and match.group(2) == ';')):
3279    error(filename, linenum, 'whitespace/operators', 3,
3280          'Missing spaces around <<')
3281
3282  # We allow no-spaces around >> for almost anything.  This is because
3283  # C++11 allows ">>" to close nested templates, which accounts for
3284  # most cases when ">>" is not followed by a space.
3285  #
3286  # We still warn on ">>" followed by alpha character, because that is
3287  # likely due to ">>" being used for right shifts, e.g.:
3288  #   value >> alpha
3289  #
3290  # When ">>" is used to close templates, the alphanumeric letter that
3291  # follows would be part of an identifier, and there should still be
3292  # a space separating the template type and the identifier.
3293  #   type<type<type>> alpha
3294  match = Search(r'>>[a-zA-Z_]', line)
3295  if match:
3296    error(filename, linenum, 'whitespace/operators', 3,
3297          'Missing spaces around >>')
3298
3299  # There shouldn't be space around unary operators
3300  match = Search(r'(!\s|~\s|[\s]--[\s;]|[\s]\+\+[\s;])', line)
3301  if match:
3302    error(filename, linenum, 'whitespace/operators', 4,
3303          'Extra space for operator %s' % match.group(1))
3304
3305
3306def CheckParenthesisSpacing(filename, clean_lines, linenum, error):
3307  """Checks for horizontal spacing around parentheses.
3308
3309  Args:
3310    filename: The name of the current file.
3311    clean_lines: A CleansedLines instance containing the file.
3312    linenum: The number of the line to check.
3313    error: The function to call with any errors found.
3314  """
3315  line = clean_lines.elided[linenum]
3316
3317  # No spaces after an if, while, switch, or for
3318  match = Search(r' (if\(|for\(|while\(|switch\()', line)
3319  if match:
3320    error(filename, linenum, 'whitespace/parens', 5,
3321          'Missing space before ( in %s' % match.group(1))
3322
3323  # For if/for/while/switch, the left and right parens should be
3324  # consistent about how many spaces are inside the parens, and
3325  # there should either be zero or one spaces inside the parens.
3326  # We don't want: "if ( foo)" or "if ( foo   )".
3327  # Exception: "for ( ; foo; bar)" and "for (foo; bar; )" are allowed.
3328  match = Search(r'\b(if|for|while|switch)\s*'
3329                 r'\(([ ]*)(.).*[^ ]+([ ]*)\)\s*{\s*$',
3330                 line)
3331  if match:
3332    if len(match.group(2)) != len(match.group(4)):
3333      if not (match.group(3) == ';' and
3334              len(match.group(2)) == 1 + len(match.group(4)) or
3335              not match.group(2) and Search(r'\bfor\s*\(.*; \)', line)):
3336        error(filename, linenum, 'whitespace/parens', 5,
3337              'Mismatching spaces inside () in %s' % match.group(1))
3338    if len(match.group(2)) not in [0, 1]:
3339      error(filename, linenum, 'whitespace/parens', 5,
3340            'Should have zero or one spaces inside ( and ) in %s' %
3341            match.group(1))
3342
3343
3344def CheckCommaSpacing(filename, clean_lines, linenum, error):
3345  """Checks for horizontal spacing near commas and semicolons.
3346
3347  Args:
3348    filename: The name of the current file.
3349    clean_lines: A CleansedLines instance containing the file.
3350    linenum: The number of the line to check.
3351    error: The function to call with any errors found.
3352  """
3353  raw = clean_lines.lines_without_raw_strings
3354  line = clean_lines.elided[linenum]
3355
3356  # You should always have a space after a comma (either as fn arg or operator)
3357  #
3358  # This does not apply when the non-space character following the
3359  # comma is another comma, since the only time when that happens is
3360  # for empty macro arguments.
3361  #
3362  # We run this check in two passes: first pass on elided lines to
3363  # verify that lines contain missing whitespaces, second pass on raw
3364  # lines to confirm that those missing whitespaces are not due to
3365  # elided comments.
3366  if (Search(r',[^,\s]', ReplaceAll(r'\boperator\s*,\s*\(', 'F(', line)) and
3367      Search(r',[^,\s]', raw[linenum])):
3368    error(filename, linenum, 'whitespace/comma', 3,
3369          'Missing space after ,')
3370
3371  # You should always have a space after a semicolon
3372  # except for few corner cases
3373  # TODO(unknown): clarify if 'if (1) { return 1;}' is requires one more
3374  # space after ;
3375  if Search(r';[^\s};\\)/]', line):
3376    error(filename, linenum, 'whitespace/semicolon', 3,
3377          'Missing space after ;')
3378
3379
3380def CheckBracesSpacing(filename, clean_lines, linenum, error):
3381  """Checks for horizontal spacing near commas.
3382
3383  Args:
3384    filename: The name of the current file.
3385    clean_lines: A CleansedLines instance containing the file.
3386    linenum: The number of the line to check.
3387    error: The function to call with any errors found.
3388  """
3389  line = clean_lines.elided[linenum]
3390
3391  # Except after an opening paren, or after another opening brace (in case of
3392  # an initializer list, for instance), you should have spaces before your
3393  # braces. And since you should never have braces at the beginning of a line,
3394  # this is an easy test.
3395  match = Match(r'^(.*[^ ({]){', line)
3396  if match:
3397    # Try a bit harder to check for brace initialization.  This
3398    # happens in one of the following forms:
3399    #   Constructor() : initializer_list_{} { ... }
3400    #   Constructor{}.MemberFunction()
3401    #   Type variable{};
3402    #   FunctionCall(type{}, ...);
3403    #   LastArgument(..., type{});
3404    #   LOG(INFO) << type{} << " ...";
3405    #   map_of_type[{...}] = ...;
3406    #   ternary = expr ? new type{} : nullptr;
3407    #   OuterTemplate<InnerTemplateConstructor<Type>{}>
3408    #
3409    # We check for the character following the closing brace, and
3410    # silence the warning if it's one of those listed above, i.e.
3411    # "{.;,)<>]:".
3412    #
3413    # To account for nested initializer list, we allow any number of
3414    # closing braces up to "{;,)<".  We can't simply silence the
3415    # warning on first sight of closing brace, because that would
3416    # cause false negatives for things that are not initializer lists.
3417    #   Silence this:         But not this:
3418    #     Outer{                if (...) {
3419    #       Inner{...}            if (...){  // Missing space before {
3420    #     };                    }
3421    #
3422    # There is a false negative with this approach if people inserted
3423    # spurious semicolons, e.g. "if (cond){};", but we will catch the
3424    # spurious semicolon with a separate check.
3425    (endline, endlinenum, endpos) = CloseExpression(
3426        clean_lines, linenum, len(match.group(1)))
3427    trailing_text = ''
3428    if endpos > -1:
3429      trailing_text = endline[endpos:]
3430    for offset in xrange(endlinenum + 1,
3431                         min(endlinenum + 3, clean_lines.NumLines() - 1)):
3432      trailing_text += clean_lines.elided[offset]
3433    if not Match(r'^[\s}]*[{.;,)<>\]:]', trailing_text):
3434      error(filename, linenum, 'whitespace/braces', 5,
3435            'Missing space before {')
3436
3437  # Make sure '} else {' has spaces.
3438  if Search(r'}else', line):
3439    error(filename, linenum, 'whitespace/braces', 5,
3440          'Missing space before else')
3441
3442  # You shouldn't have a space before a semicolon at the end of the line.
3443  # There's a special case for "for" since the style guide allows space before
3444  # the semicolon there.
3445  if Search(r':\s*;\s*$', line):
3446    error(filename, linenum, 'whitespace/semicolon', 5,
3447          'Semicolon defining empty statement. Use {} instead.')
3448  elif Search(r'^\s*;\s*$', line):
3449    error(filename, linenum, 'whitespace/semicolon', 5,
3450          'Line contains only semicolon. If this should be an empty statement, '
3451          'use {} instead.')
3452  elif (Search(r'\s+;\s*$', line) and
3453        not Search(r'\bfor\b', line)):
3454    error(filename, linenum, 'whitespace/semicolon', 5,
3455          'Extra space before last semicolon. If this should be an empty '
3456          'statement, use {} instead.')
3457
3458
3459def IsDecltype(clean_lines, linenum, column):
3460  """Check if the token ending on (linenum, column) is decltype().
3461
3462  Args:
3463    clean_lines: A CleansedLines instance containing the file.
3464    linenum: the number of the line to check.
3465    column: end column of the token to check.
3466  Returns:
3467    True if this token is decltype() expression, False otherwise.
3468  """
3469  (text, _, start_col) = ReverseCloseExpression(clean_lines, linenum, column)
3470  if start_col < 0:
3471    return False
3472  if Search(r'\bdecltype\s*$', text[0:start_col]):
3473    return True
3474  return False
3475
3476
3477def IsTemplateParameterList(clean_lines, linenum, column):
3478  """Check if the token ending on (linenum, column) is the end of template<>.
3479
3480  Args:
3481    clean_lines: A CleansedLines instance containing the file.
3482    linenum: the number of the line to check.
3483    column: end column of the token to check.
3484  Returns:
3485    True if this token is end of a template parameter list, False otherwise.
3486  """
3487  (_, startline, startpos) = ReverseCloseExpression(
3488      clean_lines, linenum, column)
3489  if (startpos > -1 and
3490      Search(r'\btemplate\s*$', clean_lines.elided[startline][0:startpos])):
3491    return True
3492  return False
3493
3494
3495def IsRValueType(clean_lines, nesting_state, linenum, column):
3496  """Check if the token ending on (linenum, column) is a type.
3497
3498  Assumes that text to the right of the column is "&&" or a function
3499  name.
3500
3501  Args:
3502    clean_lines: A CleansedLines instance containing the file.
3503    nesting_state: A NestingState instance which maintains information about
3504                   the current stack of nested blocks being parsed.
3505    linenum: the number of the line to check.
3506    column: end column of the token to check.
3507  Returns:
3508    True if this token is a type, False if we are not sure.
3509  """
3510  prefix = clean_lines.elided[linenum][0:column]
3511
3512  # Get one word to the left.  If we failed to do so, this is most
3513  # likely not a type, since it's unlikely that the type name and "&&"
3514  # would be split across multiple lines.
3515  match = Match(r'^(.*)(\b\w+|[>*)&])\s*$', prefix)
3516  if not match:
3517    return False
3518
3519  # Check text following the token.  If it's "&&>" or "&&," or "&&...", it's
3520  # most likely a rvalue reference used inside a template.
3521  suffix = clean_lines.elided[linenum][column:]
3522  if Match(r'&&\s*(?:[>,]|\.\.\.)', suffix):
3523    return True
3524
3525  # Check for simple type and end of templates:
3526  #   int&& variable
3527  #   vector<int>&& variable
3528  #
3529  # Because this function is called recursively, we also need to
3530  # recognize pointer and reference types:
3531  #   int* Function()
3532  #   int& Function()
3533  if match.group(2) in ['char', 'char16_t', 'char32_t', 'wchar_t', 'bool',
3534                        'short', 'int', 'long', 'signed', 'unsigned',
3535                        'float', 'double', 'void', 'auto', '>', '*', '&']:
3536    return True
3537
3538  # If we see a close parenthesis, look for decltype on the other side.
3539  # decltype would unambiguously identify a type, anything else is
3540  # probably a parenthesized expression and not a type.
3541  if match.group(2) == ')':
3542    return IsDecltype(
3543        clean_lines, linenum, len(match.group(1)) + len(match.group(2)) - 1)
3544
3545  # Check for casts and cv-qualifiers.
3546  #   match.group(1)  remainder
3547  #   --------------  ---------
3548  #   const_cast<     type&&
3549  #   const           type&&
3550  #   type            const&&
3551  if Search(r'\b(?:const_cast\s*<|static_cast\s*<|dynamic_cast\s*<|'
3552            r'reinterpret_cast\s*<|\w+\s)\s*$',
3553            match.group(1)):
3554    return True
3555
3556  # Look for a preceding symbol that might help differentiate the context.
3557  # These are the cases that would be ambiguous:
3558  #   match.group(1)  remainder
3559  #   --------------  ---------
3560  #   Call         (   expression &&
3561  #   Declaration  (   type&&
3562  #   sizeof       (   type&&
3563  #   if           (   expression &&
3564  #   while        (   expression &&
3565  #   for          (   type&&
3566  #   for(         ;   expression &&
3567  #   statement    ;   type&&
3568  #   block        {   type&&
3569  #   constructor  {   expression &&
3570  start = linenum
3571  line = match.group(1)
3572  match_symbol = None
3573  while start >= 0:
3574    # We want to skip over identifiers and commas to get to a symbol.
3575    # Commas are skipped so that we can find the opening parenthesis
3576    # for function parameter lists.
3577    match_symbol = Match(r'^(.*)([^\w\s,])[\w\s,]*$', line)
3578    if match_symbol:
3579      break
3580    start -= 1
3581    line = clean_lines.elided[start]
3582
3583  if not match_symbol:
3584    # Probably the first statement in the file is an rvalue reference
3585    return True
3586
3587  if match_symbol.group(2) == '}':
3588    # Found closing brace, probably an indicate of this:
3589    #   block{} type&&
3590    return True
3591
3592  if match_symbol.group(2) == ';':
3593    # Found semicolon, probably one of these:
3594    #   for(; expression &&
3595    #   statement; type&&
3596
3597    # Look for the previous 'for(' in the previous lines.
3598    before_text = match_symbol.group(1)
3599    for i in xrange(start - 1, max(start - 6, 0), -1):
3600      before_text = clean_lines.elided[i] + before_text
3601    if Search(r'for\s*\([^{};]*$', before_text):
3602      # This is the condition inside a for-loop
3603      return False
3604
3605    # Did not find a for-init-statement before this semicolon, so this
3606    # is probably a new statement and not a condition.
3607    return True
3608
3609  if match_symbol.group(2) == '{':
3610    # Found opening brace, probably one of these:
3611    #   block{ type&& = ... ; }
3612    #   constructor{ expression && expression }
3613
3614    # Look for a closing brace or a semicolon.  If we see a semicolon
3615    # first, this is probably a rvalue reference.
3616    line = clean_lines.elided[start][0:len(match_symbol.group(1)) + 1]
3617    end = start
3618    depth = 1
3619    while True:
3620      for ch in line:
3621        if ch == ';':
3622          return True
3623        elif ch == '{':
3624          depth += 1
3625        elif ch == '}':
3626          depth -= 1
3627          if depth == 0:
3628            return False
3629      end += 1
3630      if end >= clean_lines.NumLines():
3631        break
3632      line = clean_lines.elided[end]
3633    # Incomplete program?
3634    return False
3635
3636  if match_symbol.group(2) == '(':
3637    # Opening parenthesis.  Need to check what's to the left of the
3638    # parenthesis.  Look back one extra line for additional context.
3639    before_text = match_symbol.group(1)
3640    if linenum > 1:
3641      before_text = clean_lines.elided[linenum - 1] + before_text
3642    before_text = match_symbol.group(1)
3643
3644    # Patterns that are likely to be types:
3645    #   [](type&&
3646    #   for (type&&
3647    #   sizeof(type&&
3648    #   operator=(type&&
3649    #
3650    if Search(r'(?:\]|\bfor|\bsizeof|\boperator\s*\S+\s*)\s*$', before_text):
3651      return True
3652
3653    # Patterns that are likely to be expressions:
3654    #   if (expression &&
3655    #   while (expression &&
3656    #   : initializer(expression &&
3657    #   , initializer(expression &&
3658    #   ( FunctionCall(expression &&
3659    #   + FunctionCall(expression &&
3660    #   + (expression &&
3661    #
3662    # The last '+' represents operators such as '+' and '-'.
3663    if Search(r'(?:\bif|\bwhile|[-+=%^(<!?:,&*]\s*)$', before_text):
3664      return False
3665
3666    # Something else.  Check that tokens to the left look like
3667    #   return_type function_name
3668    match_func = Match(r'^(.*)\s+\w(?:\w|::)*(?:<[^<>]*>)?\s*$',
3669                       match_symbol.group(1))
3670    if match_func:
3671      # Check for constructors, which don't have return types.
3672      if Search(r'\b(?:explicit|inline)$', match_func.group(1)):
3673        return True
3674      implicit_constructor = Match(r'\s*(\w+)\((?:const\s+)?(\w+)', prefix)
3675      if (implicit_constructor and
3676          implicit_constructor.group(1) == implicit_constructor.group(2)):
3677        return True
3678      return IsRValueType(clean_lines, nesting_state, linenum,
3679                          len(match_func.group(1)))
3680
3681    # Nothing before the function name.  If this is inside a block scope,
3682    # this is probably a function call.
3683    return not (nesting_state.previous_stack_top and
3684                nesting_state.previous_stack_top.IsBlockInfo())
3685
3686  if match_symbol.group(2) == '>':
3687    # Possibly a closing bracket, check that what's on the other side
3688    # looks like the start of a template.
3689    return IsTemplateParameterList(
3690        clean_lines, start, len(match_symbol.group(1)))
3691
3692  # Some other symbol, usually something like "a=b&&c".  This is most
3693  # likely not a type.
3694  return False
3695
3696
3697def IsDeletedOrDefault(clean_lines, linenum):
3698  """Check if current constructor or operator is deleted or default.
3699
3700  Args:
3701    clean_lines: A CleansedLines instance containing the file.
3702    linenum: The number of the line to check.
3703  Returns:
3704    True if this is a deleted or default constructor.
3705  """
3706  open_paren = clean_lines.elided[linenum].find('(')
3707  if open_paren < 0:
3708    return False
3709  (close_line, _, close_paren) = CloseExpression(
3710      clean_lines, linenum, open_paren)
3711  if close_paren < 0:
3712    return False
3713  return Match(r'\s*=\s*(?:delete|default)\b', close_line[close_paren:])
3714
3715
3716def IsRValueAllowed(clean_lines, linenum):
3717  """Check if RValue reference is allowed on a particular line.
3718
3719  Args:
3720    clean_lines: A CleansedLines instance containing the file.
3721    linenum: The number of the line to check.
3722  Returns:
3723    True if line is within the region where RValue references are allowed.
3724  """
3725  # Allow region marked by PUSH/POP macros
3726  for i in xrange(linenum, 0, -1):
3727    line = clean_lines.elided[i]
3728    if Match(r'GOOGLE_ALLOW_RVALUE_REFERENCES_(?:PUSH|POP)', line):
3729      if not line.endswith('PUSH'):
3730        return False
3731      for j in xrange(linenum, clean_lines.NumLines(), 1):
3732        line = clean_lines.elided[j]
3733        if Match(r'GOOGLE_ALLOW_RVALUE_REFERENCES_(?:PUSH|POP)', line):
3734          return line.endswith('POP')
3735
3736  # Allow operator=
3737  line = clean_lines.elided[linenum]
3738  if Search(r'\boperator\s*=\s*\(', line):
3739    return IsDeletedOrDefault(clean_lines, linenum)
3740
3741  # Allow constructors
3742  match = Match(r'\s*([\w<>]+)\s*::\s*([\w<>]+)\s*\(', line)
3743  if match and match.group(1) == match.group(2):
3744    return IsDeletedOrDefault(clean_lines, linenum)
3745  if Search(r'\b(?:explicit|inline)\s+[\w<>]+\s*\(', line):
3746    return IsDeletedOrDefault(clean_lines, linenum)
3747
3748  if Match(r'\s*[\w<>]+\s*\(', line):
3749    previous_line = 'ReturnType'
3750    if linenum > 0:
3751      previous_line = clean_lines.elided[linenum - 1]
3752    if Match(r'^\s*$', previous_line) or Search(r'[{}:;]\s*$', previous_line):
3753      return IsDeletedOrDefault(clean_lines, linenum)
3754
3755  return False
3756
3757
3758def CheckRValueReference(filename, clean_lines, linenum, nesting_state, error):
3759  """Check for rvalue references.
3760
3761  Args:
3762    filename: The name of the current file.
3763    clean_lines: A CleansedLines instance containing the file.
3764    linenum: The number of the line to check.
3765    nesting_state: A NestingState instance which maintains information about
3766                   the current stack of nested blocks being parsed.
3767    error: The function to call with any errors found.
3768  """
3769  # Find lines missing spaces around &&.
3770  # TODO(unknown): currently we don't check for rvalue references
3771  # with spaces surrounding the && to avoid false positives with
3772  # boolean expressions.
3773  line = clean_lines.elided[linenum]
3774  match = Match(r'^(.*\S)&&', line)
3775  if not match:
3776    match = Match(r'(.*)&&\S', line)
3777  if (not match) or '(&&)' in line or Search(r'\boperator\s*$', match.group(1)):
3778    return
3779
3780  # Either poorly formed && or an rvalue reference, check the context
3781  # to get a more accurate error message.  Mostly we want to determine
3782  # if what's to the left of "&&" is a type or not.
3783  and_pos = len(match.group(1))
3784  if IsRValueType(clean_lines, nesting_state, linenum, and_pos):
3785    if not IsRValueAllowed(clean_lines, linenum):
3786      error(filename, linenum, 'build/c++11', 3,
3787            'RValue references are an unapproved C++ feature.')
3788  else:
3789    error(filename, linenum, 'whitespace/operators', 3,
3790          'Missing spaces around &&')
3791
3792
3793def CheckSectionSpacing(filename, clean_lines, class_info, linenum, error):
3794  """Checks for additional blank line issues related to sections.
3795
3796  Currently the only thing checked here is blank line before protected/private.
3797
3798  Args:
3799    filename: The name of the current file.
3800    clean_lines: A CleansedLines instance containing the file.
3801    class_info: A _ClassInfo objects.
3802    linenum: The number of the line to check.
3803    error: The function to call with any errors found.
3804  """
3805  # Skip checks if the class is small, where small means 25 lines or less.
3806  # 25 lines seems like a good cutoff since that's the usual height of
3807  # terminals, and any class that can't fit in one screen can't really
3808  # be considered "small".
3809  #
3810  # Also skip checks if we are on the first line.  This accounts for
3811  # classes that look like
3812  #   class Foo { public: ... };
3813  #
3814  # If we didn't find the end of the class, last_line would be zero,
3815  # and the check will be skipped by the first condition.
3816  if (class_info.last_line - class_info.starting_linenum <= 24 or
3817      linenum <= class_info.starting_linenum):
3818    return
3819
3820  matched = Match(r'\s*(public|protected|private):', clean_lines.lines[linenum])
3821  if matched:
3822    # Issue warning if the line before public/protected/private was
3823    # not a blank line, but don't do this if the previous line contains
3824    # "class" or "struct".  This can happen two ways:
3825    #  - We are at the beginning of the class.
3826    #  - We are forward-declaring an inner class that is semantically
3827    #    private, but needed to be public for implementation reasons.
3828    # Also ignores cases where the previous line ends with a backslash as can be
3829    # common when defining classes in C macros.
3830    prev_line = clean_lines.lines[linenum - 1]
3831    if (not IsBlankLine(prev_line) and
3832        not Search(r'\b(class|struct)\b', prev_line) and
3833        not Search(r'\\$', prev_line)):
3834      # Try a bit harder to find the beginning of the class.  This is to
3835      # account for multi-line base-specifier lists, e.g.:
3836      #   class Derived
3837      #       : public Base {
3838      end_class_head = class_info.starting_linenum
3839      for i in range(class_info.starting_linenum, linenum):
3840        if Search(r'\{\s*$', clean_lines.lines[i]):
3841          end_class_head = i
3842          break
3843      if end_class_head < linenum - 1:
3844        error(filename, linenum, 'whitespace/blank_line', 3,
3845              '"%s:" should be preceded by a blank line' % matched.group(1))
3846
3847
3848def GetPreviousNonBlankLine(clean_lines, linenum):
3849  """Return the most recent non-blank line and its line number.
3850
3851  Args:
3852    clean_lines: A CleansedLines instance containing the file contents.
3853    linenum: The number of the line to check.
3854
3855  Returns:
3856    A tuple with two elements.  The first element is the contents of the last
3857    non-blank line before the current line, or the empty string if this is the
3858    first non-blank line.  The second is the line number of that line, or -1
3859    if this is the first non-blank line.
3860  """
3861
3862  prevlinenum = linenum - 1
3863  while prevlinenum >= 0:
3864    prevline = clean_lines.elided[prevlinenum]
3865    if not IsBlankLine(prevline):     # if not a blank line...
3866      return (prevline, prevlinenum)
3867    prevlinenum -= 1
3868  return ('', -1)
3869
3870
3871def CheckBraces(filename, clean_lines, linenum, error):
3872  """Looks for misplaced braces (e.g. at the end of line).
3873
3874  Args:
3875    filename: The name of the current file.
3876    clean_lines: A CleansedLines instance containing the file.
3877    linenum: The number of the line to check.
3878    error: The function to call with any errors found.
3879  """
3880
3881  line = clean_lines.elided[linenum]        # get rid of comments and strings
3882
3883  if Match(r'\s*{\s*$', line):
3884    # We allow an open brace to start a line in the case where someone is using
3885    # braces in a block to explicitly create a new scope, which is commonly used
3886    # to control the lifetime of stack-allocated variables.  Braces are also
3887    # used for brace initializers inside function calls.  We don't detect this
3888    # perfectly: we just don't complain if the last non-whitespace character on
3889    # the previous non-blank line is ',', ';', ':', '(', '{', or '}', or if the
3890    # previous line starts a preprocessor block.
3891    prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0]
3892    if (not Search(r'[,;:}{(]\s*$', prevline) and
3893        not Match(r'\s*#', prevline)):
3894      error(filename, linenum, 'whitespace/braces', 4,
3895            '{ should almost always be at the end of the previous line')
3896
3897  # An else clause should be on the same line as the preceding closing brace.
3898  if Match(r'\s*else\b\s*(?:if\b|\{|$)', line):
3899    prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0]
3900    if Match(r'\s*}\s*$', prevline):
3901      error(filename, linenum, 'whitespace/newline', 4,
3902            'An else should appear on the same line as the preceding }')
3903
3904  # If braces come on one side of an else, they should be on both.
3905  # However, we have to worry about "else if" that spans multiple lines!
3906  if Search(r'else if\s*\(', line):       # could be multi-line if
3907    brace_on_left = bool(Search(r'}\s*else if\s*\(', line))
3908    # find the ( after the if
3909    pos = line.find('else if')
3910    pos = line.find('(', pos)
3911    if pos > 0:
3912      (endline, _, endpos) = CloseExpression(clean_lines, linenum, pos)
3913      brace_on_right = endline[endpos:].find('{') != -1
3914      if brace_on_left != brace_on_right:    # must be brace after if
3915        error(filename, linenum, 'readability/braces', 5,
3916              'If an else has a brace on one side, it should have it on both')
3917  elif Search(r'}\s*else[^{]*$', line) or Match(r'[^}]*else\s*{', line):
3918    error(filename, linenum, 'readability/braces', 5,
3919          'If an else has a brace on one side, it should have it on both')
3920
3921  # Likewise, an else should never have the else clause on the same line
3922  if Search(r'\belse [^\s{]', line) and not Search(r'\belse if\b', line):
3923    error(filename, linenum, 'whitespace/newline', 4,
3924          'Else clause should never be on same line as else (use 2 lines)')
3925
3926  # In the same way, a do/while should never be on one line
3927  if Match(r'\s*do [^\s{]', line):
3928    error(filename, linenum, 'whitespace/newline', 4,
3929          'do/while clauses should not be on a single line')
3930
3931  # Check single-line if/else bodies. The style guide says 'curly braces are not
3932  # required for single-line statements'. We additionally allow multi-line,
3933  # single statements, but we reject anything with more than one semicolon in
3934  # it. This means that the first semicolon after the if should be at the end of
3935  # its line, and the line after that should have an indent level equal to or
3936  # lower than the if. We also check for ambiguous if/else nesting without
3937  # braces.
3938  if_else_match = Search(r'\b(if\s*\(|else\b)', line)
3939  if if_else_match and not Match(r'\s*#', line):
3940    if_indent = GetIndentLevel(line)
3941    endline, endlinenum, endpos = line, linenum, if_else_match.end()
3942    if_match = Search(r'\bif\s*\(', line)
3943    if if_match:
3944      # This could be a multiline if condition, so find the end first.
3945      pos = if_match.end() - 1
3946      (endline, endlinenum, endpos) = CloseExpression(clean_lines, linenum, pos)
3947    # Check for an opening brace, either directly after the if or on the next
3948    # line. If found, this isn't a single-statement conditional.
3949    if (not Match(r'\s*{', endline[endpos:])
3950        and not (Match(r'\s*$', endline[endpos:])
3951                 and endlinenum < (len(clean_lines.elided) - 1)
3952                 and Match(r'\s*{', clean_lines.elided[endlinenum + 1]))):
3953      while (endlinenum < len(clean_lines.elided)
3954             and ';' not in clean_lines.elided[endlinenum][endpos:]):
3955        endlinenum += 1
3956        endpos = 0
3957      if endlinenum < len(clean_lines.elided):
3958        endline = clean_lines.elided[endlinenum]
3959        # We allow a mix of whitespace and closing braces (e.g. for one-liner
3960        # methods) and a single \ after the semicolon (for macros)
3961        endpos = endline.find(';')
3962        if not Match(r';[\s}]*(\\?)$', endline[endpos:]):
3963          # Semicolon isn't the last character, there's something trailing.
3964          # Output a warning if the semicolon is not contained inside
3965          # a lambda expression.
3966          if not Match(r'^[^{};]*\[[^\[\]]*\][^{}]*\{[^{}]*\}\s*\)*[;,]\s*$',
3967                       endline):
3968            error(filename, linenum, 'readability/braces', 4,
3969                  'If/else bodies with multiple statements require braces')
3970        elif endlinenum < len(clean_lines.elided) - 1:
3971          # Make sure the next line is dedented
3972          next_line = clean_lines.elided[endlinenum + 1]
3973          next_indent = GetIndentLevel(next_line)
3974          # With ambiguous nested if statements, this will error out on the
3975          # if that *doesn't* match the else, regardless of whether it's the
3976          # inner one or outer one.
3977          if (if_match and Match(r'\s*else\b', next_line)
3978              and next_indent != if_indent):
3979            error(filename, linenum, 'readability/braces', 4,
3980                  'Else clause should be indented at the same level as if. '
3981                  'Ambiguous nested if/else chains require braces.')
3982          elif next_indent > if_indent:
3983            error(filename, linenum, 'readability/braces', 4,
3984                  'If/else bodies with multiple statements require braces')
3985
3986
3987def CheckTrailingSemicolon(filename, clean_lines, linenum, error):
3988  """Looks for redundant trailing semicolon.
3989
3990  Args:
3991    filename: The name of the current file.
3992    clean_lines: A CleansedLines instance containing the file.
3993    linenum: The number of the line to check.
3994    error: The function to call with any errors found.
3995  """
3996
3997  line = clean_lines.elided[linenum]
3998
3999  # Block bodies should not be followed by a semicolon.  Due to C++11
4000  # brace initialization, there are more places where semicolons are
4001  # required than not, so we use a whitelist approach to check these
4002  # rather than a blacklist.  These are the places where "};" should
4003  # be replaced by just "}":
4004  # 1. Some flavor of block following closing parenthesis:
4005  #    for (;;) {};
4006  #    while (...) {};
4007  #    switch (...) {};
4008  #    Function(...) {};
4009  #    if (...) {};
4010  #    if (...) else if (...) {};
4011  #
4012  # 2. else block:
4013  #    if (...) else {};
4014  #
4015  # 3. const member function:
4016  #    Function(...) const {};
4017  #
4018  # 4. Block following some statement:
4019  #    x = 42;
4020  #    {};
4021  #
4022  # 5. Block at the beginning of a function:
4023  #    Function(...) {
4024  #      {};
4025  #    }
4026  #
4027  #    Note that naively checking for the preceding "{" will also match
4028  #    braces inside multi-dimensional arrays, but this is fine since
4029  #    that expression will not contain semicolons.
4030  #
4031  # 6. Block following another block:
4032  #    while (true) {}
4033  #    {};
4034  #
4035  # 7. End of namespaces:
4036  #    namespace {};
4037  #
4038  #    These semicolons seems far more common than other kinds of
4039  #    redundant semicolons, possibly due to people converting classes
4040  #    to namespaces.  For now we do not warn for this case.
4041  #
4042  # Try matching case 1 first.
4043  match = Match(r'^(.*\)\s*)\{', line)
4044  if match:
4045    # Matched closing parenthesis (case 1).  Check the token before the
4046    # matching opening parenthesis, and don't warn if it looks like a
4047    # macro.  This avoids these false positives:
4048    #  - macro that defines a base class
4049    #  - multi-line macro that defines a base class
4050    #  - macro that defines the whole class-head
4051    #
4052    # But we still issue warnings for macros that we know are safe to
4053    # warn, specifically:
4054    #  - TEST, TEST_F, TEST_P, MATCHER, MATCHER_P
4055    #  - TYPED_TEST
4056    #  - INTERFACE_DEF
4057    #  - EXCLUSIVE_LOCKS_REQUIRED, SHARED_LOCKS_REQUIRED, LOCKS_EXCLUDED:
4058    #
4059    # We implement a whitelist of safe macros instead of a blacklist of
4060    # unsafe macros, even though the latter appears less frequently in
4061    # google code and would have been easier to implement.  This is because
4062    # the downside for getting the whitelist wrong means some extra
4063    # semicolons, while the downside for getting the blacklist wrong
4064    # would result in compile errors.
4065    #
4066    # In addition to macros, we also don't want to warn on compound
4067    # literals and lambdas.
4068    closing_brace_pos = match.group(1).rfind(')')
4069    opening_parenthesis = ReverseCloseExpression(
4070        clean_lines, linenum, closing_brace_pos)
4071    if opening_parenthesis[2] > -1:
4072      line_prefix = opening_parenthesis[0][0:opening_parenthesis[2]]
4073      macro = Search(r'\b([A-Z_]+)\s*$', line_prefix)
4074      func = Match(r'^(.*\])\s*$', line_prefix)
4075      if ((macro and
4076           macro.group(1) not in (
4077               'TEST', 'TEST_F', 'MATCHER', 'MATCHER_P', 'TYPED_TEST',
4078               'EXCLUSIVE_LOCKS_REQUIRED', 'SHARED_LOCKS_REQUIRED',
4079               'LOCKS_EXCLUDED', 'INTERFACE_DEF')) or
4080          (func and not Search(r'\boperator\s*\[\s*\]', func.group(1))) or
4081          Search(r'\s+=\s*$', line_prefix)):
4082        match = None
4083    if (match and
4084        opening_parenthesis[1] > 1 and
4085        Search(r'\]\s*$', clean_lines.elided[opening_parenthesis[1] - 1])):
4086      # Multi-line lambda-expression
4087      match = None
4088
4089  else:
4090    # Try matching cases 2-3.
4091    match = Match(r'^(.*(?:else|\)\s*const)\s*)\{', line)
4092    if not match:
4093      # Try matching cases 4-6.  These are always matched on separate lines.
4094      #
4095      # Note that we can't simply concatenate the previous line to the
4096      # current line and do a single match, otherwise we may output
4097      # duplicate warnings for the blank line case:
4098      #   if (cond) {
4099      #     // blank line
4100      #   }
4101      prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0]
4102      if prevline and Search(r'[;{}]\s*$', prevline):
4103        match = Match(r'^(\s*)\{', line)
4104
4105  # Check matching closing brace
4106  if match:
4107    (endline, endlinenum, endpos) = CloseExpression(
4108        clean_lines, linenum, len(match.group(1)))
4109    if endpos > -1 and Match(r'^\s*;', endline[endpos:]):
4110      # Current {} pair is eligible for semicolon check, and we have found
4111      # the redundant semicolon, output warning here.
4112      #
4113      # Note: because we are scanning forward for opening braces, and
4114      # outputting warnings for the matching closing brace, if there are
4115      # nested blocks with trailing semicolons, we will get the error
4116      # messages in reversed order.
4117      error(filename, endlinenum, 'readability/braces', 4,
4118            "You don't need a ; after a }")
4119
4120
4121def CheckEmptyBlockBody(filename, clean_lines, linenum, error):
4122  """Look for empty loop/conditional body with only a single semicolon.
4123
4124  Args:
4125    filename: The name of the current file.
4126    clean_lines: A CleansedLines instance containing the file.
4127    linenum: The number of the line to check.
4128    error: The function to call with any errors found.
4129  """
4130
4131  # Search for loop keywords at the beginning of the line.  Because only
4132  # whitespaces are allowed before the keywords, this will also ignore most
4133  # do-while-loops, since those lines should start with closing brace.
4134  #
4135  # We also check "if" blocks here, since an empty conditional block
4136  # is likely an error.
4137  line = clean_lines.elided[linenum]
4138  matched = Match(r'\s*(for|while|if)\s*\(', line)
4139  if matched:
4140    # Find the end of the conditional expression
4141    (end_line, end_linenum, end_pos) = CloseExpression(
4142        clean_lines, linenum, line.find('('))
4143
4144    # Output warning if what follows the condition expression is a semicolon.
4145    # No warning for all other cases, including whitespace or newline, since we
4146    # have a separate check for semicolons preceded by whitespace.
4147    if end_pos >= 0 and Match(r';', end_line[end_pos:]):
4148      if matched.group(1) == 'if':
4149        error(filename, end_linenum, 'whitespace/empty_conditional_body', 5,
4150              'Empty conditional bodies should use {}')
4151      else:
4152        error(filename, end_linenum, 'whitespace/empty_loop_body', 5,
4153              'Empty loop bodies should use {} or continue')
4154
4155
4156def FindCheckMacro(line):
4157  """Find a replaceable CHECK-like macro.
4158
4159  Args:
4160    line: line to search on.
4161  Returns:
4162    (macro name, start position), or (None, -1) if no replaceable
4163    macro is found.
4164  """
4165  for macro in _CHECK_MACROS:
4166    i = line.find(macro)
4167    if i >= 0:
4168      # Find opening parenthesis.  Do a regular expression match here
4169      # to make sure that we are matching the expected CHECK macro, as
4170      # opposed to some other macro that happens to contain the CHECK
4171      # substring.
4172      matched = Match(r'^(.*\b' + macro + r'\s*)\(', line)
4173      if not matched:
4174        continue
4175      return (macro, len(matched.group(1)))
4176  return (None, -1)
4177
4178
4179def CheckCheck(filename, clean_lines, linenum, error):
4180  """Checks the use of CHECK and EXPECT macros.
4181
4182  Args:
4183    filename: The name of the current file.
4184    clean_lines: A CleansedLines instance containing the file.
4185    linenum: The number of the line to check.
4186    error: The function to call with any errors found.
4187  """
4188
4189  # Decide the set of replacement macros that should be suggested
4190  lines = clean_lines.elided
4191  (check_macro, start_pos) = FindCheckMacro(lines[linenum])
4192  if not check_macro:
4193    return
4194
4195  # Find end of the boolean expression by matching parentheses
4196  (last_line, end_line, end_pos) = CloseExpression(
4197      clean_lines, linenum, start_pos)
4198  if end_pos < 0:
4199    return
4200
4201  # If the check macro is followed by something other than a
4202  # semicolon, assume users will log their own custom error messages
4203  # and don't suggest any replacements.
4204  if not Match(r'\s*;', last_line[end_pos:]):
4205    return
4206
4207  if linenum == end_line:
4208    expression = lines[linenum][start_pos + 1:end_pos - 1]
4209  else:
4210    expression = lines[linenum][start_pos + 1:]
4211    for i in xrange(linenum + 1, end_line):
4212      expression += lines[i]
4213    expression += last_line[0:end_pos - 1]
4214
4215  # Parse expression so that we can take parentheses into account.
4216  # This avoids false positives for inputs like "CHECK((a < 4) == b)",
4217  # which is not replaceable by CHECK_LE.
4218  lhs = ''
4219  rhs = ''
4220  operator = None
4221  while expression:
4222    matched = Match(r'^\s*(<<|<<=|>>|>>=|->\*|->|&&|\|\||'
4223                    r'==|!=|>=|>|<=|<|\()(.*)$', expression)
4224    if matched:
4225      token = matched.group(1)
4226      if token == '(':
4227        # Parenthesized operand
4228        expression = matched.group(2)
4229        (end, _) = FindEndOfExpressionInLine(expression, 0, ['('])
4230        if end < 0:
4231          return  # Unmatched parenthesis
4232        lhs += '(' + expression[0:end]
4233        expression = expression[end:]
4234      elif token in ('&&', '||'):
4235        # Logical and/or operators.  This means the expression
4236        # contains more than one term, for example:
4237        #   CHECK(42 < a && a < b);
4238        #
4239        # These are not replaceable with CHECK_LE, so bail out early.
4240        return
4241      elif token in ('<<', '<<=', '>>', '>>=', '->*', '->'):
4242        # Non-relational operator
4243        lhs += token
4244        expression = matched.group(2)
4245      else:
4246        # Relational operator
4247        operator = token
4248        rhs = matched.group(2)
4249        break
4250    else:
4251      # Unparenthesized operand.  Instead of appending to lhs one character
4252      # at a time, we do another regular expression match to consume several
4253      # characters at once if possible.  Trivial benchmark shows that this
4254      # is more efficient when the operands are longer than a single
4255      # character, which is generally the case.
4256      matched = Match(r'^([^-=!<>()&|]+)(.*)$', expression)
4257      if not matched:
4258        matched = Match(r'^(\s*\S)(.*)$', expression)
4259        if not matched:
4260          break
4261      lhs += matched.group(1)
4262      expression = matched.group(2)
4263
4264  # Only apply checks if we got all parts of the boolean expression
4265  if not (lhs and operator and rhs):
4266    return
4267
4268  # Check that rhs do not contain logical operators.  We already know
4269  # that lhs is fine since the loop above parses out && and ||.
4270  if rhs.find('&&') > -1 or rhs.find('||') > -1:
4271    return
4272
4273  # At least one of the operands must be a constant literal.  This is
4274  # to avoid suggesting replacements for unprintable things like
4275  # CHECK(variable != iterator)
4276  #
4277  # The following pattern matches decimal, hex integers, strings, and
4278  # characters (in that order).
4279  lhs = lhs.strip()
4280  rhs = rhs.strip()
4281  match_constant = r'^([-+]?(\d+|0[xX][0-9a-fA-F]+)[lLuU]{0,3}|".*"|\'.*\')$'
4282  if Match(match_constant, lhs) or Match(match_constant, rhs):
4283    # Note: since we know both lhs and rhs, we can provide a more
4284    # descriptive error message like:
4285    #   Consider using CHECK_EQ(x, 42) instead of CHECK(x == 42)
4286    # Instead of:
4287    #   Consider using CHECK_EQ instead of CHECK(a == b)
4288    #
4289    # We are still keeping the less descriptive message because if lhs
4290    # or rhs gets long, the error message might become unreadable.
4291    error(filename, linenum, 'readability/check', 2,
4292          'Consider using %s instead of %s(a %s b)' % (
4293              _CHECK_REPLACEMENT[check_macro][operator],
4294              check_macro, operator))
4295
4296
4297def CheckAltTokens(filename, clean_lines, linenum, error):
4298  """Check alternative keywords being used in boolean expressions.
4299
4300  Args:
4301    filename: The name of the current file.
4302    clean_lines: A CleansedLines instance containing the file.
4303    linenum: The number of the line to check.
4304    error: The function to call with any errors found.
4305  """
4306  line = clean_lines.elided[linenum]
4307
4308  # Avoid preprocessor lines
4309  if Match(r'^\s*#', line):
4310    return
4311
4312  # Last ditch effort to avoid multi-line comments.  This will not help
4313  # if the comment started before the current line or ended after the
4314  # current line, but it catches most of the false positives.  At least,
4315  # it provides a way to workaround this warning for people who use
4316  # multi-line comments in preprocessor macros.
4317  #
4318  # TODO(unknown): remove this once cpplint has better support for
4319  # multi-line comments.
4320  if line.find('/*') >= 0 or line.find('*/') >= 0:
4321    return
4322
4323  for match in _ALT_TOKEN_REPLACEMENT_PATTERN.finditer(line):
4324    error(filename, linenum, 'readability/alt_tokens', 2,
4325          'Use operator %s instead of %s' % (
4326              _ALT_TOKEN_REPLACEMENT[match.group(1)], match.group(1)))
4327
4328
4329def GetLineWidth(line):
4330  """Determines the width of the line in column positions.
4331
4332  Args:
4333    line: A string, which may be a Unicode string.
4334
4335  Returns:
4336    The width of the line in column positions, accounting for Unicode
4337    combining characters and wide characters.
4338  """
4339  if isinstance(line, unicode):
4340    width = 0
4341    for uc in unicodedata.normalize('NFC', line):
4342      if unicodedata.east_asian_width(uc) in ('W', 'F'):
4343        width += 2
4344      elif not unicodedata.combining(uc):
4345        width += 1
4346    return width
4347  else:
4348    return len(line)
4349
4350
4351def CheckStyle(filename, clean_lines, linenum, file_extension, nesting_state,
4352               error):
4353  """Checks rules from the 'C++ style rules' section of cppguide.html.
4354
4355  Most of these rules are hard to test (naming, comment style), but we
4356  do what we can.  In particular we check for 2-space indents, line lengths,
4357  tab usage, spaces inside code, etc.
4358
4359  Args:
4360    filename: The name of the current file.
4361    clean_lines: A CleansedLines instance containing the file.
4362    linenum: The number of the line to check.
4363    file_extension: The extension (without the dot) of the filename.
4364    nesting_state: A NestingState instance which maintains information about
4365                   the current stack of nested blocks being parsed.
4366    error: The function to call with any errors found.
4367  """
4368
4369  # Don't use "elided" lines here, otherwise we can't check commented lines.
4370  # Don't want to use "raw" either, because we don't want to check inside C++11
4371  # raw strings,
4372  raw_lines = clean_lines.lines_without_raw_strings
4373  line = raw_lines[linenum]
4374
4375  if line.find('\t') != -1:
4376    error(filename, linenum, 'whitespace/tab', 1,
4377          'Tab found; better to use spaces')
4378
4379  # One or three blank spaces at the beginning of the line is weird; it's
4380  # hard to reconcile that with 2-space indents.
4381  # NOTE: here are the conditions rob pike used for his tests.  Mine aren't
4382  # as sophisticated, but it may be worth becoming so:  RLENGTH==initial_spaces
4383  # if(RLENGTH > 20) complain = 0;
4384  # if(match($0, " +(error|private|public|protected):")) complain = 0;
4385  # if(match(prev, "&& *$")) complain = 0;
4386  # if(match(prev, "\\|\\| *$")) complain = 0;
4387  # if(match(prev, "[\",=><] *$")) complain = 0;
4388  # if(match($0, " <<")) complain = 0;
4389  # if(match(prev, " +for \\(")) complain = 0;
4390  # if(prevodd && match(prevprev, " +for \\(")) complain = 0;
4391  scope_or_label_pattern = r'\s*\w+\s*:\s*\\?$'
4392  classinfo = nesting_state.InnermostClass()
4393  initial_spaces = 0
4394  cleansed_line = clean_lines.elided[linenum]
4395  while initial_spaces < len(line) and line[initial_spaces] == ' ':
4396    initial_spaces += 1
4397  if line and line[-1].isspace():
4398    error(filename, linenum, 'whitespace/end_of_line', 4,
4399          'Line ends in whitespace.  Consider deleting these extra spaces.')
4400  # There are certain situations we allow one space, notably for
4401  # section labels, and also lines containing multi-line raw strings.
4402  elif ((initial_spaces == 1 or initial_spaces == 3) and
4403        not Match(scope_or_label_pattern, cleansed_line) and
4404        not (clean_lines.raw_lines[linenum] != line and
4405             Match(r'^\s*""', line))):
4406    error(filename, linenum, 'whitespace/indent', 3,
4407          'Weird number of spaces at line-start.  '
4408          'Are you using a 2-space indent?')
4409
4410  # Check if the line is a header guard.
4411  is_header_guard = False
4412  if file_extension == 'h':
4413    cppvar = GetHeaderGuardCPPVariable(filename)
4414    if (line.startswith('#ifndef %s' % cppvar) or
4415        line.startswith('#define %s' % cppvar) or
4416        line.startswith('#endif  // %s' % cppvar)):
4417      is_header_guard = True
4418  # #include lines and header guards can be long, since there's no clean way to
4419  # split them.
4420  #
4421  # URLs can be long too.  It's possible to split these, but it makes them
4422  # harder to cut&paste.
4423  #
4424  # The "$Id:...$" comment may also get very long without it being the
4425  # developers fault.
4426  if (not line.startswith('#include') and not is_header_guard and
4427      not Match(r'^\s*//.*http(s?)://\S*$', line) and
4428      not Match(r'^// \$Id:.*#[0-9]+ \$$', line)):
4429    line_width = GetLineWidth(line)
4430    extended_length = int((_line_length * 1.25))
4431    if line_width > extended_length:
4432      error(filename, linenum, 'whitespace/line_length', 4,
4433            'Lines should very rarely be longer than %i characters' %
4434            extended_length)
4435    elif line_width > _line_length:
4436      error(filename, linenum, 'whitespace/line_length', 2,
4437            'Lines should be <= %i characters long' % _line_length)
4438
4439  if (cleansed_line.count(';') > 1 and
4440      # for loops are allowed two ;'s (and may run over two lines).
4441      cleansed_line.find('for') == -1 and
4442      (GetPreviousNonBlankLine(clean_lines, linenum)[0].find('for') == -1 or
4443       GetPreviousNonBlankLine(clean_lines, linenum)[0].find(';') != -1) and
4444      # It's ok to have many commands in a switch case that fits in 1 line
4445      not ((cleansed_line.find('case ') != -1 or
4446            cleansed_line.find('default:') != -1) and
4447           cleansed_line.find('break;') != -1)):
4448    error(filename, linenum, 'whitespace/newline', 0,
4449          'More than one command on the same line')
4450
4451  # Some more style checks
4452  CheckBraces(filename, clean_lines, linenum, error)
4453  CheckTrailingSemicolon(filename, clean_lines, linenum, error)
4454  CheckEmptyBlockBody(filename, clean_lines, linenum, error)
4455  CheckAccess(filename, clean_lines, linenum, nesting_state, error)
4456  CheckSpacing(filename, clean_lines, linenum, nesting_state, error)
4457  CheckOperatorSpacing(filename, clean_lines, linenum, error)
4458  CheckParenthesisSpacing(filename, clean_lines, linenum, error)
4459  CheckCommaSpacing(filename, clean_lines, linenum, error)
4460  CheckBracesSpacing(filename, clean_lines, linenum, error)
4461  CheckSpacingForFunctionCall(filename, clean_lines, linenum, error)
4462  CheckRValueReference(filename, clean_lines, linenum, nesting_state, error)
4463  CheckCheck(filename, clean_lines, linenum, error)
4464  CheckAltTokens(filename, clean_lines, linenum, error)
4465  classinfo = nesting_state.InnermostClass()
4466  if classinfo:
4467    CheckSectionSpacing(filename, clean_lines, classinfo, linenum, error)
4468
4469
4470_RE_PATTERN_INCLUDE = re.compile(r'^\s*#\s*include\s*([<"])([^>"]*)[>"].*$')
4471# Matches the first component of a filename delimited by -s and _s. That is:
4472#  _RE_FIRST_COMPONENT.match('foo').group(0) == 'foo'
4473#  _RE_FIRST_COMPONENT.match('foo.cc').group(0) == 'foo'
4474#  _RE_FIRST_COMPONENT.match('foo-bar_baz.cc').group(0) == 'foo'
4475#  _RE_FIRST_COMPONENT.match('foo_bar-baz.cc').group(0) == 'foo'
4476_RE_FIRST_COMPONENT = re.compile(r'^[^-_.]+')
4477
4478
4479def _DropCommonSuffixes(filename):
4480  """Drops common suffixes like _test.cc or -inl.h from filename.
4481
4482  For example:
4483    >>> _DropCommonSuffixes('foo/foo-inl.h')
4484    'foo/foo'
4485    >>> _DropCommonSuffixes('foo/bar/foo.cc')
4486    'foo/bar/foo'
4487    >>> _DropCommonSuffixes('foo/foo_internal.h')
4488    'foo/foo'
4489    >>> _DropCommonSuffixes('foo/foo_unusualinternal.h')
4490    'foo/foo_unusualinternal'
4491
4492  Args:
4493    filename: The input filename.
4494
4495  Returns:
4496    The filename with the common suffix removed.
4497  """
4498  for suffix in ('test.cc', 'regtest.cc', 'unittest.cc',
4499                 'inl.h', 'impl.h', 'internal.h'):
4500    if (filename.endswith(suffix) and len(filename) > len(suffix) and
4501        filename[-len(suffix) - 1] in ('-', '_')):
4502      return filename[:-len(suffix) - 1]
4503  return os.path.splitext(filename)[0]
4504
4505
4506def _IsTestFilename(filename):
4507  """Determines if the given filename has a suffix that identifies it as a test.
4508
4509  Args:
4510    filename: The input filename.
4511
4512  Returns:
4513    True if 'filename' looks like a test, False otherwise.
4514  """
4515  if (filename.endswith('_test.cc') or
4516      filename.endswith('_unittest.cc') or
4517      filename.endswith('_regtest.cc')):
4518    return True
4519  else:
4520    return False
4521
4522
4523def _ClassifyInclude(fileinfo, include, is_system):
4524  """Figures out what kind of header 'include' is.
4525
4526  Args:
4527    fileinfo: The current file cpplint is running over. A FileInfo instance.
4528    include: The path to a #included file.
4529    is_system: True if the #include used <> rather than "".
4530
4531  Returns:
4532    One of the _XXX_HEADER constants.
4533
4534  For example:
4535    >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'stdio.h', True)
4536    _C_SYS_HEADER
4537    >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'string', True)
4538    _CPP_SYS_HEADER
4539    >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'foo/foo.h', False)
4540    _LIKELY_MY_HEADER
4541    >>> _ClassifyInclude(FileInfo('foo/foo_unknown_extension.cc'),
4542    ...                  'bar/foo_other_ext.h', False)
4543    _POSSIBLE_MY_HEADER
4544    >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'foo/bar.h', False)
4545    _OTHER_HEADER
4546  """
4547  # This is a list of all standard c++ header files, except
4548  # those already checked for above.
4549  is_cpp_h = include in _CPP_HEADERS
4550
4551  if is_system:
4552    if is_cpp_h:
4553      return _CPP_SYS_HEADER
4554    else:
4555      return _C_SYS_HEADER
4556
4557  # If the target file and the include we're checking share a
4558  # basename when we drop common extensions, and the include
4559  # lives in . , then it's likely to be owned by the target file.
4560  target_dir, target_base = (
4561      os.path.split(_DropCommonSuffixes(fileinfo.RepositoryName())))
4562  include_dir, include_base = os.path.split(_DropCommonSuffixes(include))
4563  if target_base == include_base and (
4564      include_dir == target_dir or
4565      include_dir == os.path.normpath(target_dir + '/../public')):
4566    return _LIKELY_MY_HEADER
4567
4568  # If the target and include share some initial basename
4569  # component, it's possible the target is implementing the
4570  # include, so it's allowed to be first, but we'll never
4571  # complain if it's not there.
4572  target_first_component = _RE_FIRST_COMPONENT.match(target_base)
4573  include_first_component = _RE_FIRST_COMPONENT.match(include_base)
4574  if (target_first_component and include_first_component and
4575      target_first_component.group(0) ==
4576      include_first_component.group(0)):
4577    return _POSSIBLE_MY_HEADER
4578
4579  return _OTHER_HEADER
4580
4581
4582
4583def CheckIncludeLine(filename, clean_lines, linenum, include_state, error):
4584  """Check rules that are applicable to #include lines.
4585
4586  Strings on #include lines are NOT removed from elided line, to make
4587  certain tasks easier. However, to prevent false positives, checks
4588  applicable to #include lines in CheckLanguage must be put here.
4589
4590  Args:
4591    filename: The name of the current file.
4592    clean_lines: A CleansedLines instance containing the file.
4593    linenum: The number of the line to check.
4594    include_state: An _IncludeState instance in which the headers are inserted.
4595    error: The function to call with any errors found.
4596  """
4597  fileinfo = FileInfo(filename)
4598  line = clean_lines.lines[linenum]
4599
4600  # "include" should use the new style "foo/bar.h" instead of just "bar.h"
4601  # Only do this check if the included header follows google naming
4602  # conventions.  If not, assume that it's a 3rd party API that
4603  # requires special include conventions.
4604  #
4605  # We also make an exception for Lua headers, which follow google
4606  # naming convention but not the include convention.
4607  match = Match(r'#include\s*"([^/]+\.h)"', line)
4608  if match and not _THIRD_PARTY_HEADERS_PATTERN.match(match.group(1)):
4609    error(filename, linenum, 'build/include', 4,
4610          'Include the directory when naming .h files')
4611
4612  # we shouldn't include a file more than once. actually, there are a
4613  # handful of instances where doing so is okay, but in general it's
4614  # not.
4615  match = _RE_PATTERN_INCLUDE.search(line)
4616  if match:
4617    include = match.group(2)
4618    is_system = (match.group(1) == '<')
4619    duplicate_line = include_state.FindHeader(include)
4620    if duplicate_line >= 0:
4621      error(filename, linenum, 'build/include', 4,
4622            '"%s" already included at %s:%s' %
4623            (include, filename, duplicate_line))
4624    elif not _THIRD_PARTY_HEADERS_PATTERN.match(include):
4625      include_state.include_list[-1].append((include, linenum))
4626
4627      # We want to ensure that headers appear in the right order:
4628      # 1) for foo.cc, foo.h  (preferred location)
4629      # 2) c system files
4630      # 3) cpp system files
4631      # 4) for foo.cc, foo.h  (deprecated location)
4632      # 5) other google headers
4633      #
4634      # We classify each include statement as one of those 5 types
4635      # using a number of techniques. The include_state object keeps
4636      # track of the highest type seen, and complains if we see a
4637      # lower type after that.
4638      error_message = include_state.CheckNextIncludeOrder(
4639          _ClassifyInclude(fileinfo, include, is_system))
4640      if error_message:
4641        error(filename, linenum, 'build/include_order', 4,
4642              '%s. Should be: %s.h, c system, c++ system, other.' %
4643              (error_message, fileinfo.BaseName()))
4644      canonical_include = include_state.CanonicalizeAlphabeticalOrder(include)
4645      if not include_state.IsInAlphabeticalOrder(
4646          clean_lines, linenum, canonical_include):
4647        error(filename, linenum, 'build/include_alpha', 4,
4648              'Include "%s" not in alphabetical order' % include)
4649      include_state.SetLastHeader(canonical_include)
4650
4651  # Look for any of the stream classes that are part of standard C++.
4652  match = _RE_PATTERN_INCLUDE.match(line)
4653  if match:
4654    include = match.group(2)
4655    if Match(r'(f|ind|io|i|o|parse|pf|stdio|str|)?stream$', include):
4656      # Many unit tests use cout, so we exempt them.
4657      if not _IsTestFilename(filename):
4658        # Suggest a different header for ostream
4659        if include == 'ostream':
4660          error(filename, linenum, 'readability/streams', 3,
4661                'For logging, include "base/logging.h" instead of <ostream>.')
4662        else:
4663          error(filename, linenum, 'readability/streams', 3,
4664                'Streams are highly discouraged.')
4665
4666
4667def _GetTextInside(text, start_pattern):
4668  r"""Retrieves all the text between matching open and close parentheses.
4669
4670  Given a string of lines and a regular expression string, retrieve all the text
4671  following the expression and between opening punctuation symbols like
4672  (, [, or {, and the matching close-punctuation symbol. This properly nested
4673  occurrences of the punctuations, so for the text like
4674    printf(a(), b(c()));
4675  a call to _GetTextInside(text, r'printf\(') will return 'a(), b(c())'.
4676  start_pattern must match string having an open punctuation symbol at the end.
4677
4678  Args:
4679    text: The lines to extract text. Its comments and strings must be elided.
4680           It can be single line and can span multiple lines.
4681    start_pattern: The regexp string indicating where to start extracting
4682                   the text.
4683  Returns:
4684    The extracted text.
4685    None if either the opening string or ending punctuation could not be found.
4686  """
4687  # TODO(unknown): Audit cpplint.py to see what places could be profitably
4688  # rewritten to use _GetTextInside (and use inferior regexp matching today).
4689
4690  # Give opening punctuations to get the matching close-punctuations.
4691  matching_punctuation = {'(': ')', '{': '}', '[': ']'}
4692  closing_punctuation = set(matching_punctuation.itervalues())
4693
4694  # Find the position to start extracting text.
4695  match = re.search(start_pattern, text, re.M)
4696  if not match:  # start_pattern not found in text.
4697    return None
4698  start_position = match.end(0)
4699
4700  assert start_position > 0, (
4701      'start_pattern must ends with an opening punctuation.')
4702  assert text[start_position - 1] in matching_punctuation, (
4703      'start_pattern must ends with an opening punctuation.')
4704  # Stack of closing punctuations we expect to have in text after position.
4705  punctuation_stack = [matching_punctuation[text[start_position - 1]]]
4706  position = start_position
4707  while punctuation_stack and position < len(text):
4708    if text[position] == punctuation_stack[-1]:
4709      punctuation_stack.pop()
4710    elif text[position] in closing_punctuation:
4711      # A closing punctuation without matching opening punctuations.
4712      return None
4713    elif text[position] in matching_punctuation:
4714      punctuation_stack.append(matching_punctuation[text[position]])
4715    position += 1
4716  if punctuation_stack:
4717    # Opening punctuations left without matching close-punctuations.
4718    return None
4719  # punctuations match.
4720  return text[start_position:position - 1]
4721
4722
4723# Patterns for matching call-by-reference parameters.
4724#
4725# Supports nested templates up to 2 levels deep using this messy pattern:
4726#   < (?: < (?: < [^<>]*
4727#               >
4728#           |   [^<>] )*
4729#         >
4730#     |   [^<>] )*
4731#   >
4732_RE_PATTERN_IDENT = r'[_a-zA-Z]\w*'  # =~ [[:alpha:]][[:alnum:]]*
4733_RE_PATTERN_TYPE = (
4734    r'(?:const\s+)?(?:typename\s+|class\s+|struct\s+|union\s+|enum\s+)?'
4735    r'(?:\w|'
4736    r'\s*<(?:<(?:<[^<>]*>|[^<>])*>|[^<>])*>|'
4737    r'::)+')
4738# A call-by-reference parameter ends with '& identifier'.
4739_RE_PATTERN_REF_PARAM = re.compile(
4740    r'(' + _RE_PATTERN_TYPE + r'(?:\s*(?:\bconst\b|[*]))*\s*'
4741    r'&\s*' + _RE_PATTERN_IDENT + r')\s*(?:=[^,()]+)?[,)]')
4742# A call-by-const-reference parameter either ends with 'const& identifier'
4743# or looks like 'const type& identifier' when 'type' is atomic.
4744_RE_PATTERN_CONST_REF_PARAM = (
4745    r'(?:.*\s*\bconst\s*&\s*' + _RE_PATTERN_IDENT +
4746    r'|const\s+' + _RE_PATTERN_TYPE + r'\s*&\s*' + _RE_PATTERN_IDENT + r')')
4747
4748
4749def CheckLanguage(filename, clean_lines, linenum, file_extension,
4750                  include_state, nesting_state, error):
4751  """Checks rules from the 'C++ language rules' section of cppguide.html.
4752
4753  Some of these rules are hard to test (function overloading, using
4754  uint32 inappropriately), but we do the best we can.
4755
4756  Args:
4757    filename: The name of the current file.
4758    clean_lines: A CleansedLines instance containing the file.
4759    linenum: The number of the line to check.
4760    file_extension: The extension (without the dot) of the filename.
4761    include_state: An _IncludeState instance in which the headers are inserted.
4762    nesting_state: A NestingState instance which maintains information about
4763                   the current stack of nested blocks being parsed.
4764    error: The function to call with any errors found.
4765  """
4766  # If the line is empty or consists of entirely a comment, no need to
4767  # check it.
4768  line = clean_lines.elided[linenum]
4769  if not line:
4770    return
4771
4772  match = _RE_PATTERN_INCLUDE.search(line)
4773  if match:
4774    CheckIncludeLine(filename, clean_lines, linenum, include_state, error)
4775    return
4776
4777  # Reset include state across preprocessor directives.  This is meant
4778  # to silence warnings for conditional includes.
4779  match = Match(r'^\s*#\s*(if|ifdef|ifndef|elif|else|endif)\b', line)
4780  if match:
4781    include_state.ResetSection(match.group(1))
4782
4783  # Make Windows paths like Unix.
4784  fullname = os.path.abspath(filename).replace('\\', '/')
4785
4786  # Perform other checks now that we are sure that this is not an include line
4787  CheckCasts(filename, clean_lines, linenum, error)
4788  CheckGlobalStatic(filename, clean_lines, linenum, error)
4789  CheckPrintf(filename, clean_lines, linenum, error)
4790
4791  if file_extension == 'h':
4792    # TODO(unknown): check that 1-arg constructors are explicit.
4793    #                How to tell it's a constructor?
4794    #                (handled in CheckForNonStandardConstructs for now)
4795    # TODO(unknown): check that classes declare or disable copy/assign
4796    #                (level 1 error)
4797    pass
4798
4799  # Check if people are using the verboten C basic types.  The only exception
4800  # we regularly allow is "unsigned short port" for port.
4801  if Search(r'\bshort port\b', line):
4802    if not Search(r'\bunsigned short port\b', line):
4803      error(filename, linenum, 'runtime/int', 4,
4804            'Use "unsigned short" for ports, not "short"')
4805  else:
4806    match = Search(r'\b(short|long(?! +double)|long long)\b', line)
4807    if match:
4808      error(filename, linenum, 'runtime/int', 4,
4809            'Use int16/int64/etc, rather than the C type %s' % match.group(1))
4810
4811  # Check if some verboten operator overloading is going on
4812  # TODO(unknown): catch out-of-line unary operator&:
4813  #   class X {};
4814  #   int operator&(const X& x) { return 42; }  // unary operator&
4815  # The trick is it's hard to tell apart from binary operator&:
4816  #   class Y { int operator&(const Y& x) { return 23; } }; // binary operator&
4817  if Search(r'\boperator\s*&\s*\(\s*\)', line):
4818    error(filename, linenum, 'runtime/operator', 4,
4819          'Unary operator& is dangerous.  Do not use it.')
4820
4821  # Check for suspicious usage of "if" like
4822  # } if (a == b) {
4823  if Search(r'\}\s*if\s*\(', line):
4824    error(filename, linenum, 'readability/braces', 4,
4825          'Did you mean "else if"? If not, start a new line for "if".')
4826
4827  # Check for potential format string bugs like printf(foo).
4828  # We constrain the pattern not to pick things like DocidForPrintf(foo).
4829  # Not perfect but it can catch printf(foo.c_str()) and printf(foo->c_str())
4830  # TODO(unknown): Catch the following case. Need to change the calling
4831  # convention of the whole function to process multiple line to handle it.
4832  #   printf(
4833  #       boy_this_is_a_really_long_variable_that_cannot_fit_on_the_prev_line);
4834  printf_args = _GetTextInside(line, r'(?i)\b(string)?printf\s*\(')
4835  if printf_args:
4836    match = Match(r'([\w.\->()]+)$', printf_args)
4837    if match and match.group(1) != '__VA_ARGS__':
4838      function_name = re.search(r'\b((?:string)?printf)\s*\(',
4839                                line, re.I).group(1)
4840      error(filename, linenum, 'runtime/printf', 4,
4841            'Potential format string bug. Do %s("%%s", %s) instead.'
4842            % (function_name, match.group(1)))
4843
4844  # Check for potential memset bugs like memset(buf, sizeof(buf), 0).
4845  match = Search(r'memset\s*\(([^,]*),\s*([^,]*),\s*0\s*\)', line)
4846  if match and not Match(r"^''|-?[0-9]+|0x[0-9A-Fa-f]$", match.group(2)):
4847    error(filename, linenum, 'runtime/memset', 4,
4848          'Did you mean "memset(%s, 0, %s)"?'
4849          % (match.group(1), match.group(2)))
4850
4851  if Search(r'\busing namespace\b', line):
4852    error(filename, linenum, 'build/namespaces', 5,
4853          'Do not use namespace using-directives.  '
4854          'Use using-declarations instead.')
4855
4856  # Detect variable-length arrays.
4857  match = Match(r'\s*(.+::)?(\w+) [a-z]\w*\[(.+)];', line)
4858  if (match and match.group(2) != 'return' and match.group(2) != 'delete' and
4859      match.group(3).find(']') == -1):
4860    # Split the size using space and arithmetic operators as delimiters.
4861    # If any of the resulting tokens are not compile time constants then
4862    # report the error.
4863    tokens = re.split(r'\s|\+|\-|\*|\/|<<|>>]', match.group(3))
4864    is_const = True
4865    skip_next = False
4866    for tok in tokens:
4867      if skip_next:
4868        skip_next = False
4869        continue
4870
4871      if Search(r'sizeof\(.+\)', tok): continue
4872      if Search(r'arraysize\(\w+\)', tok): continue
4873
4874      tok = tok.lstrip('(')
4875      tok = tok.rstrip(')')
4876      if not tok: continue
4877      if Match(r'\d+', tok): continue
4878      if Match(r'0[xX][0-9a-fA-F]+', tok): continue
4879      if Match(r'k[A-Z0-9]\w*', tok): continue
4880      if Match(r'(.+::)?k[A-Z0-9]\w*', tok): continue
4881      if Match(r'(.+::)?[A-Z][A-Z0-9_]*', tok): continue
4882      # A catch all for tricky sizeof cases, including 'sizeof expression',
4883      # 'sizeof(*type)', 'sizeof(const type)', 'sizeof(struct StructName)'
4884      # requires skipping the next token because we split on ' ' and '*'.
4885      if tok.startswith('sizeof'):
4886        skip_next = True
4887        continue
4888      is_const = False
4889      break
4890    if not is_const:
4891      error(filename, linenum, 'runtime/arrays', 1,
4892            'Do not use variable-length arrays.  Use an appropriately named '
4893            "('k' followed by CamelCase) compile-time constant for the size.")
4894
4895  # If DISALLOW_COPY_AND_ASSIGN DISALLOW_IMPLICIT_CONSTRUCTORS is present,
4896  # then it should be the last thing in the class declaration.
4897  match = Match(
4898      (r'\s*'
4899       r'(DISALLOW_(COPY_AND_ASSIGN|IMPLICIT_CONSTRUCTORS))'
4900       r'\(.*\);$'),
4901      line)
4902  if match and linenum + 1 < clean_lines.NumLines():
4903    next_line = clean_lines.elided[linenum + 1]
4904    # We allow some, but not all, declarations of variables to be present
4905    # in the statement that defines the class.  The [\w\*,\s]* fragment of
4906    # the regular expression below allows users to declare instances of
4907    # the class or pointers to instances, but not less common types such
4908    # as function pointers or arrays.  It's a tradeoff between allowing
4909    # reasonable code and avoiding trying to parse more C++ using regexps.
4910    if not Search(r'^\s*}[\w\*,\s]*;', next_line):
4911      error(filename, linenum, 'readability/constructors', 3,
4912            match.group(1) + ' should be the last thing in the class')
4913
4914  # Check for use of unnamed namespaces in header files.  Registration
4915  # macros are typically OK, so we allow use of "namespace {" on lines
4916  # that end with backslashes.
4917  if (file_extension == 'h'
4918      and Search(r'\bnamespace\s*{', line)
4919      and line[-1] != '\\'):
4920    error(filename, linenum, 'build/namespaces', 4,
4921          'Do not use unnamed namespaces in header files.  See '
4922          'http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Namespaces'
4923          ' for more information.')
4924
4925
4926def CheckGlobalStatic(filename, clean_lines, linenum, error):
4927  """Check for unsafe global or static objects.
4928
4929  Args:
4930    filename: The name of the current file.
4931    clean_lines: A CleansedLines instance containing the file.
4932    linenum: The number of the line to check.
4933    error: The function to call with any errors found.
4934  """
4935  line = clean_lines.elided[linenum]
4936
4937  # Match two lines at a time to support multiline declarations
4938  if linenum + 1 < clean_lines.NumLines() and not Search(r'[;({]', line):
4939    line += clean_lines.elided[linenum + 1].strip()
4940
4941  # Check for people declaring static/global STL strings at the top level.
4942  # This is dangerous because the C++ language does not guarantee that
4943  # globals with constructors are initialized before the first access.
4944  match = Match(
4945      r'((?:|static +)(?:|const +))string +([a-zA-Z0-9_:]+)\b(.*)',
4946      line)
4947
4948  # Remove false positives:
4949  # - String pointers (as opposed to values).
4950  #    string *pointer
4951  #    const string *pointer
4952  #    string const *pointer
4953  #    string *const pointer
4954  #
4955  # - Functions and template specializations.
4956  #    string Function<Type>(...
4957  #    string Class<Type>::Method(...
4958  #
4959  # - Operators.  These are matched separately because operator names
4960  #   cross non-word boundaries, and trying to match both operators
4961  #   and functions at the same time would decrease accuracy of
4962  #   matching identifiers.
4963  #    string Class::operator*()
4964  if (match and
4965      not Search(r'\bstring\b(\s+const)?\s*\*\s*(const\s+)?\w', line) and
4966      not Search(r'\boperator\W', line) and
4967      not Match(r'\s*(<.*>)?(::[a-zA-Z0-9_]+)*\s*\(([^"]|$)', match.group(3))):
4968    error(filename, linenum, 'runtime/string', 4,
4969          'For a static/global string constant, use a C style string instead: '
4970          '"%schar %s[]".' %
4971          (match.group(1), match.group(2)))
4972
4973  if Search(r'\b([A-Za-z0-9_]*_)\(\1\)', line):
4974    error(filename, linenum, 'runtime/init', 4,
4975          'You seem to be initializing a member variable with itself.')
4976
4977
4978def CheckPrintf(filename, clean_lines, linenum, error):
4979  """Check for printf related issues.
4980
4981  Args:
4982    filename: The name of the current file.
4983    clean_lines: A CleansedLines instance containing the file.
4984    linenum: The number of the line to check.
4985    error: The function to call with any errors found.
4986  """
4987  line = clean_lines.elided[linenum]
4988
4989  # When snprintf is used, the second argument shouldn't be a literal.
4990  match = Search(r'snprintf\s*\(([^,]*),\s*([0-9]*)\s*,', line)
4991  if match and match.group(2) != '0':
4992    # If 2nd arg is zero, snprintf is used to calculate size.
4993    error(filename, linenum, 'runtime/printf', 3,
4994          'If you can, use sizeof(%s) instead of %s as the 2nd arg '
4995          'to snprintf.' % (match.group(1), match.group(2)))
4996
4997  # Check if some verboten C functions are being used.
4998  if Search(r'\bsprintf\s*\(', line):
4999    error(filename, linenum, 'runtime/printf', 5,
5000          'Never use sprintf. Use snprintf instead.')
5001  match = Search(r'\b(strcpy|strcat)\s*\(', line)
5002  if match:
5003    error(filename, linenum, 'runtime/printf', 4,
5004          'Almost always, snprintf is better than %s' % match.group(1))
5005
5006
5007def IsDerivedFunction(clean_lines, linenum):
5008  """Check if current line contains an inherited function.
5009
5010  Args:
5011    clean_lines: A CleansedLines instance containing the file.
5012    linenum: The number of the line to check.
5013  Returns:
5014    True if current line contains a function with "override"
5015    virt-specifier.
5016  """
5017  # Scan back a few lines for start of current function
5018  for i in xrange(linenum, max(-1, linenum - 10), -1):
5019    match = Match(r'^([^()]*\w+)\(', clean_lines.elided[i])
5020    if match:
5021      # Look for "override" after the matching closing parenthesis
5022      line, _, closing_paren = CloseExpression(
5023          clean_lines, i, len(match.group(1)))
5024      return (closing_paren >= 0 and
5025              Search(r'\boverride\b', line[closing_paren:]))
5026  return False
5027
5028
5029def IsInitializerList(clean_lines, linenum):
5030  """Check if current line is inside constructor initializer list.
5031
5032  Args:
5033    clean_lines: A CleansedLines instance containing the file.
5034    linenum: The number of the line to check.
5035  Returns:
5036    True if current line appears to be inside constructor initializer
5037    list, False otherwise.
5038  """
5039  for i in xrange(linenum, 1, -1):
5040    line = clean_lines.elided[i]
5041    if i == linenum:
5042      remove_function_body = Match(r'^(.*)\{\s*$', line)
5043      if remove_function_body:
5044        line = remove_function_body.group(1)
5045
5046    if Search(r'\s:\s*\w+[({]', line):
5047      # A lone colon tend to indicate the start of a constructor
5048      # initializer list.  It could also be a ternary operator, which
5049      # also tend to appear in constructor initializer lists as
5050      # opposed to parameter lists.
5051      return True
5052    if Search(r'\}\s*,\s*$', line):
5053      # A closing brace followed by a comma is probably the end of a
5054      # brace-initialized member in constructor initializer list.
5055      return True
5056    if Search(r'[{};]\s*$', line):
5057      # Found one of the following:
5058      # - A closing brace or semicolon, probably the end of the previous
5059      #   function.
5060      # - An opening brace, probably the start of current class or namespace.
5061      #
5062      # Current line is probably not inside an initializer list since
5063      # we saw one of those things without seeing the starting colon.
5064      return False
5065
5066  # Got to the beginning of the file without seeing the start of
5067  # constructor initializer list.
5068  return False
5069
5070
5071def CheckForNonConstReference(filename, clean_lines, linenum,
5072                              nesting_state, error):
5073  """Check for non-const references.
5074
5075  Separate from CheckLanguage since it scans backwards from current
5076  line, instead of scanning forward.
5077
5078  Args:
5079    filename: The name of the current file.
5080    clean_lines: A CleansedLines instance containing the file.
5081    linenum: The number of the line to check.
5082    nesting_state: A NestingState instance which maintains information about
5083                   the current stack of nested blocks being parsed.
5084    error: The function to call with any errors found.
5085  """
5086  # Do nothing if there is no '&' on current line.
5087  line = clean_lines.elided[linenum]
5088  if '&' not in line:
5089    return
5090
5091  # If a function is inherited, current function doesn't have much of
5092  # a choice, so any non-const references should not be blamed on
5093  # derived function.
5094  if IsDerivedFunction(clean_lines, linenum):
5095    return
5096
5097  # Long type names may be broken across multiple lines, usually in one
5098  # of these forms:
5099  #   LongType
5100  #       ::LongTypeContinued &identifier
5101  #   LongType::
5102  #       LongTypeContinued &identifier
5103  #   LongType<
5104  #       ...>::LongTypeContinued &identifier
5105  #
5106  # If we detected a type split across two lines, join the previous
5107  # line to current line so that we can match const references
5108  # accordingly.
5109  #
5110  # Note that this only scans back one line, since scanning back
5111  # arbitrary number of lines would be expensive.  If you have a type
5112  # that spans more than 2 lines, please use a typedef.
5113  if linenum > 1:
5114    previous = None
5115    if Match(r'\s*::(?:[\w<>]|::)+\s*&\s*\S', line):
5116      # previous_line\n + ::current_line
5117      previous = Search(r'\b((?:const\s*)?(?:[\w<>]|::)+[\w<>])\s*$',
5118                        clean_lines.elided[linenum - 1])
5119    elif Match(r'\s*[a-zA-Z_]([\w<>]|::)+\s*&\s*\S', line):
5120      # previous_line::\n + current_line
5121      previous = Search(r'\b((?:const\s*)?(?:[\w<>]|::)+::)\s*$',
5122                        clean_lines.elided[linenum - 1])
5123    if previous:
5124      line = previous.group(1) + line.lstrip()
5125    else:
5126      # Check for templated parameter that is split across multiple lines
5127      endpos = line.rfind('>')
5128      if endpos > -1:
5129        (_, startline, startpos) = ReverseCloseExpression(
5130            clean_lines, linenum, endpos)
5131        if startpos > -1 and startline < linenum:
5132          # Found the matching < on an earlier line, collect all
5133          # pieces up to current line.
5134          line = ''
5135          for i in xrange(startline, linenum + 1):
5136            line += clean_lines.elided[i].strip()
5137
5138  # Check for non-const references in function parameters.  A single '&' may
5139  # found in the following places:
5140  #   inside expression: binary & for bitwise AND
5141  #   inside expression: unary & for taking the address of something
5142  #   inside declarators: reference parameter
5143  # We will exclude the first two cases by checking that we are not inside a
5144  # function body, including one that was just introduced by a trailing '{'.
5145  # TODO(unknown): Doesn't account for 'catch(Exception& e)' [rare].
5146  if (nesting_state.previous_stack_top and
5147      not (isinstance(nesting_state.previous_stack_top, _ClassInfo) or
5148           isinstance(nesting_state.previous_stack_top, _NamespaceInfo))):
5149    # Not at toplevel, not within a class, and not within a namespace
5150    return
5151
5152  # Avoid initializer lists.  We only need to scan back from the
5153  # current line for something that starts with ':'.
5154  #
5155  # We don't need to check the current line, since the '&' would
5156  # appear inside the second set of parentheses on the current line as
5157  # opposed to the first set.
5158  if linenum > 0:
5159    for i in xrange(linenum - 1, max(0, linenum - 10), -1):
5160      previous_line = clean_lines.elided[i]
5161      if not Search(r'[),]\s*$', previous_line):
5162        break
5163      if Match(r'^\s*:\s+\S', previous_line):
5164        return
5165
5166  # Avoid preprocessors
5167  if Search(r'\\\s*$', line):
5168    return
5169
5170  # Avoid constructor initializer lists
5171  if IsInitializerList(clean_lines, linenum):
5172    return
5173
5174  # We allow non-const references in a few standard places, like functions
5175  # called "swap()" or iostream operators like "<<" or ">>".  Do not check
5176  # those function parameters.
5177  #
5178  # We also accept & in static_assert, which looks like a function but
5179  # it's actually a declaration expression.
5180  whitelisted_functions = (r'(?:[sS]wap(?:<\w:+>)?|'
5181                           r'operator\s*[<>][<>]|'
5182                           r'static_assert|COMPILE_ASSERT'
5183                           r')\s*\(')
5184  if Search(whitelisted_functions, line):
5185    return
5186  elif not Search(r'\S+\([^)]*$', line):
5187    # Don't see a whitelisted function on this line.  Actually we
5188    # didn't see any function name on this line, so this is likely a
5189    # multi-line parameter list.  Try a bit harder to catch this case.
5190    for i in xrange(2):
5191      if (linenum > i and
5192          Search(whitelisted_functions, clean_lines.elided[linenum - i - 1])):
5193        return
5194
5195  decls = ReplaceAll(r'{[^}]*}', ' ', line)  # exclude function body
5196  for parameter in re.findall(_RE_PATTERN_REF_PARAM, decls):
5197    if not Match(_RE_PATTERN_CONST_REF_PARAM, parameter):
5198      error(filename, linenum, 'runtime/references', 2,
5199            'Is this a non-const reference? '
5200            'If so, make const or use a pointer: ' +
5201            ReplaceAll(' *<', '<', parameter))
5202
5203
5204def CheckCasts(filename, clean_lines, linenum, error):
5205  """Various cast related checks.
5206
5207  Args:
5208    filename: The name of the current file.
5209    clean_lines: A CleansedLines instance containing the file.
5210    linenum: The number of the line to check.
5211    error: The function to call with any errors found.
5212  """
5213  line = clean_lines.elided[linenum]
5214
5215  # Check to see if they're using an conversion function cast.
5216  # I just try to capture the most common basic types, though there are more.
5217  # Parameterless conversion functions, such as bool(), are allowed as they are
5218  # probably a member operator declaration or default constructor.
5219  match = Search(
5220      r'(\bnew\s+|\S<\s*(?:const\s+)?)?\b'
5221      r'(int|float|double|bool|char|int32|uint32|int64|uint64)'
5222      r'(\([^)].*)', line)
5223  expecting_function = ExpectingFunctionArgs(clean_lines, linenum)
5224  if match and not expecting_function:
5225    matched_type = match.group(2)
5226
5227    # matched_new_or_template is used to silence two false positives:
5228    # - New operators
5229    # - Template arguments with function types
5230    #
5231    # For template arguments, we match on types immediately following
5232    # an opening bracket without any spaces.  This is a fast way to
5233    # silence the common case where the function type is the first
5234    # template argument.  False negative with less-than comparison is
5235    # avoided because those operators are usually followed by a space.
5236    #
5237    #   function<double(double)>   // bracket + no space = false positive
5238    #   value < double(42)         // bracket + space = true positive
5239    matched_new_or_template = match.group(1)
5240
5241    # Avoid arrays by looking for brackets that come after the closing
5242    # parenthesis.
5243    if Match(r'\([^()]+\)\s*\[', match.group(3)):
5244      return
5245
5246    # Other things to ignore:
5247    # - Function pointers
5248    # - Casts to pointer types
5249    # - Placement new
5250    # - Alias declarations
5251    matched_funcptr = match.group(3)
5252    if (matched_new_or_template is None and
5253        not (matched_funcptr and
5254             (Match(r'\((?:[^() ]+::\s*\*\s*)?[^() ]+\)\s*\(',
5255                    matched_funcptr) or
5256              matched_funcptr.startswith('(*)'))) and
5257        not Match(r'\s*using\s+\S+\s*=\s*' + matched_type, line) and
5258        not Search(r'new\(\S+\)\s*' + matched_type, line)):
5259      error(filename, linenum, 'readability/casting', 4,
5260            'Using deprecated casting style.  '
5261            'Use static_cast<%s>(...) instead' %
5262            matched_type)
5263
5264  if not expecting_function:
5265    CheckCStyleCast(filename, clean_lines, linenum, 'static_cast',
5266                    r'\((int|float|double|bool|char|u?int(16|32|64))\)', error)
5267
5268  # This doesn't catch all cases. Consider (const char * const)"hello".
5269  #
5270  # (char *) "foo" should always be a const_cast (reinterpret_cast won't
5271  # compile).
5272  if CheckCStyleCast(filename, clean_lines, linenum, 'const_cast',
5273                     r'\((char\s?\*+\s?)\)\s*"', error):
5274    pass
5275  else:
5276    # Check pointer casts for other than string constants
5277    CheckCStyleCast(filename, clean_lines, linenum, 'reinterpret_cast',
5278                    r'\((\w+\s?\*+\s?)\)', error)
5279
5280  # In addition, we look for people taking the address of a cast.  This
5281  # is dangerous -- casts can assign to temporaries, so the pointer doesn't
5282  # point where you think.
5283  #
5284  # Some non-identifier character is required before the '&' for the
5285  # expression to be recognized as a cast.  These are casts:
5286  #   expression = &static_cast<int*>(temporary());
5287  #   function(&(int*)(temporary()));
5288  #
5289  # This is not a cast:
5290  #   reference_type&(int* function_param);
5291  match = Search(
5292      r'(?:[^\w]&\(([^)]+)\)[\w(])|'
5293      r'(?:[^\w]&(static|dynamic|down|reinterpret)_cast\b)', line)
5294  if match and match.group(1) != '*':
5295    # Try a better error message when the & is bound to something
5296    # dereferenced by the casted pointer, as opposed to the casted
5297    # pointer itself.
5298    parenthesis_error = False
5299    match = Match(r'^(.*&(?:static|dynamic|down|reinterpret)_cast\b)<', line)
5300    if match:
5301      _, y1, x1 = CloseExpression(clean_lines, linenum, len(match.group(1)))
5302      if x1 >= 0 and clean_lines.elided[y1][x1] == '(':
5303        _, y2, x2 = CloseExpression(clean_lines, y1, x1)
5304        if x2 >= 0:
5305          extended_line = clean_lines.elided[y2][x2:]
5306          if y2 < clean_lines.NumLines() - 1:
5307            extended_line += clean_lines.elided[y2 + 1]
5308          if Match(r'\s*(?:->|\[)', extended_line):
5309            parenthesis_error = True
5310
5311    if parenthesis_error:
5312      error(filename, linenum, 'readability/casting', 4,
5313            ('Are you taking an address of something dereferenced '
5314             'from a cast?  Wrapping the dereferenced expression in '
5315             'parentheses will make the binding more obvious'))
5316    else:
5317      error(filename, linenum, 'runtime/casting', 4,
5318            ('Are you taking an address of a cast?  '
5319             'This is dangerous: could be a temp var.  '
5320             'Take the address before doing the cast, rather than after'))
5321
5322
5323def CheckCStyleCast(filename, clean_lines, linenum, cast_type, pattern, error):
5324  """Checks for a C-style cast by looking for the pattern.
5325
5326  Args:
5327    filename: The name of the current file.
5328    clean_lines: A CleansedLines instance containing the file.
5329    linenum: The number of the line to check.
5330    cast_type: The string for the C++ cast to recommend.  This is either
5331      reinterpret_cast, static_cast, or const_cast, depending.
5332    pattern: The regular expression used to find C-style casts.
5333    error: The function to call with any errors found.
5334
5335  Returns:
5336    True if an error was emitted.
5337    False otherwise.
5338  """
5339  line = clean_lines.elided[linenum]
5340  match = Search(pattern, line)
5341  if not match:
5342    return False
5343
5344  # Exclude lines with keywords that tend to look like casts
5345  context = line[0:match.start(1) - 1]
5346  if Match(r'.*\b(?:sizeof|alignof|alignas|[_A-Z][_A-Z0-9]*)\s*$', context):
5347    return False
5348
5349  # Try expanding current context to see if we one level of
5350  # parentheses inside a macro.
5351  if linenum > 0:
5352    for i in xrange(linenum - 1, max(0, linenum - 5), -1):
5353      context = clean_lines.elided[i] + context
5354  if Match(r'.*\b[_A-Z][_A-Z0-9]*\s*\((?:\([^()]*\)|[^()])*$', context):
5355    return False
5356
5357  # operator++(int) and operator--(int)
5358  if context.endswith(' operator++') or context.endswith(' operator--'):
5359    return False
5360
5361  # A single unnamed argument for a function tends to look like old
5362  # style cast.  If we see those, don't issue warnings for deprecated
5363  # casts, instead issue warnings for unnamed arguments where
5364  # appropriate.
5365  #
5366  # These are things that we want warnings for, since the style guide
5367  # explicitly require all parameters to be named:
5368  #   Function(int);
5369  #   Function(int) {
5370  #   ConstMember(int) const;
5371  #   ConstMember(int) const {
5372  #   ExceptionMember(int) throw (...);
5373  #   ExceptionMember(int) throw (...) {
5374  #   PureVirtual(int) = 0;
5375  #
5376  # These are functions of some sort, where the compiler would be fine
5377  # if they had named parameters, but people often omit those
5378  # identifiers to reduce clutter:
5379  #   (FunctionPointer)(int);
5380  #   (FunctionPointer)(int) = value;
5381  #   Function((function_pointer_arg)(int))
5382  #   Function((function_pointer_arg)(int), int param)
5383  #   <TemplateArgument(int)>;
5384  #   <(FunctionPointerTemplateArgument)(int)>;
5385  remainder = line[match.end(0):]
5386  if Match(r'^\s*(?:;|const\b|throw\b|final\b|override\b|[=>{),])',
5387           remainder):
5388    # Looks like an unnamed parameter.
5389
5390    # Don't warn on any kind of template arguments.
5391    if Match(r'^\s*>', remainder):
5392      return False
5393
5394    # Don't warn on assignments to function pointers, but keep warnings for
5395    # unnamed parameters to pure virtual functions.  Note that this pattern
5396    # will also pass on assignments of "0" to function pointers, but the
5397    # preferred values for those would be "nullptr" or "NULL".
5398    matched_zero = Match(r'^\s=\s*(\S+)\s*;', remainder)
5399    if matched_zero and matched_zero.group(1) != '0':
5400      return False
5401
5402    # Don't warn on function pointer declarations.  For this we need
5403    # to check what came before the "(type)" string.
5404    if Match(r'.*\)\s*$', line[0:match.start(0)]):
5405      return False
5406
5407    # Don't warn if the parameter is named with block comments, e.g.:
5408    #  Function(int /*unused_param*/);
5409    raw_line = clean_lines.raw_lines[linenum]
5410    if '/*' in raw_line:
5411      return False
5412
5413    # Passed all filters, issue warning here.
5414    error(filename, linenum, 'readability/function', 3,
5415          'All parameters should be named in a function')
5416    return True
5417
5418  # At this point, all that should be left is actual casts.
5419  error(filename, linenum, 'readability/casting', 4,
5420        'Using C-style cast.  Use %s<%s>(...) instead' %
5421        (cast_type, match.group(1)))
5422
5423  return True
5424
5425
5426def ExpectingFunctionArgs(clean_lines, linenum):
5427  """Checks whether where function type arguments are expected.
5428
5429  Args:
5430    clean_lines: A CleansedLines instance containing the file.
5431    linenum: The number of the line to check.
5432
5433  Returns:
5434    True if the line at 'linenum' is inside something that expects arguments
5435    of function types.
5436  """
5437  line = clean_lines.elided[linenum]
5438  return (Match(r'^\s*MOCK_(CONST_)?METHOD\d+(_T)?\(', line) or
5439          (linenum >= 2 and
5440           (Match(r'^\s*MOCK_(?:CONST_)?METHOD\d+(?:_T)?\((?:\S+,)?\s*$',
5441                  clean_lines.elided[linenum - 1]) or
5442            Match(r'^\s*MOCK_(?:CONST_)?METHOD\d+(?:_T)?\(\s*$',
5443                  clean_lines.elided[linenum - 2]) or
5444            Search(r'\bstd::m?function\s*\<\s*$',
5445                   clean_lines.elided[linenum - 1]))))
5446
5447
5448_HEADERS_CONTAINING_TEMPLATES = (
5449    ('<deque>', ('deque',)),
5450    ('<functional>', ('unary_function', 'binary_function',
5451                      'plus', 'minus', 'multiplies', 'divides', 'modulus',
5452                      'negate',
5453                      'equal_to', 'not_equal_to', 'greater', 'less',
5454                      'greater_equal', 'less_equal',
5455                      'logical_and', 'logical_or', 'logical_not',
5456                      'unary_negate', 'not1', 'binary_negate', 'not2',
5457                      'bind1st', 'bind2nd',
5458                      'pointer_to_unary_function',
5459                      'pointer_to_binary_function',
5460                      'ptr_fun',
5461                      'mem_fun_t', 'mem_fun', 'mem_fun1_t', 'mem_fun1_ref_t',
5462                      'mem_fun_ref_t',
5463                      'const_mem_fun_t', 'const_mem_fun1_t',
5464                      'const_mem_fun_ref_t', 'const_mem_fun1_ref_t',
5465                      'mem_fun_ref',
5466                     )),
5467    ('<limits>', ('numeric_limits',)),
5468    ('<list>', ('list',)),
5469    ('<map>', ('map', 'multimap',)),
5470    ('<memory>', ('allocator',)),
5471    ('<queue>', ('queue', 'priority_queue',)),
5472    ('<set>', ('set', 'multiset',)),
5473    ('<stack>', ('stack',)),
5474    ('<string>', ('char_traits', 'basic_string',)),
5475    ('<utility>', ('pair',)),
5476    ('<vector>', ('vector',)),
5477
5478    # gcc extensions.
5479    # Note: std::hash is their hash, ::hash is our hash
5480    ('<hash_map>', ('hash_map', 'hash_multimap',)),
5481    ('<hash_set>', ('hash_set', 'hash_multiset',)),
5482    ('<slist>', ('slist',)),
5483    )
5484
5485_RE_PATTERN_STRING = re.compile(r'\bstring\b')
5486
5487_re_pattern_algorithm_header = []
5488for _template in ('copy', 'max', 'min', 'min_element', 'sort', 'swap',
5489                  'transform'):
5490  # Match max<type>(..., ...), max(..., ...), but not foo->max, foo.max or
5491  # type::max().
5492  _re_pattern_algorithm_header.append(
5493      (re.compile(r'[^>.]\b' + _template + r'(<.*?>)?\([^\)]'),
5494       _template,
5495       '<algorithm>'))
5496
5497_re_pattern_templates = []
5498for _header, _templates in _HEADERS_CONTAINING_TEMPLATES:
5499  for _template in _templates:
5500    _re_pattern_templates.append(
5501        (re.compile(r'(\<|\b)' + _template + r'\s*\<'),
5502         _template + '<>',
5503         _header))
5504
5505
5506def FilesBelongToSameModule(filename_cc, filename_h):
5507  """Check if these two filenames belong to the same module.
5508
5509  The concept of a 'module' here is a as follows:
5510  foo.h, foo-inl.h, foo.cc, foo_test.cc and foo_unittest.cc belong to the
5511  same 'module' if they are in the same directory.
5512  some/path/public/xyzzy and some/path/internal/xyzzy are also considered
5513  to belong to the same module here.
5514
5515  If the filename_cc contains a longer path than the filename_h, for example,
5516  '/absolute/path/to/base/sysinfo.cc', and this file would include
5517  'base/sysinfo.h', this function also produces the prefix needed to open the
5518  header. This is used by the caller of this function to more robustly open the
5519  header file. We don't have access to the real include paths in this context,
5520  so we need this guesswork here.
5521
5522  Known bugs: tools/base/bar.cc and base/bar.h belong to the same module
5523  according to this implementation. Because of this, this function gives
5524  some false positives. This should be sufficiently rare in practice.
5525
5526  Args:
5527    filename_cc: is the path for the .cc file
5528    filename_h: is the path for the header path
5529
5530  Returns:
5531    Tuple with a bool and a string:
5532    bool: True if filename_cc and filename_h belong to the same module.
5533    string: the additional prefix needed to open the header file.
5534  """
5535
5536  if not filename_cc.endswith('.cc'):
5537    return (False, '')
5538  filename_cc = filename_cc[:-len('.cc')]
5539  if filename_cc.endswith('_unittest'):
5540    filename_cc = filename_cc[:-len('_unittest')]
5541  elif filename_cc.endswith('_test'):
5542    filename_cc = filename_cc[:-len('_test')]
5543  filename_cc = filename_cc.replace('/public/', '/')
5544  filename_cc = filename_cc.replace('/internal/', '/')
5545
5546  if not filename_h.endswith('.h'):
5547    return (False, '')
5548  filename_h = filename_h[:-len('.h')]
5549  if filename_h.endswith('-inl'):
5550    filename_h = filename_h[:-len('-inl')]
5551  filename_h = filename_h.replace('/public/', '/')
5552  filename_h = filename_h.replace('/internal/', '/')
5553
5554  files_belong_to_same_module = filename_cc.endswith(filename_h)
5555  common_path = ''
5556  if files_belong_to_same_module:
5557    common_path = filename_cc[:-len(filename_h)]
5558  return files_belong_to_same_module, common_path
5559
5560
5561def UpdateIncludeState(filename, include_dict, io=codecs):
5562  """Fill up the include_dict with new includes found from the file.
5563
5564  Args:
5565    filename: the name of the header to read.
5566    include_dict: a dictionary in which the headers are inserted.
5567    io: The io factory to use to read the file. Provided for testability.
5568
5569  Returns:
5570    True if a header was successfully added. False otherwise.
5571  """
5572  headerfile = None
5573  try:
5574    headerfile = io.open(filename, 'r', 'utf8', 'replace')
5575  except IOError:
5576    return False
5577  linenum = 0
5578  for line in headerfile:
5579    linenum += 1
5580    clean_line = CleanseComments(line)
5581    match = _RE_PATTERN_INCLUDE.search(clean_line)
5582    if match:
5583      include = match.group(2)
5584      include_dict.setdefault(include, linenum)
5585  return True
5586
5587
5588def CheckForIncludeWhatYouUse(filename, clean_lines, include_state, error,
5589                              io=codecs):
5590  """Reports for missing stl includes.
5591
5592  This function will output warnings to make sure you are including the headers
5593  necessary for the stl containers and functions that you use. We only give one
5594  reason to include a header. For example, if you use both equal_to<> and
5595  less<> in a .h file, only one (the latter in the file) of these will be
5596  reported as a reason to include the <functional>.
5597
5598  Args:
5599    filename: The name of the current file.
5600    clean_lines: A CleansedLines instance containing the file.
5601    include_state: An _IncludeState instance.
5602    error: The function to call with any errors found.
5603    io: The IO factory to use to read the header file. Provided for unittest
5604        injection.
5605  """
5606  required = {}  # A map of header name to linenumber and the template entity.
5607                 # Example of required: { '<functional>': (1219, 'less<>') }
5608
5609  for linenum in xrange(clean_lines.NumLines()):
5610    line = clean_lines.elided[linenum]
5611    if not line or line[0] == '#':
5612      continue
5613
5614    # String is special -- it is a non-templatized type in STL.
5615    matched = _RE_PATTERN_STRING.search(line)
5616    if matched:
5617      # Don't warn about strings in non-STL namespaces:
5618      # (We check only the first match per line; good enough.)
5619      prefix = line[:matched.start()]
5620      if prefix.endswith('std::') or not prefix.endswith('::'):
5621        required['<string>'] = (linenum, 'string')
5622
5623    for pattern, template, header in _re_pattern_algorithm_header:
5624      if pattern.search(line):
5625        required[header] = (linenum, template)
5626
5627    # The following function is just a speed up, no semantics are changed.
5628    if not '<' in line:  # Reduces the cpu time usage by skipping lines.
5629      continue
5630
5631    for pattern, template, header in _re_pattern_templates:
5632      if pattern.search(line):
5633        required[header] = (linenum, template)
5634
5635  # The policy is that if you #include something in foo.h you don't need to
5636  # include it again in foo.cc. Here, we will look at possible includes.
5637  # Let's flatten the include_state include_list and copy it into a dictionary.
5638  include_dict = dict([item for sublist in include_state.include_list
5639                       for item in sublist])
5640
5641  # Did we find the header for this file (if any) and successfully load it?
5642  header_found = False
5643
5644  # Use the absolute path so that matching works properly.
5645  abs_filename = FileInfo(filename).FullName()
5646
5647  # For Emacs's flymake.
5648  # If cpplint is invoked from Emacs's flymake, a temporary file is generated
5649  # by flymake and that file name might end with '_flymake.cc'. In that case,
5650  # restore original file name here so that the corresponding header file can be
5651  # found.
5652  # e.g. If the file name is 'foo_flymake.cc', we should search for 'foo.h'
5653  # instead of 'foo_flymake.h'
5654  abs_filename = re.sub(r'_flymake\.cc$', '.cc', abs_filename)
5655
5656  # include_dict is modified during iteration, so we iterate over a copy of
5657  # the keys.
5658  header_keys = include_dict.keys()
5659  for header in header_keys:
5660    (same_module, common_path) = FilesBelongToSameModule(abs_filename, header)
5661    fullpath = common_path + header
5662    if same_module and UpdateIncludeState(fullpath, include_dict, io):
5663      header_found = True
5664
5665  # If we can't find the header file for a .cc, assume it's because we don't
5666  # know where to look. In that case we'll give up as we're not sure they
5667  # didn't include it in the .h file.
5668  # TODO(unknown): Do a better job of finding .h files so we are confident that
5669  # not having the .h file means there isn't one.
5670  if filename.endswith('.cc') and not header_found:
5671    return
5672
5673  # All the lines have been processed, report the errors found.
5674  for required_header_unstripped in required:
5675    template = required[required_header_unstripped][1]
5676    if required_header_unstripped.strip('<>"') not in include_dict:
5677      error(filename, required[required_header_unstripped][0],
5678            'build/include_what_you_use', 4,
5679            'Add #include ' + required_header_unstripped + ' for ' + template)
5680
5681
5682_RE_PATTERN_EXPLICIT_MAKEPAIR = re.compile(r'\bmake_pair\s*<')
5683
5684
5685def CheckMakePairUsesDeduction(filename, clean_lines, linenum, error):
5686  """Check that make_pair's template arguments are deduced.
5687
5688  G++ 4.6 in C++11 mode fails badly if make_pair's template arguments are
5689  specified explicitly, and such use isn't intended in any case.
5690
5691  Args:
5692    filename: The name of the current file.
5693    clean_lines: A CleansedLines instance containing the file.
5694    linenum: The number of the line to check.
5695    error: The function to call with any errors found.
5696  """
5697  line = clean_lines.elided[linenum]
5698  match = _RE_PATTERN_EXPLICIT_MAKEPAIR.search(line)
5699  if match:
5700    error(filename, linenum, 'build/explicit_make_pair',
5701          4,  # 4 = high confidence
5702          'For C++11-compatibility, omit template arguments from make_pair'
5703          ' OR use pair directly OR if appropriate, construct a pair directly')
5704
5705
5706def CheckDefaultLambdaCaptures(filename, clean_lines, linenum, error):
5707  """Check that default lambda captures are not used.
5708
5709  Args:
5710    filename: The name of the current file.
5711    clean_lines: A CleansedLines instance containing the file.
5712    linenum: The number of the line to check.
5713    error: The function to call with any errors found.
5714  """
5715  line = clean_lines.elided[linenum]
5716
5717  # A lambda introducer specifies a default capture if it starts with "[="
5718  # or if it starts with "[&" _not_ followed by an identifier.
5719  match = Match(r'^(.*)\[\s*(?:=|&[^\w])', line)
5720  if match:
5721    # Found a potential error, check what comes after the lambda-introducer.
5722    # If it's not open parenthesis (for lambda-declarator) or open brace
5723    # (for compound-statement), it's not a lambda.
5724    line, _, pos = CloseExpression(clean_lines, linenum, len(match.group(1)))
5725    if pos >= 0 and Match(r'^\s*[{(]', line[pos:]):
5726      error(filename, linenum, 'build/c++11',
5727            4,  # 4 = high confidence
5728            'Default lambda captures are an unapproved C++ feature.')
5729
5730
5731def CheckRedundantVirtual(filename, clean_lines, linenum, error):
5732  """Check if line contains a redundant "virtual" function-specifier.
5733
5734  Args:
5735    filename: The name of the current file.
5736    clean_lines: A CleansedLines instance containing the file.
5737    linenum: The number of the line to check.
5738    error: The function to call with any errors found.
5739  """
5740  # Look for "virtual" on current line.
5741  line = clean_lines.elided[linenum]
5742  virtual = Match(r'^(.*\bvirtual\b)', line)
5743  if not virtual: return
5744
5745  # Look for the next opening parenthesis.  This is the start of the
5746  # parameter list (possibly on the next line shortly after virtual).
5747  # TODO(unknown): doesn't work if there are virtual functions with
5748  # decltype() or other things that use parentheses, but csearch suggests
5749  # that this is rare.
5750  end_col = -1
5751  end_line = -1
5752  start_col = len(virtual.group(1))
5753  for start_line in xrange(linenum, min(linenum + 3, clean_lines.NumLines())):
5754    line = clean_lines.elided[start_line][start_col:]
5755    parameter_list = Match(r'^([^(]*)\(', line)
5756    if parameter_list:
5757      # Match parentheses to find the end of the parameter list
5758      (_, end_line, end_col) = CloseExpression(
5759          clean_lines, start_line, start_col + len(parameter_list.group(1)))
5760      break
5761    start_col = 0
5762
5763  if end_col < 0:
5764    return  # Couldn't find end of parameter list, give up
5765
5766  # Look for "override" or "final" after the parameter list
5767  # (possibly on the next few lines).
5768  for i in xrange(end_line, min(end_line + 3, clean_lines.NumLines())):
5769    line = clean_lines.elided[i][end_col:]
5770    match = Search(r'\b(override|final)\b', line)
5771    if match:
5772      error(filename, linenum, 'readability/inheritance', 4,
5773            ('"virtual" is redundant since function is '
5774             'already declared as "%s"' % match.group(1)))
5775
5776    # Set end_col to check whole lines after we are done with the
5777    # first line.
5778    end_col = 0
5779    if Search(r'[^\w]\s*$', line):
5780      break
5781
5782
5783def CheckRedundantOverrideOrFinal(filename, clean_lines, linenum, error):
5784  """Check if line contains a redundant "override" or "final" virt-specifier.
5785
5786  Args:
5787    filename: The name of the current file.
5788    clean_lines: A CleansedLines instance containing the file.
5789    linenum: The number of the line to check.
5790    error: The function to call with any errors found.
5791  """
5792  # Check that at most one of "override" or "final" is present, not both
5793  line = clean_lines.elided[linenum]
5794  if Search(r'\boverride\b', line) and Search(r'\bfinal\b', line):
5795    error(filename, linenum, 'readability/inheritance', 4,
5796          ('"override" is redundant since function is '
5797           'already declared as "final"'))
5798
5799
5800
5801
5802# Returns true if we are at a new block, and it is directly
5803# inside of a namespace.
5804def IsBlockInNameSpace(nesting_state, is_forward_declaration):
5805  """Checks that the new block is directly in a namespace.
5806
5807  Args:
5808    nesting_state: The _NestingState object that contains info about our state.
5809    is_forward_declaration: If the class is a forward declared class.
5810  Returns:
5811    Whether or not the new block is directly in a namespace.
5812  """
5813  if is_forward_declaration:
5814    if len(nesting_state.stack) >= 1 and (
5815        isinstance(nesting_state.stack[-1], _NamespaceInfo)):
5816      return True
5817    else:
5818      return False
5819
5820  return (len(nesting_state.stack) > 1 and
5821          nesting_state.stack[-1].check_namespace_indentation and
5822          isinstance(nesting_state.stack[-2], _NamespaceInfo))
5823
5824
5825def ShouldCheckNamespaceIndentation(nesting_state, is_namespace_indent_item,
5826                                    raw_lines_no_comments, linenum):
5827  """This method determines if we should apply our namespace indentation check.
5828
5829  Args:
5830    nesting_state: The current nesting state.
5831    is_namespace_indent_item: If we just put a new class on the stack, True.
5832      If the top of the stack is not a class, or we did not recently
5833      add the class, False.
5834    raw_lines_no_comments: The lines without the comments.
5835    linenum: The current line number we are processing.
5836
5837  Returns:
5838    True if we should apply our namespace indentation check. Currently, it
5839    only works for classes and namespaces inside of a namespace.
5840  """
5841
5842  is_forward_declaration = IsForwardClassDeclaration(raw_lines_no_comments,
5843                                                     linenum)
5844
5845  if not (is_namespace_indent_item or is_forward_declaration):
5846    return False
5847
5848  # If we are in a macro, we do not want to check the namespace indentation.
5849  if IsMacroDefinition(raw_lines_no_comments, linenum):
5850    return False
5851
5852  return IsBlockInNameSpace(nesting_state, is_forward_declaration)
5853
5854
5855# Call this method if the line is directly inside of a namespace.
5856# If the line above is blank (excluding comments) or the start of
5857# an inner namespace, it cannot be indented.
5858def CheckItemIndentationInNamespace(filename, raw_lines_no_comments, linenum,
5859                                    error):
5860  line = raw_lines_no_comments[linenum]
5861  if Match(r'^\s+', line):
5862    error(filename, linenum, 'runtime/indentation_namespace', 4,
5863          'Do not indent within a namespace')
5864
5865
5866def ProcessLine(filename, file_extension, clean_lines, line,
5867                include_state, function_state, nesting_state, error,
5868                extra_check_functions=[]):
5869  """Processes a single line in the file.
5870
5871  Args:
5872    filename: Filename of the file that is being processed.
5873    file_extension: The extension (dot not included) of the file.
5874    clean_lines: An array of strings, each representing a line of the file,
5875                 with comments stripped.
5876    line: Number of line being processed.
5877    include_state: An _IncludeState instance in which the headers are inserted.
5878    function_state: A _FunctionState instance which counts function lines, etc.
5879    nesting_state: A NestingState instance which maintains information about
5880                   the current stack of nested blocks being parsed.
5881    error: A callable to which errors are reported, which takes 4 arguments:
5882           filename, line number, error level, and message
5883    extra_check_functions: An array of additional check functions that will be
5884                           run on each source line. Each function takes 4
5885                           arguments: filename, clean_lines, line, error
5886  """
5887  raw_lines = clean_lines.raw_lines
5888  ParseNolintSuppressions(filename, raw_lines[line], line, error)
5889  nesting_state.Update(filename, clean_lines, line, error)
5890  CheckForNamespaceIndentation(filename, nesting_state, clean_lines, line,
5891                               error)
5892  CheckForMongoPolyfill(filename, clean_lines, line, error)
5893  CheckForMongoAtomic(filename, clean_lines, line, error)
5894  CheckForMongoVolatile(filename, clean_lines, line, error)
5895  CheckForNonMongoAssert(filename, clean_lines, line, error)
5896  if nesting_state.InAsmBlock(): return
5897  CheckForFunctionLengths(filename, clean_lines, line, function_state, error)
5898  CheckForMultilineCommentsAndStrings(filename, clean_lines, line, error)
5899  CheckStyle(filename, clean_lines, line, file_extension, nesting_state, error)
5900  CheckLanguage(filename, clean_lines, line, file_extension, include_state,
5901                nesting_state, error)
5902  CheckForNonConstReference(filename, clean_lines, line, nesting_state, error)
5903  CheckForNonStandardConstructs(filename, clean_lines, line,
5904                                nesting_state, error)
5905  CheckVlogArguments(filename, clean_lines, line, error)
5906  CheckPosixThreading(filename, clean_lines, line, error)
5907  CheckInvalidIncrement(filename, clean_lines, line, error)
5908  CheckMakePairUsesDeduction(filename, clean_lines, line, error)
5909  CheckDefaultLambdaCaptures(filename, clean_lines, line, error)
5910  CheckRedundantVirtual(filename, clean_lines, line, error)
5911  CheckRedundantOverrideOrFinal(filename, clean_lines, line, error)
5912  for check_fn in extra_check_functions:
5913    check_fn(filename, clean_lines, line, error)
5914
5915def FlagCxx11Features(filename, clean_lines, linenum, error):
5916  """Flag those c++11 features that we only allow in certain places.
5917
5918  Args:
5919    filename: The name of the current file.
5920    clean_lines: A CleansedLines instance containing the file.
5921    linenum: The number of the line to check.
5922    error: The function to call with any errors found.
5923  """
5924  line = clean_lines.elided[linenum]
5925
5926  # Flag unapproved C++11 headers.
5927  include = Match(r'\s*#\s*include\s+[<"]([^<"]+)[">]', line)
5928  if include and include.group(1) in ('cfenv',
5929                                      'condition_variable',
5930                                      'fenv.h',
5931                                      'future',
5932                                      'mutex',
5933                                      'thread',
5934                                      'chrono',
5935                                      'ratio',
5936                                      'regex',
5937                                      'system_error',
5938                                     ):
5939    error(filename, linenum, 'build/c++11', 5,
5940          ('<%s> is an unapproved C++11 header.') % include.group(1))
5941
5942  # The only place where we need to worry about C++11 keywords and library
5943  # features in preprocessor directives is in macro definitions.
5944  if Match(r'\s*#', line) and not Match(r'\s*#\s*define\b', line): return
5945
5946  # These are classes and free functions.  The classes are always
5947  # mentioned as std::*, but we only catch the free functions if
5948  # they're not found by ADL.  They're alphabetical by header.
5949  for top_name in (
5950      # type_traits
5951      'alignment_of',
5952      'aligned_union',
5953
5954      # utility
5955      'forward',
5956      ):
5957    if Search(r'\bstd::%s\b' % top_name, line):
5958      error(filename, linenum, 'build/c++11', 5,
5959            ('std::%s is an unapproved C++11 class or function.  Send c-style '
5960             'an example of where it would make your code more readable, and '
5961             'they may let you use it.') % top_name)
5962
5963
5964def ProcessFileData(filename, file_extension, lines, error,
5965                    extra_check_functions=[]):
5966  """Performs lint checks and reports any errors to the given error function.
5967
5968  Args:
5969    filename: Filename of the file that is being processed.
5970    file_extension: The extension (dot not included) of the file.
5971    lines: An array of strings, each representing a line of the file, with the
5972           last element being empty if the file is terminated with a newline.
5973    error: A callable to which errors are reported, which takes 4 arguments:
5974           filename, line number, error level, and message
5975    extra_check_functions: An array of additional check functions that will be
5976                           run on each source line. Each function takes 4
5977                           arguments: filename, clean_lines, line, error
5978  """
5979  lines = (['// marker so line numbers and indices both start at 1'] + lines +
5980           ['// marker so line numbers end in a known way'])
5981
5982  include_state = _IncludeState()
5983  function_state = _FunctionState()
5984  nesting_state = NestingState()
5985
5986  ResetNolintSuppressions()
5987
5988  CheckForCopyright(filename, lines, error)
5989
5990  if file_extension == 'h':
5991    CheckForHeaderGuard(filename, lines, error)
5992
5993  RemoveMultiLineComments(filename, lines, error)
5994  clean_lines = CleansedLines(lines)
5995  for line in xrange(clean_lines.NumLines()):
5996    ProcessLine(filename, file_extension, clean_lines, line,
5997                include_state, function_state, nesting_state, error,
5998                extra_check_functions)
5999    FlagCxx11Features(filename, clean_lines, line, error)
6000  nesting_state.CheckCompletedBlocks(filename, error)
6001
6002  CheckForIncludeWhatYouUse(filename, clean_lines, include_state, error)
6003
6004  # We check here rather than inside ProcessLine so that we see raw
6005  # lines rather than "cleaned" lines.
6006  CheckForBadCharacters(filename, lines, error)
6007
6008  CheckForNewlineAtEOF(filename, lines, error)
6009
6010def ProcessConfigOverrides(filename):
6011  """ Loads the configuration files and processes the config overrides.
6012
6013  Args:
6014    filename: The name of the file being processed by the linter.
6015
6016  Returns:
6017    False if the current |filename| should not be processed further.
6018  """
6019
6020  abs_filename = os.path.abspath(filename)
6021  cfg_filters = []
6022  keep_looking = True
6023  while keep_looking:
6024    abs_path, base_name = os.path.split(abs_filename)
6025    if not base_name:
6026      break  # Reached the root directory.
6027
6028    cfg_file = os.path.join(abs_path, "CPPLINT.cfg")
6029    abs_filename = abs_path
6030    if not os.path.isfile(cfg_file):
6031      continue
6032
6033    try:
6034      with open(cfg_file) as file_handle:
6035        for line in file_handle:
6036          line, _, _ = line.partition('#')  # Remove comments.
6037          if not line.strip():
6038            continue
6039
6040          name, _, val = line.partition('=')
6041          name = name.strip()
6042          val = val.strip()
6043          if name == 'set noparent':
6044            keep_looking = False
6045          elif name == 'filter':
6046            cfg_filters.append(val)
6047          elif name == 'exclude_files':
6048            # When matching exclude_files pattern, use the base_name of
6049            # the current file name or the directory name we are processing.
6050            # For example, if we are checking for lint errors in /foo/bar/baz.cc
6051            # and we found the .cfg file at /foo/CPPLINT.cfg, then the config
6052            # file's "exclude_files" filter is meant to be checked against "bar"
6053            # and not "baz" nor "bar/baz.cc".
6054            if base_name:
6055              pattern = re.compile(val)
6056              if pattern.match(base_name):
6057                sys.stderr.write('Ignoring "%s": file excluded by "%s". '
6058                                 'File path component "%s" matches '
6059                                 'pattern "%s"\n' %
6060                                 (filename, cfg_file, base_name, val))
6061                return False
6062          elif name == 'linelength':
6063            global _line_length
6064            try:
6065                _line_length = int(val)
6066            except ValueError:
6067                sys.stderr.write('Line length must be numeric.')
6068          else:
6069            sys.stderr.write(
6070                'Invalid configuration option (%s) in file %s\n' %
6071                (name, cfg_file))
6072
6073    except IOError:
6074      sys.stderr.write(
6075          "Skipping config file '%s': Can't open for reading\n" % cfg_file)
6076      keep_looking = False
6077
6078  # Apply all the accumulated filters in reverse order (top-level directory
6079  # config options having the least priority).
6080  for filter in reversed(cfg_filters):
6081     _AddFilters(filter)
6082
6083  return True
6084
6085
6086def ProcessFile(filename, vlevel, extra_check_functions=[]):
6087  """Does google-lint on a single file.
6088
6089  Args:
6090    filename: The name of the file to parse.
6091
6092    vlevel: The level of errors to report.  Every error of confidence
6093    >= verbose_level will be reported.  0 is a good default.
6094
6095    extra_check_functions: An array of additional check functions that will be
6096                           run on each source line. Each function takes 4
6097                           arguments: filename, clean_lines, line, error
6098  """
6099
6100  _SetVerboseLevel(vlevel)
6101  _BackupFilters()
6102
6103  if not ProcessConfigOverrides(filename):
6104    _RestoreFilters()
6105    return
6106
6107  lf_lines = []
6108  crlf_lines = []
6109  try:
6110    # Support the UNIX convention of using "-" for stdin.  Note that
6111    # we are not opening the file with universal newline support
6112    # (which codecs doesn't support anyway), so the resulting lines do
6113    # contain trailing '\r' characters if we are reading a file that
6114    # has CRLF endings.
6115    # If after the split a trailing '\r' is present, it is removed
6116    # below.
6117    if filename == '-':
6118      lines = codecs.StreamReaderWriter(sys.stdin,
6119                                        codecs.getreader('utf8'),
6120                                        codecs.getwriter('utf8'),
6121                                        'replace').read().split('\n')
6122    else:
6123      lines = codecs.open(filename, 'r', 'utf8', 'replace').read().split('\n')
6124
6125    # Remove trailing '\r'.
6126    # The -1 accounts for the extra trailing blank line we get from split()
6127    for linenum in range(len(lines) - 1):
6128      if lines[linenum].endswith('\r'):
6129        lines[linenum] = lines[linenum].rstrip('\r')
6130        crlf_lines.append(linenum + 1)
6131      else:
6132        lf_lines.append(linenum + 1)
6133
6134  except IOError:
6135    sys.stderr.write(
6136        "Skipping input '%s': Can't open for reading\n" % filename)
6137    _RestoreFilters()
6138    return
6139
6140  # Note, if no dot is found, this will give the entire filename as the ext.
6141  file_extension = filename[filename.rfind('.') + 1:]
6142
6143  # When reading from stdin, the extension is unknown, so no cpplint tests
6144  # should rely on the extension.
6145  if filename != '-' and file_extension not in _valid_extensions:
6146    sys.stderr.write('Ignoring %s; not a valid file name '
6147                     '(%s)\n' % (filename, ', '.join(_valid_extensions)))
6148  else:
6149    ProcessFileData(filename, file_extension, lines, Error,
6150                    extra_check_functions)
6151
6152    # If end-of-line sequences are a mix of LF and CR-LF, issue
6153    # warnings on the lines with CR.
6154    #
6155    # Don't issue any warnings if all lines are uniformly LF or CR-LF,
6156    # since critique can handle these just fine, and the style guide
6157    # doesn't dictate a particular end of line sequence.
6158    #
6159    # We can't depend on os.linesep to determine what the desired
6160    # end-of-line sequence should be, since that will return the
6161    # server-side end-of-line sequence.
6162    if lf_lines and crlf_lines:
6163      # Warn on every line with CR.  An alternative approach might be to
6164      # check whether the file is mostly CRLF or just LF, and warn on the
6165      # minority, we bias toward LF here since most tools prefer LF.
6166      for linenum in crlf_lines:
6167        Error(filename, linenum, 'whitespace/newline', 1,
6168              'Unexpected \\r (^M) found; better to use only \\n')
6169
6170  #sys.stderr.write('Done processing %s\n' % filename)
6171  _RestoreFilters()
6172
6173
6174def PrintUsage(message):
6175  """Prints a brief usage string and exits, optionally with an error message.
6176
6177  Args:
6178    message: The optional error message.
6179  """
6180  sys.stderr.write(_USAGE)
6181  if message:
6182    sys.exit('\nFATAL ERROR: ' + message)
6183  else:
6184    sys.exit(1)
6185
6186
6187def PrintCategories():
6188  """Prints a list of all the error-categories used by error messages.
6189
6190  These are the categories used to filter messages via --filter.
6191  """
6192  sys.stderr.write(''.join('  %s\n' % cat for cat in _ERROR_CATEGORIES))
6193  sys.exit(0)
6194
6195
6196def ParseArguments(args):
6197  """Parses the command line arguments.
6198
6199  This may set the output format and verbosity level as side-effects.
6200
6201  Args:
6202    args: The command line arguments:
6203
6204  Returns:
6205    The list of filenames to lint.
6206  """
6207  try:
6208    (opts, filenames) = getopt.getopt(args, '', ['help', 'output=', 'verbose=',
6209                                                 'counting=',
6210                                                 'filter=',
6211                                                 'root=',
6212                                                 'linelength=',
6213                                                 'extensions='])
6214  except getopt.GetoptError:
6215    PrintUsage('Invalid arguments.')
6216
6217  verbosity = _VerboseLevel()
6218  output_format = _OutputFormat()
6219  filters = ''
6220  counting_style = ''
6221
6222  for (opt, val) in opts:
6223    if opt == '--help':
6224      PrintUsage(None)
6225    elif opt == '--output':
6226      if val not in ('emacs', 'vs7', 'eclipse'):
6227        PrintUsage('The only allowed output formats are emacs, vs7 and eclipse.')
6228      output_format = val
6229    elif opt == '--verbose':
6230      verbosity = int(val)
6231    elif opt == '--filter':
6232      filters = val
6233      if not filters:
6234        PrintCategories()
6235    elif opt == '--counting':
6236      if val not in ('total', 'toplevel', 'detailed'):
6237        PrintUsage('Valid counting options are total, toplevel, and detailed')
6238      counting_style = val
6239    elif opt == '--root':
6240      global _root
6241      _root = val
6242    elif opt == '--linelength':
6243      global _line_length
6244      try:
6245          _line_length = int(val)
6246      except ValueError:
6247          PrintUsage('Line length must be digits.')
6248    elif opt == '--extensions':
6249      global _valid_extensions
6250      try:
6251          _valid_extensions = set(val.split(','))
6252      except ValueError:
6253          PrintUsage('Extensions must be comma seperated list.')
6254
6255  if not filenames:
6256    PrintUsage('No files were specified.')
6257
6258  _SetOutputFormat(output_format)
6259  _SetVerboseLevel(verbosity)
6260  _SetFilters(filters)
6261  _SetCountingStyle(counting_style)
6262
6263  return filenames
6264
6265
6266def main():
6267  filenames = ParseArguments(sys.argv[1:])
6268
6269  # Change stderr to write with replacement characters so we don't die
6270  # if we try to print something containing non-ASCII characters.
6271  sys.stderr = codecs.StreamReaderWriter(sys.stderr,
6272                                         codecs.getreader('utf8'),
6273                                         codecs.getwriter('utf8'),
6274                                         'replace')
6275
6276  _cpplint_state.ResetErrorCounts()
6277  for filename in filenames:
6278    ProcessFile(filename, _cpplint_state.verbose_level)
6279  _cpplint_state.PrintErrorCounts()
6280
6281  sys.exit(_cpplint_state.error_count > 0)
6282
6283
6284if __name__ == '__main__':
6285  main()
6286