1#!/usr/bin/python
2#
3# Copyright (c) 2009 Google Inc. All rights reserved.
4#
5# Redistribution and use in source and binary forms, with or without
6# modification, are permitted provided that the following conditions are
7# met:
8#
9#    * Redistributions of source code must retain the above copyright
10# notice, this list of conditions and the following disclaimer.
11#    * Redistributions in binary form must reproduce the above
12# copyright notice, this list of conditions and the following disclaimer
13# in the documentation and/or other materials provided with the
14# distribution.
15#    * Neither the name of Google Inc. nor the names of its
16# contributors may be used to endorse or promote products derived from
17# this software without specific prior written permission.
18#
19# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31"""Does google-lint on c++ files.
32
33The goal of this script is to identify places in the code that *may*
34be in non-compliance with google style.  It does not attempt to fix
35up these problems -- the point is to educate.  It does also not
36attempt to find all problems, or to ensure that everything it does
37find is legitimately a problem.
38
39In particular, we can get very confused by /* and // inside strings!
40We do a small hack, which is to ignore //'s with "'s after them on the
41same line, but it is far from perfect (in either direction).
42"""
43
44import codecs
45import copy
46import getopt
47import math  # for log
48import os
49import re
50import sre_compile
51import string
52import sys
53import unicodedata
54
55
56_USAGE = """
57Syntax: cpplint.py [--verbose=#] [--output=vs7] [--filter=-x,+y,...]
58                   [--counting=total|toplevel|detailed] [--root=subdir]
59                   [--linelength=digits]
60        <file> [file] ...
61
62  The style guidelines this tries to follow are those in
63    http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml
64
65  Every problem is given a confidence score from 1-5, with 5 meaning we are
66  certain of the problem, and 1 meaning it could be a legitimate construct.
67  This will miss some errors, and is not a substitute for a code review.
68
69  To suppress false-positive errors of a certain category, add a
70  'NOLINT(category)' comment to the line.  NOLINT or NOLINT(*)
71  suppresses errors of all categories on that line.
72
73  The files passed in will be linted; at least one file must be provided.
74  Default linted extensions are .cc, .cpp, .cu, .cuh and .h.  Change the
75  extensions with the --extensions flag.
76
77  Flags:
78
79    output=vs7
80      By default, the output is formatted to ease emacs parsing.  Visual Studio
81      compatible output (vs7) may also be used.  Other formats are unsupported.
82
83    verbose=#
84      Specify a number 0-5 to restrict errors to certain verbosity levels.
85
86    filter=-x,+y,...
87      Specify a comma-separated list of category-filters to apply: only
88      error messages whose category names pass the filters will be printed.
89      (Category names are printed with the message and look like
90      "[whitespace/indent]".)  Filters are evaluated left to right.
91      "-FOO" and "FOO" means "do not print categories that start with FOO".
92      "+FOO" means "do print categories that start with FOO".
93
94      Examples: --filter=-whitespace,+whitespace/braces
95                --filter=whitespace,runtime/printf,+runtime/printf_format
96                --filter=-,+build/include_what_you_use
97
98      To see a list of all the categories used in cpplint, pass no arg:
99         --filter=
100
101    counting=total|toplevel|detailed
102      The total number of errors found is always printed. If
103      'toplevel' is provided, then the count of errors in each of
104      the top-level categories like 'build' and 'whitespace' will
105      also be printed. If 'detailed' is provided, then a count
106      is provided for each category like 'build/class'.
107
108    root=subdir
109      The root directory used for deriving header guard CPP variable.
110      By default, the header guard CPP variable is calculated as the relative
111      path to the directory that contains .git, .hg, or .svn.  When this flag
112      is specified, the relative path is calculated from the specified
113      directory. If the specified directory does not exist, this flag is
114      ignored.
115
116      Examples:
117        Assuming that src/.git exists, the header guard CPP variables for
118        src/chrome/browser/ui/browser.h are:
119
120        No flag => CHROME_BROWSER_UI_BROWSER_H_
121        --root=chrome => BROWSER_UI_BROWSER_H_
122        --root=chrome/browser => UI_BROWSER_H_
123
124    linelength=digits
125      This is the allowed line length for the project. The default value is
126      80 characters.
127
128      Examples:
129        --linelength=120
130
131    extensions=extension,extension,...
132      The allowed file extensions that cpplint will check
133
134      Examples:
135        --extensions=hpp,cpp
136"""
137
138# We categorize each error message we print.  Here are the categories.
139# We want an explicit list so we can list them all in cpplint --filter=.
140# If you add a new error message with a new category, add it to the list
141# here!  cpplint_unittest.py should tell you if you forget to do this.
142_ERROR_CATEGORIES = [
143  'build/class',
144  'build/c++11',
145  'build/deprecated',
146  'build/endif_comment',
147  'build/explicit_make_pair',
148  'build/forward_decl',
149  'build/header_guard',
150  'build/include',
151  'build/include_alpha',
152  'build/include_order',
153  'build/include_what_you_use',
154  'build/namespaces',
155  'build/printf_format',
156  'build/storage_class',
157  'legal/copyright',
158  'readability/alt_tokens',
159  'readability/braces',
160  'readability/casting',
161  'readability/check',
162  'readability/constructors',
163  'readability/fn_size',
164  'readability/function',
165  'readability/multiline_comment',
166  'readability/multiline_string',
167  'readability/namespace',
168  'readability/nolint',
169  'readability/nul',
170  'readability/streams',
171  'readability/todo',
172  'readability/utf8',
173  'runtime/arrays',
174  'runtime/casting',
175  'runtime/explicit',
176  'runtime/int',
177  'runtime/init',
178  'runtime/invalid_increment',
179  'runtime/member_string_references',
180  'runtime/memset',
181  'runtime/operator',
182  'runtime/printf',
183  'runtime/printf_format',
184  'runtime/references',
185  'runtime/string',
186  'runtime/threadsafe_fn',
187  'runtime/vlog',
188  'whitespace/blank_line',
189  'whitespace/braces',
190  'whitespace/comma',
191  'whitespace/comments',
192  'whitespace/empty_conditional_body',
193  'whitespace/empty_loop_body',
194  'whitespace/end_of_line',
195  'whitespace/ending_newline',
196  'whitespace/forcolon',
197  'whitespace/indent',
198  'whitespace/line_length',
199  'whitespace/newline',
200  'whitespace/operators',
201  'whitespace/parens',
202  'whitespace/semicolon',
203  'whitespace/tab',
204  'whitespace/todo'
205  ]
206
207# The default state of the category filter. This is overridden by the --filter=
208# flag. By default all errors are on, so only add here categories that should be
209# off by default (i.e., categories that must be enabled by the --filter= flags).
210# All entries here should start with a '-' or '+', as in the --filter= flag.
211_DEFAULT_FILTERS = ['-build/include_alpha']
212
213# We used to check for high-bit characters, but after much discussion we
214# decided those were OK, as long as they were in UTF-8 and didn't represent
215# hard-coded international strings, which belong in a separate i18n file.
216
217# C++ headers
218_CPP_HEADERS = frozenset([
219    # Legacy
220    'algobase.h',
221    'algo.h',
222    'alloc.h',
223    'builtinbuf.h',
224    'bvector.h',
225    'complex.h',
226    'defalloc.h',
227    'deque.h',
228    'editbuf.h',
229    'fstream.h',
230    'function.h',
231    'hash_map',
232    'hash_map.h',
233    'hash_set',
234    'hash_set.h',
235    'hashtable.h',
236    'heap.h',
237    'indstream.h',
238    'iomanip.h',
239    'iostream.h',
240    'istream.h',
241    'iterator.h',
242    'list.h',
243    'map.h',
244    'multimap.h',
245    'multiset.h',
246    'ostream.h',
247    'pair.h',
248    'parsestream.h',
249    'pfstream.h',
250    'procbuf.h',
251    'pthread_alloc',
252    'pthread_alloc.h',
253    'rope',
254    'rope.h',
255    'ropeimpl.h',
256    'set.h',
257    'slist',
258    'slist.h',
259    'stack.h',
260    'stdiostream.h',
261    'stl_alloc.h',
262    'stl_relops.h',
263    'streambuf.h',
264    'stream.h',
265    'strfile.h',
266    'strstream.h',
267    'tempbuf.h',
268    'tree.h',
269    'type_traits.h',
270    'vector.h',
271    # 17.6.1.2 C++ library headers
272    'algorithm',
273    'array',
274    'atomic',
275    'bitset',
276    'chrono',
277    'codecvt',
278    'complex',
279    'condition_variable',
280    'deque',
281    'exception',
282    'forward_list',
283    'fstream',
284    'functional',
285    'future',
286    'initializer_list',
287    'iomanip',
288    'ios',
289    'iosfwd',
290    'iostream',
291    'istream',
292    'iterator',
293    'limits',
294    'list',
295    'locale',
296    'map',
297    'memory',
298    'mutex',
299    'new',
300    'numeric',
301    'ostream',
302    'queue',
303    'random',
304    'ratio',
305    'regex',
306    'set',
307    'sstream',
308    'stack',
309    'stdexcept',
310    'streambuf',
311    'string',
312    'strstream',
313    'system_error',
314    'thread',
315    'tuple',
316    'typeindex',
317    'typeinfo',
318    'type_traits',
319    'unordered_map',
320    'unordered_set',
321    'utility',
322    'valarray',
323    'vector',
324    # 17.6.1.2 C++ headers for C library facilities
325    'cassert',
326    'ccomplex',
327    'cctype',
328    'cerrno',
329    'cfenv',
330    'cfloat',
331    'cinttypes',
332    'ciso646',
333    'climits',
334    'clocale',
335    'cmath',
336    'csetjmp',
337    'csignal',
338    'cstdalign',
339    'cstdarg',
340    'cstdbool',
341    'cstddef',
342    'cstdint',
343    'cstdio',
344    'cstdlib',
345    'cstring',
346    'ctgmath',
347    'ctime',
348    'cuchar',
349    'cwchar',
350    'cwctype',
351    ])
352
353
354# Assertion macros.  These are defined in base/logging.h and
355# testing/base/gunit.h.  Note that the _M versions need to come first
356# for substring matching to work.
357_CHECK_MACROS = [
358    'DCHECK', 'CHECK',
359    'EXPECT_TRUE_M', 'EXPECT_TRUE',
360    'ASSERT_TRUE_M', 'ASSERT_TRUE',
361    'EXPECT_FALSE_M', 'EXPECT_FALSE',
362    'ASSERT_FALSE_M', 'ASSERT_FALSE',
363    ]
364
365# Replacement macros for CHECK/DCHECK/EXPECT_TRUE/EXPECT_FALSE
366_CHECK_REPLACEMENT = dict([(m, {}) for m in _CHECK_MACROS])
367
368for op, replacement in [('==', 'EQ'), ('!=', 'NE'),
369                        ('>=', 'GE'), ('>', 'GT'),
370                        ('<=', 'LE'), ('<', 'LT')]:
371  _CHECK_REPLACEMENT['DCHECK'][op] = 'DCHECK_%s' % replacement
372  _CHECK_REPLACEMENT['CHECK'][op] = 'CHECK_%s' % replacement
373  _CHECK_REPLACEMENT['EXPECT_TRUE'][op] = 'EXPECT_%s' % replacement
374  _CHECK_REPLACEMENT['ASSERT_TRUE'][op] = 'ASSERT_%s' % replacement
375  _CHECK_REPLACEMENT['EXPECT_TRUE_M'][op] = 'EXPECT_%s_M' % replacement
376  _CHECK_REPLACEMENT['ASSERT_TRUE_M'][op] = 'ASSERT_%s_M' % replacement
377
378for op, inv_replacement in [('==', 'NE'), ('!=', 'EQ'),
379                            ('>=', 'LT'), ('>', 'LE'),
380                            ('<=', 'GT'), ('<', 'GE')]:
381  _CHECK_REPLACEMENT['EXPECT_FALSE'][op] = 'EXPECT_%s' % inv_replacement
382  _CHECK_REPLACEMENT['ASSERT_FALSE'][op] = 'ASSERT_%s' % inv_replacement
383  _CHECK_REPLACEMENT['EXPECT_FALSE_M'][op] = 'EXPECT_%s_M' % inv_replacement
384  _CHECK_REPLACEMENT['ASSERT_FALSE_M'][op] = 'ASSERT_%s_M' % inv_replacement
385
386# Alternative tokens and their replacements.  For full list, see section 2.5
387# Alternative tokens [lex.digraph] in the C++ standard.
388#
389# Digraphs (such as '%:') are not included here since it's a mess to
390# match those on a word boundary.
391_ALT_TOKEN_REPLACEMENT = {
392    'and': '&&',
393    'bitor': '|',
394    'or': '||',
395    'xor': '^',
396    'compl': '~',
397    'bitand': '&',
398    'and_eq': '&=',
399    'or_eq': '|=',
400    'xor_eq': '^=',
401    'not': '!',
402    'not_eq': '!='
403    }
404
405# Compile regular expression that matches all the above keywords.  The "[ =()]"
406# bit is meant to avoid matching these keywords outside of boolean expressions.
407#
408# False positives include C-style multi-line comments and multi-line strings
409# but those have always been troublesome for cpplint.
410_ALT_TOKEN_REPLACEMENT_PATTERN = re.compile(
411    r'[ =()](' + ('|'.join(_ALT_TOKEN_REPLACEMENT.keys())) + r')(?=[ (]|$)')
412
413
414# These constants define types of headers for use with
415# _IncludeState.CheckNextIncludeOrder().
416_C_SYS_HEADER = 1
417_CPP_SYS_HEADER = 2
418_LIKELY_MY_HEADER = 3
419_POSSIBLE_MY_HEADER = 4
420_OTHER_HEADER = 5
421
422# These constants define the current inline assembly state
423_NO_ASM = 0       # Outside of inline assembly block
424_INSIDE_ASM = 1   # Inside inline assembly block
425_END_ASM = 2      # Last line of inline assembly block
426_BLOCK_ASM = 3    # The whole block is an inline assembly block
427
428# Match start of assembly blocks
429_MATCH_ASM = re.compile(r'^\s*(?:asm|_asm|__asm|__asm__)'
430                        r'(?:\s+(volatile|__volatile__))?'
431                        r'\s*[{(]')
432
433
434_regexp_compile_cache = {}
435
436# Finds occurrences of NOLINT or NOLINT(...).
437_RE_SUPPRESSION = re.compile(r'\bNOLINT\b(\([^)]*\))?')
438
439# {str, set(int)}: a map from error categories to sets of linenumbers
440# on which those errors are expected and should be suppressed.
441_error_suppressions = {}
442
443# The root directory used for deriving header guard CPP variable.
444# This is set by --root flag.
445_root = None
446
447# The allowed line length of files.
448# This is set by --linelength flag.
449_line_length = 80
450
451# The allowed extensions for file names
452# This is set by --extensions flag.
453_valid_extensions = set(['cc', 'hh', 'h', 'cpp', 'cu', 'cuh'])
454
455def ParseNolintSuppressions(filename, raw_line, linenum, error):
456  """Updates the global list of error-suppressions.
457
458  Parses any NOLINT comments on the current line, updating the global
459  error_suppressions store.  Reports an error if the NOLINT comment
460  was malformed.
461
462  Args:
463    filename: str, the name of the input file.
464    raw_line: str, the line of input text, with comments.
465    linenum: int, the number of the current line.
466    error: function, an error handler.
467  """
468  # FIXME(adonovan): "NOLINT(" is misparsed as NOLINT(*).
469  matched = _RE_SUPPRESSION.search(raw_line)
470  if matched:
471    category = matched.group(1)
472    if category in (None, '(*)'):  # => "suppress all"
473      _error_suppressions.setdefault(None, set()).add(linenum)
474    else:
475      if category.startswith('(') and category.endswith(')'):
476        category = category[1:-1]
477        if category in _ERROR_CATEGORIES:
478          _error_suppressions.setdefault(category, set()).add(linenum)
479        else:
480          error(filename, linenum, 'readability/nolint', 5,
481                'Unknown NOLINT error category: %s' % category)
482
483
484def ResetNolintSuppressions():
485  "Resets the set of NOLINT suppressions to empty."
486  _error_suppressions.clear()
487
488
489def IsErrorSuppressedByNolint(category, linenum):
490  """Returns true if the specified error category is suppressed on this line.
491
492  Consults the global error_suppressions map populated by
493  ParseNolintSuppressions/ResetNolintSuppressions.
494
495  Args:
496    category: str, the category of the error.
497    linenum: int, the current line number.
498  Returns:
499    bool, True iff the error should be suppressed due to a NOLINT comment.
500  """
501  return (linenum in _error_suppressions.get(category, set()) or
502          linenum in _error_suppressions.get(None, set()))
503
504
505def Match(pattern, s):
506  """Matches the string with the pattern, caching the compiled regexp."""
507  # The regexp compilation caching is inlined in both Match and Search for
508  # performance reasons; factoring it out into a separate function turns out
509  # to be noticeably expensive.
510  if pattern not in _regexp_compile_cache:
511    _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
512  return _regexp_compile_cache[pattern].match(s)
513
514
515def ReplaceAll(pattern, rep, s):
516  """Replaces instances of pattern in a string with a replacement.
517
518  The compiled regex is kept in a cache shared by Match and Search.
519
520  Args:
521    pattern: regex pattern
522    rep: replacement text
523    s: search string
524
525  Returns:
526    string with replacements made (or original string if no replacements)
527  """
528  if pattern not in _regexp_compile_cache:
529    _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
530  return _regexp_compile_cache[pattern].sub(rep, s)
531
532
533def Search(pattern, s):
534  """Searches the string for the pattern, caching the compiled regexp."""
535  if pattern not in _regexp_compile_cache:
536    _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
537  return _regexp_compile_cache[pattern].search(s)
538
539
540class _IncludeState(dict):
541  """Tracks line numbers for includes, and the order in which includes appear.
542
543  As a dict, an _IncludeState object serves as a mapping between include
544  filename and line number on which that file was included.
545
546  Call CheckNextIncludeOrder() once for each header in the file, passing
547  in the type constants defined above. Calls in an illegal order will
548  raise an _IncludeError with an appropriate error message.
549
550  """
551  # self._section will move monotonically through this set. If it ever
552  # needs to move backwards, CheckNextIncludeOrder will raise an error.
553  _INITIAL_SECTION = 0
554  _MY_H_SECTION = 1
555  _C_SECTION = 2
556  _CPP_SECTION = 3
557  _OTHER_H_SECTION = 4
558
559  _TYPE_NAMES = {
560      _C_SYS_HEADER: 'C system header',
561      _CPP_SYS_HEADER: 'C++ system header',
562      _LIKELY_MY_HEADER: 'header this file implements',
563      _POSSIBLE_MY_HEADER: 'header this file may implement',
564      _OTHER_HEADER: 'other header',
565      }
566  _SECTION_NAMES = {
567      _INITIAL_SECTION: "... nothing. (This can't be an error.)",
568      _MY_H_SECTION: 'a header this file implements',
569      _C_SECTION: 'C system header',
570      _CPP_SECTION: 'C++ system header',
571      _OTHER_H_SECTION: 'other header',
572      }
573
574  def __init__(self):
575    dict.__init__(self)
576    self.ResetSection()
577
578  def ResetSection(self):
579    # The name of the current section.
580    self._section = self._INITIAL_SECTION
581    # The path of last found header.
582    self._last_header = ''
583
584  def SetLastHeader(self, header_path):
585    self._last_header = header_path
586
587  def CanonicalizeAlphabeticalOrder(self, header_path):
588    """Returns a path canonicalized for alphabetical comparison.
589
590    - replaces "-" with "_" so they both cmp the same.
591    - removes '-inl' since we don't require them to be after the main header.
592    - lowercase everything, just in case.
593
594    Args:
595      header_path: Path to be canonicalized.
596
597    Returns:
598      Canonicalized path.
599    """
600    return header_path.replace('-inl.h', '.h').replace('-', '_').lower()
601
602  def IsInAlphabeticalOrder(self, clean_lines, linenum, header_path):
603    """Check if a header is in alphabetical order with the previous header.
604
605    Args:
606      clean_lines: A CleansedLines instance containing the file.
607      linenum: The number of the line to check.
608      header_path: Canonicalized header to be checked.
609
610    Returns:
611      Returns true if the header is in alphabetical order.
612    """
613    # If previous section is different from current section, _last_header will
614    # be reset to empty string, so it's always less than current header.
615    #
616    # If previous line was a blank line, assume that the headers are
617    # intentionally sorted the way they are.
618    if (self._last_header > header_path and
619        not Match(r'^\s*$', clean_lines.elided[linenum - 1])):
620      return False
621    return True
622
623  def CheckNextIncludeOrder(self, header_type):
624    """Returns a non-empty error message if the next header is out of order.
625
626    This function also updates the internal state to be ready to check
627    the next include.
628
629    Args:
630      header_type: One of the _XXX_HEADER constants defined above.
631
632    Returns:
633      The empty string if the header is in the right order, or an
634      error message describing what's wrong.
635
636    """
637    error_message = ('Found %s after %s' %
638                     (self._TYPE_NAMES[header_type],
639                      self._SECTION_NAMES[self._section]))
640
641    last_section = self._section
642
643    if header_type == _C_SYS_HEADER:
644      if self._section <= self._C_SECTION:
645        self._section = self._C_SECTION
646      else:
647        self._last_header = ''
648        return error_message
649    elif header_type == _CPP_SYS_HEADER:
650      if self._section <= self._CPP_SECTION:
651        self._section = self._CPP_SECTION
652      else:
653        self._last_header = ''
654        return error_message
655    elif header_type == _LIKELY_MY_HEADER:
656      if self._section <= self._MY_H_SECTION:
657        self._section = self._MY_H_SECTION
658      else:
659        self._section = self._OTHER_H_SECTION
660    elif header_type == _POSSIBLE_MY_HEADER:
661      if self._section <= self._MY_H_SECTION:
662        self._section = self._MY_H_SECTION
663      else:
664        # This will always be the fallback because we're not sure
665        # enough that the header is associated with this file.
666        self._section = self._OTHER_H_SECTION
667    else:
668      assert header_type == _OTHER_HEADER
669      self._section = self._OTHER_H_SECTION
670
671    if last_section != self._section:
672      self._last_header = ''
673
674    return ''
675
676
677class _CppLintState(object):
678  """Maintains module-wide state.."""
679
680  def __init__(self):
681    self.verbose_level = 1  # global setting.
682    self.error_count = 0    # global count of reported errors
683    # filters to apply when emitting error messages
684    self.filters = _DEFAULT_FILTERS[:]
685    self.counting = 'total'  # In what way are we counting errors?
686    self.errors_by_category = {}  # string to int dict storing error counts
687
688    # output format:
689    # "emacs" - format that emacs can parse (default)
690    # "vs7" - format that Microsoft Visual Studio 7 can parse
691    self.output_format = 'emacs'
692
693  def SetOutputFormat(self, output_format):
694    """Sets the output format for errors."""
695    self.output_format = output_format
696
697  def SetVerboseLevel(self, level):
698    """Sets the module's verbosity, and returns the previous setting."""
699    last_verbose_level = self.verbose_level
700    self.verbose_level = level
701    return last_verbose_level
702
703  def SetCountingStyle(self, counting_style):
704    """Sets the module's counting options."""
705    self.counting = counting_style
706
707  def SetFilters(self, filters):
708    """Sets the error-message filters.
709
710    These filters are applied when deciding whether to emit a given
711    error message.
712
713    Args:
714      filters: A string of comma-separated filters (eg "+whitespace/indent").
715               Each filter should start with + or -; else we die.
716
717    Raises:
718      ValueError: The comma-separated filters did not all start with '+' or '-'.
719                  E.g. "-,+whitespace,-whitespace/indent,whitespace/badfilter"
720    """
721    # Default filters always have less priority than the flag ones.
722    self.filters = _DEFAULT_FILTERS[:]
723    for filt in filters.split(','):
724      clean_filt = filt.strip()
725      if clean_filt:
726        self.filters.append(clean_filt)
727    for filt in self.filters:
728      if not (filt.startswith('+') or filt.startswith('-')):
729        raise ValueError('Every filter in --filters must start with + or -'
730                         ' (%s does not)' % filt)
731
732  def ResetErrorCounts(self):
733    """Sets the module's error statistic back to zero."""
734    self.error_count = 0
735    self.errors_by_category = {}
736
737  def IncrementErrorCount(self, category):
738    """Bumps the module's error statistic."""
739    self.error_count += 1
740    if self.counting in ('toplevel', 'detailed'):
741      if self.counting != 'detailed':
742        category = category.split('/')[0]
743      if category not in self.errors_by_category:
744        self.errors_by_category[category] = 0
745      self.errors_by_category[category] += 1
746
747  def PrintErrorCounts(self):
748    """Print a summary of errors by category, and the total."""
749    for category, count in self.errors_by_category.iteritems():
750      sys.stderr.write('Category \'%s\' errors found: %d\n' %
751                       (category, count))
752    sys.stderr.write('Total errors found: %d\n' % self.error_count)
753
754_cpplint_state = _CppLintState()
755
756
757def _OutputFormat():
758  """Gets the module's output format."""
759  return _cpplint_state.output_format
760
761
762def _SetOutputFormat(output_format):
763  """Sets the module's output format."""
764  _cpplint_state.SetOutputFormat(output_format)
765
766
767def _VerboseLevel():
768  """Returns the module's verbosity setting."""
769  return _cpplint_state.verbose_level
770
771
772def _SetVerboseLevel(level):
773  """Sets the module's verbosity, and returns the previous setting."""
774  return _cpplint_state.SetVerboseLevel(level)
775
776
777def _SetCountingStyle(level):
778  """Sets the module's counting options."""
779  _cpplint_state.SetCountingStyle(level)
780
781
782def _Filters():
783  """Returns the module's list of output filters, as a list."""
784  return _cpplint_state.filters
785
786
787def _SetFilters(filters):
788  """Sets the module's error-message filters.
789
790  These filters are applied when deciding whether to emit a given
791  error message.
792
793  Args:
794    filters: A string of comma-separated filters (eg "whitespace/indent").
795             Each filter should start with + or -; else we die.
796  """
797  _cpplint_state.SetFilters(filters)
798
799
800class _FunctionState(object):
801  """Tracks current function name and the number of lines in its body."""
802
803  _NORMAL_TRIGGER = 250  # for --v=0, 500 for --v=1, etc.
804  _TEST_TRIGGER = 400    # about 50% more than _NORMAL_TRIGGER.
805
806  def __init__(self):
807    self.in_a_function = False
808    self.lines_in_function = 0
809    self.current_function = ''
810
811  def Begin(self, function_name):
812    """Start analyzing function body.
813
814    Args:
815      function_name: The name of the function being tracked.
816    """
817    self.in_a_function = True
818    self.lines_in_function = 0
819    self.current_function = function_name
820
821  def Count(self):
822    """Count line in current function body."""
823    if self.in_a_function:
824      self.lines_in_function += 1
825
826  def Check(self, error, filename, linenum):
827    """Report if too many lines in function body.
828
829    Args:
830      error: The function to call with any errors found.
831      filename: The name of the current file.
832      linenum: The number of the line to check.
833    """
834    if Match(r'T(EST|est)', self.current_function):
835      base_trigger = self._TEST_TRIGGER
836    else:
837      base_trigger = self._NORMAL_TRIGGER
838    trigger = base_trigger * 2**_VerboseLevel()
839
840    if self.lines_in_function > trigger:
841      error_level = int(math.log(self.lines_in_function / base_trigger, 2))
842      # 50 => 0, 100 => 1, 200 => 2, 400 => 3, 800 => 4, 1600 => 5, ...
843      if error_level > 5:
844        error_level = 5
845      error(filename, linenum, 'readability/fn_size', error_level,
846            'Small and focused functions are preferred:'
847            ' %s has %d non-comment lines'
848            ' (error triggered by exceeding %d lines).'  % (
849                self.current_function, self.lines_in_function, trigger))
850
851  def End(self):
852    """Stop analyzing function body."""
853    self.in_a_function = False
854
855
856class _IncludeError(Exception):
857  """Indicates a problem with the include order in a file."""
858  pass
859
860
861class FileInfo:
862  """Provides utility functions for filenames.
863
864  FileInfo provides easy access to the components of a file's path
865  relative to the project root.
866  """
867
868  def __init__(self, filename):
869    self._filename = filename
870
871  def FullName(self):
872    """Make Windows paths like Unix."""
873    return os.path.abspath(self._filename).replace('\\', '/')
874
875  def RepositoryName(self):
876    """FullName after removing the local path to the repository.
877
878    If we have a real absolute path name here we can try to do something smart:
879    detecting the root of the checkout and truncating /path/to/checkout from
880    the name so that we get header guards that don't include things like
881    "C:\Documents and Settings\..." or "/home/username/..." in them and thus
882    people on different computers who have checked the source out to different
883    locations won't see bogus errors.
884    """
885    fullname = self.FullName()
886
887    if os.path.exists(fullname):
888      project_dir = os.path.dirname(fullname)
889
890      if os.path.exists(os.path.join(project_dir, ".svn")):
891        # If there's a .svn file in the current directory, we recursively look
892        # up the directory tree for the top of the SVN checkout
893        root_dir = project_dir
894        one_up_dir = os.path.dirname(root_dir)
895        while os.path.exists(os.path.join(one_up_dir, ".svn")):
896          root_dir = os.path.dirname(root_dir)
897          one_up_dir = os.path.dirname(one_up_dir)
898
899        prefix = os.path.commonprefix([root_dir, project_dir])
900        return fullname[len(prefix) + 1:]
901
902      # Not SVN <= 1.6? Try to find a git, hg, or svn top level directory by
903      # searching up from the current path.
904      root_dir = os.path.dirname(fullname)
905      while (root_dir != os.path.dirname(root_dir) and
906             not os.path.exists(os.path.join(root_dir, ".git")) and
907             not os.path.exists(os.path.join(root_dir, ".hg")) and
908             not os.path.exists(os.path.join(root_dir, ".svn"))):
909        root_dir = os.path.dirname(root_dir)
910
911      if (os.path.exists(os.path.join(root_dir, ".git")) or
912          os.path.exists(os.path.join(root_dir, ".hg")) or
913          os.path.exists(os.path.join(root_dir, ".svn"))):
914        prefix = os.path.commonprefix([root_dir, project_dir])
915        return fullname[len(prefix) + 1:]
916
917    # Don't know what to do; header guard warnings may be wrong...
918    return fullname
919
920  def Split(self):
921    """Splits the file into the directory, basename, and extension.
922
923    For 'chrome/browser/browser.cc', Split() would
924    return ('chrome/browser', 'browser', '.cc')
925
926    Returns:
927      A tuple of (directory, basename, extension).
928    """
929
930    googlename = self.RepositoryName()
931    project, rest = os.path.split(googlename)
932    return (project,) + os.path.splitext(rest)
933
934  def BaseName(self):
935    """File base name - text after the final slash, before the final period."""
936    return self.Split()[1]
937
938  def Extension(self):
939    """File extension - text following the final period."""
940    return self.Split()[2]
941
942  def NoExtension(self):
943    """File has no source file extension."""
944    return '/'.join(self.Split()[0:2])
945
946  def IsSource(self):
947    """File has a source file extension."""
948    return self.Extension()[1:] in ('c', 'cc', 'cpp', 'cxx')
949
950
951def _ShouldPrintError(category, confidence, linenum):
952  """If confidence >= verbose, category passes filter and is not suppressed."""
953
954  # There are three ways we might decide not to print an error message:
955  # a "NOLINT(category)" comment appears in the source,
956  # the verbosity level isn't high enough, or the filters filter it out.
957  if IsErrorSuppressedByNolint(category, linenum):
958    return False
959
960  if confidence < _cpplint_state.verbose_level:
961    return False
962
963  is_filtered = False
964  for one_filter in _Filters():
965    if one_filter.startswith('-'):
966      if category.startswith(one_filter[1:]):
967        is_filtered = True
968    elif one_filter.startswith('+'):
969      if category.startswith(one_filter[1:]):
970        is_filtered = False
971    else:
972      assert False  # should have been checked for in SetFilter.
973  if is_filtered:
974    return False
975
976  return True
977
978
979def Error(filename, linenum, category, confidence, message):
980  """Logs the fact we've found a lint error.
981
982  We log where the error was found, and also our confidence in the error,
983  that is, how certain we are this is a legitimate style regression, and
984  not a misidentification or a use that's sometimes justified.
985
986  False positives can be suppressed by the use of
987  "cpplint(category)"  comments on the offending line.  These are
988  parsed into _error_suppressions.
989
990  Args:
991    filename: The name of the file containing the error.
992    linenum: The number of the line containing the error.
993    category: A string used to describe the "category" this bug
994      falls under: "whitespace", say, or "runtime".  Categories
995      may have a hierarchy separated by slashes: "whitespace/indent".
996    confidence: A number from 1-5 representing a confidence score for
997      the error, with 5 meaning that we are certain of the problem,
998      and 1 meaning that it could be a legitimate construct.
999    message: The error message.
1000  """
1001  if _ShouldPrintError(category, confidence, linenum):
1002    _cpplint_state.IncrementErrorCount(category)
1003    if _cpplint_state.output_format == 'vs7':
1004      sys.stderr.write('%s(%s):  %s  [%s] [%d]\n' % (
1005          filename, linenum, message, category, confidence))
1006    elif _cpplint_state.output_format == 'eclipse':
1007      sys.stderr.write('%s:%s: warning: %s  [%s] [%d]\n' % (
1008          filename, linenum, message, category, confidence))
1009    else:
1010      sys.stderr.write('%s:%s:  %s  [%s] [%d]\n' % (
1011          filename, linenum, message, category, confidence))
1012
1013
1014# Matches standard C++ escape sequences per 2.13.2.3 of the C++ standard.
1015_RE_PATTERN_CLEANSE_LINE_ESCAPES = re.compile(
1016    r'\\([abfnrtv?"\\\']|\d+|x[0-9a-fA-F]+)')
1017# Match a single C style comment on the same line.
1018_RE_PATTERN_C_COMMENTS = r'/\*(?:[^*]|\*(?!/))*\*/'
1019# Matches multi-line C style comments.
1020# This RE is a little bit more complicated than one might expect, because we
1021# have to take care of space removals tools so we can handle comments inside
1022# statements better.
1023# The current rule is: We only clear spaces from both sides when we're at the
1024# end of the line. Otherwise, we try to remove spaces from the right side,
1025# if this doesn't work we try on left side but only if there's a non-character
1026# on the right.
1027_RE_PATTERN_CLEANSE_LINE_C_COMMENTS = re.compile(
1028    r'(\s*' + _RE_PATTERN_C_COMMENTS + r'\s*$|' +
1029    _RE_PATTERN_C_COMMENTS + r'\s+|' +
1030    r'\s+' + _RE_PATTERN_C_COMMENTS + r'(?=\W)|' +
1031    _RE_PATTERN_C_COMMENTS + r')')
1032
1033
1034def IsCppString(line):
1035  """Does line terminate so, that the next symbol is in string constant.
1036
1037  This function does not consider single-line nor multi-line comments.
1038
1039  Args:
1040    line: is a partial line of code starting from the 0..n.
1041
1042  Returns:
1043    True, if next character appended to 'line' is inside a
1044    string constant.
1045  """
1046
1047  line = line.replace(r'\\', 'XX')  # after this, \\" does not match to \"
1048  return ((line.count('"') - line.count(r'\"') - line.count("'\"'")) & 1) == 1
1049
1050
1051def CleanseRawStrings(raw_lines):
1052  """Removes C++11 raw strings from lines.
1053
1054    Before:
1055      static const char kData[] = R"(
1056          multi-line string
1057          )";
1058
1059    After:
1060      static const char kData[] = ""
1061          (replaced by blank line)
1062          "";
1063
1064  Args:
1065    raw_lines: list of raw lines.
1066
1067  Returns:
1068    list of lines with C++11 raw strings replaced by empty strings.
1069  """
1070
1071  delimiter = None
1072  lines_without_raw_strings = []
1073  for line in raw_lines:
1074    if delimiter:
1075      # Inside a raw string, look for the end
1076      end = line.find(delimiter)
1077      if end >= 0:
1078        # Found the end of the string, match leading space for this
1079        # line and resume copying the original lines, and also insert
1080        # a "" on the last line.
1081        leading_space = Match(r'^(\s*)\S', line)
1082        line = leading_space.group(1) + '""' + line[end + len(delimiter):]
1083        delimiter = None
1084      else:
1085        # Haven't found the end yet, append a blank line.
1086        line = '""'
1087
1088    # Look for beginning of a raw string, and replace them with
1089    # empty strings.  This is done in a loop to handle multiple raw
1090    # strings on the same line.
1091    while delimiter is None:
1092      # Look for beginning of a raw string.
1093      # See 2.14.15 [lex.string] for syntax.
1094      matched = Match(r'^(.*)\b(?:R|u8R|uR|UR|LR)"([^\s\\()]*)\((.*)$', line)
1095      if matched:
1096        delimiter = ')' + matched.group(2) + '"'
1097
1098        end = matched.group(3).find(delimiter)
1099        if end >= 0:
1100          # Raw string ended on same line
1101          line = (matched.group(1) + '""' +
1102                  matched.group(3)[end + len(delimiter):])
1103          delimiter = None
1104        else:
1105          # Start of a multi-line raw string
1106          line = matched.group(1) + '""'
1107      else:
1108        break
1109
1110    lines_without_raw_strings.append(line)
1111
1112  # TODO(unknown): if delimiter is not None here, we might want to
1113  # emit a warning for unterminated string.
1114  return lines_without_raw_strings
1115
1116
1117def FindNextMultiLineCommentStart(lines, lineix):
1118  """Find the beginning marker for a multiline comment."""
1119  while lineix < len(lines):
1120    if lines[lineix].strip().startswith('/*'):
1121      # Only return this marker if the comment goes beyond this line
1122      if lines[lineix].strip().find('*/', 2) < 0:
1123        return lineix
1124    lineix += 1
1125  return len(lines)
1126
1127
1128def FindNextMultiLineCommentEnd(lines, lineix):
1129  """We are inside a comment, find the end marker."""
1130  while lineix < len(lines):
1131    if lines[lineix].strip().endswith('*/'):
1132      return lineix
1133    lineix += 1
1134  return len(lines)
1135
1136
1137def RemoveMultiLineCommentsFromRange(lines, begin, end):
1138  """Clears a range of lines for multi-line comments."""
1139  # Having // dummy comments makes the lines non-empty, so we will not get
1140  # unnecessary blank line warnings later in the code.
1141  for i in range(begin, end):
1142    lines[i] = '// dummy'
1143
1144
1145def RemoveMultiLineComments(filename, lines, error):
1146  """Removes multiline (c-style) comments from lines."""
1147  lineix = 0
1148  while lineix < len(lines):
1149    lineix_begin = FindNextMultiLineCommentStart(lines, lineix)
1150    if lineix_begin >= len(lines):
1151      return
1152    lineix_end = FindNextMultiLineCommentEnd(lines, lineix_begin)
1153    if lineix_end >= len(lines):
1154      error(filename, lineix_begin + 1, 'readability/multiline_comment', 5,
1155            'Could not find end of multi-line comment')
1156      return
1157    RemoveMultiLineCommentsFromRange(lines, lineix_begin, lineix_end + 1)
1158    lineix = lineix_end + 1
1159
1160
1161def CleanseComments(line):
1162  """Removes //-comments and single-line C-style /* */ comments.
1163
1164  Args:
1165    line: A line of C++ source.
1166
1167  Returns:
1168    The line with single-line comments removed.
1169  """
1170  commentpos = line.find('//')
1171  if commentpos != -1 and not IsCppString(line[:commentpos]):
1172    line = line[:commentpos].rstrip()
1173  # get rid of /* ... */
1174  return _RE_PATTERN_CLEANSE_LINE_C_COMMENTS.sub('', line)
1175
1176
1177class CleansedLines(object):
1178  """Holds 3 copies of all lines with different preprocessing applied to them.
1179
1180  1) elided member contains lines without strings and comments,
1181  2) lines member contains lines without comments, and
1182  3) raw_lines member contains all the lines without processing.
1183  All these three members are of <type 'list'>, and of the same length.
1184  """
1185
1186  def __init__(self, lines):
1187    self.elided = []
1188    self.lines = []
1189    self.raw_lines = lines
1190    self.num_lines = len(lines)
1191    self.lines_without_raw_strings = CleanseRawStrings(lines)
1192    for linenum in range(len(self.lines_without_raw_strings)):
1193      self.lines.append(CleanseComments(
1194          self.lines_without_raw_strings[linenum]))
1195      elided = self._CollapseStrings(self.lines_without_raw_strings[linenum])
1196      self.elided.append(CleanseComments(elided))
1197
1198  def NumLines(self):
1199    """Returns the number of lines represented."""
1200    return self.num_lines
1201
1202  @staticmethod
1203  def _CollapseStrings(elided):
1204    """Collapses strings and chars on a line to simple "" or '' blocks.
1205
1206    We nix strings first so we're not fooled by text like '"http://"'
1207
1208    Args:
1209      elided: The line being processed.
1210
1211    Returns:
1212      The line with collapsed strings.
1213    """
1214    if _RE_PATTERN_INCLUDE.match(elided):
1215      return elided
1216
1217    # Remove escaped characters first to make quote/single quote collapsing
1218    # basic.  Things that look like escaped characters shouldn't occur
1219    # outside of strings and chars.
1220    elided = _RE_PATTERN_CLEANSE_LINE_ESCAPES.sub('', elided)
1221
1222    # Replace quoted strings and digit separators.  Both single quotes
1223    # and double quotes are processed in the same loop, otherwise
1224    # nested quotes wouldn't work.
1225    collapsed = ''
1226    while True:
1227      # Find the first quote character
1228      match = Match(r'^([^\'"]*)([\'"])(.*)$', elided)
1229      if not match:
1230        collapsed += elided
1231        break
1232      head, quote, tail = match.groups()
1233
1234      if quote == '"':
1235        # Collapse double quoted strings
1236        second_quote = tail.find('"')
1237        if second_quote >= 0:
1238          collapsed += head + '""'
1239          elided = tail[second_quote + 1:]
1240        else:
1241          # Unmatched double quote, don't bother processing the rest
1242          # of the line since this is probably a multiline string.
1243          collapsed += elided
1244          break
1245      else:
1246        # Found single quote, check nearby text to eliminate digit separators.
1247        #
1248        # There is no special handling for floating point here, because
1249        # the integer/fractional/exponent parts would all be parsed
1250        # correctly as long as there are digits on both sides of the
1251        # separator.  So we are fine as long as we don't see something
1252        # like "0.'3" (gcc 4.9.0 will not allow this literal).
1253        if Search(r'\b(?:0[bBxX]?|[1-9])[0-9a-fA-F]*$', head):
1254          match_literal = Match(r'^((?:\'?[0-9a-zA-Z_])*)(.*)$', "'" + tail)
1255          collapsed += head + match_literal.group(1).replace("'", '')
1256          elided = match_literal.group(2)
1257        else:
1258          second_quote = tail.find('\'')
1259          if second_quote >= 0:
1260            collapsed += head + "''"
1261            elided = tail[second_quote + 1:]
1262          else:
1263            # Unmatched single quote
1264            collapsed += elided
1265            break
1266
1267    return collapsed
1268
1269
1270def FindEndOfExpressionInLine(line, startpos, stack):
1271  """Find the position just after the end of current parenthesized expression.
1272
1273  Args:
1274    line: a CleansedLines line.
1275    startpos: start searching at this position.
1276    stack: nesting stack at startpos.
1277
1278  Returns:
1279    On finding matching end: (index just after matching end, None)
1280    On finding an unclosed expression: (-1, None)
1281    Otherwise: (-1, new stack at end of this line)
1282  """
1283  for i in xrange(startpos, len(line)):
1284    char = line[i]
1285    if char in '([{':
1286      # Found start of parenthesized expression, push to expression stack
1287      stack.append(char)
1288    elif char == '<':
1289      # Found potential start of template argument list
1290      if i > 0 and line[i - 1] == '<':
1291        # Left shift operator
1292        if stack and stack[-1] == '<':
1293          stack.pop()
1294          if not stack:
1295            return (-1, None)
1296      elif i > 0 and Search(r'\boperator\s*$', line[0:i]):
1297        # operator<, don't add to stack
1298        continue
1299      else:
1300        # Tentative start of template argument list
1301        stack.append('<')
1302    elif char in ')]}':
1303      # Found end of parenthesized expression.
1304      #
1305      # If we are currently expecting a matching '>', the pending '<'
1306      # must have been an operator.  Remove them from expression stack.
1307      while stack and stack[-1] == '<':
1308        stack.pop()
1309      if not stack:
1310        return (-1, None)
1311      if ((stack[-1] == '(' and char == ')') or
1312          (stack[-1] == '[' and char == ']') or
1313          (stack[-1] == '{' and char == '}')):
1314        stack.pop()
1315        if not stack:
1316          return (i + 1, None)
1317      else:
1318        # Mismatched parentheses
1319        return (-1, None)
1320    elif char == '>':
1321      # Found potential end of template argument list.
1322
1323      # Ignore "->" and operator functions
1324      if (i > 0 and
1325          (line[i - 1] == '-' or Search(r'\boperator\s*$', line[0:i - 1]))):
1326        continue
1327
1328      # Pop the stack if there is a matching '<'.  Otherwise, ignore
1329      # this '>' since it must be an operator.
1330      if stack:
1331        if stack[-1] == '<':
1332          stack.pop()
1333          if not stack:
1334            return (i + 1, None)
1335    elif char == ';':
1336      # Found something that look like end of statements.  If we are currently
1337      # expecting a '>', the matching '<' must have been an operator, since
1338      # template argument list should not contain statements.
1339      while stack and stack[-1] == '<':
1340        stack.pop()
1341      if not stack:
1342        return (-1, None)
1343
1344  # Did not find end of expression or unbalanced parentheses on this line
1345  return (-1, stack)
1346
1347
1348def CloseExpression(clean_lines, linenum, pos):
1349  """If input points to ( or { or [ or <, finds the position that closes it.
1350
1351  If lines[linenum][pos] points to a '(' or '{' or '[' or '<', finds the
1352  linenum/pos that correspond to the closing of the expression.
1353
1354  TODO(unknown): cpplint spends a fair bit of time matching parentheses.
1355  Ideally we would want to index all opening and closing parentheses once
1356  and have CloseExpression be just a simple lookup, but due to preprocessor
1357  tricks, this is not so easy.
1358
1359  Args:
1360    clean_lines: A CleansedLines instance containing the file.
1361    linenum: The number of the line to check.
1362    pos: A position on the line.
1363
1364  Returns:
1365    A tuple (line, linenum, pos) pointer *past* the closing brace, or
1366    (line, len(lines), -1) if we never find a close.  Note we ignore
1367    strings and comments when matching; and the line we return is the
1368    'cleansed' line at linenum.
1369  """
1370
1371  line = clean_lines.elided[linenum]
1372  if (line[pos] not in '({[<') or Match(r'<[<=]', line[pos:]):
1373    return (line, clean_lines.NumLines(), -1)
1374
1375  # Check first line
1376  (end_pos, stack) = FindEndOfExpressionInLine(line, pos, [])
1377  if end_pos > -1:
1378    return (line, linenum, end_pos)
1379
1380  # Continue scanning forward
1381  while stack and linenum < clean_lines.NumLines() - 1:
1382    linenum += 1
1383    line = clean_lines.elided[linenum]
1384    (end_pos, stack) = FindEndOfExpressionInLine(line, 0, stack)
1385    if end_pos > -1:
1386      return (line, linenum, end_pos)
1387
1388  # Did not find end of expression before end of file, give up
1389  return (line, clean_lines.NumLines(), -1)
1390
1391
1392def FindStartOfExpressionInLine(line, endpos, stack):
1393  """Find position at the matching start of current expression.
1394
1395  This is almost the reverse of FindEndOfExpressionInLine, but note
1396  that the input position and returned position differs by 1.
1397
1398  Args:
1399    line: a CleansedLines line.
1400    endpos: start searching at this position.
1401    stack: nesting stack at endpos.
1402
1403  Returns:
1404    On finding matching start: (index at matching start, None)
1405    On finding an unclosed expression: (-1, None)
1406    Otherwise: (-1, new stack at beginning of this line)
1407  """
1408  i = endpos
1409  while i >= 0:
1410    char = line[i]
1411    if char in ')]}':
1412      # Found end of expression, push to expression stack
1413      stack.append(char)
1414    elif char == '>':
1415      # Found potential end of template argument list.
1416      #
1417      # Ignore it if it's a "->" or ">=" or "operator>"
1418      if (i > 0 and
1419          (line[i - 1] == '-' or
1420           Match(r'\s>=\s', line[i - 1:]) or
1421           Search(r'\boperator\s*$', line[0:i]))):
1422        i -= 1
1423      else:
1424        stack.append('>')
1425    elif char == '<':
1426      # Found potential start of template argument list
1427      if i > 0 and line[i - 1] == '<':
1428        # Left shift operator
1429        i -= 1
1430      else:
1431        # If there is a matching '>', we can pop the expression stack.
1432        # Otherwise, ignore this '<' since it must be an operator.
1433        if stack and stack[-1] == '>':
1434          stack.pop()
1435          if not stack:
1436            return (i, None)
1437    elif char in '([{':
1438      # Found start of expression.
1439      #
1440      # If there are any unmatched '>' on the stack, they must be
1441      # operators.  Remove those.
1442      while stack and stack[-1] == '>':
1443        stack.pop()
1444      if not stack:
1445        return (-1, None)
1446      if ((char == '(' and stack[-1] == ')') or
1447          (char == '[' and stack[-1] == ']') or
1448          (char == '{' and stack[-1] == '}')):
1449        stack.pop()
1450        if not stack:
1451          return (i, None)
1452      else:
1453        # Mismatched parentheses
1454        return (-1, None)
1455    elif char == ';':
1456      # Found something that look like end of statements.  If we are currently
1457      # expecting a '<', the matching '>' must have been an operator, since
1458      # template argument list should not contain statements.
1459      while stack and stack[-1] == '>':
1460        stack.pop()
1461      if not stack:
1462        return (-1, None)
1463
1464    i -= 1
1465
1466  return (-1, stack)
1467
1468
1469def ReverseCloseExpression(clean_lines, linenum, pos):
1470  """If input points to ) or } or ] or >, finds the position that opens it.
1471
1472  If lines[linenum][pos] points to a ')' or '}' or ']' or '>', finds the
1473  linenum/pos that correspond to the opening of the expression.
1474
1475  Args:
1476    clean_lines: A CleansedLines instance containing the file.
1477    linenum: The number of the line to check.
1478    pos: A position on the line.
1479
1480  Returns:
1481    A tuple (line, linenum, pos) pointer *at* the opening brace, or
1482    (line, 0, -1) if we never find the matching opening brace.  Note
1483    we ignore strings and comments when matching; and the line we
1484    return is the 'cleansed' line at linenum.
1485  """
1486  line = clean_lines.elided[linenum]
1487  if line[pos] not in ')}]>':
1488    return (line, 0, -1)
1489
1490  # Check last line
1491  (start_pos, stack) = FindStartOfExpressionInLine(line, pos, [])
1492  if start_pos > -1:
1493    return (line, linenum, start_pos)
1494
1495  # Continue scanning backward
1496  while stack and linenum > 0:
1497    linenum -= 1
1498    line = clean_lines.elided[linenum]
1499    (start_pos, stack) = FindStartOfExpressionInLine(line, len(line) - 1, stack)
1500    if start_pos > -1:
1501      return (line, linenum, start_pos)
1502
1503  # Did not find start of expression before beginning of file, give up
1504  return (line, 0, -1)
1505
1506
1507def CheckForCopyright(filename, lines, error):
1508  """Logs an error if no Copyright message appears at the top of the file."""
1509
1510  # We'll say it should occur by line 10. Don't forget there's a
1511  # dummy line at the front.
1512  for line in xrange(1, min(len(lines), 11)):
1513    if re.search(r'Copyright', lines[line], re.I): break
1514  else:                       # means no copyright line was found
1515    error(filename, 0, 'legal/copyright', 5,
1516          'No copyright message found.  '
1517          'You should have a line: "Copyright [year] <Copyright Owner>"')
1518
1519
1520def GetIndentLevel(line):
1521  """Return the number of leading spaces in line.
1522
1523  Args:
1524    line: A string to check.
1525
1526  Returns:
1527    An integer count of leading spaces, possibly zero.
1528  """
1529  indent = Match(r'^( *)\S', line)
1530  if indent:
1531    return len(indent.group(1))
1532  else:
1533    return 0
1534
1535
1536def GetHeaderGuardCPPVariable(filename):
1537  """Returns the CPP variable that should be used as a header guard.
1538
1539  Args:
1540    filename: The name of a C++ header file.
1541
1542  Returns:
1543    The CPP variable that should be used as a header guard in the
1544    named file.
1545
1546  """
1547
1548  # Restores original filename in case that cpplint is invoked from Emacs's
1549  # flymake.
1550  filename = re.sub(r'_flymake\.h$', '.h', filename)
1551  filename = re.sub(r'/\.flymake/([^/]*)$', r'/\1', filename)
1552
1553  fileinfo = FileInfo(filename)
1554  file_path_from_root = fileinfo.RepositoryName()
1555  if _root:
1556    file_path_from_root = re.sub('^' + _root + os.sep, '', file_path_from_root)
1557  return re.sub(r'[-./\s]', '_', file_path_from_root).upper() + '_'
1558
1559
1560def CheckForHeaderGuard(filename, lines, error):
1561  """Checks that the file contains a header guard.
1562
1563  Logs an error if no #ifndef header guard is present.  For other
1564  headers, checks that the full pathname is used.
1565
1566  Args:
1567    filename: The name of the C++ header file.
1568    lines: An array of strings, each representing a line of the file.
1569    error: The function to call with any errors found.
1570  """
1571
1572  cppvar = GetHeaderGuardCPPVariable(filename)
1573
1574  ifndef = None
1575  ifndef_linenum = 0
1576  define = None
1577  endif = None
1578  endif_linenum = 0
1579  for linenum, line in enumerate(lines):
1580    linesplit = line.split()
1581    if len(linesplit) >= 2:
1582      # find the first occurrence of #ifndef and #define, save arg
1583      if not ifndef and linesplit[0] == '#ifndef':
1584        # set ifndef to the header guard presented on the #ifndef line.
1585        ifndef = linesplit[1]
1586        ifndef_linenum = linenum
1587      if not define and linesplit[0] == '#define':
1588        define = linesplit[1]
1589    # find the last occurrence of #endif, save entire line
1590    if line.startswith('#endif'):
1591      endif = line
1592      endif_linenum = linenum
1593
1594  if not ifndef:
1595    error(filename, 0, 'build/header_guard', 5,
1596          'No #ifndef header guard found, suggested CPP variable is: %s' %
1597          cppvar)
1598    return
1599
1600  if not define:
1601    error(filename, 0, 'build/header_guard', 5,
1602          'No #define header guard found, suggested CPP variable is: %s' %
1603          cppvar)
1604    return
1605
1606  # The guard should be PATH_FILE_H_, but we also allow PATH_FILE_H__
1607  # for backward compatibility.
1608  if ifndef != cppvar:
1609    error_level = 0
1610    if ifndef != cppvar + '_':
1611      error_level = 5
1612
1613    ParseNolintSuppressions(filename, lines[ifndef_linenum], ifndef_linenum,
1614                            error)
1615    error(filename, ifndef_linenum, 'build/header_guard', error_level,
1616          '#ifndef header guard has wrong style, please use: %s' % cppvar)
1617
1618  if define != ifndef:
1619    error(filename, 0, 'build/header_guard', 5,
1620          '#ifndef and #define don\'t match, suggested CPP variable is: %s' %
1621          cppvar)
1622    return
1623
1624  if endif != ('#endif  // %s' % cppvar):
1625    error_level = 0
1626    if endif != ('#endif  // %s' % (cppvar + '_')):
1627      error_level = 5
1628
1629    ParseNolintSuppressions(filename, lines[endif_linenum], endif_linenum,
1630                            error)
1631    error(filename, endif_linenum, 'build/header_guard', error_level,
1632          '#endif line should be "#endif  // %s"' % cppvar)
1633
1634
1635def CheckForBadCharacters(filename, lines, error):
1636  """Logs an error for each line containing bad characters.
1637
1638  Two kinds of bad characters:
1639
1640  1. Unicode replacement characters: These indicate that either the file
1641  contained invalid UTF-8 (likely) or Unicode replacement characters (which
1642  it shouldn't).  Note that it's possible for this to throw off line
1643  numbering if the invalid UTF-8 occurred adjacent to a newline.
1644
1645  2. NUL bytes.  These are problematic for some tools.
1646
1647  Args:
1648    filename: The name of the current file.
1649    lines: An array of strings, each representing a line of the file.
1650    error: The function to call with any errors found.
1651  """
1652  for linenum, line in enumerate(lines):
1653    if u'\ufffd' in line:
1654      error(filename, linenum, 'readability/utf8', 5,
1655            'Line contains invalid UTF-8 (or Unicode replacement character).')
1656    if '\0' in line:
1657      error(filename, linenum, 'readability/nul', 5, 'Line contains NUL byte.')
1658
1659
1660def CheckForNewlineAtEOF(filename, lines, error):
1661  """Logs an error if there is no newline char at the end of the file.
1662
1663  Args:
1664    filename: The name of the current file.
1665    lines: An array of strings, each representing a line of the file.
1666    error: The function to call with any errors found.
1667  """
1668
1669  # The array lines() was created by adding two newlines to the
1670  # original file (go figure), then splitting on \n.
1671  # To verify that the file ends in \n, we just have to make sure the
1672  # last-but-two element of lines() exists and is empty.
1673  if len(lines) < 3 or lines[-2]:
1674    error(filename, len(lines) - 2, 'whitespace/ending_newline', 5,
1675          'Could not find a newline character at the end of the file.')
1676
1677
1678def CheckForMultilineCommentsAndStrings(filename, clean_lines, linenum, error):
1679  """Logs an error if we see /* ... */ or "..." that extend past one line.
1680
1681  /* ... */ comments are legit inside macros, for one line.
1682  Otherwise, we prefer // comments, so it's ok to warn about the
1683  other.  Likewise, it's ok for strings to extend across multiple
1684  lines, as long as a line continuation character (backslash)
1685  terminates each line. Although not currently prohibited by the C++
1686  style guide, it's ugly and unnecessary. We don't do well with either
1687  in this lint program, so we warn about both.
1688
1689  Args:
1690    filename: The name of the current file.
1691    clean_lines: A CleansedLines instance containing the file.
1692    linenum: The number of the line to check.
1693    error: The function to call with any errors found.
1694  """
1695  line = clean_lines.elided[linenum]
1696
1697  # Remove all \\ (escaped backslashes) from the line. They are OK, and the
1698  # second (escaped) slash may trigger later \" detection erroneously.
1699  line = line.replace('\\\\', '')
1700
1701  if line.count('/*') > line.count('*/'):
1702    error(filename, linenum, 'readability/multiline_comment', 5,
1703          'Complex multi-line /*...*/-style comment found. '
1704          'Lint may give bogus warnings.  '
1705          'Consider replacing these with //-style comments, '
1706          'with #if 0...#endif, '
1707          'or with more clearly structured multi-line comments.')
1708
1709  if (line.count('"') - line.count('\\"')) % 2:
1710    error(filename, linenum, 'readability/multiline_string', 5,
1711          'Multi-line string ("...") found.  This lint script doesn\'t '
1712          'do well with such strings, and may give bogus warnings.  '
1713          'Use C++11 raw strings or concatenation instead.')
1714
1715
1716# (non-threadsafe name, thread-safe alternative, validation pattern)
1717#
1718# The validation pattern is used to eliminate false positives such as:
1719#  _rand();               // false positive due to substring match.
1720#  ->rand();              // some member function rand().
1721#  ACMRandom rand(seed);  // some variable named rand.
1722#  ISAACRandom rand();    // another variable named rand.
1723#
1724# Basically we require the return value of these functions to be used
1725# in some expression context on the same line by matching on some
1726# operator before the function name.  This eliminates constructors and
1727# member function calls.
1728_UNSAFE_FUNC_PREFIX = r'(?:[-+*/=%^&|(<]\s*|>\s+)'
1729_THREADING_LIST = (
1730    ('asctime(', 'asctime_r(', _UNSAFE_FUNC_PREFIX + r'asctime\([^)]+\)'),
1731    ('ctime(', 'ctime_r(', _UNSAFE_FUNC_PREFIX + r'ctime\([^)]+\)'),
1732    ('getgrgid(', 'getgrgid_r(', _UNSAFE_FUNC_PREFIX + r'getgrgid\([^)]+\)'),
1733    ('getgrnam(', 'getgrnam_r(', _UNSAFE_FUNC_PREFIX + r'getgrnam\([^)]+\)'),
1734    ('getlogin(', 'getlogin_r(', _UNSAFE_FUNC_PREFIX + r'getlogin\(\)'),
1735    ('getpwnam(', 'getpwnam_r(', _UNSAFE_FUNC_PREFIX + r'getpwnam\([^)]+\)'),
1736    ('getpwuid(', 'getpwuid_r(', _UNSAFE_FUNC_PREFIX + r'getpwuid\([^)]+\)'),
1737    ('gmtime(', 'gmtime_r(', _UNSAFE_FUNC_PREFIX + r'gmtime\([^)]+\)'),
1738    ('localtime(', 'localtime_r(', _UNSAFE_FUNC_PREFIX + r'localtime\([^)]+\)'),
1739    ('rand(', 'rand_r(', _UNSAFE_FUNC_PREFIX + r'rand\(\)'),
1740    ('strtok(', 'strtok_r(',
1741     _UNSAFE_FUNC_PREFIX + r'strtok\([^)]+\)'),
1742    ('ttyname(', 'ttyname_r(', _UNSAFE_FUNC_PREFIX + r'ttyname\([^)]+\)'),
1743    )
1744
1745
1746def CheckPosixThreading(filename, clean_lines, linenum, error):
1747  """Checks for calls to thread-unsafe functions.
1748
1749  Much code has been originally written without consideration of
1750  multi-threading. Also, engineers are relying on their old experience;
1751  they have learned posix before threading extensions were added. These
1752  tests guide the engineers to use thread-safe functions (when using
1753  posix directly).
1754
1755  Args:
1756    filename: The name of the current file.
1757    clean_lines: A CleansedLines instance containing the file.
1758    linenum: The number of the line to check.
1759    error: The function to call with any errors found.
1760  """
1761  line = clean_lines.elided[linenum]
1762  for single_thread_func, multithread_safe_func, pattern in _THREADING_LIST:
1763    # Additional pattern matching check to confirm that this is the
1764    # function we are looking for
1765    if Search(pattern, line):
1766      error(filename, linenum, 'runtime/threadsafe_fn', 2,
1767            'Consider using ' + multithread_safe_func +
1768            '...) instead of ' + single_thread_func +
1769            '...) for improved thread safety.')
1770
1771
1772def CheckVlogArguments(filename, clean_lines, linenum, error):
1773  """Checks that VLOG() is only used for defining a logging level.
1774
1775  For example, VLOG(2) is correct. VLOG(INFO), VLOG(WARNING), VLOG(ERROR), and
1776  VLOG(FATAL) are not.
1777
1778  Args:
1779    filename: The name of the current file.
1780    clean_lines: A CleansedLines instance containing the file.
1781    linenum: The number of the line to check.
1782    error: The function to call with any errors found.
1783  """
1784  line = clean_lines.elided[linenum]
1785  if Search(r'\bVLOG\((INFO|ERROR|WARNING|DFATAL|FATAL)\)', line):
1786    error(filename, linenum, 'runtime/vlog', 5,
1787          'VLOG() should be used with numeric verbosity level.  '
1788          'Use LOG() if you want symbolic severity levels.')
1789
1790# Matches invalid increment: *count++, which moves pointer instead of
1791# incrementing a value.
1792_RE_PATTERN_INVALID_INCREMENT = re.compile(
1793    r'^\s*\*\w+(\+\+|--);')
1794
1795
1796def CheckInvalidIncrement(filename, clean_lines, linenum, error):
1797  """Checks for invalid increment *count++.
1798
1799  For example following function:
1800  void increment_counter(int* count) {
1801    *count++;
1802  }
1803  is invalid, because it effectively does count++, moving pointer, and should
1804  be replaced with ++*count, (*count)++ or *count += 1.
1805
1806  Args:
1807    filename: The name of the current file.
1808    clean_lines: A CleansedLines instance containing the file.
1809    linenum: The number of the line to check.
1810    error: The function to call with any errors found.
1811  """
1812  line = clean_lines.elided[linenum]
1813  if _RE_PATTERN_INVALID_INCREMENT.match(line):
1814    error(filename, linenum, 'runtime/invalid_increment', 5,
1815          'Changing pointer instead of value (or unused value of operator*).')
1816
1817
1818class _BlockInfo(object):
1819  """Stores information about a generic block of code."""
1820
1821  def __init__(self, seen_open_brace):
1822    self.seen_open_brace = seen_open_brace
1823    self.open_parentheses = 0
1824    self.inline_asm = _NO_ASM
1825
1826  def CheckBegin(self, filename, clean_lines, linenum, error):
1827    """Run checks that applies to text up to the opening brace.
1828
1829    This is mostly for checking the text after the class identifier
1830    and the "{", usually where the base class is specified.  For other
1831    blocks, there isn't much to check, so we always pass.
1832
1833    Args:
1834      filename: The name of the current file.
1835      clean_lines: A CleansedLines instance containing the file.
1836      linenum: The number of the line to check.
1837      error: The function to call with any errors found.
1838    """
1839    pass
1840
1841  def CheckEnd(self, filename, clean_lines, linenum, error):
1842    """Run checks that applies to text after the closing brace.
1843
1844    This is mostly used for checking end of namespace comments.
1845
1846    Args:
1847      filename: The name of the current file.
1848      clean_lines: A CleansedLines instance containing the file.
1849      linenum: The number of the line to check.
1850      error: The function to call with any errors found.
1851    """
1852    pass
1853
1854  def IsBlockInfo(self):
1855    """Returns true if this block is a _BlockInfo.
1856
1857    This is convenient for verifying that an object is an instance of
1858    a _BlockInfo, but not an instance of any of the derived classes.
1859
1860    Returns:
1861      True for this class, False for derived classes.
1862    """
1863    return self.__class__ == _BlockInfo
1864
1865
1866class _ExternCInfo(_BlockInfo):
1867  """Stores information about an 'extern "C"' block."""
1868
1869  def __init__(self):
1870    _BlockInfo.__init__(self, True)
1871
1872
1873class _ClassInfo(_BlockInfo):
1874  """Stores information about a class."""
1875
1876  def __init__(self, name, class_or_struct, clean_lines, linenum):
1877    _BlockInfo.__init__(self, False)
1878    self.name = name
1879    self.starting_linenum = linenum
1880    self.is_derived = False
1881    if class_or_struct == 'struct':
1882      self.access = 'public'
1883      self.is_struct = True
1884    else:
1885      self.access = 'private'
1886      self.is_struct = False
1887
1888    # Remember initial indentation level for this class.  Using raw_lines here
1889    # instead of elided to account for leading comments.
1890    self.class_indent = GetIndentLevel(clean_lines.raw_lines[linenum])
1891
1892    # Try to find the end of the class.  This will be confused by things like:
1893    #   class A {
1894    #   } *x = { ...
1895    #
1896    # But it's still good enough for CheckSectionSpacing.
1897    self.last_line = 0
1898    depth = 0
1899    for i in range(linenum, clean_lines.NumLines()):
1900      line = clean_lines.elided[i]
1901      depth += line.count('{') - line.count('}')
1902      if not depth:
1903        self.last_line = i
1904        break
1905
1906  def CheckBegin(self, filename, clean_lines, linenum, error):
1907    # Look for a bare ':'
1908    if Search('(^|[^:]):($|[^:])', clean_lines.elided[linenum]):
1909      self.is_derived = True
1910
1911  def CheckEnd(self, filename, clean_lines, linenum, error):
1912    # Check that closing brace is aligned with beginning of the class.
1913    # Only do this if the closing brace is indented by only whitespaces.
1914    # This means we will not check single-line class definitions.
1915    indent = Match(r'^( *)\}', clean_lines.elided[linenum])
1916    if indent and len(indent.group(1)) != self.class_indent:
1917      if self.is_struct:
1918        parent = 'struct ' + self.name
1919      else:
1920        parent = 'class ' + self.name
1921      error(filename, linenum, 'whitespace/indent', 3,
1922            'Closing brace should be aligned with beginning of %s' % parent)
1923
1924
1925class _NamespaceInfo(_BlockInfo):
1926  """Stores information about a namespace."""
1927
1928  def __init__(self, name, linenum):
1929    _BlockInfo.__init__(self, False)
1930    self.name = name or ''
1931    self.starting_linenum = linenum
1932
1933  def CheckEnd(self, filename, clean_lines, linenum, error):
1934    """Check end of namespace comments."""
1935    line = clean_lines.raw_lines[linenum]
1936
1937    # Check how many lines is enclosed in this namespace.  Don't issue
1938    # warning for missing namespace comments if there aren't enough
1939    # lines.  However, do apply checks if there is already an end of
1940    # namespace comment and it's incorrect.
1941    #
1942    # TODO(unknown): We always want to check end of namespace comments
1943    # if a namespace is large, but sometimes we also want to apply the
1944    # check if a short namespace contained nontrivial things (something
1945    # other than forward declarations).  There is currently no logic on
1946    # deciding what these nontrivial things are, so this check is
1947    # triggered by namespace size only, which works most of the time.
1948    if (linenum - self.starting_linenum < 10
1949        and not Match(r'};*\s*(//|/\*).*\bnamespace\b', line)):
1950      return
1951
1952    # Look for matching comment at end of namespace.
1953    #
1954    # Note that we accept C style "/* */" comments for terminating
1955    # namespaces, so that code that terminate namespaces inside
1956    # preprocessor macros can be cpplint clean.
1957    #
1958    # We also accept stuff like "// end of namespace <name>." with the
1959    # period at the end.
1960    #
1961    # Besides these, we don't accept anything else, otherwise we might
1962    # get false negatives when existing comment is a substring of the
1963    # expected namespace.
1964    if self.name:
1965      # Named namespace
1966      if Match((r'};*\s*(//|/\*).*\bnamespace\s+' + re.escape(self.name) +
1967                     r'[\*/\.\\\s]*$'),
1968                    line):
1969         error(filename, linenum, 'readability/namespace', 5,
1970               'Namespace should not be terminated with "// namespace %s"' %
1971               self.name)
1972    else:
1973      # Anonymous namespace
1974      if not Match(r'};*\s*(//|/\*).*\bnamespace[\*/\.\\\s]*$', line):
1975        # If "// namespace anonymous" or "// anonymous namespace (more text)",
1976        # mention "// anonymous namespace" as an acceptable form
1977        if Match(r'}.*\b(namespace anonymous|anonymous namespace)\b', line):
1978          error(filename, linenum, 'readability/namespace', 5,
1979                'Anonymous namespace should be terminated with "// namespace"'
1980                ' or "// anonymous namespace"')
1981        else:
1982          error(filename, linenum, 'readability/namespace', 5,
1983                'Anonymous namespace should be terminated with "// namespace"')
1984
1985
1986class _PreprocessorInfo(object):
1987  """Stores checkpoints of nesting stacks when #if/#else is seen."""
1988
1989  def __init__(self, stack_before_if):
1990    # The entire nesting stack before #if
1991    self.stack_before_if = stack_before_if
1992
1993    # The entire nesting stack up to #else
1994    self.stack_before_else = []
1995
1996    # Whether we have already seen #else or #elif
1997    self.seen_else = False
1998
1999
2000class NestingState(object):
2001  """Holds states related to parsing braces."""
2002
2003  def __init__(self):
2004    # Stack for tracking all braces.  An object is pushed whenever we
2005    # see a "{", and popped when we see a "}".  Only 3 types of
2006    # objects are possible:
2007    # - _ClassInfo: a class or struct.
2008    # - _NamespaceInfo: a namespace.
2009    # - _BlockInfo: some other type of block.
2010    self.stack = []
2011
2012    # Top of the previous stack before each Update().
2013    #
2014    # Because the nesting_stack is updated at the end of each line, we
2015    # had to do some convoluted checks to find out what is the current
2016    # scope at the beginning of the line.  This check is simplified by
2017    # saving the previous top of nesting stack.
2018    #
2019    # We could save the full stack, but we only need the top.  Copying
2020    # the full nesting stack would slow down cpplint by ~10%.
2021    self.previous_stack_top = []
2022
2023    # Stack of _PreprocessorInfo objects.
2024    self.pp_stack = []
2025
2026  def SeenOpenBrace(self):
2027    """Check if we have seen the opening brace for the innermost block.
2028
2029    Returns:
2030      True if we have seen the opening brace, False if the innermost
2031      block is still expecting an opening brace.
2032    """
2033    return (not self.stack) or self.stack[-1].seen_open_brace
2034
2035  def InNamespaceBody(self):
2036    """Check if we are currently one level inside a namespace body.
2037
2038    Returns:
2039      True if top of the stack is a namespace block, False otherwise.
2040    """
2041    return self.stack and isinstance(self.stack[-1], _NamespaceInfo)
2042
2043  def InExternC(self):
2044    """Check if we are currently one level inside an 'extern "C"' block.
2045
2046    Returns:
2047      True if top of the stack is an extern block, False otherwise.
2048    """
2049    return self.stack and isinstance(self.stack[-1], _ExternCInfo)
2050
2051  def InClassDeclaration(self):
2052    """Check if we are currently one level inside a class or struct declaration.
2053
2054    Returns:
2055      True if top of the stack is a class/struct, False otherwise.
2056    """
2057    return self.stack and isinstance(self.stack[-1], _ClassInfo)
2058
2059  def InAsmBlock(self):
2060    """Check if we are currently one level inside an inline ASM block.
2061
2062    Returns:
2063      True if the top of the stack is a block containing inline ASM.
2064    """
2065    return self.stack and self.stack[-1].inline_asm != _NO_ASM
2066
2067  def InTemplateArgumentList(self, clean_lines, linenum, pos):
2068    """Check if current position is inside template argument list.
2069
2070    Args:
2071      clean_lines: A CleansedLines instance containing the file.
2072      linenum: The number of the line to check.
2073      pos: position just after the suspected template argument.
2074    Returns:
2075      True if (linenum, pos) is inside template arguments.
2076    """
2077    while linenum < clean_lines.NumLines():
2078      # Find the earliest character that might indicate a template argument
2079      line = clean_lines.elided[linenum]
2080      match = Match(r'^[^{};=\[\]\.<>]*(.)', line[pos:])
2081      if not match:
2082        linenum += 1
2083        pos = 0
2084        continue
2085      token = match.group(1)
2086      pos += len(match.group(0))
2087
2088      # These things do not look like template argument list:
2089      #   class Suspect {
2090      #   class Suspect x; }
2091      if token in ('{', '}', ';'): return False
2092
2093      # These things look like template argument list:
2094      #   template <class Suspect>
2095      #   template <class Suspect = default_value>
2096      #   template <class Suspect[]>
2097      #   template <class Suspect...>
2098      if token in ('>', '=', '[', ']', '.'): return True
2099
2100      # Check if token is an unmatched '<'.
2101      # If not, move on to the next character.
2102      if token != '<':
2103        pos += 1
2104        if pos >= len(line):
2105          linenum += 1
2106          pos = 0
2107        continue
2108
2109      # We can't be sure if we just find a single '<', and need to
2110      # find the matching '>'.
2111      (_, end_line, end_pos) = CloseExpression(clean_lines, linenum, pos - 1)
2112      if end_pos < 0:
2113        # Not sure if template argument list or syntax error in file
2114        return False
2115      linenum = end_line
2116      pos = end_pos
2117    return False
2118
2119  def UpdatePreprocessor(self, line):
2120    """Update preprocessor stack.
2121
2122    We need to handle preprocessors due to classes like this:
2123      #ifdef SWIG
2124      struct ResultDetailsPageElementExtensionPoint {
2125      #else
2126      struct ResultDetailsPageElementExtensionPoint : public Extension {
2127      #endif
2128
2129    We make the following assumptions (good enough for most files):
2130    - Preprocessor condition evaluates to true from #if up to first
2131      #else/#elif/#endif.
2132
2133    - Preprocessor condition evaluates to false from #else/#elif up
2134      to #endif.  We still perform lint checks on these lines, but
2135      these do not affect nesting stack.
2136
2137    Args:
2138      line: current line to check.
2139    """
2140    if Match(r'^\s*#\s*(if|ifdef|ifndef)\b', line):
2141      # Beginning of #if block, save the nesting stack here.  The saved
2142      # stack will allow us to restore the parsing state in the #else case.
2143      self.pp_stack.append(_PreprocessorInfo(copy.deepcopy(self.stack)))
2144    elif Match(r'^\s*#\s*(else|elif)\b', line):
2145      # Beginning of #else block
2146      if self.pp_stack:
2147        if not self.pp_stack[-1].seen_else:
2148          # This is the first #else or #elif block.  Remember the
2149          # whole nesting stack up to this point.  This is what we
2150          # keep after the #endif.
2151          self.pp_stack[-1].seen_else = True
2152          self.pp_stack[-1].stack_before_else = copy.deepcopy(self.stack)
2153
2154        # Restore the stack to how it was before the #if
2155        self.stack = copy.deepcopy(self.pp_stack[-1].stack_before_if)
2156      else:
2157        # TODO(unknown): unexpected #else, issue warning?
2158        pass
2159    elif Match(r'^\s*#\s*endif\b', line):
2160      # End of #if or #else blocks.
2161      if self.pp_stack:
2162        # If we saw an #else, we will need to restore the nesting
2163        # stack to its former state before the #else, otherwise we
2164        # will just continue from where we left off.
2165        if self.pp_stack[-1].seen_else:
2166          # Here we can just use a shallow copy since we are the last
2167          # reference to it.
2168          self.stack = self.pp_stack[-1].stack_before_else
2169        # Drop the corresponding #if
2170        self.pp_stack.pop()
2171      else:
2172        # TODO(unknown): unexpected #endif, issue warning?
2173        pass
2174
2175  # TODO(unknown): Update() is too long, but we will refactor later.
2176  def Update(self, filename, clean_lines, linenum, error):
2177    """Update nesting state with current line.
2178
2179    Args:
2180      filename: The name of the current file.
2181      clean_lines: A CleansedLines instance containing the file.
2182      linenum: The number of the line to check.
2183      error: The function to call with any errors found.
2184    """
2185    line = clean_lines.elided[linenum]
2186
2187    # Remember top of the previous nesting stack.
2188    #
2189    # The stack is always pushed/popped and not modified in place, so
2190    # we can just do a shallow copy instead of copy.deepcopy.  Using
2191    # deepcopy would slow down cpplint by ~28%.
2192    if self.stack:
2193      self.previous_stack_top = self.stack[-1]
2194    else:
2195      self.previous_stack_top = None
2196
2197    # Update pp_stack
2198    self.UpdatePreprocessor(line)
2199
2200    # Count parentheses.  This is to avoid adding struct arguments to
2201    # the nesting stack.
2202    if self.stack:
2203      inner_block = self.stack[-1]
2204      depth_change = line.count('(') - line.count(')')
2205      inner_block.open_parentheses += depth_change
2206
2207      # Also check if we are starting or ending an inline assembly block.
2208      if inner_block.inline_asm in (_NO_ASM, _END_ASM):
2209        if (depth_change != 0 and
2210            inner_block.open_parentheses == 1 and
2211            _MATCH_ASM.match(line)):
2212          # Enter assembly block
2213          inner_block.inline_asm = _INSIDE_ASM
2214        else:
2215          # Not entering assembly block.  If previous line was _END_ASM,
2216          # we will now shift to _NO_ASM state.
2217          inner_block.inline_asm = _NO_ASM
2218      elif (inner_block.inline_asm == _INSIDE_ASM and
2219            inner_block.open_parentheses == 0):
2220        # Exit assembly block
2221        inner_block.inline_asm = _END_ASM
2222
2223    # Consume namespace declaration at the beginning of the line.  Do
2224    # this in a loop so that we catch same line declarations like this:
2225    #   namespace proto2 { namespace bridge { class MessageSet; } }
2226    while True:
2227      # Match start of namespace.  The "\b\s*" below catches namespace
2228      # declarations even if it weren't followed by a whitespace, this
2229      # is so that we don't confuse our namespace checker.  The
2230      # missing spaces will be flagged by CheckSpacing.
2231      namespace_decl_match = Match(r'^\s*namespace\b\s*([:\w]+)?(.*)$', line)
2232      if not namespace_decl_match:
2233        break
2234
2235      new_namespace = _NamespaceInfo(namespace_decl_match.group(1), linenum)
2236      self.stack.append(new_namespace)
2237
2238      line = namespace_decl_match.group(2)
2239      if line.find('{') != -1:
2240        new_namespace.seen_open_brace = True
2241        line = line[line.find('{') + 1:]
2242
2243    # Look for a class declaration in whatever is left of the line
2244    # after parsing namespaces.  The regexp accounts for decorated classes
2245    # such as in:
2246    #   class LOCKABLE API Object {
2247    #   };
2248    class_decl_match = Match(
2249        r'^(\s*(?:template\s*<[\w\s<>,:]*>\s*)?'
2250        r'(class|struct)\s+(?:[A-Z_]+\s+)*(\w+(?:::\w+)*))'
2251        r'(.*)$', line)
2252    if (class_decl_match and
2253        (not self.stack or self.stack[-1].open_parentheses == 0)):
2254      # We do not want to accept classes that are actually template arguments:
2255      #   template <class Ignore1,
2256      #             class Ignore2 = Default<Args>,
2257      #             template <Args> class Ignore3>
2258      #   void Function() {};
2259      #
2260      # To avoid template argument cases, we scan forward and look for
2261      # an unmatched '>'.  If we see one, assume we are inside a
2262      # template argument list.
2263      end_declaration = len(class_decl_match.group(1))
2264      if not self.InTemplateArgumentList(clean_lines, linenum, end_declaration):
2265        self.stack.append(_ClassInfo(
2266            class_decl_match.group(3), class_decl_match.group(2),
2267            clean_lines, linenum))
2268        line = class_decl_match.group(4)
2269
2270    # If we have not yet seen the opening brace for the innermost block,
2271    # run checks here.
2272    if not self.SeenOpenBrace():
2273      self.stack[-1].CheckBegin(filename, clean_lines, linenum, error)
2274
2275    # Update access control if we are inside a class/struct
2276    if self.stack and isinstance(self.stack[-1], _ClassInfo):
2277      classinfo = self.stack[-1]
2278      access_match = Match(
2279          r'^(.*)\b(public|private|protected|signals)(\s+(?:slots\s*)?)?'
2280          r':(?:[^:]|$)',
2281          line)
2282      if access_match:
2283        classinfo.access = access_match.group(2)
2284
2285        # Check that access keywords are indented +1 space.  Skip this
2286        # check if the keywords are not preceded by whitespaces.
2287        indent = access_match.group(1)
2288        if (len(indent) != classinfo.class_indent + 2 and
2289            Match(r'^\s*$', indent)):
2290          if classinfo.is_struct:
2291            parent = 'struct ' + classinfo.name
2292          else:
2293            parent = 'class ' + classinfo.name
2294          slots = ''
2295          if access_match.group(3):
2296            slots = access_match.group(3)
2297          error(filename, linenum, 'whitespace/indent', 3,
2298                '%s%s: should be indented +2 space inside %s' % (
2299                    access_match.group(2), slots, parent))
2300
2301    # Consume braces or semicolons from what's left of the line
2302    while True:
2303      # Match first brace, semicolon, or closed parenthesis.
2304      matched = Match(r'^[^{;)}]*([{;)}])(.*)$', line)
2305      if not matched:
2306        break
2307
2308      token = matched.group(1)
2309      if token == '{':
2310        # If namespace or class hasn't seen a opening brace yet, mark
2311        # namespace/class head as complete.  Push a new block onto the
2312        # stack otherwise.
2313        if not self.SeenOpenBrace():
2314          self.stack[-1].seen_open_brace = True
2315        elif Match(r'^extern\s*"[^"]*"\s*\{', line):
2316          self.stack.append(_ExternCInfo())
2317        else:
2318          self.stack.append(_BlockInfo(True))
2319          if _MATCH_ASM.match(line):
2320            self.stack[-1].inline_asm = _BLOCK_ASM
2321
2322      elif token == ';' or token == ')':
2323        # If we haven't seen an opening brace yet, but we already saw
2324        # a semicolon, this is probably a forward declaration.  Pop
2325        # the stack for these.
2326        #
2327        # Similarly, if we haven't seen an opening brace yet, but we
2328        # already saw a closing parenthesis, then these are probably
2329        # function arguments with extra "class" or "struct" keywords.
2330        # Also pop these stack for these.
2331        if not self.SeenOpenBrace():
2332          self.stack.pop()
2333      else:  # token == '}'
2334        # Perform end of block checks and pop the stack.
2335        if self.stack:
2336          self.stack[-1].CheckEnd(filename, clean_lines, linenum, error)
2337          self.stack.pop()
2338      line = matched.group(2)
2339
2340  def InnermostClass(self):
2341    """Get class info on the top of the stack.
2342
2343    Returns:
2344      A _ClassInfo object if we are inside a class, or None otherwise.
2345    """
2346    for i in range(len(self.stack), 0, -1):
2347      classinfo = self.stack[i - 1]
2348      if isinstance(classinfo, _ClassInfo):
2349        return classinfo
2350    return None
2351
2352  def CheckCompletedBlocks(self, filename, error):
2353    """Checks that all classes and namespaces have been completely parsed.
2354
2355    Call this when all lines in a file have been processed.
2356    Args:
2357      filename: The name of the current file.
2358      error: The function to call with any errors found.
2359    """
2360    # Note: This test can result in false positives if #ifdef constructs
2361    # get in the way of brace matching. See the testBuildClass test in
2362    # cpplint_unittest.py for an example of this.
2363    for obj in self.stack:
2364      if isinstance(obj, _ClassInfo):
2365        error(filename, obj.starting_linenum, 'build/class', 5,
2366              'Failed to find complete declaration of class %s' %
2367              obj.name)
2368      elif isinstance(obj, _NamespaceInfo):
2369        error(filename, obj.starting_linenum, 'build/namespaces', 5,
2370              'Failed to find complete declaration of namespace %s' %
2371              obj.name)
2372
2373
2374def CheckForNonStandardConstructs(filename, clean_lines, linenum,
2375                                  nesting_state, error):
2376  r"""Logs an error if we see certain non-ANSI constructs ignored by gcc-2.
2377
2378  Complain about several constructs which gcc-2 accepts, but which are
2379  not standard C++.  Warning about these in lint is one way to ease the
2380  transition to new compilers.
2381  - put storage class first (e.g. "static const" instead of "const static").
2382  - "%lld" instead of %qd" in printf-type functions.
2383  - "%1$d" is non-standard in printf-type functions.
2384  - "\%" is an undefined character escape sequence.
2385  - text after #endif is not allowed.
2386  - invalid inner-style forward declaration.
2387  - >? and <? operators, and their >?= and <?= cousins.
2388
2389  Additionally, check for constructor/destructor style violations and reference
2390  members, as it is very convenient to do so while checking for
2391  gcc-2 compliance.
2392
2393  Args:
2394    filename: The name of the current file.
2395    clean_lines: A CleansedLines instance containing the file.
2396    linenum: The number of the line to check.
2397    nesting_state: A NestingState instance which maintains information about
2398                   the current stack of nested blocks being parsed.
2399    error: A callable to which errors are reported, which takes 4 arguments:
2400           filename, line number, error level, and message
2401  """
2402
2403  # Remove comments from the line, but leave in strings for now.
2404  line = clean_lines.lines[linenum]
2405
2406  if Search(r'printf\s*\(.*".*%[-+ ]?\d*q', line):
2407    error(filename, linenum, 'runtime/printf_format', 3,
2408          '%q in format strings is deprecated.  Use %ll instead.')
2409
2410  if Search(r'printf\s*\(.*".*%\d+\$', line):
2411    error(filename, linenum, 'runtime/printf_format', 2,
2412          '%N$ formats are unconventional.  Try rewriting to avoid them.')
2413
2414  # Remove escaped backslashes before looking for undefined escapes.
2415  line = line.replace('\\\\', '')
2416
2417  if Search(r'("|\').*\\(%|\[|\(|{)', line):
2418    error(filename, linenum, 'build/printf_format', 3,
2419          '%, [, (, and { are undefined character escapes.  Unescape them.')
2420
2421  # For the rest, work with both comments and strings removed.
2422  line = clean_lines.elided[linenum]
2423
2424  if Search(r'\b(const|volatile|void|char|short|int|long'
2425            r'|float|double|signed|unsigned'
2426            r'|schar|u?int8|u?int16|u?int32|u?int64)'
2427            r'\s+(register|static|extern|typedef)\b',
2428            line):
2429    error(filename, linenum, 'build/storage_class', 5,
2430          'Storage class (static, extern, typedef, etc) should be first.')
2431
2432  if Match(r'\s*#\s*endif\s*[^/\s]+', line):
2433    error(filename, linenum, 'build/endif_comment', 5,
2434          'Uncommented text after #endif is non-standard.  Use a comment.')
2435
2436  if Match(r'\s*class\s+(\w+\s*::\s*)+\w+\s*;', line):
2437    error(filename, linenum, 'build/forward_decl', 5,
2438          'Inner-style forward declarations are invalid.  Remove this line.')
2439
2440  if Search(r'(\w+|[+-]?\d+(\.\d*)?)\s*(<|>)\?=?\s*(\w+|[+-]?\d+)(\.\d*)?',
2441            line):
2442    error(filename, linenum, 'build/deprecated', 3,
2443          '>? and <? (max and min) operators are non-standard and deprecated.')
2444
2445  if Search(r'^\s*const\s*string\s*&\s*\w+\s*;', line):
2446    # TODO(unknown): Could it be expanded safely to arbitrary references,
2447    # without triggering too many false positives? The first
2448    # attempt triggered 5 warnings for mostly benign code in the regtest, hence
2449    # the restriction.
2450    # Here's the original regexp, for the reference:
2451    # type_name = r'\w+((\s*::\s*\w+)|(\s*<\s*\w+?\s*>))?'
2452    # r'\s*const\s*' + type_name + '\s*&\s*\w+\s*;'
2453    error(filename, linenum, 'runtime/member_string_references', 2,
2454          'const string& members are dangerous. It is much better to use '
2455          'alternatives, such as pointers or simple constants.')
2456
2457  # Everything else in this function operates on class declarations.
2458  # Return early if the top of the nesting stack is not a class, or if
2459  # the class head is not completed yet.
2460  classinfo = nesting_state.InnermostClass()
2461  if not classinfo or not classinfo.seen_open_brace:
2462    return
2463
2464  # The class may have been declared with namespace or classname qualifiers.
2465  # The constructor and destructor will not have those qualifiers.
2466  base_classname = classinfo.name.split('::')[-1]
2467
2468  # Look for single-argument constructors that aren't marked explicit.
2469  # Technically a valid construct, but against style.
2470  args = Match(r'\s+(?:inline\s+)?%s\s*\(([^,()]+)\)'
2471               % re.escape(base_classname),
2472               line)
2473  if (args and
2474      args.group(1) != 'void' and
2475      not Search(r'\bstd::initializer_list\b', args.group(1)) and
2476      not Match(r'(const\s+)?%s(\s+const)?\s*(?:<\w+>\s*)?&'
2477                % re.escape(base_classname), args.group(1).strip())):
2478    error(filename, linenum, 'runtime/explicit', 5,
2479          'Single-argument constructors should be marked explicit.')
2480
2481
2482def CheckSpacingForFunctionCall(filename, clean_lines, linenum, error):
2483  """Checks for the correctness of various spacing around function calls.
2484
2485  Args:
2486    filename: The name of the current file.
2487    clean_lines: A CleansedLines instance containing the file.
2488    linenum: The number of the line to check.
2489    error: The function to call with any errors found.
2490  """
2491  line = clean_lines.elided[linenum]
2492
2493  # Since function calls often occur inside if/for/while/switch
2494  # expressions - which have their own, more liberal conventions - we
2495  # first see if we should be looking inside such an expression for a
2496  # function call, to which we can apply more strict standards.
2497  fncall = line    # if there's no control flow construct, look at whole line
2498  for pattern in (r'\bif\s*\((.*)\)\s*{',
2499                  r'\bfor\s*\((.*)\)\s*{',
2500                  r'\bwhile\s*\((.*)\)\s*[{;]',
2501                  r'\bswitch\s*\((.*)\)\s*{'):
2502    match = Search(pattern, line)
2503    if match:
2504      fncall = match.group(1)    # look inside the parens for function calls
2505      break
2506
2507  # Except in if/for/while/switch, there should never be space
2508  # immediately inside parens (eg "f( 3, 4 )").  We make an exception
2509  # for nested parens ( (a+b) + c ).  Likewise, there should never be
2510  # a space before a ( when it's a function argument.  I assume it's a
2511  # function argument when the char before the whitespace is legal in
2512  # a function name (alnum + _) and we're not starting a macro. Also ignore
2513  # pointers and references to arrays and functions coz they're too tricky:
2514  # we use a very simple way to recognize these:
2515  # " (something)(maybe-something)" or
2516  # " (something)(maybe-something," or
2517  # " (something)[something]"
2518  # Note that we assume the contents of [] to be short enough that
2519  # they'll never need to wrap.
2520  if (  # Ignore control structures.
2521      not Search(r'\b(if|for|while|switch|return|new|delete|catch|sizeof)\b',
2522                 fncall) and
2523      # Ignore pointers/references to functions.
2524      not Search(r' \([^)]+\)\([^)]*(\)|,$)', fncall) and
2525      # Ignore pointers/references to arrays.
2526      not Search(r' \([^)]+\)\[[^\]]+\]', fncall)):
2527    if Search(r'\w\s*\(\s(?!\s*\\$)', fncall):      # a ( used for a fn call
2528      error(filename, linenum, 'whitespace/parens', 4,
2529            'Extra space after ( in function call')
2530    elif Search(r'\(\s+(?!(\s*\\)|\()', fncall):
2531      error(filename, linenum, 'whitespace/parens', 2,
2532            'Extra space after (')
2533    if (Search(r'\w\s+\(', fncall) and
2534        not Search(r'#\s*define|typedef|using\s+\w+\s*=', fncall) and
2535        not Search(r'\w\s+\((\w+::)*\*\w+\)\(', fncall)):
2536      # TODO(unknown): Space after an operator function seem to be a common
2537      # error, silence those for now by restricting them to highest verbosity.
2538      if Search(r'\boperator_*\b', line):
2539        error(filename, linenum, 'whitespace/parens', 0,
2540              'Extra space before ( in function call')
2541      else:
2542        error(filename, linenum, 'whitespace/parens', 4,
2543              'Extra space before ( in function call')
2544    # If the ) is followed only by a newline or a { + newline, assume it's
2545    # part of a control statement (if/while/etc), and don't complain
2546    if Search(r'[^)]\s+\)\s*[^{\s]', fncall):
2547      # If the closing parenthesis is preceded by only whitespaces,
2548      # try to give a more descriptive error message.
2549      if Search(r'^\s+\)', fncall):
2550        error(filename, linenum, 'whitespace/parens', 2,
2551              'Closing ) should be moved to the previous line')
2552      else:
2553        error(filename, linenum, 'whitespace/parens', 2,
2554              'Extra space before )')
2555
2556
2557def IsBlankLine(line):
2558  """Returns true if the given line is blank.
2559
2560  We consider a line to be blank if the line is empty or consists of
2561  only white spaces.
2562
2563  Args:
2564    line: A line of a string.
2565
2566  Returns:
2567    True, if the given line is blank.
2568  """
2569  return not line or line.isspace()
2570
2571
2572def CheckForFunctionLengths(filename, clean_lines, linenum,
2573                            function_state, error):
2574  """Reports for long function bodies.
2575
2576  For an overview why this is done, see:
2577  http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Write_Short_Functions
2578
2579  Uses a simplistic algorithm assuming other style guidelines
2580  (especially spacing) are followed.
2581  Only checks unindented functions, so class members are unchecked.
2582  Trivial bodies are unchecked, so constructors with huge initializer lists
2583  may be missed.
2584  Blank/comment lines are not counted so as to avoid encouraging the removal
2585  of vertical space and comments just to get through a lint check.
2586  NOLINT *on the last line of a function* disables this check.
2587
2588  Args:
2589    filename: The name of the current file.
2590    clean_lines: A CleansedLines instance containing the file.
2591    linenum: The number of the line to check.
2592    function_state: Current function name and lines in body so far.
2593    error: The function to call with any errors found.
2594  """
2595  lines = clean_lines.lines
2596  line = lines[linenum]
2597  joined_line = ''
2598
2599  starting_func = False
2600  regexp = r'(\w(\w|::|\*|\&|\s)*)\('  # decls * & space::name( ...
2601  match_result = Match(regexp, line)
2602  if match_result:
2603    # If the name is all caps and underscores, figure it's a macro and
2604    # ignore it, unless it's TEST or TEST_F.
2605    function_name = match_result.group(1).split()[-1]
2606    if function_name == 'TEST' or function_name == 'TEST_F' or (
2607        not Match(r'[A-Z_]+$', function_name)):
2608      starting_func = True
2609
2610  if starting_func:
2611    body_found = False
2612    for start_linenum in xrange(linenum, clean_lines.NumLines()):
2613      start_line = lines[start_linenum]
2614      joined_line += ' ' + start_line.lstrip()
2615      if Search(r'(;|})', start_line):  # Declarations and trivial functions
2616        body_found = True
2617        break                              # ... ignore
2618      elif Search(r'{', start_line):
2619        body_found = True
2620        function = Search(r'((\w|:)*)\(', line).group(1)
2621        if Match(r'TEST', function):    # Handle TEST... macros
2622          parameter_regexp = Search(r'(\(.*\))', joined_line)
2623          if parameter_regexp:             # Ignore bad syntax
2624            function += parameter_regexp.group(1)
2625        else:
2626          function += '()'
2627        function_state.Begin(function)
2628        break
2629    if not body_found:
2630      # No body for the function (or evidence of a non-function) was found.
2631      error(filename, linenum, 'readability/fn_size', 5,
2632            'Lint failed to find start of function body.')
2633  elif Match(r'^\}\s*$', line):  # function end
2634    function_state.Check(error, filename, linenum)
2635    function_state.End()
2636  elif not Match(r'^\s*$', line):
2637    function_state.Count()  # Count non-blank/non-comment lines.
2638
2639
2640_RE_PATTERN_TODO = re.compile(r'^//(\s*)TODO(\(.+?\))?:?(\s|$)?')
2641
2642
2643def CheckComment(line, filename, linenum, next_line_start, error):
2644  """Checks for common mistakes in comments.
2645
2646  Args:
2647    line: The line in question.
2648    filename: The name of the current file.
2649    linenum: The number of the line to check.
2650    next_line_start: The first non-whitespace column of the next line.
2651    error: The function to call with any errors found.
2652  """
2653  commentpos = line.find('//')
2654  if commentpos != -1:
2655    # Check if the // may be in quotes.  If so, ignore it
2656    # Comparisons made explicit for clarity -- pylint: disable=g-explicit-bool-comparison
2657    if (line.count('"', 0, commentpos) -
2658        line.count('\\"', 0, commentpos)) % 2 == 0:   # not in quotes
2659      # Allow one space for new scopes, two spaces otherwise:
2660      if (not (Match(r'^.*{ *//', line) and next_line_start == commentpos) and
2661          ((commentpos >= 1 and
2662            line[commentpos-1] not in string.whitespace) or
2663           (commentpos >= 2 and
2664            line[commentpos-2] not in string.whitespace))):
2665        error(filename, linenum, 'whitespace/comments', 2,
2666              'At least two spaces is best between code and comments')
2667
2668      # Checks for common mistakes in TODO comments.
2669      comment = line[commentpos:]
2670      match = _RE_PATTERN_TODO.match(comment)
2671      if match:
2672        # One whitespace is correct; zero whitespace is handled elsewhere.
2673        leading_whitespace = match.group(1)
2674        if len(leading_whitespace) > 1:
2675          error(filename, linenum, 'whitespace/todo', 2,
2676                'Too many spaces before TODO')
2677
2678        username = match.group(2)
2679        # if not username:
2680        #   error(filename, linenum, 'readability/todo', 2,
2681        #         'Missing username in TODO; it should look like '
2682        #         '"// TODO(my_username): Stuff."')
2683
2684        middle_whitespace = match.group(3)
2685        # Comparisons made explicit for correctness -- pylint: disable=g-explicit-bool-comparison
2686        if middle_whitespace != ' ' and middle_whitespace != '':
2687          error(filename, linenum, 'whitespace/todo', 2,
2688                'TODO(my_username) should be followed by a space')
2689
2690      # If the comment contains an alphanumeric character, there
2691      # should be a space somewhere between it and the //.
2692      if Match(r'//[^ ]*\w', comment):
2693        error(filename, linenum, 'whitespace/comments', 4,
2694              'Should have a space between // and comment')
2695
2696def CheckAccess(filename, clean_lines, linenum, nesting_state, error):
2697  """Checks for improper use of DISALLOW* macros.
2698
2699  Args:
2700    filename: The name of the current file.
2701    clean_lines: A CleansedLines instance containing the file.
2702    linenum: The number of the line to check.
2703    nesting_state: A NestingState instance which maintains information about
2704                   the current stack of nested blocks being parsed.
2705    error: The function to call with any errors found.
2706  """
2707  line = clean_lines.elided[linenum]  # get rid of comments and strings
2708
2709  matched = Match((r'\s*(DISALLOW_COPY_AND_ASSIGN|'
2710                   r'DISALLOW_EVIL_CONSTRUCTORS|'
2711                   r'DISALLOW_IMPLICIT_CONSTRUCTORS)'), line)
2712  if not matched:
2713    return
2714  if nesting_state.stack and isinstance(nesting_state.stack[-1], _ClassInfo):
2715    if nesting_state.stack[-1].access != 'private':
2716      error(filename, linenum, 'readability/constructors', 3,
2717            '%s must be in the private: section' % matched.group(1))
2718
2719  else:
2720    # Found DISALLOW* macro outside a class declaration, or perhaps it
2721    # was used inside a function when it should have been part of the
2722    # class declaration.  We could issue a warning here, but it
2723    # probably resulted in a compiler error already.
2724    pass
2725
2726
2727def CheckSpacing(filename, clean_lines, linenum, nesting_state, error):
2728  """Checks for the correctness of various spacing issues in the code.
2729
2730  Things we check for: spaces around operators, spaces after
2731  if/for/while/switch, no spaces around parens in function calls, two
2732  spaces between code and comment, don't start a block with a blank
2733  line, don't end a function with a blank line, don't add a blank line
2734  after public/protected/private, don't have too many blank lines in a row.
2735
2736  Args:
2737    filename: The name of the current file.
2738    clean_lines: A CleansedLines instance containing the file.
2739    linenum: The number of the line to check.
2740    nesting_state: A NestingState instance which maintains information about
2741                   the current stack of nested blocks being parsed.
2742    error: The function to call with any errors found.
2743  """
2744
2745  # Don't use "elided" lines here, otherwise we can't check commented lines.
2746  # Don't want to use "raw" either, because we don't want to check inside C++11
2747  # raw strings,
2748  raw = clean_lines.lines_without_raw_strings
2749  line = raw[linenum]
2750
2751  # Before nixing comments, check if the line is blank for no good
2752  # reason.  This includes the first line after a block is opened, and
2753  # blank lines at the end of a function (ie, right before a line like '}'
2754  #
2755  # Skip all the blank line checks if we are immediately inside a
2756  # namespace body.  In other words, don't issue blank line warnings
2757  # for this block:
2758  #   namespace {
2759  #
2760  #   }
2761  #
2762  # A warning about missing end of namespace comments will be issued instead.
2763  #
2764  # Also skip blank line checks for 'extern "C"' blocks, which are formatted
2765  # like namespaces.
2766  if (IsBlankLine(line) and
2767      not nesting_state.InNamespaceBody() and
2768      not nesting_state.InExternC()):
2769    elided = clean_lines.elided
2770    prev_line = elided[linenum - 1]
2771    prevbrace = prev_line.rfind('{')
2772    # TODO(unknown): Don't complain if line before blank line, and line after,
2773    #                both start with alnums and are indented the same amount.
2774    #                This ignores whitespace at the start of a namespace block
2775    #                because those are not usually indented.
2776    if prevbrace != -1 and prev_line[prevbrace:].find('}') == -1:
2777      # OK, we have a blank line at the start of a code block.  Before we
2778      # complain, we check if it is an exception to the rule: The previous
2779      # non-empty line has the parameters of a function header that are indented
2780      # 4 spaces (because they did not fit in a 80 column line when placed on
2781      # the same line as the function name).  We also check for the case where
2782      # the previous line is indented 6 spaces, which may happen when the
2783      # initializers of a constructor do not fit into a 80 column line.
2784      exception = False
2785      if Match(r' {6}\w', prev_line):  # Initializer list?
2786        # We are looking for the opening column of initializer list, which
2787        # should be indented 4 spaces to cause 6 space indentation afterwards.
2788        search_position = linenum-2
2789        while (search_position >= 0
2790               and Match(r' {6}\w', elided[search_position])):
2791          search_position -= 1
2792        exception = (search_position >= 0
2793                     and elided[search_position][:5] == '    :')
2794      else:
2795        # Search for the function arguments or an initializer list.  We use a
2796        # simple heuristic here: If the line is indented 4 spaces; and we have a
2797        # closing paren, without the opening paren, followed by an opening brace
2798        # or colon (for initializer lists) we assume that it is the last line of
2799        # a function header.  If we have a colon indented 4 spaces, it is an
2800        # initializer list.
2801        exception = (Match(r' {4}\w[^\(]*\)\s*(const\s*)?(\{\s*$|:)',
2802                           prev_line)
2803                     or Match(r' {4}:', prev_line))
2804
2805      if not exception:
2806        error(filename, linenum, 'whitespace/blank_line', 2,
2807              'Redundant blank line at the start of a code block '
2808              'should be deleted.')
2809    # Ignore blank lines at the end of a block in a long if-else
2810    # chain, like this:
2811    #   if (condition1) {
2812    #     // Something followed by a blank line
2813    #
2814    #   } else if (condition2) {
2815    #     // Something else
2816    #   }
2817    if linenum + 1 < clean_lines.NumLines():
2818      next_line = raw[linenum + 1]
2819      if (next_line
2820          and Match(r'\s*}', next_line)
2821          and next_line.find('} else ') == -1):
2822        error(filename, linenum, 'whitespace/blank_line', 3,
2823              'Redundant blank line at the end of a code block '
2824              'should be deleted.')
2825
2826    # matched = Match(r'\s*(public|protected|private):', prev_line)
2827    # if matched:
2828    #   error(filename, linenum, 'whitespace/blank_line', 3,
2829    #         'Do not leave a blank line after "%s:"' % matched.group(1))
2830
2831  # Next, check comments
2832  next_line_start = 0
2833  if linenum + 1 < clean_lines.NumLines():
2834    next_line = raw[linenum + 1]
2835    next_line_start = len(next_line) - len(next_line.lstrip())
2836  CheckComment(line, filename, linenum, next_line_start, error)
2837
2838  # get rid of comments and strings
2839  line = clean_lines.elided[linenum]
2840
2841  # You shouldn't have spaces before your brackets, except maybe after
2842  # 'delete []' or 'return []() {};'
2843  if Search(r'\w\s+\[', line) and not Search(r'(?:delete|return)\s+\[', line):
2844    error(filename, linenum, 'whitespace/braces', 5,
2845          'Extra space before [')
2846
2847  # In range-based for, we wanted spaces before and after the colon, but
2848  # not around "::" tokens that might appear.
2849  if (Search(r'for *\(.*[^:]:[^: ]', line) or
2850      Search(r'for *\(.*[^: ]:[^:]', line)):
2851    error(filename, linenum, 'whitespace/forcolon', 2,
2852          'Missing space around colon in range-based for loop')
2853
2854
2855def CheckOperatorSpacing(filename, clean_lines, linenum, error):
2856  """Checks for horizontal spacing around operators.
2857
2858  Args:
2859    filename: The name of the current file.
2860    clean_lines: A CleansedLines instance containing the file.
2861    linenum: The number of the line to check.
2862    error: The function to call with any errors found.
2863  """
2864  line = clean_lines.elided[linenum]
2865
2866  # Don't try to do spacing checks for operator methods.  Do this by
2867  # replacing the troublesome characters with something else,
2868  # preserving column position for all other characters.
2869  #
2870  # The replacement is done repeatedly to avoid false positives from
2871  # operators that call operators.
2872  while True:
2873    match = Match(r'^(.*\boperator\b)(\S+)(\s*\(.*)$', line)
2874    if match:
2875      line = match.group(1) + ('_' * len(match.group(2))) + match.group(3)
2876    else:
2877      break
2878
2879  # We allow no-spaces around = within an if: "if ( (a=Foo()) == 0 )".
2880  # Otherwise not.  Note we only check for non-spaces on *both* sides;
2881  # sometimes people put non-spaces on one side when aligning ='s among
2882  # many lines (not that this is behavior that I approve of...)
2883  if Search(r'[\w.]=[\w.]', line) and not Search(r'\b(if|while) ', line):
2884    error(filename, linenum, 'whitespace/operators', 4,
2885          'Missing spaces around =')
2886
2887  # It's ok not to have spaces around binary operators like + - * /, but if
2888  # there's too little whitespace, we get concerned.  It's hard to tell,
2889  # though, so we punt on this one for now.  TODO.
2890
2891  # You should always have whitespace around binary operators.
2892  #
2893  # Check <= and >= first to avoid false positives with < and >, then
2894  # check non-include lines for spacing around < and >.
2895  #
2896  # If the operator is followed by a comma, assume it's be used in a
2897  # macro context and don't do any checks.  This avoids false
2898  # positives.
2899  #
2900  # Note that && is not included here.  Those are checked separately
2901  # in CheckRValueReference
2902  match = Search(r'[^<>=!\s](==|!=|<=|>=|\|\|)[^<>=!\s,;\)]', line)
2903  if match:
2904    error(filename, linenum, 'whitespace/operators', 3,
2905          'Missing spaces around %s' % match.group(1))
2906  elif not Match(r'#.*include', line):
2907    # Look for < that is not surrounded by spaces.  This is only
2908    # triggered if both sides are missing spaces, even though
2909    # technically should should flag if at least one side is missing a
2910    # space.  This is done to avoid some false positives with shifts.
2911    match = Match(r'^(.*[^\s<])<[^\s=<,]', line)
2912    if match:
2913      (_, _, end_pos) = CloseExpression(
2914          clean_lines, linenum, len(match.group(1)))
2915      if end_pos <= -1:
2916        error(filename, linenum, 'whitespace/operators', 3,
2917              'Missing spaces around <')
2918
2919    # Look for > that is not surrounded by spaces.  Similar to the
2920    # above, we only trigger if both sides are missing spaces to avoid
2921    # false positives with shifts.
2922    match = Match(r'^(.*[^-\s>])>[^\s=>,]', line)
2923    if match:
2924      (_, _, start_pos) = ReverseCloseExpression(
2925          clean_lines, linenum, len(match.group(1)))
2926      if start_pos <= -1:
2927        error(filename, linenum, 'whitespace/operators', 3,
2928              'Missing spaces around >')
2929
2930  # We allow no-spaces around << when used like this: 10<<20, but
2931  # not otherwise (particularly, not when used as streams)
2932  # We also allow operators following an opening parenthesis, since
2933  # those tend to be macros that deal with operators.
2934  match = Search(r'(operator|\S)(?:L|UL|ULL|l|ul|ull)?<<([^\s,=])', line)
2935  if (match and match.group(1) != '(' and
2936      not (match.group(1).isdigit() and match.group(2).isdigit()) and
2937      not (match.group(1) == 'operator' and match.group(2) == ';')):
2938    error(filename, linenum, 'whitespace/operators', 3,
2939          'Missing spaces around <<')
2940
2941  # We allow no-spaces around >> for almost anything.  This is because
2942  # C++11 allows ">>" to close nested templates, which accounts for
2943  # most cases when ">>" is not followed by a space.
2944  #
2945  # We still warn on ">>" followed by alpha character, because that is
2946  # likely due to ">>" being used for right shifts, e.g.:
2947  #   value >> alpha
2948  #
2949  # When ">>" is used to close templates, the alphanumeric letter that
2950  # follows would be part of an identifier, and there should still be
2951  # a space separating the template type and the identifier.
2952  #   type<type<type>> alpha
2953  match = Search(r'>>[a-zA-Z_]', line)
2954  if match:
2955    error(filename, linenum, 'whitespace/operators', 3,
2956          'Missing spaces around >>')
2957
2958  # There shouldn't be space around unary operators
2959  match = Search(r'(!\s|~\s|[\s]--[\s;]|[\s]\+\+[\s;])', line)
2960  if match:
2961    error(filename, linenum, 'whitespace/operators', 4,
2962          'Extra space for operator %s' % match.group(1))
2963
2964
2965def CheckParenthesisSpacing(filename, clean_lines, linenum, error):
2966  """Checks for horizontal spacing around parentheses.
2967
2968  Args:
2969    filename: The name of the current file.
2970    clean_lines: A CleansedLines instance containing the file.
2971    linenum: The number of the line to check.
2972    error: The function to call with any errors found.
2973  """
2974  line = clean_lines.elided[linenum]
2975
2976  # No spaces after an if, while, switch, or for
2977  match = Search(r' (if\(|for\(|while\(|switch\()', line)
2978  if match:
2979    error(filename, linenum, 'whitespace/parens', 5,
2980          'Missing space before ( in %s' % match.group(1))
2981
2982  # For if/for/while/switch, the left and right parens should be
2983  # consistent about how many spaces are inside the parens, and
2984  # there should either be zero or one spaces inside the parens.
2985  # We don't want: "if ( foo)" or "if ( foo   )".
2986  # Exception: "for ( ; foo; bar)" and "for (foo; bar; )" are allowed.
2987  match = Search(r'\b(if|for|while|switch)\s*'
2988                 r'\(([ ]*)(.).*[^ ]+([ ]*)\)\s*{\s*$',
2989                 line)
2990  if match:
2991    if len(match.group(2)) != len(match.group(4)):
2992      if not (match.group(3) == ';' and
2993              len(match.group(2)) == 1 + len(match.group(4)) or
2994              not match.group(2) and Search(r'\bfor\s*\(.*; \)', line)):
2995        error(filename, linenum, 'whitespace/parens', 5,
2996              'Mismatching spaces inside () in %s' % match.group(1))
2997    if len(match.group(2)) not in [0, 1]:
2998      error(filename, linenum, 'whitespace/parens', 5,
2999            'Should have zero or one spaces inside ( and ) in %s' %
3000            match.group(1))
3001
3002
3003def CheckCommaSpacing(filename, clean_lines, linenum, error):
3004  """Checks for horizontal spacing near commas and semicolons.
3005
3006  Args:
3007    filename: The name of the current file.
3008    clean_lines: A CleansedLines instance containing the file.
3009    linenum: The number of the line to check.
3010    error: The function to call with any errors found.
3011  """
3012  raw = clean_lines.lines_without_raw_strings
3013  line = clean_lines.elided[linenum]
3014
3015  # You should always have a space after a comma (either as fn arg or operator)
3016  #
3017  # This does not apply when the non-space character following the
3018  # comma is another comma, since the only time when that happens is
3019  # for empty macro arguments.
3020  #
3021  # We run this check in two passes: first pass on elided lines to
3022  # verify that lines contain missing whitespaces, second pass on raw
3023  # lines to confirm that those missing whitespaces are not due to
3024  # elided comments.
3025  if Search(r',[^,\s]', line) and Search(r',[^,\s]', raw[linenum]):
3026    error(filename, linenum, 'whitespace/comma', 3,
3027          'Missing space after ,')
3028
3029  # You should always have a space after a semicolon
3030  # except for few corner cases
3031  # TODO(unknown): clarify if 'if (1) { return 1;}' is requires one more
3032  # space after ;
3033  if Search(r';[^\s};\\)/]', line):
3034    error(filename, linenum, 'whitespace/semicolon', 3,
3035          'Missing space after ;')
3036
3037
3038def CheckBracesSpacing(filename, clean_lines, linenum, error):
3039  """Checks for horizontal spacing near commas.
3040
3041  Args:
3042    filename: The name of the current file.
3043    clean_lines: A CleansedLines instance containing the file.
3044    linenum: The number of the line to check.
3045    error: The function to call with any errors found.
3046  """
3047  line = clean_lines.elided[linenum]
3048
3049  # Except after an opening paren, or after another opening brace (in case of
3050  # an initializer list, for instance), you should have spaces before your
3051  # braces. And since you should never have braces at the beginning of a line,
3052  # this is an easy test.
3053  match = Match(r'^(.*[^ ({]){', line)
3054  if match:
3055    # Try a bit harder to check for brace initialization.  This
3056    # happens in one of the following forms:
3057    #   Constructor() : initializer_list_{} { ... }
3058    #   Constructor{}.MemberFunction()
3059    #   Type variable{};
3060    #   FunctionCall(type{}, ...);
3061    #   LastArgument(..., type{});
3062    #   LOG(INFO) << type{} << " ...";
3063    #   map_of_type[{...}] = ...;
3064    #   ternary = expr ? new type{} : nullptr;
3065    #   OuterTemplate<InnerTemplateConstructor<Type>{}>
3066    #
3067    # We check for the character following the closing brace, and
3068    # silence the warning if it's one of those listed above, i.e.
3069    # "{.;,)<>]:".
3070    #
3071    # To account for nested initializer list, we allow any number of
3072    # closing braces up to "{;,)<".  We can't simply silence the
3073    # warning on first sight of closing brace, because that would
3074    # cause false negatives for things that are not initializer lists.
3075    #   Silence this:         But not this:
3076    #     Outer{                if (...) {
3077    #       Inner{...}            if (...){  // Missing space before {
3078    #     };                    }
3079    #
3080    # There is a false negative with this approach if people inserted
3081    # spurious semicolons, e.g. "if (cond){};", but we will catch the
3082    # spurious semicolon with a separate check.
3083    (endline, endlinenum, endpos) = CloseExpression(
3084        clean_lines, linenum, len(match.group(1)))
3085    trailing_text = ''
3086    if endpos > -1:
3087      trailing_text = endline[endpos:]
3088    for offset in xrange(endlinenum + 1,
3089                         min(endlinenum + 3, clean_lines.NumLines() - 1)):
3090      trailing_text += clean_lines.elided[offset]
3091    if not Match(r'^[\s}]*[{.;,)<>\]:]', trailing_text):
3092      error(filename, linenum, 'whitespace/braces', 5,
3093            'Missing space before {')
3094
3095  # Make sure '} else {' has spaces.
3096  if Search(r'}else', line):
3097    error(filename, linenum, 'whitespace/braces', 5,
3098          'Missing space before else')
3099
3100  # You shouldn't have a space before a semicolon at the end of the line.
3101  # There's a special case for "for" since the style guide allows space before
3102  # the semicolon there.
3103  if Search(r':\s*;\s*$', line):
3104    error(filename, linenum, 'whitespace/semicolon', 5,
3105          'Semicolon defining empty statement. Use {} instead.')
3106  elif Search(r'^\s*;\s*$', line):
3107    error(filename, linenum, 'whitespace/semicolon', 5,
3108          'Line contains only semicolon. If this should be an empty statement, '
3109          'use {} instead.')
3110  elif (Search(r'\s+;\s*$', line) and
3111        not Search(r'\bfor\b', line)):
3112    error(filename, linenum, 'whitespace/semicolon', 5,
3113          'Extra space before last semicolon. If this should be an empty '
3114          'statement, use {} instead.')
3115
3116
3117def IsDecltype(clean_lines, linenum, column):
3118  """Check if the token ending on (linenum, column) is decltype().
3119
3120  Args:
3121    clean_lines: A CleansedLines instance containing the file.
3122    linenum: the number of the line to check.
3123    column: end column of the token to check.
3124  Returns:
3125    True if this token is decltype() expression, False otherwise.
3126  """
3127  (text, _, start_col) = ReverseCloseExpression(clean_lines, linenum, column)
3128  if start_col < 0:
3129    return False
3130  if Search(r'\bdecltype\s*$', text[0:start_col]):
3131    return True
3132  return False
3133
3134
3135def IsTemplateParameterList(clean_lines, linenum, column):
3136  """Check if the token ending on (linenum, column) is the end of template<>.
3137
3138  Args:
3139    clean_lines: A CleansedLines instance containing the file.
3140    linenum: the number of the line to check.
3141    column: end column of the token to check.
3142  Returns:
3143    True if this token is end of a template parameter list, False otherwise.
3144  """
3145  (_, startline, startpos) = ReverseCloseExpression(
3146      clean_lines, linenum, column)
3147  if (startpos > -1 and
3148      Search(r'\btemplate\s*$', clean_lines.elided[startline][0:startpos])):
3149    return True
3150  return False
3151
3152
3153def IsRValueType(clean_lines, nesting_state, linenum, column):
3154  """Check if the token ending on (linenum, column) is a type.
3155
3156  Assumes that text to the right of the column is "&&" or a function
3157  name.
3158
3159  Args:
3160    clean_lines: A CleansedLines instance containing the file.
3161    nesting_state: A NestingState instance which maintains information about
3162                   the current stack of nested blocks being parsed.
3163    linenum: the number of the line to check.
3164    column: end column of the token to check.
3165  Returns:
3166    True if this token is a type, False if we are not sure.
3167  """
3168  prefix = clean_lines.elided[linenum][0:column]
3169
3170  # Get one word to the left.  If we failed to do so, this is most
3171  # likely not a type, since it's unlikely that the type name and "&&"
3172  # would be split across multiple lines.
3173  match = Match(r'^(.*)(\b\w+|[>*)&])\s*$', prefix)
3174  if not match:
3175    return False
3176
3177  # Check text following the token.  If it's "&&>" or "&&," or "&&...", it's
3178  # most likely a rvalue reference used inside a template.
3179  suffix = clean_lines.elided[linenum][column:]
3180  if Match(r'&&\s*(?:[>,]|\.\.\.)', suffix):
3181    return True
3182
3183  # Check for simple type and end of templates:
3184  #   int&& variable
3185  #   vector<int>&& variable
3186  #
3187  # Because this function is called recursively, we also need to
3188  # recognize pointer and reference types:
3189  #   int* Function()
3190  #   int& Function()
3191  if match.group(2) in ['char', 'char16_t', 'char32_t', 'wchar_t', 'bool',
3192                        'short', 'int', 'long', 'signed', 'unsigned',
3193                        'float', 'double', 'void', 'auto', '>', '*', '&']:
3194    return True
3195
3196  # If we see a close parenthesis, look for decltype on the other side.
3197  # decltype would unambiguously identify a type, anything else is
3198  # probably a parenthesized expression and not a type.
3199  if match.group(2) == ')':
3200    return IsDecltype(
3201        clean_lines, linenum, len(match.group(1)) + len(match.group(2)) - 1)
3202
3203  # Check for casts and cv-qualifiers.
3204  #   match.group(1)  remainder
3205  #   --------------  ---------
3206  #   const_cast<     type&&
3207  #   const           type&&
3208  #   type            const&&
3209  if Search(r'\b(?:const_cast\s*<|static_cast\s*<|dynamic_cast\s*<|'
3210            r'reinterpret_cast\s*<|\w+\s)\s*$',
3211            match.group(1)):
3212    return True
3213
3214  # Look for a preceding symbol that might help differentiate the context.
3215  # These are the cases that would be ambiguous:
3216  #   match.group(1)  remainder
3217  #   --------------  ---------
3218  #   Call         (   expression &&
3219  #   Declaration  (   type&&
3220  #   sizeof       (   type&&
3221  #   if           (   expression &&
3222  #   while        (   expression &&
3223  #   for          (   type&&
3224  #   for(         ;   expression &&
3225  #   statement    ;   type&&
3226  #   block        {   type&&
3227  #   constructor  {   expression &&
3228  start = linenum
3229  line = match.group(1)
3230  match_symbol = None
3231  while start >= 0:
3232    # We want to skip over identifiers and commas to get to a symbol.
3233    # Commas are skipped so that we can find the opening parenthesis
3234    # for function parameter lists.
3235    match_symbol = Match(r'^(.*)([^\w\s,])[\w\s,]*$', line)
3236    if match_symbol:
3237      break
3238    start -= 1
3239    line = clean_lines.elided[start]
3240
3241  if not match_symbol:
3242    # Probably the first statement in the file is an rvalue reference
3243    return True
3244
3245  if match_symbol.group(2) == '}':
3246    # Found closing brace, probably an indicate of this:
3247    #   block{} type&&
3248    return True
3249
3250  if match_symbol.group(2) == ';':
3251    # Found semicolon, probably one of these:
3252    #   for(; expression &&
3253    #   statement; type&&
3254
3255    # Look for the previous 'for(' in the previous lines.
3256    before_text = match_symbol.group(1)
3257    for i in xrange(start - 1, max(start - 6, 0), -1):
3258      before_text = clean_lines.elided[i] + before_text
3259    if Search(r'for\s*\([^{};]*$', before_text):
3260      # This is the condition inside a for-loop
3261      return False
3262
3263    # Did not find a for-init-statement before this semicolon, so this
3264    # is probably a new statement and not a condition.
3265    return True
3266
3267  if match_symbol.group(2) == '{':
3268    # Found opening brace, probably one of these:
3269    #   block{ type&& = ... ; }
3270    #   constructor{ expression && expression }
3271
3272    # Look for a closing brace or a semicolon.  If we see a semicolon
3273    # first, this is probably a rvalue reference.
3274    line = clean_lines.elided[start][0:len(match_symbol.group(1)) + 1]
3275    end = start
3276    depth = 1
3277    while True:
3278      for ch in line:
3279        if ch == ';':
3280          return True
3281        elif ch == '{':
3282          depth += 1
3283        elif ch == '}':
3284          depth -= 1
3285          if depth == 0:
3286            return False
3287      end += 1
3288      if end >= clean_lines.NumLines():
3289        break
3290      line = clean_lines.elided[end]
3291    # Incomplete program?
3292    return False
3293
3294  if match_symbol.group(2) == '(':
3295    # Opening parenthesis.  Need to check what's to the left of the
3296    # parenthesis.  Look back one extra line for additional context.
3297    before_text = match_symbol.group(1)
3298    if linenum > 1:
3299      before_text = clean_lines.elided[linenum - 1] + before_text
3300    before_text = match_symbol.group(1)
3301
3302    # Patterns that are likely to be types:
3303    #   [](type&&
3304    #   for (type&&
3305    #   sizeof(type&&
3306    #   operator=(type&&
3307    #
3308    if Search(r'(?:\]|\bfor|\bsizeof|\boperator\s*\S+\s*)\s*$', before_text):
3309      return True
3310
3311    # Patterns that are likely to be expressions:
3312    #   if (expression &&
3313    #   while (expression &&
3314    #   : initializer(expression &&
3315    #   , initializer(expression &&
3316    #   ( FunctionCall(expression &&
3317    #   + FunctionCall(expression &&
3318    #   + (expression &&
3319    #
3320    # The last '+' represents operators such as '+' and '-'.
3321    if Search(r'(?:\bif|\bwhile|[-+=%^(<!?:,&*]\s*)$', before_text):
3322      return False
3323
3324    # Something else.  Check that tokens to the left look like
3325    #   return_type function_name
3326    match_func = Match(r'^(.*)\s+\w(?:\w|::)*(?:<[^<>]*>)?\s*$',
3327                       match_symbol.group(1))
3328    if match_func:
3329      # Check for constructors, which don't have return types.
3330      if Search(r'\bexplicit$', match_func.group(1)):
3331        return True
3332      implicit_constructor = Match(r'\s*(\w+)\((?:const\s+)?(\w+)', prefix)
3333      if (implicit_constructor and
3334          implicit_constructor.group(1) == implicit_constructor.group(2)):
3335        return True
3336      return IsRValueType(clean_lines, nesting_state, linenum,
3337                          len(match_func.group(1)))
3338
3339    # Nothing before the function name.  If this is inside a block scope,
3340    # this is probably a function call.
3341    return not (nesting_state.previous_stack_top and
3342                nesting_state.previous_stack_top.IsBlockInfo())
3343
3344  if match_symbol.group(2) == '>':
3345    # Possibly a closing bracket, check that what's on the other side
3346    # looks like the start of a template.
3347    return IsTemplateParameterList(
3348        clean_lines, start, len(match_symbol.group(1)))
3349
3350  # Some other symbol, usually something like "a=b&&c".  This is most
3351  # likely not a type.
3352  return False
3353
3354
3355def IsRValueAllowed(clean_lines, linenum):
3356  """Check if RValue reference is allowed within some range of lines.
3357
3358  Args:
3359    clean_lines: A CleansedLines instance containing the file.
3360    linenum: The number of the line to check.
3361  Returns:
3362    True if line is within the region where RValue references are allowed.
3363  """
3364  for i in xrange(linenum, 0, -1):
3365    line = clean_lines.elided[i]
3366    if Match(r'GOOGLE_ALLOW_RVALUE_REFERENCES_(?:PUSH|POP)', line):
3367      if not line.endswith('PUSH'):
3368        return False
3369      for j in xrange(linenum, clean_lines.NumLines(), 1):
3370        line = clean_lines.elided[j]
3371        if Match(r'GOOGLE_ALLOW_RVALUE_REFERENCES_(?:PUSH|POP)', line):
3372          return line.endswith('POP')
3373  return False
3374
3375
3376def CheckRValueReference(filename, clean_lines, linenum, nesting_state, error):
3377  """Check for rvalue references.
3378
3379  Args:
3380    filename: The name of the current file.
3381    clean_lines: A CleansedLines instance containing the file.
3382    linenum: The number of the line to check.
3383    nesting_state: A NestingState instance which maintains information about
3384                   the current stack of nested blocks being parsed.
3385    error: The function to call with any errors found.
3386  """
3387  # Find lines missing spaces around &&.
3388  # TODO(unknown): currently we don't check for rvalue references
3389  # with spaces surrounding the && to avoid false positives with
3390  # boolean expressions.
3391  line = clean_lines.elided[linenum]
3392  match = Match(r'^(.*\S)&&', line)
3393  if not match:
3394    match = Match(r'(.*)&&\S', line)
3395  if (not match) or '(&&)' in line or Search(r'\boperator\s*$', match.group(1)):
3396    return
3397
3398  # Either poorly formed && or an rvalue reference, check the context
3399  # to get a more accurate error message.  Mostly we want to determine
3400  # if what's to the left of "&&" is a type or not.
3401  and_pos = len(match.group(1))
3402  if IsRValueType(clean_lines, nesting_state, linenum, and_pos):
3403    if False and not IsRValueAllowed(clean_lines, linenum):
3404      error(filename, linenum, 'build/c++11', 3,
3405            'RValue references are an unapproved C++ feature.')
3406  else:
3407    # Custom hack that assumes a line containing '&&' with a semicolon at
3408    # the end is function definition instead of an "if/while/for/etc". This
3409    # could be better, but I'm going for simple solution. ign-math5+ will use
3410    # an updated cpplint.py script that handles this case properly.
3411    match = Match(r'.*;$', line)
3412    if not match:
3413      error(filename, linenum, 'whitespace/operators', 3,
3414            'Missing spaces around &&')
3415
3416
3417def CheckSectionSpacing(filename, clean_lines, class_info, linenum, error):
3418  """Checks for additional blank line issues related to sections.
3419
3420  Currently the only thing checked here is blank line before protected/private.
3421
3422  Args:
3423    filename: The name of the current file.
3424    clean_lines: A CleansedLines instance containing the file.
3425    class_info: A _ClassInfo objects.
3426    linenum: The number of the line to check.
3427    error: The function to call with any errors found.
3428  """
3429  # Skip checks if the class is small, where small means 25 lines or less.
3430  # 25 lines seems like a good cutoff since that's the usual height of
3431  # terminals, and any class that can't fit in one screen can't really
3432  # be considered "small".
3433  #
3434  # Also skip checks if we are on the first line.  This accounts for
3435  # classes that look like
3436  #   class Foo { public: ... };
3437  #
3438  # If we didn't find the end of the class, last_line would be zero,
3439  # and the check will be skipped by the first condition.
3440  if (class_info.last_line - class_info.starting_linenum <= 24 or
3441      linenum <= class_info.starting_linenum):
3442    return
3443
3444  matched = Match(r'\s*(public|protected|private):', clean_lines.lines[linenum])
3445  if matched:
3446    # Issue warning if the line before public/protected/private was
3447    # not a blank line, but don't do this if the previous line contains
3448    # "class" or "struct".  This can happen two ways:
3449    #  - We are at the beginning of the class.
3450    #  - We are forward-declaring an inner class that is semantically
3451    #    private, but needed to be public for implementation reasons.
3452    # Also ignores cases where the previous line ends with a backslash as can be
3453    # common when defining classes in C macros.
3454    prev_line = clean_lines.lines[linenum - 1]
3455    if (not IsBlankLine(prev_line) and
3456        not Search(r'\b(class|struct)\b', prev_line) and
3457        not Search(r'\\$', prev_line)):
3458      # Try a bit harder to find the beginning of the class.  This is to
3459      # account for multi-line base-specifier lists, e.g.:
3460      #   class Derived
3461      #       : public Base {
3462      end_class_head = class_info.starting_linenum
3463      for i in range(class_info.starting_linenum, linenum):
3464        if Search(r'\{\s*$', clean_lines.lines[i]):
3465          end_class_head = i
3466          break
3467      if end_class_head < linenum - 1:
3468        error(filename, linenum, 'whitespace/blank_line', 3,
3469              '"%s:" should be preceded by a blank line' % matched.group(1))
3470
3471
3472def GetPreviousNonBlankLine(clean_lines, linenum):
3473  """Return the most recent non-blank line and its line number.
3474
3475  Args:
3476    clean_lines: A CleansedLines instance containing the file contents.
3477    linenum: The number of the line to check.
3478
3479  Returns:
3480    A tuple with two elements.  The first element is the contents of the last
3481    non-blank line before the current line, or the empty string if this is the
3482    first non-blank line.  The second is the line number of that line, or -1
3483    if this is the first non-blank line.
3484  """
3485
3486  prevlinenum = linenum - 1
3487  while prevlinenum >= 0:
3488    prevline = clean_lines.elided[prevlinenum]
3489    if not IsBlankLine(prevline):     # if not a blank line...
3490      return (prevline, prevlinenum)
3491    prevlinenum -= 1
3492  return ('', -1)
3493
3494
3495def CheckBraces(filename, clean_lines, linenum, error):
3496  """Looks for misplaced braces (e.g. at the end of line).
3497
3498  Args:
3499    filename: The name of the current file.
3500    clean_lines: A CleansedLines instance containing the file.
3501    linenum: The number of the line to check.
3502    error: The function to call with any errors found.
3503  """
3504
3505  line = clean_lines.elided[linenum]        # get rid of comments and strings
3506
3507  #if Match(r'\s*{\s*$', line):
3508  #  # We allow an open brace to start a line in the case where someone is using
3509  #  # braces in a block to explicitly create a new scope, which is commonly used
3510  #  # to control the lifetime of stack-allocated variables.  Braces are also
3511  #  # used for brace initializers inside function calls.  We don't detect this
3512  #  # perfectly: we just don't complain if the last non-whitespace character on
3513  #  # the previous non-blank line is ',', ';', ':', '(', '{', or '}', or if the
3514  #  # previous line starts a preprocessor block.
3515  #  prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0]
3516  #  if (not Search(r'[,;:}{()]\s*$', prevline) and
3517  #      not Match(r'\s*#', prevline) and
3518  #      os.path.splitext(filename)[1] != ".hh"):
3519  #      #not Match(r'\s*}\s*', prevline) and
3520  #      #not Match(r'^\s*{\s*&', prevline)):
3521  #    error(filename, linenum, 'whitespace/braces', 4,
3522  #          '{ should never be at the end of the previous line')
3523
3524  # An else clause should not be on the same line as the preceding closing brace.
3525  if Match(r'\s*}\s*else\b\s*(?:if\b|\{|$)', line):
3526    # prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0]
3527    #if Match(r'\s*}\s*$', line):
3528    error(filename, linenum, 'whitespace/newline', 4,
3529          'An else should not appear on the same line as the preceding }')
3530  if Match(r'\s*else.*{$', line):
3531    error(filename, linenum, 'whitespace/newline', 4,
3532          'An else should not appear on the same line as the next {')
3533
3534  # If braces come on one side of an else, they should be on both.
3535  # However, we have to worry about "else if" that spans multiple lines!
3536  # if Search(r'else if\s*\(', line):       # could be multi-line if
3537  #   brace_on_left = bool(Search(r'}\s*else if\s*\(', line))
3538  #   # find the ( after the if
3539  #   pos = line.find('else if')
3540  #   pos = line.find('(', pos)
3541  #   if pos > 0:
3542  #     (endline, _, endpos) = CloseExpression(clean_lines, linenum, pos)
3543  #     brace_on_right = endline[endpos:].find('{') != -1
3544  #     if brace_on_left != brace_on_right:    # must be brace after if
3545  #       error(filename, linenum, 'readability/braces', 5,
3546  #             'If an else has a brace on one side, it should have it on both')
3547  # elif Search(r'}\s*else[^{]*$', line) or Match(r'[^}]*else\s*{', line):
3548  #   error(filename, linenum, 'readability/braces', 5,
3549  #         'If an else has a brace on one side, it should have it on both')
3550
3551  # Likewise, an else should never have the else clause on the same line
3552  if Search(r'\belse [^\s{]', line) and not Search(r'\belse if\b', line):
3553    error(filename, linenum, 'whitespace/newline', 4,
3554          'Else clause should never be on same line as else (use 2 lines)')
3555
3556  # In the same way, a do/while should never be on one line
3557  if Match(r'\s*do [^\s{]', line):
3558    error(filename, linenum, 'whitespace/newline', 4,
3559          'do/while clauses should not be on a single line')
3560
3561  # Check single-line if/else bodies. The style guide says 'curly braces are not
3562  # required for single-line statements'. We additionally allow multi-line,
3563  # single statements, but we reject anything with more than one semicolon in
3564  # it. This means that the first semicolon after the if should be at the end of
3565  # its line, and the line after that should have an indent level equal to or
3566  # lower than the if. We also check for ambiguous if/else nesting without
3567  # braces.
3568  if_else_match = Search(r'\b(if\s*\(|else\b)', line)
3569  if if_else_match and not Match(r'\s*#', line):
3570    if_indent = GetIndentLevel(line)
3571    endline, endlinenum, endpos = line, linenum, if_else_match.end()
3572    if_match = Search(r'\bif\s*\(', line)
3573    if if_match:
3574      # This could be a multiline if condition, so find the end first.
3575      pos = if_match.end() - 1
3576      (endline, endlinenum, endpos) = CloseExpression(clean_lines, linenum, pos)
3577    # Check for an opening brace, either directly after the if or on the next
3578    # line. If found, this isn't a single-statement conditional.
3579    if (not Match(r'\s*{', endline[endpos:])
3580        and not (Match(r'\s*$', endline[endpos:])
3581                 and endlinenum < (len(clean_lines.elided) - 1)
3582                 and Match(r'\s*{', clean_lines.elided[endlinenum + 1]))):
3583      while (endlinenum < len(clean_lines.elided)
3584             and ';' not in clean_lines.elided[endlinenum][endpos:]):
3585        endlinenum += 1
3586        endpos = 0
3587      if endlinenum < len(clean_lines.elided):
3588        endline = clean_lines.elided[endlinenum]
3589        # We allow a mix of whitespace and closing braces (e.g. for one-liner
3590        # methods) and a single \ after the semicolon (for macros)
3591        endpos = endline.find(';')
3592        if not Match(r';[\s}]*(\\?)$', endline[endpos:]):
3593          # Semicolon isn't the last character, there's something trailing
3594          error(filename, linenum, 'readability/braces', 4,
3595                'If/else bodies with multiple statements require braces')
3596        elif endlinenum < len(clean_lines.elided) - 1:
3597          # Make sure the next line is dedented
3598          next_line = clean_lines.elided[endlinenum + 1]
3599          next_indent = GetIndentLevel(next_line)
3600          # With ambiguous nested if statements, this will error out on the
3601          # if that *doesn't* match the else, regardless of whether it's the
3602          # inner one or outer one.
3603          if (if_match and Match(r'\s*else\b', next_line)
3604              and next_indent != if_indent):
3605            error(filename, linenum, 'readability/braces', 4,
3606                  'Else clause should be indented at the same level as if. '
3607                  'Ambiguous nested if/else chains require braces.')
3608          elif next_indent > if_indent:
3609            error(filename, linenum, 'readability/braces', 4,
3610                  'If/else bodies with multiple statements require braces')
3611
3612
3613def CheckTrailingSemicolon(filename, clean_lines, linenum, error):
3614  """Looks for redundant trailing semicolon.
3615
3616  Args:
3617    filename: The name of the current file.
3618    clean_lines: A CleansedLines instance containing the file.
3619    linenum: The number of the line to check.
3620    error: The function to call with any errors found.
3621  """
3622
3623  line = clean_lines.elided[linenum]
3624
3625  # Block bodies should not be followed by a semicolon.  Due to C++11
3626  # brace initialization, there are more places where semicolons are
3627  # required than not, so we use a whitelist approach to check these
3628  # rather than a blacklist.  These are the places where "};" should
3629  # be replaced by just "}":
3630  # 1. Some flavor of block following closing parenthesis:
3631  #    for (;;) {};
3632  #    while (...) {};
3633  #    switch (...) {};
3634  #    Function(...) {};
3635  #    if (...) {};
3636  #    if (...) else if (...) {};
3637  #
3638  # 2. else block:
3639  #    if (...) else {};
3640  #
3641  # 3. const member function:
3642  #    Function(...) const {};
3643  #
3644  # 4. Block following some statement:
3645  #    x = 42;
3646  #    {};
3647  #
3648  # 5. Block at the beginning of a function:
3649  #    Function(...) {
3650  #      {};
3651  #    }
3652  #
3653  #    Note that naively checking for the preceding "{" will also match
3654  #    braces inside multi-dimensional arrays, but this is fine since
3655  #    that expression will not contain semicolons.
3656  #
3657  # 6. Block following another block:
3658  #    while (true) {}
3659  #    {};
3660  #
3661  # 7. End of namespaces:
3662  #    namespace {};
3663  #
3664  #    These semicolons seems far more common than other kinds of
3665  #    redundant semicolons, possibly due to people converting classes
3666  #    to namespaces.  For now we do not warn for this case.
3667  #
3668  # Try matching case 1 first.
3669  match = Match(r'^(.*\)\s*)\{', line)
3670  if match:
3671    # Matched closing parenthesis (case 1).  Check the token before the
3672    # matching opening parenthesis, and don't warn if it looks like a
3673    # macro.  This avoids these false positives:
3674    #  - macro that defines a base class
3675    #  - multi-line macro that defines a base class
3676    #  - macro that defines the whole class-head
3677    #
3678    # But we still issue warnings for macros that we know are safe to
3679    # warn, specifically:
3680    #  - TEST, TEST_F, TEST_P, MATCHER, MATCHER_P
3681    #  - TYPED_TEST
3682    #  - INTERFACE_DEF
3683    #  - EXCLUSIVE_LOCKS_REQUIRED, SHARED_LOCKS_REQUIRED, LOCKS_EXCLUDED:
3684    #
3685    # We implement a whitelist of safe macros instead of a blacklist of
3686    # unsafe macros, even though the latter appears less frequently in
3687    # google code and would have been easier to implement.  This is because
3688    # the downside for getting the whitelist wrong means some extra
3689    # semicolons, while the downside for getting the blacklist wrong
3690    # would result in compile errors.
3691    #
3692    # In addition to macros, we also don't want to warn on compound
3693    # literals and lambdas.
3694    closing_brace_pos = match.group(1).rfind(')')
3695    opening_parenthesis = ReverseCloseExpression(
3696        clean_lines, linenum, closing_brace_pos)
3697    if opening_parenthesis[2] > -1:
3698      line_prefix = opening_parenthesis[0][0:opening_parenthesis[2]]
3699      macro = Search(r'\b([A-Z_]+)\s*$', line_prefix)
3700      func = Match(r'^(.*\])\s*$', line_prefix)
3701      if ((macro and
3702           macro.group(1) not in (
3703               'TEST', 'TEST_F', 'MATCHER', 'MATCHER_P', 'TYPED_TEST',
3704               'EXCLUSIVE_LOCKS_REQUIRED', 'SHARED_LOCKS_REQUIRED',
3705               'LOCKS_EXCLUDED', 'INTERFACE_DEF')) or
3706          (func and not Search(r'\boperator\s*\[\s*\]', func.group(1))) or
3707          Search(r'\s+=\s*$', line_prefix)):
3708        match = None
3709    if (match and
3710        opening_parenthesis[1] > 1 and
3711        Search(r'\]\s*$', clean_lines.elided[opening_parenthesis[1] - 1])):
3712      # Multi-line lambda-expression
3713      match = None
3714
3715  else:
3716    # Try matching cases 2-3.
3717    match = Match(r'^(.*(?:else|\)\s*const)\s*)\{', line)
3718    if not match:
3719      # Try matching cases 4-6.  These are always matched on separate lines.
3720      #
3721      # Note that we can't simply concatenate the previous line to the
3722      # current line and do a single match, otherwise we may output
3723      # duplicate warnings for the blank line case:
3724      #   if (cond) {
3725      #     // blank line
3726      #   }
3727      prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0]
3728      if prevline and Search(r'[;{}]\s*$', prevline):
3729        match = Match(r'^(\s*)\{', line)
3730
3731  # Check matching closing brace
3732  if match:
3733    (endline, endlinenum, endpos) = CloseExpression(
3734        clean_lines, linenum, len(match.group(1)))
3735    if endpos > -1 and Match(r'^\s*;', endline[endpos:]):
3736      # Current {} pair is eligible for semicolon check, and we have found
3737      # the redundant semicolon, output warning here.
3738      #
3739      # Note: because we are scanning forward for opening braces, and
3740      # outputting warnings for the matching closing brace, if there are
3741      # nested blocks with trailing semicolons, we will get the error
3742      # messages in reversed order.
3743      error(filename, endlinenum, 'readability/braces', 4,
3744            "You don't need a ; after a }")
3745
3746
3747def CheckEmptyBlockBody(filename, clean_lines, linenum, error):
3748  """Look for empty loop/conditional body with only a single semicolon.
3749
3750  Args:
3751    filename: The name of the current file.
3752    clean_lines: A CleansedLines instance containing the file.
3753    linenum: The number of the line to check.
3754    error: The function to call with any errors found.
3755  """
3756
3757  # Search for loop keywords at the beginning of the line.  Because only
3758  # whitespaces are allowed before the keywords, this will also ignore most
3759  # do-while-loops, since those lines should start with closing brace.
3760  #
3761  # We also check "if" blocks here, since an empty conditional block
3762  # is likely an error.
3763  line = clean_lines.elided[linenum]
3764  matched = Match(r'\s*(for|while|if)\s*\(', line)
3765  if matched:
3766    # Find the end of the conditional expression
3767    (end_line, end_linenum, end_pos) = CloseExpression(
3768        clean_lines, linenum, line.find('('))
3769
3770    # Output warning if what follows the condition expression is a semicolon.
3771    # No warning for all other cases, including whitespace or newline, since we
3772    # have a separate check for semicolons preceded by whitespace.
3773    if end_pos >= 0 and Match(r';', end_line[end_pos:]):
3774      if matched.group(1) == 'if':
3775        error(filename, end_linenum, 'whitespace/empty_conditional_body', 5,
3776              'Empty conditional bodies should use {}')
3777      elif matched.group(1) != "while":
3778        error(filename, end_linenum, 'whitespace/empty_loop_body', 5,
3779              'Empty loop bodies should use {} or continue')
3780
3781
3782def FindCheckMacro(line):
3783  """Find a replaceable CHECK-like macro.
3784
3785  Args:
3786    line: line to search on.
3787  Returns:
3788    (macro name, start position), or (None, -1) if no replaceable
3789    macro is found.
3790  """
3791  for macro in _CHECK_MACROS:
3792    i = line.find(macro)
3793    if i >= 0:
3794      # Find opening parenthesis.  Do a regular expression match here
3795      # to make sure that we are matching the expected CHECK macro, as
3796      # opposed to some other macro that happens to contain the CHECK
3797      # substring.
3798      matched = Match(r'^(.*\b' + macro + r'\s*)\(', line)
3799      if not matched:
3800        continue
3801      return (macro, len(matched.group(1)))
3802  return (None, -1)
3803
3804
3805def CheckCheck(filename, clean_lines, linenum, error):
3806  """Checks the use of CHECK and EXPECT macros.
3807
3808  Args:
3809    filename: The name of the current file.
3810    clean_lines: A CleansedLines instance containing the file.
3811    linenum: The number of the line to check.
3812    error: The function to call with any errors found.
3813  """
3814
3815  # Decide the set of replacement macros that should be suggested
3816  lines = clean_lines.elided
3817  (check_macro, start_pos) = FindCheckMacro(lines[linenum])
3818  if not check_macro:
3819    return
3820
3821  # Find end of the boolean expression by matching parentheses
3822  (last_line, end_line, end_pos) = CloseExpression(
3823      clean_lines, linenum, start_pos)
3824  if end_pos < 0:
3825    return
3826  if linenum == end_line:
3827    expression = lines[linenum][start_pos + 1:end_pos - 1]
3828  else:
3829    expression = lines[linenum][start_pos + 1:]
3830    for i in xrange(linenum + 1, end_line):
3831      expression += lines[i]
3832    expression += last_line[0:end_pos - 1]
3833
3834  # Parse expression so that we can take parentheses into account.
3835  # This avoids false positives for inputs like "CHECK((a < 4) == b)",
3836  # which is not replaceable by CHECK_LE.
3837  lhs = ''
3838  rhs = ''
3839  operator = None
3840  while expression:
3841    matched = Match(r'^\s*(<<|<<=|>>|>>=|->\*|->|&&|\|\||'
3842                    r'==|!=|>=|>|<=|<|\()(.*)$', expression)
3843    if matched:
3844      token = matched.group(1)
3845      if token == '(':
3846        # Parenthesized operand
3847        expression = matched.group(2)
3848        (end, _) = FindEndOfExpressionInLine(expression, 0, ['('])
3849        if end < 0:
3850          return  # Unmatched parenthesis
3851        lhs += '(' + expression[0:end]
3852        expression = expression[end:]
3853      elif token in ('&&', '||'):
3854        # Logical and/or operators.  This means the expression
3855        # contains more than one term, for example:
3856        #   CHECK(42 < a && a < b);
3857        #
3858        # These are not replaceable with CHECK_LE, so bail out early.
3859        return
3860      elif token in ('<<', '<<=', '>>', '>>=', '->*', '->'):
3861        # Non-relational operator
3862        lhs += token
3863        expression = matched.group(2)
3864      else:
3865        # Relational operator
3866        operator = token
3867        rhs = matched.group(2)
3868        break
3869    else:
3870      # Unparenthesized operand.  Instead of appending to lhs one character
3871      # at a time, we do another regular expression match to consume several
3872      # characters at once if possible.  Trivial benchmark shows that this
3873      # is more efficient when the operands are longer than a single
3874      # character, which is generally the case.
3875      matched = Match(r'^([^-=!<>()&|]+)(.*)$', expression)
3876      if not matched:
3877        matched = Match(r'^(\s*\S)(.*)$', expression)
3878        if not matched:
3879          break
3880      lhs += matched.group(1)
3881      expression = matched.group(2)
3882
3883  # Only apply checks if we got all parts of the boolean expression
3884  if not (lhs and operator and rhs):
3885    return
3886
3887  # Check that rhs do not contain logical operators.  We already know
3888  # that lhs is fine since the loop above parses out && and ||.
3889  if rhs.find('&&') > -1 or rhs.find('||') > -1:
3890    return
3891
3892  # At least one of the operands must be a constant literal.  This is
3893  # to avoid suggesting replacements for unprintable things like
3894  # CHECK(variable != iterator)
3895  #
3896  # The following pattern matches decimal, hex integers, strings, and
3897  # characters (in that order).
3898  lhs = lhs.strip()
3899  rhs = rhs.strip()
3900  match_constant = r'^([-+]?(\d+|0[xX][0-9a-fA-F]+)[lLuU]{0,3}|".*"|\'.*\')$'
3901  if Match(match_constant, lhs) or Match(match_constant, rhs):
3902    # Note: since we know both lhs and rhs, we can provide a more
3903    # descriptive error message like:
3904    #   Consider using CHECK_EQ(x, 42) instead of CHECK(x == 42)
3905    # Instead of:
3906    #   Consider using CHECK_EQ instead of CHECK(a == b)
3907    #
3908    # We are still keeping the less descriptive message because if lhs
3909    # or rhs gets long, the error message might become unreadable.
3910    error(filename, linenum, 'readability/check', 2,
3911          'Consider using %s instead of %s(a %s b)' % (
3912              _CHECK_REPLACEMENT[check_macro][operator],
3913              check_macro, operator))
3914
3915
3916def CheckAltTokens(filename, clean_lines, linenum, error):
3917  """Check alternative keywords being used in boolean expressions.
3918
3919  Args:
3920    filename: The name of the current file.
3921    clean_lines: A CleansedLines instance containing the file.
3922    linenum: The number of the line to check.
3923    error: The function to call with any errors found.
3924  """
3925  line = clean_lines.elided[linenum]
3926
3927  # Avoid preprocessor lines
3928  if Match(r'^\s*#', line):
3929    return
3930
3931  # Last ditch effort to avoid multi-line comments.  This will not help
3932  # if the comment started before the current line or ended after the
3933  # current line, but it catches most of the false positives.  At least,
3934  # it provides a way to workaround this warning for people who use
3935  # multi-line comments in preprocessor macros.
3936  #
3937  # TODO(unknown): remove this once cpplint has better support for
3938  # multi-line comments.
3939  if line.find('/*') >= 0 or line.find('*/') >= 0:
3940    return
3941
3942  for match in _ALT_TOKEN_REPLACEMENT_PATTERN.finditer(line):
3943    error(filename, linenum, 'readability/alt_tokens', 2,
3944          'Use operator %s instead of %s' % (
3945              _ALT_TOKEN_REPLACEMENT[match.group(1)], match.group(1)))
3946
3947
3948def GetLineWidth(line):
3949  """Determines the width of the line in column positions.
3950
3951  Args:
3952    line: A string, which may be a Unicode string.
3953
3954  Returns:
3955    The width of the line in column positions, accounting for Unicode
3956    combining characters and wide characters.
3957  """
3958  if isinstance(line, unicode):
3959    width = 0
3960    for uc in unicodedata.normalize('NFC', line):
3961      if unicodedata.east_asian_width(uc) in ('W', 'F'):
3962        width += 2
3963      elif not unicodedata.combining(uc):
3964        width += 1
3965    return width
3966  else:
3967    return len(line)
3968
3969
3970def CheckStyle(filename, clean_lines, linenum, file_extension, nesting_state,
3971               error):
3972  """Checks rules from the 'C++ style rules' section of cppguide.html.
3973
3974  Most of these rules are hard to test (naming, comment style), but we
3975  do what we can.  In particular we check for 2-space indents, line lengths,
3976  tab usage, spaces inside code, etc.
3977
3978  Args:
3979    filename: The name of the current file.
3980    clean_lines: A CleansedLines instance containing the file.
3981    linenum: The number of the line to check.
3982    file_extension: The extension (without the dot) of the filename.
3983    nesting_state: A NestingState instance which maintains information about
3984                   the current stack of nested blocks being parsed.
3985    error: The function to call with any errors found.
3986  """
3987
3988  # Don't use "elided" lines here, otherwise we can't check commented lines.
3989  # Don't want to use "raw" either, because we don't want to check inside C++11
3990  # raw strings,
3991  raw_lines = clean_lines.lines_without_raw_strings
3992  line = raw_lines[linenum]
3993
3994  if line.find('\t') != -1:
3995    error(filename, linenum, 'whitespace/tab', 1,
3996          'Tab found; better to use spaces')
3997
3998  # One or three blank spaces at the beginning of the line is weird; it's
3999  # hard to reconcile that with 2-space indents.
4000  # NOTE: here are the conditions rob pike used for his tests.  Mine aren't
4001  # as sophisticated, but it may be worth becoming so:  RLENGTH==initial_spaces
4002  # if(RLENGTH > 20) complain = 0;
4003  # if(match($0, " +(error|private|public|protected):")) complain = 0;
4004  # if(match(prev, "&& *$")) complain = 0;
4005  # if(match(prev, "\\|\\| *$")) complain = 0;
4006  # if(match(prev, "[\",=><] *$")) complain = 0;
4007  # if(match($0, " <<")) complain = 0;
4008  # if(match(prev, " +for \\(")) complain = 0;
4009  # if(prevodd && match(prevprev, " +for \\(")) complain = 0;
4010  scope_or_label_pattern = r'\s*\w+\s*:\s*\\?$'
4011  classinfo = nesting_state.InnermostClass()
4012  initial_spaces = 0
4013  cleansed_line = clean_lines.elided[linenum]
4014  while initial_spaces < len(line) and line[initial_spaces] == ' ':
4015    initial_spaces += 1
4016  if line and line[-1].isspace():
4017    error(filename, linenum, 'whitespace/end_of_line', 4,
4018          'Line ends in whitespace.  Consider deleting these extra spaces.')
4019  # There are certain situations we allow one space, notably for
4020  # section labels, and also lines containing multi-line raw strings.
4021  elif ((initial_spaces == 1 or initial_spaces == 3) and
4022        not Match(scope_or_label_pattern, cleansed_line) and
4023        not (clean_lines.raw_lines[linenum] != line and
4024             Match(r'^\s*""', line))):
4025    error(filename, linenum, 'whitespace/indent', 3,
4026          'Weird number of spaces at line-start.  '
4027          'Are you using a 2-space indent?')
4028
4029  # Check if the line is a header guard.
4030  is_header_guard = False
4031  if file_extension == 'h':
4032    cppvar = GetHeaderGuardCPPVariable(filename)
4033    if (line.startswith('#ifndef %s' % cppvar) or
4034        line.startswith('#define %s' % cppvar) or
4035        line.startswith('#endif  // %s' % cppvar)):
4036      is_header_guard = True
4037  # #include lines and header guards can be long, since there's no clean way to
4038  # split them.
4039  #
4040  # URLs can be long too.  It's possible to split these, but it makes them
4041  # harder to cut&paste.
4042  #
4043  # The "$Id:...$" comment may also get very long without it being the
4044  # developers fault.
4045  if (not line.startswith('#include') and not is_header_guard and
4046      not Match(r'^\s*//.*http(s?)://\S*$', line) and
4047      not Match(r'^// \$Id:.*#[0-9]+ \$$', line)):
4048    line_width = GetLineWidth(line)
4049    extended_length = int((_line_length * 1.25))
4050    if line_width > extended_length:
4051      if not Match(r'.*http(s?)://.*', line):
4052        error(filename, linenum, 'whitespace/line_length', 4,
4053              'Lines should very rarely be longer than %i characters' %
4054              extended_length)
4055    elif line_width > _line_length:
4056      error(filename, linenum, 'whitespace/line_length', 2,
4057            'Lines should be <= %i characters long' % _line_length)
4058
4059  if (cleansed_line.count(';') > 1 and
4060      # for loops are allowed two ;'s (and may run over two lines).
4061      cleansed_line.find('for') == -1 and
4062      (GetPreviousNonBlankLine(clean_lines, linenum)[0].find('for') == -1 or
4063       GetPreviousNonBlankLine(clean_lines, linenum)[0].find(';') != -1) and
4064      # It's ok to have many commands in a switch case that fits in 1 line
4065      not ((cleansed_line.find('case ') != -1 or
4066            cleansed_line.find('default:') != -1) and
4067           cleansed_line.find('break;') != -1)):
4068    error(filename, linenum, 'whitespace/newline', 0,
4069          'More than one command on the same line')
4070
4071  # Some more style checks
4072  CheckBraces(filename, clean_lines, linenum, error)
4073  CheckTrailingSemicolon(filename, clean_lines, linenum, error)
4074  CheckEmptyBlockBody(filename, clean_lines, linenum, error)
4075  CheckAccess(filename, clean_lines, linenum, nesting_state, error)
4076  CheckSpacing(filename, clean_lines, linenum, nesting_state, error)
4077  CheckOperatorSpacing(filename, clean_lines, linenum, error)
4078  CheckParenthesisSpacing(filename, clean_lines, linenum, error)
4079  CheckCommaSpacing(filename, clean_lines, linenum, error)
4080  CheckBracesSpacing(filename, clean_lines, linenum, error)
4081  CheckSpacingForFunctionCall(filename, clean_lines, linenum, error)
4082  CheckRValueReference(filename, clean_lines, linenum, nesting_state, error)
4083  CheckCheck(filename, clean_lines, linenum, error)
4084  CheckAltTokens(filename, clean_lines, linenum, error)
4085  classinfo = nesting_state.InnermostClass()
4086  if classinfo:
4087    CheckSectionSpacing(filename, clean_lines, classinfo, linenum, error)
4088
4089
4090_RE_PATTERN_INCLUDE_NEW_STYLE = re.compile(r'#include +"[^/]+\.h"')
4091_RE_PATTERN_INCLUDE = re.compile(r'^\s*#\s*include\s*([<"])([^>"]*)[>"].*$')
4092# Matches the first component of a filename delimited by -s and _s. That is:
4093#  _RE_FIRST_COMPONENT.match('foo').group(0) == 'foo'
4094#  _RE_FIRST_COMPONENT.match('foo.cc').group(0) == 'foo'
4095#  _RE_FIRST_COMPONENT.match('foo-bar_baz.cc').group(0) == 'foo'
4096#  _RE_FIRST_COMPONENT.match('foo_bar-baz.cc').group(0) == 'foo'
4097_RE_FIRST_COMPONENT = re.compile(r'^[^-_.]+')
4098
4099
4100def _DropCommonSuffixes(filename):
4101  """Drops common suffixes like _test.cc or -inl.h from filename.
4102
4103  For example:
4104    >>> _DropCommonSuffixes('foo/foo-inl.h')
4105    'foo/foo'
4106    >>> _DropCommonSuffixes('foo/bar/foo.cc')
4107    'foo/bar/foo'
4108    >>> _DropCommonSuffixes('foo/foo_internal.h')
4109    'foo/foo'
4110    >>> _DropCommonSuffixes('foo/foo_unusualinternal.h')
4111    'foo/foo_unusualinternal'
4112
4113  Args:
4114    filename: The input filename.
4115
4116  Returns:
4117    The filename with the common suffix removed.
4118  """
4119  for suffix in ('test.cc', 'regtest.cc', 'unittest.cc',
4120                 'inl.h', 'impl.h', 'internal.h'):
4121    if (filename.endswith(suffix) and len(filename) > len(suffix) and
4122        filename[-len(suffix) - 1] in ('-', '_')):
4123      return filename[:-len(suffix) - 1]
4124  return os.path.splitext(filename)[0]
4125
4126
4127def _IsTestFilename(filename):
4128  """Determines if the given filename has a suffix that identifies it as a test.
4129
4130  Args:
4131    filename: The input filename.
4132
4133  Returns:
4134    True if 'filename' looks like a test, False otherwise.
4135  """
4136  if (filename.endswith('_test.cc') or
4137      filename.endswith('_unittest.cc') or
4138      filename.endswith('_regtest.cc')):
4139    return True
4140  else:
4141    return False
4142
4143
4144def _ClassifyInclude(fileinfo, include, is_system):
4145  """Figures out what kind of header 'include' is.
4146
4147  Args:
4148    fileinfo: The current file cpplint is running over. A FileInfo instance.
4149    include: The path to a #included file.
4150    is_system: True if the #include used <> rather than "".
4151
4152  Returns:
4153    One of the _XXX_HEADER constants.
4154
4155  For example:
4156    >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'stdio.h', True)
4157    _C_SYS_HEADER
4158    >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'string', True)
4159    _CPP_SYS_HEADER
4160    >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'foo/foo.h', False)
4161    _LIKELY_MY_HEADER
4162    >>> _ClassifyInclude(FileInfo('foo/foo_unknown_extension.cc'),
4163    ...                  'bar/foo_other_ext.h', False)
4164    _POSSIBLE_MY_HEADER
4165    >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'foo/bar.h', False)
4166    _OTHER_HEADER
4167  """
4168  # This is a list of all standard c++ header files, except
4169  # those already checked for above.
4170  is_cpp_h = include in _CPP_HEADERS or include.find(".hh") > 0
4171
4172  if is_system:
4173    if is_cpp_h:
4174      return _CPP_SYS_HEADER
4175    else:
4176      return _C_SYS_HEADER
4177
4178  # If the target file and the include we're checking share a
4179  # basename when we drop common extensions, and the include
4180  # lives in . , then it's likely to be owned by the target file.
4181  target_dir, target_base = (
4182      os.path.split(_DropCommonSuffixes(fileinfo.RepositoryName())))
4183  include_dir, include_base = os.path.split(_DropCommonSuffixes(include))
4184  if target_base == include_base and (
4185      include_dir == target_dir or
4186      include_dir == os.path.normpath(target_dir + '/../public')):
4187    return _LIKELY_MY_HEADER
4188
4189  # If the target and include share some initial basename
4190  # component, it's possible the target is implementing the
4191  # include, so it's allowed to be first, but we'll never
4192  # complain if it's not there.
4193  target_first_component = _RE_FIRST_COMPONENT.match(target_base)
4194  include_first_component = _RE_FIRST_COMPONENT.match(include_base)
4195  if (target_first_component and include_first_component and
4196      target_first_component.group(0) ==
4197      include_first_component.group(0)):
4198    return _POSSIBLE_MY_HEADER
4199
4200  return _OTHER_HEADER
4201
4202
4203
4204def CheckIncludeLine(filename, clean_lines, linenum, include_state, error):
4205  """Check rules that are applicable to #include lines.
4206
4207  Strings on #include lines are NOT removed from elided line, to make
4208  certain tasks easier. However, to prevent false positives, checks
4209  applicable to #include lines in CheckLanguage must be put here.
4210
4211  Args:
4212    filename: The name of the current file.
4213    clean_lines: A CleansedLines instance containing the file.
4214    linenum: The number of the line to check.
4215    include_state: An _IncludeState instance in which the headers are inserted.
4216    error: The function to call with any errors found.
4217  """
4218  fileinfo = FileInfo(filename)
4219  line = clean_lines.lines[linenum]
4220
4221  # "include" should use the new style "foo/bar.h" instead of just "bar.h"
4222  if _RE_PATTERN_INCLUDE_NEW_STYLE.search(line):
4223    error(filename, linenum, 'build/include', 4,
4224          'Include the directory when naming .h files')
4225
4226  # we shouldn't include a file more than once. actually, there are a
4227  # handful of instances where doing so is okay, but in general it's
4228  # not.
4229  match = _RE_PATTERN_INCLUDE.search(line)
4230  if match:
4231    include = match.group(2)
4232    is_system = (match.group(1) == '<')
4233    if include in include_state:
4234      error(filename, linenum, 'build/include', 4,
4235            '"%s" already included at %s:%s' %
4236            (include, filename, include_state[include]))
4237    else:
4238      include_state[include] = linenum
4239
4240      # We want to ensure that headers appear in the right order:
4241      # 1) for foo.cc, foo.h  (preferred location)
4242      # 2) c system files
4243      # 3) cpp system files
4244      # 4) for foo.cc, foo.h  (deprecated location)
4245      # 5) other google headers
4246      #
4247      # We classify each include statement as one of those 5 types
4248      # using a number of techniques. The include_state object keeps
4249      # track of the highest type seen, and complains if we see a
4250      # lower type after that.
4251      error_message = include_state.CheckNextIncludeOrder(
4252          _ClassifyInclude(fileinfo, include, is_system))
4253      if error_message:
4254        error(filename, linenum, 'build/include_order', 4,
4255              '%s. Should be: %s.h, c system, c++ system, other.' %
4256              (error_message, fileinfo.BaseName()))
4257      canonical_include = include_state.CanonicalizeAlphabeticalOrder(include)
4258      if not include_state.IsInAlphabeticalOrder(
4259          clean_lines, linenum, canonical_include):
4260        error(filename, linenum, 'build/include_alpha', 4,
4261              'Include "%s" not in alphabetical order' % include)
4262      include_state.SetLastHeader(canonical_include)
4263
4264  # Look for any of the stream classes that are part of standard C++.
4265  match = _RE_PATTERN_INCLUDE.match(line)
4266  if match:
4267    include = match.group(2)
4268    if Match(r'(f|ind|io|i|o|parse|pf|stdio|str|)?stream$', include):
4269      # Many unit tests use cout, so we exempt them.
4270      if not _IsTestFilename(filename):
4271        # Suggest a different header for ostream
4272        if include == 'ostream':
4273          error(filename, linenum, 'readability/streams', 3,
4274                'For logging, include "base/logging.h" instead of <ostream>.')
4275        # else:
4276        #   error(filename, linenum, 'readability/streams', 3,
4277        #         'Streams are highly discouraged.')
4278
4279
4280def _GetTextInside(text, start_pattern):
4281  r"""Retrieves all the text between matching open and close parentheses.
4282
4283  Given a string of lines and a regular expression string, retrieve all the text
4284  following the expression and between opening punctuation symbols like
4285  (, [, or {, and the matching close-punctuation symbol. This properly nested
4286  occurrences of the punctuations, so for the text like
4287    printf(a(), b(c()));
4288  a call to _GetTextInside(text, r'printf\(') will return 'a(), b(c())'.
4289  start_pattern must match string having an open punctuation symbol at the end.
4290
4291  Args:
4292    text: The lines to extract text. Its comments and strings must be elided.
4293           It can be single line and can span multiple lines.
4294    start_pattern: The regexp string indicating where to start extracting
4295                   the text.
4296  Returns:
4297    The extracted text.
4298    None if either the opening string or ending punctuation could not be found.
4299  """
4300  # TODO(unknown): Audit cpplint.py to see what places could be profitably
4301  # rewritten to use _GetTextInside (and use inferior regexp matching today).
4302
4303  # Give opening punctuations to get the matching close-punctuations.
4304  matching_punctuation = {'(': ')', '{': '}', '[': ']'}
4305  closing_punctuation = set(matching_punctuation.itervalues())
4306
4307  # Find the position to start extracting text.
4308  match = re.search(start_pattern, text, re.M)
4309  if not match:  # start_pattern not found in text.
4310    return None
4311  start_position = match.end(0)
4312
4313  assert start_position > 0, (
4314      'start_pattern must ends with an opening punctuation.')
4315  assert text[start_position - 1] in matching_punctuation, (
4316      'start_pattern must ends with an opening punctuation.')
4317  # Stack of closing punctuations we expect to have in text after position.
4318  punctuation_stack = [matching_punctuation[text[start_position - 1]]]
4319  position = start_position
4320  while punctuation_stack and position < len(text):
4321    if text[position] == punctuation_stack[-1]:
4322      punctuation_stack.pop()
4323    elif text[position] in closing_punctuation:
4324      # A closing punctuation without matching opening punctuations.
4325      return None
4326    elif text[position] in matching_punctuation:
4327      punctuation_stack.append(matching_punctuation[text[position]])
4328    position += 1
4329  if punctuation_stack:
4330    # Opening punctuations left without matching close-punctuations.
4331    return None
4332  # punctuations match.
4333  return text[start_position:position - 1]
4334
4335
4336# Patterns for matching call-by-reference parameters.
4337#
4338# Supports nested templates up to 2 levels deep using this messy pattern:
4339#   < (?: < (?: < [^<>]*
4340#               >
4341#           |   [^<>] )*
4342#         >
4343#     |   [^<>] )*
4344#   >
4345_RE_PATTERN_IDENT = r'[_a-zA-Z]\w*'  # =~ [[:alpha:]][[:alnum:]]*
4346_RE_PATTERN_TYPE = (
4347    r'(?:const\s+)?(?:typename\s+|class\s+|struct\s+|union\s+|enum\s+)?'
4348    r'(?:\w|'
4349    r'\s*<(?:<(?:<[^<>]*>|[^<>])*>|[^<>])*>|'
4350    r'::)+')
4351# A call-by-reference parameter ends with '& identifier'.
4352_RE_PATTERN_REF_PARAM = re.compile(
4353    r'(' + _RE_PATTERN_TYPE + r'(?:\s*(?:\bconst\b|[*]))*\s*'
4354    r'&\s*' + _RE_PATTERN_IDENT + r')\s*(?:=[^,()]+)?[,)]')
4355# A call-by-const-reference parameter either ends with 'const& identifier'
4356# or looks like 'const type& identifier' when 'type' is atomic.
4357_RE_PATTERN_CONST_REF_PARAM = (
4358    r'(?:.*\s*\bconst\s*&\s*' + _RE_PATTERN_IDENT +
4359    r'|const\s+' + _RE_PATTERN_TYPE + r'\s*&\s*' + _RE_PATTERN_IDENT + r')')
4360
4361
4362def CheckLanguage(filename, clean_lines, linenum, file_extension,
4363                  include_state, nesting_state, error):
4364  """Checks rules from the 'C++ language rules' section of cppguide.html.
4365
4366  Some of these rules are hard to test (function overloading, using
4367  uint32 inappropriately), but we do the best we can.
4368
4369  Args:
4370    filename: The name of the current file.
4371    clean_lines: A CleansedLines instance containing the file.
4372    linenum: The number of the line to check.
4373    file_extension: The extension (without the dot) of the filename.
4374    include_state: An _IncludeState instance in which the headers are inserted.
4375    nesting_state: A NestingState instance which maintains information about
4376                   the current stack of nested blocks being parsed.
4377    error: The function to call with any errors found.
4378  """
4379  # If the line is empty or consists of entirely a comment, no need to
4380  # check it.
4381  line = clean_lines.elided[linenum]
4382  if not line:
4383    return
4384
4385  match = _RE_PATTERN_INCLUDE.search(line)
4386  if match:
4387    CheckIncludeLine(filename, clean_lines, linenum, include_state, error)
4388    return
4389
4390  # Reset include state across preprocessor directives.  This is meant
4391  # to silence warnings for conditional includes.
4392  if Match(r'^\s*#\s*(?:ifdef|elif|else|endif)\b', line):
4393    include_state.ResetSection()
4394
4395  # Make Windows paths like Unix.
4396  fullname = os.path.abspath(filename).replace('\\', '/')
4397
4398  # Perform other checks now that we are sure that this is not an include line
4399  CheckCasts(filename, clean_lines, linenum, error)
4400  CheckGlobalStatic(filename, clean_lines, linenum, error)
4401  CheckPrintf(filename, clean_lines, linenum, error)
4402
4403  if file_extension == 'h':
4404    # TODO(unknown): check that 1-arg constructors are explicit.
4405    #                How to tell it's a constructor?
4406    #                (handled in CheckForNonStandardConstructs for now)
4407    # TODO(unknown): check that classes have DISALLOW_EVIL_CONSTRUCTORS
4408    #                (level 1 error)
4409    pass
4410
4411  # Check if people are using the verboten C basic types.  The only exception
4412  # we regularly allow is "unsigned short port" for port.
4413  if Search(r'\bshort port\b', line):
4414    if not Search(r'\bunsigned short port\b', line):
4415      error(filename, linenum, 'runtime/int', 4,
4416            'Use "unsigned short" for ports, not "short"')
4417  else:
4418    match = Search(r'\b(short|long(?! +double)|long long)\b', line)
4419    if match:
4420      error(filename, linenum, 'runtime/int', 4,
4421            'Use int16/int64/etc, rather than the C type %s' % match.group(1))
4422
4423  # Check if some verboten operator overloading is going on
4424  # TODO(unknown): catch out-of-line unary operator&:
4425  #   class X {};
4426  #   int operator&(const X& x) { return 42; }  // unary operator&
4427  # The trick is it's hard to tell apart from binary operator&:
4428  #   class Y { int operator&(const Y& x) { return 23; } }; // binary operator&
4429  if Search(r'\boperator\s*&\s*\(\s*\)', line):
4430    error(filename, linenum, 'runtime/operator', 4,
4431          'Unary operator& is dangerous.  Do not use it.')
4432
4433  # Check for suspicious usage of "if" like
4434  # } if (a == b) {
4435  if Search(r'\}\s*if\s*\(', line):
4436    error(filename, linenum, 'readability/braces', 4,
4437          'Did you mean "else if"? If not, start a new line for "if".')
4438
4439  # Check for potential format string bugs like printf(foo).
4440  # We constrain the pattern not to pick things like DocidForPrintf(foo).
4441  # Not perfect but it can catch printf(foo.c_str()) and printf(foo->c_str())
4442  # TODO(unknown): Catch the following case. Need to change the calling
4443  # convention of the whole function to process multiple line to handle it.
4444  #   printf(
4445  #       boy_this_is_a_really_long_variable_that_cannot_fit_on_the_prev_line);
4446  printf_args = _GetTextInside(line, r'(?i)\b(string)?printf\s*\(')
4447  if printf_args:
4448    match = Match(r'([\w.\->()]+)$', printf_args)
4449    if match and match.group(1) != '__VA_ARGS__':
4450      function_name = re.search(r'\b((?:string)?printf)\s*\(',
4451                                line, re.I).group(1)
4452      error(filename, linenum, 'runtime/printf', 4,
4453            'Potential format string bug. Do %s("%%s", %s) instead.'
4454            % (function_name, match.group(1)))
4455
4456  # Check for potential memset bugs like memset(buf, sizeof(buf), 0).
4457  match = Search(r'memset\s*\(([^,]*),\s*([^,]*),\s*0\s*\)', line)
4458  if match and not Match(r"^''|-?[0-9]+|0x[0-9A-Fa-f]$", match.group(2)):
4459    error(filename, linenum, 'runtime/memset', 4,
4460          'Did you mean "memset(%s, 0, %s)"?'
4461          % (match.group(1), match.group(2)))
4462
4463  # if Search(r'\busing namespace\b', line):
4464  #   error(filename, linenum, 'build/namespaces', 5,
4465  #         'Do not use namespace using-directives.  '
4466  #         'Use using-declarations instead.')
4467
4468  # Detect variable-length arrays.
4469  match = Match(r'\s*(.+::)?(\w+) [a-z]\w*\[(.+)];', line)
4470  if (match and match.group(2) != 'return' and match.group(2) != 'delete' and
4471      match.group(3).find(']') == -1):
4472    # Split the size using space and arithmetic operators as delimiters.
4473    # If any of the resulting tokens are not compile time constants then
4474    # report the error.
4475    tokens = re.split(r'\s|\+|\-|\*|\/|<<|>>]', match.group(3))
4476    is_const = True
4477    skip_next = False
4478    for tok in tokens:
4479      if skip_next:
4480        skip_next = False
4481        continue
4482
4483      if Search(r'sizeof\(.+\)', tok): continue
4484      if Search(r'arraysize\(\w+\)', tok): continue
4485
4486      tok = tok.lstrip('(')
4487      tok = tok.rstrip(')')
4488      if not tok: continue
4489      if Match(r'\d+', tok): continue
4490      if Match(r'0[xX][0-9a-fA-F]+', tok): continue
4491      if Match(r'k[A-Z0-9]\w*', tok): continue
4492      if Match(r'(.+::)?k[A-Z0-9]\w*', tok): continue
4493      if Match(r'(.+::)?[A-Z][A-Z0-9_]*', tok): continue
4494      # A catch all for tricky sizeof cases, including 'sizeof expression',
4495      # 'sizeof(*type)', 'sizeof(const type)', 'sizeof(struct StructName)'
4496      # requires skipping the next token because we split on ' ' and '*'.
4497      if tok.startswith('sizeof'):
4498        skip_next = True
4499        continue
4500      is_const = False
4501      break
4502    if not is_const:
4503      error(filename, linenum, 'runtime/arrays', 1,
4504            'Do not use variable-length arrays.  Use an appropriately named '
4505            "('k' followed by CamelCase) compile-time constant for the size.")
4506
4507  # If DISALLOW_EVIL_CONSTRUCTORS, DISALLOW_COPY_AND_ASSIGN, or
4508  # DISALLOW_IMPLICIT_CONSTRUCTORS is present, then it should be the last thing
4509  # in the class declaration.
4510  match = Match(
4511      (r'\s*'
4512       r'(DISALLOW_(EVIL_CONSTRUCTORS|COPY_AND_ASSIGN|IMPLICIT_CONSTRUCTORS))'
4513       r'\(.*\);$'),
4514      line)
4515  if match and linenum + 1 < clean_lines.NumLines():
4516    next_line = clean_lines.elided[linenum + 1]
4517    # We allow some, but not all, declarations of variables to be present
4518    # in the statement that defines the class.  The [\w\*,\s]* fragment of
4519    # the regular expression below allows users to declare instances of
4520    # the class or pointers to instances, but not less common types such
4521    # as function pointers or arrays.  It's a tradeoff between allowing
4522    # reasonable code and avoiding trying to parse more C++ using regexps.
4523    if not Search(r'^\s*}[\w\*,\s]*;', next_line):
4524      error(filename, linenum, 'readability/constructors', 3,
4525            match.group(1) + ' should be the last thing in the class')
4526
4527  # Check for use of unnamed namespaces in header files.  Registration
4528  # macros are typically OK, so we allow use of "namespace {" on lines
4529  # that end with backslashes.
4530  if (file_extension == 'h'
4531      and Search(r'\bnamespace\s*{', line)
4532      and line[-1] != '\\'):
4533    error(filename, linenum, 'build/namespaces', 4,
4534          'Do not use unnamed namespaces in header files.  See '
4535          'http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Namespaces'
4536          ' for more information.')
4537
4538
4539def CheckGlobalStatic(filename, clean_lines, linenum, error):
4540  """Check for unsafe global or static objects.
4541
4542  Args:
4543    filename: The name of the current file.
4544    clean_lines: A CleansedLines instance containing the file.
4545    linenum: The number of the line to check.
4546    error: The function to call with any errors found.
4547  """
4548  line = clean_lines.elided[linenum]
4549
4550  # Check for people declaring static/global STL strings at the top level.
4551  # This is dangerous because the C++ language does not guarantee that
4552  # globals with constructors are initialized before the first access.
4553  match = Match(
4554      r'((?:|static +)(?:|const +))string +([a-zA-Z0-9_:]+)\b(.*)',
4555      line)
4556  # Remove false positives:
4557  # - String pointers (as opposed to values).
4558  #    string *pointer
4559  #    const string *pointer
4560  #    string const *pointer
4561  #    string *const pointer
4562  #
4563  # - Functions and template specializations.
4564  #    string Function<Type>(...
4565  #    string Class<Type>::Method(...
4566  #
4567  # - Operators.  These are matched separately because operator names
4568  #   cross non-word boundaries, and trying to match both operators
4569  #   and functions at the same time would decrease accuracy of
4570  #   matching identifiers.
4571  #    string Class::operator*()
4572  if (match and
4573      not Search(r'\bstring\b(\s+const)?\s*\*\s*(const\s+)?\w', line) and
4574      not Search(r'\boperator\W', line) and
4575      not Match(r'\s*(<.*>)?(::[a-zA-Z0-9_]+)?\s*\(([^"]|$)', match.group(3))):
4576    error(filename, linenum, 'runtime/string', 4,
4577          'For a static/global string constant, use a C style string instead: '
4578          '"%schar %s[]".' %
4579          (match.group(1), match.group(2)))
4580
4581  if Search(r'\b([A-Za-z0-9_]*_)\(\1\)', line):
4582    error(filename, linenum, 'runtime/init', 4,
4583          'You seem to be initializing a member variable with itself.')
4584
4585
4586def CheckPrintf(filename, clean_lines, linenum, error):
4587  """Check for printf related issues.
4588
4589  Args:
4590    filename: The name of the current file.
4591    clean_lines: A CleansedLines instance containing the file.
4592    linenum: The number of the line to check.
4593    error: The function to call with any errors found.
4594  """
4595  line = clean_lines.elided[linenum]
4596
4597  # When snprintf is used, the second argument shouldn't be a literal.
4598  match = Search(r'snprintf\s*\(([^,]*),\s*([0-9]*)\s*,', line)
4599  if match and match.group(2) != '0':
4600    # If 2nd arg is zero, snprintf is used to calculate size.
4601    error(filename, linenum, 'runtime/printf', 3,
4602          'If you can, use sizeof(%s) instead of %s as the 2nd arg '
4603          'to snprintf.' % (match.group(1), match.group(2)))
4604
4605  # Check if some verboten C functions are being used.
4606  if Search(r'\bsprintf\b', line):
4607    error(filename, linenum, 'runtime/printf', 5,
4608          'Never use sprintf. Use snprintf instead.')
4609  match = Search(r'\b(strcpy|strcat)\b', line)
4610  if match:
4611    error(filename, linenum, 'runtime/printf', 4,
4612          'Almost always, snprintf is better than %s' % match.group(1))
4613
4614
4615def IsDerivedFunction(clean_lines, linenum):
4616  """Check if current line contains an inherited function.
4617
4618  Args:
4619    clean_lines: A CleansedLines instance containing the file.
4620    linenum: The number of the line to check.
4621  Returns:
4622    True if current line contains a function with "override"
4623    virt-specifier.
4624  """
4625  # Look for leftmost opening parenthesis on current line
4626  opening_paren = clean_lines.elided[linenum].find('(')
4627  if opening_paren < 0: return False
4628
4629  # Look for "override" after the matching closing parenthesis
4630  line, _, closing_paren = CloseExpression(clean_lines, linenum, opening_paren)
4631  return closing_paren >= 0 and Search(r'\boverride\b', line[closing_paren:])
4632
4633
4634def IsInitializerList(clean_lines, linenum):
4635  """Check if current line is inside constructor initializer list.
4636
4637  Args:
4638    clean_lines: A CleansedLines instance containing the file.
4639    linenum: The number of the line to check.
4640  Returns:
4641    True if current line appears to be inside constructor initializer
4642    list, False otherwise.
4643  """
4644  for i in xrange(linenum, 1, -1):
4645    line = clean_lines.elided[i]
4646    if i == linenum:
4647      remove_function_body = Match(r'^(.*)\{\s*$', line)
4648      if remove_function_body:
4649        line = remove_function_body.group(1)
4650
4651    if Search(r'\s:\s*\w+[({]', line):
4652      # A lone colon tend to indicate the start of a constructor
4653      # initializer list.  It could also be a ternary operator, which
4654      # also tend to appear in constructor initializer lists as
4655      # opposed to parameter lists.
4656      return True
4657    if Search(r'\}\s*,\s*$', line):
4658      # A closing brace followed by a comma is probably the end of a
4659      # brace-initialized member in constructor initializer list.
4660      return True
4661    if Search(r'[{};]\s*$', line):
4662      # Found one of the following:
4663      # - A closing brace or semicolon, probably the end of the previous
4664      #   function.
4665      # - An opening brace, probably the start of current class or namespace.
4666      #
4667      # Current line is probably not inside an initializer list since
4668      # we saw one of those things without seeing the starting colon.
4669      return False
4670
4671  # Got to the beginning of the file without seeing the start of
4672  # constructor initializer list.
4673  return False
4674
4675
4676def CheckForNonConstReference(filename, clean_lines, linenum,
4677                              nesting_state, error):
4678  """Check for non-const references.
4679
4680  Separate from CheckLanguage since it scans backwards from current
4681  line, instead of scanning forward.
4682
4683  Args:
4684    filename: The name of the current file.
4685    clean_lines: A CleansedLines instance containing the file.
4686    linenum: The number of the line to check.
4687    nesting_state: A NestingState instance which maintains information about
4688                   the current stack of nested blocks being parsed.
4689    error: The function to call with any errors found.
4690  """
4691  # Do nothing if there is no '&' on current line.
4692  line = clean_lines.elided[linenum]
4693  if '&' not in line:
4694    return
4695
4696  # If a function is inherited, current function doesn't have much of
4697  # a choice, so any non-const references should not be blamed on
4698  # derived function.
4699  if IsDerivedFunction(clean_lines, linenum):
4700    return
4701
4702  # Long type names may be broken across multiple lines, usually in one
4703  # of these forms:
4704  #   LongType
4705  #       ::LongTypeContinued &identifier
4706  #   LongType::
4707  #       LongTypeContinued &identifier
4708  #   LongType<
4709  #       ...>::LongTypeContinued &identifier
4710  #
4711  # If we detected a type split across two lines, join the previous
4712  # line to current line so that we can match const references
4713  # accordingly.
4714  #
4715  # Note that this only scans back one line, since scanning back
4716  # arbitrary number of lines would be expensive.  If you have a type
4717  # that spans more than 2 lines, please use a typedef.
4718  if linenum > 1:
4719    previous = None
4720    if Match(r'\s*::(?:[\w<>]|::)+\s*&\s*\S', line):
4721      # previous_line\n + ::current_line
4722      previous = Search(r'\b((?:const\s*)?(?:[\w<>]|::)+[\w<>])\s*$',
4723                        clean_lines.elided[linenum - 1])
4724    elif Match(r'\s*[a-zA-Z_]([\w<>]|::)+\s*&\s*\S', line):
4725      # previous_line::\n + current_line
4726      previous = Search(r'\b((?:const\s*)?(?:[\w<>]|::)+::)\s*$',
4727                        clean_lines.elided[linenum - 1])
4728    if previous:
4729      line = previous.group(1) + line.lstrip()
4730    else:
4731      # Check for templated parameter that is split across multiple lines
4732      endpos = line.rfind('>')
4733      if endpos > -1:
4734        (_, startline, startpos) = ReverseCloseExpression(
4735            clean_lines, linenum, endpos)
4736        if startpos > -1 and startline < linenum:
4737          # Found the matching < on an earlier line, collect all
4738          # pieces up to current line.
4739          line = ''
4740          for i in xrange(startline, linenum + 1):
4741            line += clean_lines.elided[i].strip()
4742
4743  # Check for non-const references in function parameters.  A single '&' may
4744  # found in the following places:
4745  #   inside expression: binary & for bitwise AND
4746  #   inside expression: unary & for taking the address of something
4747  #   inside declarators: reference parameter
4748  # We will exclude the first two cases by checking that we are not inside a
4749  # function body, including one that was just introduced by a trailing '{'.
4750  # TODO(unknown): Doesn't account for 'catch(Exception& e)' [rare].
4751  if (nesting_state.previous_stack_top and
4752      not (isinstance(nesting_state.previous_stack_top, _ClassInfo) or
4753           isinstance(nesting_state.previous_stack_top, _NamespaceInfo))):
4754    # Not at toplevel, not within a class, and not within a namespace
4755    return
4756
4757  # Avoid preprocessors
4758  if Search(r'\\\s*$', line):
4759    return
4760
4761  # Avoid constructor initializer lists
4762  if IsInitializerList(clean_lines, linenum):
4763    return
4764
4765  # We allow non-const references in a few standard places, like functions
4766  # called "swap()" or iostream operators like "<<" or ">>".  Do not check
4767  # those function parameters.
4768  #
4769  # We also accept & in static_assert, which looks like a function but
4770  # it's actually a declaration expression.
4771  whitelisted_functions = (r'(?:[sS]wap(?:<\w:+>)?|'
4772                           r'operator\s*[<>][<>]|'
4773                           r'static_assert|COMPILE_ASSERT'
4774                           r')\s*\(')
4775  if Search(whitelisted_functions, line):
4776    return
4777  elif not Search(r'\S+\([^)]*$', line):
4778    # Don't see a whitelisted function on this line.  Actually we
4779    # didn't see any function name on this line, so this is likely a
4780    # multi-line parameter list.  Try a bit harder to catch this case.
4781    for i in xrange(2):
4782      if (linenum > i and
4783          Search(whitelisted_functions, clean_lines.elided[linenum - i - 1])):
4784        return
4785
4786  decls = ReplaceAll(r'{[^}]*}', ' ', line)  # exclude function body
4787  # for parameter in re.findall(_RE_PATTERN_REF_PARAM, decls):
4788  #   if not Match(_RE_PATTERN_CONST_REF_PARAM, parameter):
4789  #     error(filename, linenum, 'runtime/references', 2,
4790  #           'Is this a non-const reference? '
4791  #           'If so, make const or use a pointer: ' +
4792  #           ReplaceAll(' *<', '<', parameter))
4793
4794
4795def CheckCasts(filename, clean_lines, linenum, error):
4796  """Various cast related checks.
4797
4798  Args:
4799    filename: The name of the current file.
4800    clean_lines: A CleansedLines instance containing the file.
4801    linenum: The number of the line to check.
4802    error: The function to call with any errors found.
4803  """
4804  line = clean_lines.elided[linenum]
4805
4806  # Check to see if they're using an conversion function cast.
4807  # I just try to capture the most common basic types, though there are more.
4808  # Parameterless conversion functions, such as bool(), are allowed as they are
4809  # probably a member operator declaration or default constructor.
4810  match = Search(
4811      r'(\bnew\s+|\S<\s*(?:const\s+)?)?\b'
4812      r'(int|float|double|bool|char|int32|uint32|int64|uint64)'
4813      r'(\([^)].*)', line)
4814  expecting_function = ExpectingFunctionArgs(clean_lines, linenum)
4815  if match and not expecting_function:
4816    matched_type = match.group(2)
4817
4818    # matched_new_or_template is used to silence two false positives:
4819    # - New operators
4820    # - Template arguments with function types
4821    #
4822    # For template arguments, we match on types immediately following
4823    # an opening bracket without any spaces.  This is a fast way to
4824    # silence the common case where the function type is the first
4825    # template argument.  False negative with less-than comparison is
4826    # avoided because those operators are usually followed by a space.
4827    #
4828    #   function<double(double)>   // bracket + no space = false positive
4829    #   value < double(42)         // bracket + space = true positive
4830    matched_new_or_template = match.group(1)
4831
4832    # Other things to ignore:
4833    # - Function pointers
4834    # - Casts to pointer types
4835    # - Placement new
4836    # - Alias declarations
4837    matched_funcptr = match.group(3)
4838    if (matched_new_or_template is None and
4839        not (matched_funcptr and
4840             (Match(r'\((?:[^() ]+::\s*\*\s*)?[^() ]+\)\s*\(',
4841                    matched_funcptr) or
4842              matched_funcptr.startswith('(*)'))) and
4843        not Match(r'\s*using\s+\S+\s*=\s*' + matched_type, line) and
4844        not Search(r'new\(\S+\)\s*' + matched_type, line)):
4845      error(filename, linenum, 'readability/casting', 4,
4846            'Using deprecated casting style.  '
4847            'Use static_cast<%s>(...) instead' %
4848            matched_type)
4849
4850  if not expecting_function:
4851    CheckCStyleCast(filename, linenum, line, clean_lines.raw_lines[linenum],
4852                    'static_cast',
4853                    r'\((int|float|double|bool|char|u?int(16|32|64))\)', error)
4854
4855  # This doesn't catch all cases. Consider (const char * const)"hello".
4856  #
4857  # (char *) "foo" should always be a const_cast (reinterpret_cast won't
4858  # compile).
4859  if CheckCStyleCast(filename, linenum, line, clean_lines.raw_lines[linenum],
4860                     'const_cast', r'\((char\s?\*+\s?)\)\s*"', error):
4861    pass
4862  else:
4863    # Check pointer casts for other than string constants
4864    CheckCStyleCast(filename, linenum, line, clean_lines.raw_lines[linenum],
4865                    'reinterpret_cast', r'\((\w+\s?\*+\s?)\)', error)
4866
4867  # In addition, we look for people taking the address of a cast.  This
4868  # is dangerous -- casts can assign to temporaries, so the pointer doesn't
4869  # point where you think.
4870  match = Search(
4871      r'(?:&\(([^)]+)\)[\w(])|'
4872      r'(?:&(static|dynamic|down|reinterpret)_cast\b)', line)
4873  if match and match.group(1) != '*':
4874    # Try a better error message when the & is bound to something
4875    # dereferenced by the casted pointer, as opposed to the casted
4876    # pointer itself.
4877    parenthesis_error = False
4878    match = Match(r'^(.*&(?:static|dynamic|down|reinterpret)_cast\b)<', line)
4879    if match:
4880      _, y1, x1 = CloseExpression(clean_lines, linenum, len(match.group(1)))
4881      if x1 >= 0 and clean_lines.elided[y1][x1] == '(':
4882        _, y2, x2 = CloseExpression(clean_lines, y1, x1)
4883        if x2 >= 0:
4884          extended_line = clean_lines.elided[y2][x2:]
4885          if y2 < clean_lines.NumLines() - 1:
4886            extended_line += clean_lines.elided[y2 + 1]
4887          if Match(r'\s*(?:->|\[)', extended_line):
4888            parenthesis_error = True
4889
4890    if parenthesis_error:
4891      error(filename, linenum, 'readability/casting', 4,
4892            ('Are you taking an address of something dereferenced '
4893             'from a cast?  Wrapping the dereferenced expression in '
4894             'parentheses will make the binding more obvious'))
4895    else:
4896      error(filename, linenum, 'runtime/casting', 4,
4897            ('Are you taking an address of a cast?  '
4898             'This is dangerous: could be a temp var.  '
4899             'Take the address before doing the cast, rather than after'))
4900
4901
4902def CheckCStyleCast(filename, linenum, line, raw_line, cast_type, pattern,
4903                    error):
4904  """Checks for a C-style cast by looking for the pattern.
4905
4906  Args:
4907    filename: The name of the current file.
4908    linenum: The number of the line to check.
4909    line: The line of code to check.
4910    raw_line: The raw line of code to check, with comments.
4911    cast_type: The string for the C++ cast to recommend.  This is either
4912      reinterpret_cast, static_cast, or const_cast, depending.
4913    pattern: The regular expression used to find C-style casts.
4914    error: The function to call with any errors found.
4915
4916  Returns:
4917    True if an error was emitted.
4918    False otherwise.
4919  """
4920  match = Search(pattern, line)
4921  if not match:
4922    return False
4923
4924  # Exclude lines with keywords that tend to look like casts, and also
4925  # macros which are generally troublesome.
4926  if Match(r'.*\b(?:sizeof|alignof|alignas|[A-Z_]+)\s*$',
4927           line[0:match.start(1) - 1]):
4928    return False
4929
4930  # operator++(int) and operator--(int)
4931  if (line[0:match.start(1) - 1].endswith(' operator++') or
4932      line[0:match.start(1) - 1].endswith(' operator--')):
4933    return False
4934
4935  # A single unnamed argument for a function tends to look like old
4936  # style cast.  If we see those, don't issue warnings for deprecated
4937  # casts, instead issue warnings for unnamed arguments where
4938  # appropriate.
4939  #
4940  # These are things that we want warnings for, since the style guide
4941  # explicitly require all parameters to be named:
4942  #   Function(int);
4943  #   Function(int) {
4944  #   ConstMember(int) const;
4945  #   ConstMember(int) const {
4946  #   ExceptionMember(int) throw (...);
4947  #   ExceptionMember(int) throw (...) {
4948  #   PureVirtual(int) = 0;
4949  #
4950  # These are functions of some sort, where the compiler would be fine
4951  # if they had named parameters, but people often omit those
4952  # identifiers to reduce clutter:
4953  #   (FunctionPointer)(int);
4954  #   (FunctionPointer)(int) = value;
4955  #   Function((function_pointer_arg)(int))
4956  #   <TemplateArgument(int)>;
4957  #   <(FunctionPointerTemplateArgument)(int)>;
4958  remainder = line[match.end(0):]
4959  if Match(r'^\s*(?:;|const\b|throw\b|final\b|override\b|=|>|\{|\))',
4960           remainder):
4961    # Looks like an unnamed parameter.
4962
4963    # Don't warn on any kind of template arguments.
4964    if Match(r'^\s*>', remainder):
4965      return False
4966
4967    # Don't warn on assignments to function pointers, but keep warnings for
4968    # unnamed parameters to pure virtual functions.  Note that this pattern
4969    # will also pass on assignments of "0" to function pointers, but the
4970    # preferred values for those would be "nullptr" or "NULL".
4971    matched_zero = Match(r'^\s=\s*(\S+)\s*;', remainder)
4972    if matched_zero and matched_zero.group(1) != '0':
4973      return False
4974
4975    # Don't warn on function pointer declarations.  For this we need
4976    # to check what came before the "(type)" string.
4977    if Match(r'.*\)\s*$', line[0:match.start(0)]):
4978      return False
4979
4980    # Don't warn if the parameter is named with block comments, e.g.:
4981    #  Function(int /*unused_param*/);
4982    if '/*' in raw_line:
4983      return False
4984
4985    # Passed all filters, issue warning here.
4986    error(filename, linenum, 'readability/function', 3,
4987          'All parameters should be named in a function')
4988    return True
4989
4990  # At this point, all that should be left is actual casts.
4991  error(filename, linenum, 'readability/casting', 4,
4992        'Using C-style cast.  Use %s<%s>(...) instead' %
4993        (cast_type, match.group(1)))
4994
4995  return True
4996
4997
4998def ExpectingFunctionArgs(clean_lines, linenum):
4999  """Checks whether where function type arguments are expected.
5000
5001  Args:
5002    clean_lines: A CleansedLines instance containing the file.
5003    linenum: The number of the line to check.
5004
5005  Returns:
5006    True if the line at 'linenum' is inside something that expects arguments
5007    of function types.
5008  """
5009  line = clean_lines.elided[linenum]
5010  return (Match(r'^\s*MOCK_(CONST_)?METHOD\d+(_T)?\(', line) or
5011          (linenum >= 2 and
5012           (Match(r'^\s*MOCK_(?:CONST_)?METHOD\d+(?:_T)?\((?:\S+,)?\s*$',
5013                  clean_lines.elided[linenum - 1]) or
5014            Match(r'^\s*MOCK_(?:CONST_)?METHOD\d+(?:_T)?\(\s*$',
5015                  clean_lines.elided[linenum - 2]) or
5016            Search(r'\bstd::m?function\s*\<\s*$',
5017                   clean_lines.elided[linenum - 1]))))
5018
5019
5020_HEADERS_CONTAINING_TEMPLATES = (
5021    ('<deque>', ('deque',)),
5022    ('<functional>', ('unary_function', 'binary_function',
5023                      'plus', 'minus', 'multiplies', 'divides', 'modulus',
5024                      'negate',
5025                      'equal_to', 'not_equal_to', 'greater', 'less',
5026                      'greater_equal', 'less_equal',
5027                      'logical_and', 'logical_or', 'logical_not',
5028                      'unary_negate', 'not1', 'binary_negate', 'not2',
5029                      'bind1st', 'bind2nd',
5030                      'pointer_to_unary_function',
5031                      'pointer_to_binary_function',
5032                      'ptr_fun',
5033                      'mem_fun_t', 'mem_fun', 'mem_fun1_t', 'mem_fun1_ref_t',
5034                      'mem_fun_ref_t',
5035                      'const_mem_fun_t', 'const_mem_fun1_t',
5036                      'const_mem_fun_ref_t', 'const_mem_fun1_ref_t',
5037                      'mem_fun_ref',
5038                     )),
5039    ('<limits>', ('numeric_limits',)),
5040    ('<list>', ('list',)),
5041    ('<map>', ('map', 'multimap',)),
5042    ('<memory>', ('allocator',)),
5043    ('<queue>', ('queue', 'priority_queue',)),
5044    ('<set>', ('set', 'multiset',)),
5045    ('<stack>', ('stack',)),
5046    ('<string>', ('char_traits', 'basic_string',)),
5047    ('<utility>', ('pair',)),
5048    ('<vector>', ('vector',)),
5049
5050    # gcc extensions.
5051    # Note: std::hash is their hash, ::hash is our hash
5052    ('<hash_map>', ('hash_map', 'hash_multimap',)),
5053    ('<hash_set>', ('hash_set', 'hash_multiset',)),
5054    ('<slist>', ('slist',)),
5055    )
5056
5057_RE_PATTERN_STRING = re.compile(r'\bstring\b')
5058
5059_re_pattern_algorithm_header = []
5060for _template in ('copy', 'max', 'min', 'min_element', 'sort', 'swap',
5061                  'transform'):
5062  # Match max<type>(..., ...), max(..., ...), but not foo->max, foo.max or
5063  # type::max().
5064  _re_pattern_algorithm_header.append(
5065      (re.compile(r'[^>.]\b' + _template + r'(<.*?>)?\([^\)]'),
5066       _template,
5067       '<algorithm>'))
5068
5069_re_pattern_templates = []
5070for _header, _templates in _HEADERS_CONTAINING_TEMPLATES:
5071  for _template in _templates:
5072    _re_pattern_templates.append(
5073        (re.compile(r'(\<|\b)' + _template + r'\s*\<'),
5074         _template + '<>',
5075         _header))
5076
5077
5078def FilesBelongToSameModule(filename_cc, filename_h):
5079  """Check if these two filenames belong to the same module.
5080
5081  The concept of a 'module' here is a as follows:
5082  foo.h, foo-inl.h, foo.cc, foo_test.cc and foo_unittest.cc belong to the
5083  same 'module' if they are in the same directory.
5084  some/path/public/xyzzy and some/path/internal/xyzzy are also considered
5085  to belong to the same module here.
5086
5087  If the filename_cc contains a longer path than the filename_h, for example,
5088  '/absolute/path/to/base/sysinfo.cc', and this file would include
5089  'base/sysinfo.h', this function also produces the prefix needed to open the
5090  header. This is used by the caller of this function to more robustly open the
5091  header file. We don't have access to the real include paths in this context,
5092  so we need this guesswork here.
5093
5094  Known bugs: tools/base/bar.cc and base/bar.h belong to the same module
5095  according to this implementation. Because of this, this function gives
5096  some false positives. This should be sufficiently rare in practice.
5097
5098  Args:
5099    filename_cc: is the path for the .cc file
5100    filename_h: is the path for the header path
5101
5102  Returns:
5103    Tuple with a bool and a string:
5104    bool: True if filename_cc and filename_h belong to the same module.
5105    string: the additional prefix needed to open the header file.
5106  """
5107
5108  if not filename_cc.endswith('.cc'):
5109    return (False, '')
5110  filename_cc = filename_cc[:-len('.cc')]
5111  if filename_cc.endswith('_unittest'):
5112    filename_cc = filename_cc[:-len('_unittest')]
5113  elif filename_cc.endswith('_test'):
5114    filename_cc = filename_cc[:-len('_test')]
5115  filename_cc = filename_cc.replace('/public/', '/')
5116  filename_cc = filename_cc.replace('/internal/', '/')
5117
5118  if not filename_h.endswith('.h'):
5119    return (False, '')
5120  filename_h = filename_h[:-len('.h')]
5121  if filename_h.endswith('-inl'):
5122    filename_h = filename_h[:-len('-inl')]
5123  filename_h = filename_h.replace('/public/', '/')
5124  filename_h = filename_h.replace('/internal/', '/')
5125
5126  files_belong_to_same_module = filename_cc.endswith(filename_h)
5127  common_path = ''
5128  if files_belong_to_same_module:
5129    common_path = filename_cc[:-len(filename_h)]
5130  return files_belong_to_same_module, common_path
5131
5132
5133def UpdateIncludeState(filename, include_state, io=codecs):
5134  """Fill up the include_state with new includes found from the file.
5135
5136  Args:
5137    filename: the name of the header to read.
5138    include_state: an _IncludeState instance in which the headers are inserted.
5139    io: The io factory to use to read the file. Provided for testability.
5140
5141  Returns:
5142    True if a header was succesfully added. False otherwise.
5143  """
5144  headerfile = None
5145  try:
5146    headerfile = io.open(filename, 'r', 'utf8', 'replace')
5147  except IOError:
5148    return False
5149  linenum = 0
5150  for line in headerfile:
5151    linenum += 1
5152    clean_line = CleanseComments(line)
5153    match = _RE_PATTERN_INCLUDE.search(clean_line)
5154    if match:
5155      include = match.group(2)
5156      # The value formatting is cute, but not really used right now.
5157      # What matters here is that the key is in include_state.
5158      include_state.setdefault(include, '%s:%d' % (filename, linenum))
5159  return True
5160
5161
5162def CheckForIncludeWhatYouUse(filename, clean_lines, include_state, error,
5163                              io=codecs):
5164  """Reports for missing stl includes.
5165
5166  This function will output warnings to make sure you are including the headers
5167  necessary for the stl containers and functions that you use. We only give one
5168  reason to include a header. For example, if you use both equal_to<> and
5169  less<> in a .h file, only one (the latter in the file) of these will be
5170  reported as a reason to include the <functional>.
5171
5172  Args:
5173    filename: The name of the current file.
5174    clean_lines: A CleansedLines instance containing the file.
5175    include_state: An _IncludeState instance.
5176    error: The function to call with any errors found.
5177    io: The IO factory to use to read the header file. Provided for unittest
5178        injection.
5179  """
5180  required = {}  # A map of header name to linenumber and the template entity.
5181                 # Example of required: { '<functional>': (1219, 'less<>') }
5182
5183  for linenum in xrange(clean_lines.NumLines()):
5184    line = clean_lines.elided[linenum]
5185    if not line or line[0] == '#':
5186      continue
5187
5188    # String is special -- it is a non-templatized type in STL.
5189    matched = _RE_PATTERN_STRING.search(line)
5190    if matched:
5191      # Don't warn about strings in non-STL namespaces:
5192      # (We check only the first match per line; good enough.)
5193      prefix = line[:matched.start()]
5194      if prefix.endswith('std::') or not prefix.endswith('::'):
5195        required['<string>'] = (linenum, 'string')
5196
5197    for pattern, template, header in _re_pattern_algorithm_header:
5198      if pattern.search(line):
5199        required[header] = (linenum, template)
5200
5201    # The following function is just a speed up, no semantics are changed.
5202    if not '<' in line:  # Reduces the cpu time usage by skipping lines.
5203      continue
5204
5205    for pattern, template, header in _re_pattern_templates:
5206      if pattern.search(line):
5207        required[header] = (linenum, template)
5208
5209  # The policy is that if you #include something in foo.h you don't need to
5210  # include it again in foo.cc. Here, we will look at possible includes.
5211  # Let's copy the include_state so it is only messed up within this function.
5212  include_state = include_state.copy()
5213
5214  # Did we find the header for this file (if any) and succesfully load it?
5215  header_found = False
5216
5217  # Use the absolute path so that matching works properly.
5218  abs_filename = FileInfo(filename).FullName()
5219
5220  # For Emacs's flymake.
5221  # If cpplint is invoked from Emacs's flymake, a temporary file is generated
5222  # by flymake and that file name might end with '_flymake.cc'. In that case,
5223  # restore original file name here so that the corresponding header file can be
5224  # found.
5225  # e.g. If the file name is 'foo_flymake.cc', we should search for 'foo.h'
5226  # instead of 'foo_flymake.h'
5227  abs_filename = re.sub(r'_flymake\.cc$', '.cc', abs_filename)
5228
5229  # include_state is modified during iteration, so we iterate over a copy of
5230  # the keys.
5231  header_keys = include_state.keys()
5232  for header in header_keys:
5233    (same_module, common_path) = FilesBelongToSameModule(abs_filename, header)
5234    fullpath = common_path + header
5235    if same_module and UpdateIncludeState(fullpath, include_state, io):
5236      header_found = True
5237
5238  # If we can't find the header file for a .cc, assume it's because we don't
5239  # know where to look. In that case we'll give up as we're not sure they
5240  # didn't include it in the .h file.
5241  # TODO(unknown): Do a better job of finding .h files so we are confident that
5242  # not having the .h file means there isn't one.
5243  if filename.endswith('.cc') and not header_found:
5244    return
5245
5246  # All the lines have been processed, report the errors found.
5247  for required_header_unstripped in required:
5248    template = required[required_header_unstripped][1]
5249    if required_header_unstripped.strip('<>"') not in include_state:
5250      error(filename, required[required_header_unstripped][0],
5251            'build/include_what_you_use', 4,
5252            'Add #include ' + required_header_unstripped + ' for ' + template)
5253
5254
5255_RE_PATTERN_EXPLICIT_MAKEPAIR = re.compile(r'\bmake_pair\s*<')
5256
5257
5258def CheckMakePairUsesDeduction(filename, clean_lines, linenum, error):
5259  """Check that make_pair's template arguments are deduced.
5260
5261  G++ 4.6 in C++11 mode fails badly if make_pair's template arguments are
5262  specified explicitly, and such use isn't intended in any case.
5263
5264  Args:
5265    filename: The name of the current file.
5266    clean_lines: A CleansedLines instance containing the file.
5267    linenum: The number of the line to check.
5268    error: The function to call with any errors found.
5269  """
5270  line = clean_lines.elided[linenum]
5271  match = _RE_PATTERN_EXPLICIT_MAKEPAIR.search(line)
5272  if match:
5273    error(filename, linenum, 'build/explicit_make_pair',
5274          4,  # 4 = high confidence
5275          'For C++11-compatibility, omit template arguments from make_pair'
5276          ' OR use pair directly OR if appropriate, construct a pair directly')
5277def CheckDefaultLambdaCaptures(filename, clean_lines, linenum, error):
5278  """Check that default lambda captures are not used.
5279
5280  Args:
5281    filename: The name of the current file.
5282    clean_lines: A CleansedLines instance containing the file.
5283    linenum: The number of the line to check.
5284    error: The function to call with any errors found.
5285  """
5286  line = clean_lines.elided[linenum]
5287
5288  # A lambda introducer specifies a default capture if it starts with "[="
5289  # or if it starts with "[&" _not_ followed by an identifier.
5290  match = Match(r'^(.*)\[\s*(?:=|&[^\w])', line)
5291  if match:
5292    # Found a potential error, check what comes after the lambda-introducer.
5293    # If it's not open parenthesis (for lambda-declarator) or open brace
5294    # (for compound-statement), it's not a lambda.
5295    line, _, pos = CloseExpression(clean_lines, linenum, len(match.group(1)))
5296    if pos >= 0 and Match(r'^\s*[{(]', line[pos:]):
5297      error(filename, linenum, 'build/c++11',
5298            4,  # 4 = high confidence
5299            'Default lambda captures are an unapproved C++ feature.')
5300
5301
5302
5303
5304def ProcessLine(filename, file_extension, clean_lines, line,
5305                include_state, function_state, nesting_state, error,
5306                extra_check_functions=[]):
5307  """Processes a single line in the file.
5308
5309  Args:
5310    filename: Filename of the file that is being processed.
5311    file_extension: The extension (dot not included) of the file.
5312    clean_lines: An array of strings, each representing a line of the file,
5313                 with comments stripped.
5314    line: Number of line being processed.
5315    include_state: An _IncludeState instance in which the headers are inserted.
5316    function_state: A _FunctionState instance which counts function lines, etc.
5317    nesting_state: A NestingState instance which maintains information about
5318                   the current stack of nested blocks being parsed.
5319    error: A callable to which errors are reported, which takes 4 arguments:
5320           filename, line number, error level, and message
5321    extra_check_functions: An array of additional check functions that will be
5322                           run on each source line. Each function takes 4
5323                           arguments: filename, clean_lines, line, error
5324  """
5325  raw_lines = clean_lines.raw_lines
5326  ParseNolintSuppressions(filename, raw_lines[line], line, error)
5327  nesting_state.Update(filename, clean_lines, line, error)
5328  if nesting_state.InAsmBlock(): return
5329  CheckForFunctionLengths(filename, clean_lines, line, function_state, error)
5330  CheckForMultilineCommentsAndStrings(filename, clean_lines, line, error)
5331  CheckStyle(filename, clean_lines, line, file_extension, nesting_state, error)
5332  CheckLanguage(filename, clean_lines, line, file_extension, include_state,
5333                nesting_state, error)
5334  CheckForNonConstReference(filename, clean_lines, line, nesting_state, error)
5335  CheckForNonStandardConstructs(filename, clean_lines, line,
5336                                nesting_state, error)
5337  CheckVlogArguments(filename, clean_lines, line, error)
5338  CheckPosixThreading(filename, clean_lines, line, error)
5339  CheckInvalidIncrement(filename, clean_lines, line, error)
5340  CheckMakePairUsesDeduction(filename, clean_lines, line, error)
5341  # CheckDefaultLambdaCaptures(filename, clean_lines, line, error)
5342  for check_fn in extra_check_functions:
5343    check_fn(filename, clean_lines, line, error)
5344
5345def FlagCxx11Features(filename, clean_lines, linenum, error):
5346  """Flag those c++11 features that we only allow in certain places.
5347
5348  Args:
5349    filename: The name of the current file.
5350    clean_lines: A CleansedLines instance containing the file.
5351    linenum: The number of the line to check.
5352    error: The function to call with any errors found.
5353  """
5354  line = clean_lines.elided[linenum]
5355
5356  # Flag unapproved C++11 headers.
5357  include = Match(r'\s*#\s*include\s+[<"]([^<"]+)[">]', line)
5358  # if include and include.group(1) in ('cfenv',
5359  #                                     'condition_variable',
5360  #                                     'fenv.h',
5361  #                                     'ratio',
5362  #                                     'regex',
5363  #                                     'system_error',
5364  #                                    ):
5365  #   error(filename, linenum, 'build/c++11', 5,
5366  #         ('<%s> is an unapproved C++11 header.') % include.group(1))
5367
5368  # The only place where we need to worry about C++11 keywords and library
5369  # features in preprocessor directives is in macro definitions.
5370  if Match(r'\s*#', line) and not Match(r'\s*#\s*define\b', line): return
5371
5372  # These are classes and free functions.  The classes are always
5373  # mentioned as std::*, but we only catch the free functions if
5374  # they're not found by ADL.  They're alphabetical by header.
5375  for top_name in (
5376      # type_traits
5377      'alignment_of',
5378      'aligned_union',
5379
5380      # utility
5381      'forward',
5382      ):
5383    if Search(r'\bstd::%s\b' % top_name, line):
5384      error(filename, linenum, 'build/c++11', 5,
5385            ('std::%s is an unapproved C++11 class or function.  Send c-style '
5386             'an example of where it would make your code more readable, and '
5387             'they may let you use it.') % top_name)
5388
5389
5390def ProcessFileData(filename, file_extension, lines, error,
5391                    extra_check_functions=[]):
5392  """Performs lint checks and reports any errors to the given error function.
5393
5394  Args:
5395    filename: Filename of the file that is being processed.
5396    file_extension: The extension (dot not included) of the file.
5397    lines: An array of strings, each representing a line of the file, with the
5398           last element being empty if the file is terminated with a newline.
5399    error: A callable to which errors are reported, which takes 4 arguments:
5400           filename, line number, error level, and message
5401    extra_check_functions: An array of additional check functions that will be
5402                           run on each source line. Each function takes 4
5403                           arguments: filename, clean_lines, line, error
5404  """
5405  lines = (['// marker so line numbers and indices both start at 1'] + lines +
5406           ['// marker so line numbers end in a known way'])
5407
5408  include_state = _IncludeState()
5409  function_state = _FunctionState()
5410  nesting_state = NestingState()
5411
5412  ResetNolintSuppressions()
5413
5414  CheckForCopyright(filename, lines, error)
5415
5416  if file_extension == 'h':
5417    CheckForHeaderGuard(filename, lines, error)
5418
5419  RemoveMultiLineComments(filename, lines, error)
5420  clean_lines = CleansedLines(lines)
5421  for line in xrange(clean_lines.NumLines()):
5422    ProcessLine(filename, file_extension, clean_lines, line,
5423                include_state, function_state, nesting_state, error,
5424                extra_check_functions)
5425    FlagCxx11Features(filename, clean_lines, line, error)
5426  nesting_state.CheckCompletedBlocks(filename, error)
5427
5428  CheckForIncludeWhatYouUse(filename, clean_lines, include_state, error)
5429
5430  # We check here rather than inside ProcessLine so that we see raw
5431  # lines rather than "cleaned" lines.
5432  CheckForBadCharacters(filename, lines, error)
5433
5434  CheckForNewlineAtEOF(filename, lines, error)
5435
5436
5437def ProcessFile(filename, vlevel, extra_check_functions=[]):
5438  """Does google-lint on a single file.
5439
5440  Args:
5441    filename: The name of the file to parse.
5442
5443    vlevel: The level of errors to report.  Every error of confidence
5444    >= verbose_level will be reported.  0 is a good default.
5445
5446    extra_check_functions: An array of additional check functions that will be
5447                           run on each source line. Each function takes 4
5448                           arguments: filename, clean_lines, line, error
5449  """
5450
5451  _SetVerboseLevel(vlevel)
5452
5453  lf_lines = []
5454  crlf_lines = []
5455  try:
5456    # Support the UNIX convention of using "-" for stdin.  Note that
5457    # we are not opening the file with universal newline support
5458    # (which codecs doesn't support anyway), so the resulting lines do
5459    # contain trailing '\r' characters if we are reading a file that
5460    # has CRLF endings.
5461    # If after the split a trailing '\r' is present, it is removed
5462    # below.
5463    if filename == '-':
5464      lines = codecs.StreamReaderWriter(sys.stdin,
5465                                        codecs.getreader('utf8'),
5466                                        codecs.getwriter('utf8'),
5467                                        'replace').read().split('\n')
5468    else:
5469      lines = codecs.open(filename, 'r', 'utf8', 'replace').read().split('\n')
5470
5471    # Remove trailing '\r'.
5472    # The -1 accounts for the extra trailing blank line we get from split()
5473    for linenum in range(len(lines) - 1):
5474      if lines[linenum].endswith('\r'):
5475        lines[linenum] = lines[linenum].rstrip('\r')
5476        crlf_lines.append(linenum + 1)
5477      else:
5478        lf_lines.append(linenum + 1)
5479
5480  except IOError:
5481    sys.stderr.write(
5482        "Skipping input '%s': Can't open for reading\n" % filename)
5483    return
5484
5485  # Note, if no dot is found, this will give the entire filename as the ext.
5486  file_extension = filename[filename.rfind('.') + 1:]
5487
5488  # When reading from stdin, the extension is unknown, so no cpplint tests
5489  # should rely on the extension.
5490  if filename != '-' and file_extension not in _valid_extensions:
5491    sys.stderr.write('Ignoring %s; not a valid file name '
5492                     '(%s)\n' % (filename, ', '.join(_valid_extensions)))
5493  else:
5494    ProcessFileData(filename, file_extension, lines, Error,
5495                    extra_check_functions)
5496
5497    # If end-of-line sequences are a mix of LF and CR-LF, issue
5498    # warnings on the lines with CR.
5499    #
5500    # Don't issue any warnings if all lines are uniformly LF or CR-LF,
5501    # since critique can handle these just fine, and the style guide
5502    # doesn't dictate a particular end of line sequence.
5503    #
5504    # We can't depend on os.linesep to determine what the desired
5505    # end-of-line sequence should be, since that will return the
5506    # server-side end-of-line sequence.
5507    if lf_lines and crlf_lines:
5508      # Warn on every line with CR.  An alternative approach might be to
5509      # check whether the file is mostly CRLF or just LF, and warn on the
5510      # minority, we bias toward LF here since most tools prefer LF.
5511      for linenum in crlf_lines:
5512        Error(filename, linenum, 'whitespace/newline', 1,
5513              'Unexpected \\r (^M) found; better to use only \\n')
5514
5515  # sys.stderr.write('Done processing %s\n' % filename)
5516
5517
5518def PrintUsage(message):
5519  """Prints a brief usage string and exits, optionally with an error message.
5520
5521  Args:
5522    message: The optional error message.
5523  """
5524  sys.stderr.write(_USAGE)
5525  if message:
5526    sys.exit('\nFATAL ERROR: ' + message)
5527  else:
5528    sys.exit(1)
5529
5530
5531def PrintCategories():
5532  """Prints a list of all the error-categories used by error messages.
5533
5534  These are the categories used to filter messages via --filter.
5535  """
5536  sys.stderr.write(''.join('  %s\n' % cat for cat in _ERROR_CATEGORIES))
5537  sys.exit(0)
5538
5539
5540def ParseArguments(args):
5541  """Parses the command line arguments.
5542
5543  This may set the output format and verbosity level as side-effects.
5544
5545  Args:
5546    args: The command line arguments:
5547
5548  Returns:
5549    The list of filenames to lint.
5550  """
5551  try:
5552    (opts, filenames) = getopt.getopt(args, '', ['help', 'output=', 'verbose=',
5553                                                 'counting=',
5554                                                 'filter=',
5555                                                 'root=',
5556                                                 'linelength=',
5557                                                 'extensions='])
5558  except getopt.GetoptError:
5559    PrintUsage('Invalid arguments.')
5560
5561  verbosity = _VerboseLevel()
5562  output_format = _OutputFormat()
5563  filters = ''
5564  counting_style = ''
5565
5566  for (opt, val) in opts:
5567    if opt == '--help':
5568      PrintUsage(None)
5569    elif opt == '--output':
5570      if val not in ('emacs', 'vs7', 'eclipse'):
5571        PrintUsage('The only allowed output formats are emacs, vs7 and eclipse.')
5572      output_format = val
5573    elif opt == '--verbose':
5574      verbosity = int(val)
5575    elif opt == '--filter':
5576      filters = val
5577      if not filters:
5578        PrintCategories()
5579    elif opt == '--counting':
5580      if val not in ('total', 'toplevel', 'detailed'):
5581        PrintUsage('Valid counting options are total, toplevel, and detailed')
5582      counting_style = val
5583    elif opt == '--root':
5584      global _root
5585      _root = val
5586    elif opt == '--linelength':
5587      global _line_length
5588      try:
5589          _line_length = int(val)
5590      except ValueError:
5591          PrintUsage('Line length must be digits.')
5592    elif opt == '--extensions':
5593      global _valid_extensions
5594      try:
5595          _valid_extensions = set(val.split(','))
5596      except ValueError:
5597          PrintUsage('Extensions must be comma seperated list.')
5598
5599  if not filenames:
5600    PrintUsage('No files were specified.')
5601
5602  _SetOutputFormat(output_format)
5603  _SetVerboseLevel(verbosity)
5604  _SetFilters(filters)
5605  _SetCountingStyle(counting_style)
5606
5607  return filenames
5608
5609
5610def main():
5611  filenames = ParseArguments(sys.argv[1:])
5612
5613  # Change stderr to write with replacement characters so we don't die
5614  # if we try to print something containing non-ASCII characters.
5615  sys.stderr = codecs.StreamReaderWriter(sys.stderr,
5616                                         codecs.getreader('utf8'),
5617                                         codecs.getwriter('utf8'),
5618                                         'replace')
5619
5620  _cpplint_state.ResetErrorCounts()
5621  for filename in filenames:
5622    ProcessFile(filename, _cpplint_state.verbose_level)
5623  _cpplint_state.PrintErrorCounts()
5624
5625  sys.exit(_cpplint_state.error_count > 0)
5626
5627
5628if __name__ == '__main__':
5629  main()
5630