1#!/usr/bin/env python
2# Copyright (c) 2016 Google Inc.
3#
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7#
8#     http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15"""Checks for copyright notices in all the files that need them under the
16current directory.  Optionally insert them.  When inserting, replaces
17an MIT or Khronos free use license with Apache 2.
18"""
19
20import argparse
21import fileinput
22import fnmatch
23import inspect
24import os
25import re
26import sys
27
28# List of designated copyright owners.
29AUTHORS = ['The Khronos Group Inc.',
30           'LunarG Inc.',
31           'Google Inc.',
32           'Google LLC',
33           'Pierre Moreau',
34           'Samsung Inc']
35CURRENT_YEAR='2019'
36
37YEARS = '(2014-2016|2015-2016|2016|2016-2017|2017|2018|2019)'
38COPYRIGHT_RE = re.compile(
39        'Copyright \(c\) {} ({})'.format(YEARS, '|'.join(AUTHORS)))
40
41MIT_BEGIN_RE = re.compile('Permission is hereby granted, '
42                          'free of charge, to any person obtaining a')
43MIT_END_RE = re.compile('MATERIALS OR THE USE OR OTHER DEALINGS IN '
44                        'THE MATERIALS.')
45APACHE2_BEGIN_RE = re.compile('Licensed under the Apache License, '
46                              'Version 2.0 \(the "License"\);')
47APACHE2_END_RE = re.compile('limitations under the License.')
48
49LICENSED = """Licensed under the Apache License, Version 2.0 (the "License");
50you may not use this file except in compliance with the License.
51You may obtain a copy of the License at
52
53    http://www.apache.org/licenses/LICENSE-2.0
54
55Unless required by applicable law or agreed to in writing, software
56distributed under the License is distributed on an "AS IS" BASIS,
57WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
58See the License for the specific language governing permissions and
59limitations under the License."""
60LICENSED_LEN = 10 # Number of lines in LICENSED
61
62
63def find(top, filename_glob, skip_glob_dir_list, skip_glob_files_list):
64    """Returns files in the tree rooted at top matching filename_glob but not
65    in directories matching skip_glob_dir_list nor files matching
66    skip_glob_dir_list."""
67
68    file_list = []
69    for path, dirs, files in os.walk(top):
70        for glob in skip_glob_dir_list:
71            for match in fnmatch.filter(dirs, glob):
72                dirs.remove(match)
73        for filename in fnmatch.filter(files, filename_glob):
74            full_file = os.path.join(path, filename)
75            if full_file not in skip_glob_files_list:
76                file_list.append(full_file)
77    return file_list
78
79
80def filtered_descendants(glob):
81    """Returns glob-matching filenames under the current directory, but skips
82    some irrelevant paths."""
83    return find('.', glob, ['third_party', 'external', 'CompilerIdCXX',
84        'build*', 'out*'], ['./utils/clang-format-diff.py'])
85
86
87def skip(line):
88    """Returns true if line is all whitespace or shebang."""
89    stripped = line.lstrip()
90    return stripped == '' or stripped.startswith('#!')
91
92
93def comment(text, prefix):
94    """Returns commented-out text.
95
96    Each line of text will be prefixed by prefix and a space character.  Any
97    trailing whitespace will be trimmed.
98    """
99    accum = ['{} {}'.format(prefix, line).rstrip() for line in text.split('\n')]
100    return '\n'.join(accum)
101
102
103def insert_copyright(author, glob, comment_prefix):
104    """Finds all glob-matching files under the current directory and inserts the
105    copyright message, and license notice.  An MIT license or Khronos free
106    use license (modified MIT) is replaced with an Apache 2 license.
107
108    The copyright message goes into the first non-whitespace, non-shebang line
109    in a file.  The license notice follows it.  Both are prefixed on each line
110    by comment_prefix and a space.
111    """
112
113    copyright = comment('Copyright (c) {} {}'.format(CURRENT_YEAR, author),
114                        comment_prefix) + '\n\n'
115    licensed = comment(LICENSED, comment_prefix) + '\n\n'
116    for file in filtered_descendants(glob):
117        # Parsing states are:
118        #   0 Initial: Have not seen a copyright declaration.
119        #   1 Seen a copyright line and no other interesting lines
120        #   2 In the middle of an MIT or Khronos free use license
121        #   9 Exited any of the above
122        state = 0
123        update_file = False
124        for line in fileinput.input(file, inplace=1):
125            emit = True
126            if state is 0:
127                if COPYRIGHT_RE.search(line):
128                    state = 1
129                elif skip(line):
130                    pass
131                else:
132                    # Didn't see a copyright. Inject copyright and license.
133                    sys.stdout.write(copyright)
134                    sys.stdout.write(licensed)
135                    # Assume there isn't a previous license notice.
136                    state = 1
137            elif state is 1:
138                if MIT_BEGIN_RE.search(line):
139                    state = 2
140                    emit = False
141                elif APACHE2_BEGIN_RE.search(line):
142                    # Assume an Apache license is preceded by a copyright
143                    # notice.  So just emit it like the rest of the file.
144                    state = 9
145            elif state is 2:
146                # Replace the MIT license with Apache 2
147                emit = False
148                if MIT_END_RE.search(line):
149                    state = 9
150                    sys.stdout.write(licensed)
151            if emit:
152                sys.stdout.write(line)
153
154
155def alert_if_no_copyright(glob, comment_prefix):
156    """Prints names of all files missing either a copyright or Apache 2 license.
157
158    Finds all glob-matching files under the current directory and checks if they
159    contain the copyright message and license notice.  Prints the names of all the
160    files that don't meet both criteria.
161
162    Returns the total number of file names printed.
163    """
164    printed_count = 0
165    for file in filtered_descendants(glob):
166        has_copyright = False
167        has_apache2 = False
168        line_num = 0
169        apache_expected_end = 0
170        with open(file) as contents:
171            for line in contents:
172                line_num += 1
173                if COPYRIGHT_RE.search(line):
174                    has_copyright = True
175                if APACHE2_BEGIN_RE.search(line):
176                    apache_expected_end = line_num + LICENSED_LEN
177                if (line_num is apache_expected_end) and APACHE2_END_RE.search(line):
178                    has_apache2 = True
179        if not (has_copyright and has_apache2):
180            message = file
181            if not has_copyright:
182                message += ' has no copyright'
183            if not has_apache2:
184                message += ' has no Apache 2 license notice'
185            print(message)
186            printed_count += 1
187    return printed_count
188
189
190class ArgParser(argparse.ArgumentParser):
191    def __init__(self):
192        super(ArgParser, self).__init__(
193                description=inspect.getdoc(sys.modules[__name__]))
194        self.add_argument('--update', dest='author', action='store',
195                          help='For files missing a copyright notice, insert '
196                               'one for the given author, and add a license '
197                               'notice.  The author must be in the AUTHORS '
198                               'list in the script.')
199
200
201def main():
202    glob_comment_pairs = [('*.h', '//'), ('*.hpp', '//'), ('*.sh', '#'),
203                          ('*.py', '#'), ('*.cpp', '//'),
204                          ('CMakeLists.txt', '#')]
205    argparser = ArgParser()
206    args = argparser.parse_args()
207
208    if args.author:
209        if args.author not in AUTHORS:
210            print('error: --update argument must be in the AUTHORS list in '
211                  'check_copyright.py: {}'.format(AUTHORS))
212            sys.exit(1)
213        for pair in glob_comment_pairs:
214            insert_copyright(args.author, *pair)
215        sys.exit(0)
216    else:
217        count = sum([alert_if_no_copyright(*p) for p in glob_comment_pairs])
218        sys.exit(count > 0)
219
220
221if __name__ == '__main__':
222    main()
223