1#!/usr/bin/env python3
2# -*- coding: utf-8 -*-
3# Licensed to the Apache Software Foundation (ASF) under one
4# or more contributor license agreements.  See the NOTICE file
5# distributed with this work for additional information
6# regarding copyright ownership.  The ASF licenses this file
7# to you under the Apache License, Version 2.0 (the
8# "License"); you may not use this file except in compliance
9# with the License.  You may obtain a copy of the License at
10#
11#   http://www.apache.org/licenses/LICENSE-2.0
12#
13# Unless required by applicable law or agreed to in writing,
14# software distributed under the License is distributed on an
15# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16# KIND, either express or implied.  See the License for the
17# specific language governing permissions and limitations
18# under the License.
19
20"""Add or check license header
21
22Usuage:
23
24- add the default license header to source files that do not contain a valid
25  license:
26
27  license_header.py add
28
29- check if every files has a license header
30
31  license_header.py check
32"""
33
34import re
35import os
36import argparse
37from itertools import chain
38import logging
39import sys
40import subprocess
41
42# the default apache license
43_LICENSE = """Licensed to the Apache Software Foundation (ASF) under one
44or more contributor license agreements.  See the NOTICE file
45distributed with this work for additional information
46regarding copyright ownership.  The ASF licenses this file
47to you under the Apache License, Version 2.0 (the
48"License"); you may not use this file except in compliance
49with the License.  You may obtain a copy of the License at
50
51  http://www.apache.org/licenses/LICENSE-2.0
52
53Unless required by applicable law or agreed to in writing,
54software distributed under the License is distributed on an
55"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
56KIND, either express or implied.  See the License for the
57specific language governing permissions and limitations
58under the License."""
59
60# if a file contains any str in the list, then consider it has been licensed
61_APACHE_LICENSE_PATTERNS = ['Licensed to the Apache Software Foundation']
62_OTHER_LICENSE_PATTERNS = ['THE SOFTWARE IS PROVIDED \"AS IS\"',
63                           'THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS']
64TOP_LEVEL_LICENSE_FILE = 'LICENSE'
65
66# the folders or files that will be ignored
67_WHITE_LIST = [
68               # Git submodules under different licenses
69               '3rdparty/ctc_include/contrib/moderngpu',
70               '3rdparty/dlpack',
71               '3rdparty/dmlc-core',
72               '3rdparty/googletest',
73               '3rdparty/mkldnn',
74               '3rdparty/nvidia_cub',
75               '3rdparty/onnx-tensorrt',
76               '3rdparty/openmp',
77               '3rdparty/ps-lite',
78               '3rdparty/tvm',
79
80               # 3rdparty headerfiles under different licenses
81               'include/mkldnn',
82
83               # Docs Sphinx themes under different licenses
84               'docs/python_docs/themes',
85
86                # Docs Jekyll website under different licenses
87               'docs/static_site',
88
89               # Code shared with project by author - see file for details
90               'src/operator/special_functions-inl.h',
91
92               # Code generated by scala-package, checked in, and verified
93               'scala-package/init-native/src/main/native/org_apache_mxnet_init_native_c_api.h',
94               'scala-package/native/src/main/native/org_apache_mxnet_native_c_api.h',
95
96               # Licensed under Caffe header
97               'src/operator/nn/pool.h',
98               'src/operator/nn/pool.cuh',
99               'src/operator/contrib/psroi_pooling-inl.h',
100               'src/operator/contrib/nn/deformable_im2col.h',
101               'src/operator/contrib/nn/deformable_im2col.cuh',
102               'src/operator/contrib/nn/modulated_deformable_im2col.h',
103               'src/operator/contrib/nn/modulated_deformable_im2col.cuh',
104               'src/operator/nn/im2col.h',
105               'src/operator/nn/im2col.cuh',
106
107               # Licenses in headers
108               'src/operator/contrib/erfinv-inl.h',
109               'docs/_static/searchtools_custom.js',
110               'docs/_static/js/clipboard.js',
111               'docs/_static/js/clipboard.min.js',
112               'docs/static_site/src/assets/js/clipboard.js',
113               'cmake/upstream/FindCUDAToolkit.cmake',
114               'cmake/upstream/select_compute_arch.cmake',
115
116               # Licensed under 2-Clause BSD in header
117               'example/ssd/dataset/pycocotools/coco.py',
118
119               # Licensed under the Boost Software License, Version 1.0
120               'cmake/Modules/FindJeMalloc.cmake',
121
122               # Licensed under MIT license and contributed by Microsoft
123               'src/operator/contrib/nn/modulated_deformable_im2col.cuh',
124               'src/operator/contrib/nn/modulated_deformable_im2col.h',
125               'src/operator/contrib/modulated_deformable_convolution-inl.h',
126               'src/operator/contrib/modulated_deformable_convolution.cc',
127               'src/operator/contrib/modulated_deformable_convolution.cu',
128
129               # Licensed under Apache 2.0 license and contributed by Microsoft
130               'src/operator/contrib/deformable_psroi_pooling.cu',
131               'src/operator/contrib/deformable_convolution.cu',
132               'src/operator/contrib/deformable_convolution-inl.h',
133               'src/operator/contrib/psroi_pooling.cc',
134               'src/operator/contrib/multi_proposal.cu',
135               'src/operator/contrib/deformable_psroi_pooling-inl.h',
136               'src/operator/contrib/deformable_psroi_pooling.cc',
137               'src/operator/contrib/deformable_convolution.cc',
138               'src/operator/contrib/psroi_pooling.cu',
139               'src/operator/contrib/multi_proposal.cc',
140               'src/operator/contrib/multi_proposal-inl.h',
141
142               # Julia package metadata, generated by Pkg3.jl
143               'julia/Project.toml',
144
145               # Licensed under Apache 2.0 license
146               'example/image-classification/predict-cpp/image-classification-predict.cc',
147               'src/operator/nn/mkldnn/mkldnn_base-inl.h',
148
149               # This file
150               'tools/license_header.py',
151
152               # Dual-Licensed under Apache 2.0 and Nvidia BSD-3
153               'python/mxnet/onnx/mx2onnx/_export_onnx.py',
154               'python/mxnet/onnx/mx2onnx/_op_translations/_op_translations_opset12.py',
155               'python/mxnet/onnx/mx2onnx/_op_translations/_op_translations_opset13.py',
156
157               # Github template
158               '.github/ISSUE_TEMPLATE/bug_report.md',
159               '.github/ISSUE_TEMPLATE/feature_request.md',
160               '.github/ISSUE_TEMPLATE/flaky_test.md',
161               '.github/PULL_REQUEST_TEMPLATE.md'
162               ]
163
164# language extensions and the according commment mark
165_LANGS = {'.cc':'*', '.h':'*', '.cu':'*', '.cuh':'*', '.py':'#',
166          '.pm':'#', '.scala':'*', '.cc':'*', '.sh':'#', '.cmake':'#',
167          '.java':'*', '.sh':'#', '.cpp':'*', '.hpp':'*', '.c':'*',
168          '.bat':'rem', '.pl':'#', '.m':'%', '.R':'#', '.mk':'#', '.cfg':'#',
169          '.t':'#', '.ps1':'#', '.jl':'#', '.clj':';;', '.pyx':'#', '.js':'*',
170          '.md':'<!---'}
171
172# Previous license header, which will be removed
173_OLD_LICENSE = re.compile('.*Copyright.*by Contributors')
174
175
176def get_mxnet_root():
177    curpath = os.path.abspath(os.path.dirname(__file__))
178    def is_mxnet_root(path: str) -> bool:
179        return os.path.exists(os.path.join(path, ".mxnet_root"))
180    while not is_mxnet_root(curpath):
181        parent = os.path.abspath(os.path.join(curpath, os.pardir))
182        if parent == curpath:
183            raise RuntimeError("Got to the root and couldn't find a parent folder with .mxnet_root")
184        curpath = parent
185    return curpath
186
187
188def _lines_have_old_license(lines):
189    for l in lines:
190        if _OLD_LICENSE.match(l):
191            return True
192    return False
193
194
195def _lines_have_multiple_license(lines):
196    has_apache_license = False
197    has_other_license = False
198    for l in lines:
199        if any(p in l for p in _APACHE_LICENSE_PATTERNS):
200            has_apache_license = True
201        if any(p in l for p  in _OTHER_LICENSE_PATTERNS):
202            has_other_license = True
203    return (has_apache_license and has_other_license)
204
205
206def _lines_have_apache_license(lines):
207    return any([any([p in l for p in _APACHE_LICENSE_PATTERNS]) for l in lines])
208
209
210def _file_listed_in_top_level_license(fname):
211    with open(TOP_LEVEL_LICENSE_FILE, 'r', encoding="utf-8") as f:
212        lines = f.readlines()
213    module = os.path.split(fname)[0] + '/LICENSE'
214    return any([fname in l or module in l for l in lines])
215
216
217def file_have_valid_license(fname):
218    with open(fname, 'r', encoding="utf-8") as f:
219        lines = f.readlines()
220    if not lines:
221        return True
222    if (_lines_have_apache_license(lines) and (not _lines_have_multiple_license(lines))):
223        return True
224    elif _lines_have_multiple_license(lines):
225        logging.error("File %s has multiple licenses", fname)
226        return False
227    else:
228        if _file_listed_in_top_level_license(fname):
229            return True
230        else:
231            logging.error("File %s doesn't have a valid license", fname)
232            return False
233
234
235def _get_license(comment_mark):
236    if comment_mark == '*':
237        body = '/*\n'
238    else:
239        body = ''
240    for l in _LICENSE.split('\n'):
241        if comment_mark == '*':
242            body += ' '
243        body += comment_mark
244        if len(l):
245            body += ' ' + l
246        if comment_mark == '<!---':
247            body += ' -->'
248        body += '\n'
249
250    if comment_mark == '*':
251        body += ' */\n'
252    body += '\n'
253    return body
254
255
256def should_have_license(fname):
257    if any([l in fname for l in _WHITE_LIST]):
258        logging.debug('skip ' + fname + ', it matches the white list')
259        return False
260    _, ext = os.path.splitext(fname)
261    if ext not in _LANGS:
262        logging.debug('skip ' + fname + ', unknown file extension')
263        return False
264    return True
265
266
267def file_has_license(fname):
268    if not should_have_license(fname):
269        return True
270    try:
271        return file_have_valid_license(fname)
272    except UnicodeError:
273        return True
274    return True
275
276
277def file_add_license(fname):
278    if not should_have_license(fname):
279        return
280    if file_have_valid_license(fname):
281        return
282    with open(fname, 'r', encoding="utf-8") as f:
283        lines = f.readlines()
284    _, ext = os.path.splitext(fname)
285    with open(fname, 'w', encoding="utf-8") as f:
286        # shebang line
287        if lines[0].startswith('#!'):
288            f.write(lines[0].rstrip()+'\n\n')
289            del lines[0]
290        f.write(_get_license(_LANGS[ext]))
291        for l in lines:
292            if _OLD_LICENSE.match(l):
293                continue
294            f.write(l.rstrip()+'\n')
295    logging.info('added license header to ' + fname)
296    return
297
298
299def under_git():
300    return subprocess.run(['git', 'rev-parse', 'HEAD'],
301        stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL).returncode == 0
302
303
304def git_files():
305    return list(map(os.fsdecode,
306        subprocess.check_output('git ls-tree -r HEAD --name-only -z'.split()).split(b'\0')))
307
308
309def file_generator(path: str):
310    for (dirpath, dirnames, files) in os.walk(path):
311        for file in files:
312            yield os.path.abspath(os.path.join(dirpath, file))
313
314
315def foreach(fn, iterable):
316    for x in iterable:
317        fn(x)
318
319
320def script_name():
321    """:returns: script name with leading paths removed"""
322    return os.path.split(sys.argv[0])[1]
323
324
325def main():
326    logging.basicConfig(
327        format='{}: %(levelname)s %(message)s'.format(script_name()),
328        level=os.environ.get("LOGLEVEL", "INFO"))
329
330    parser = argparse.ArgumentParser(
331        description='Add or check source license header')
332
333    parser.add_argument(
334        'action', nargs=1, type=str,
335        choices=['add', 'check'], default='add',
336        help='add or check')
337
338    parser.add_argument(
339        'file', nargs='*', type=str, action='append',
340        help='Files to add license header to')
341
342    args = parser.parse_args()
343    action = args.action[0]
344    files = list(chain(*args.file))
345    if not files and action == 'check':
346        if under_git():
347            logging.info("Git detected: Using files under version control")
348            files = git_files()
349        else:
350            logging.info("Using files under mxnet sources root")
351            files = file_generator(get_mxnet_root())
352
353    if action == 'check':
354        logging.info("Start to check %d files", (len(files)))
355        if False in list(map(file_has_license, files)):
356            return 1
357        else:
358            logging.info("All known and whitelisted files have license")
359            return 0
360    else:
361        assert action == 'add'
362        foreach(file_add_license, files)
363    return 0
364
365
366if __name__ == '__main__':
367    sys.exit(main())
368