1#!/usr/bin/env python3 2# -*- coding: utf-8 -*- 3# Licensed to the Apache Software Foundation (ASF) under one 4# or more contributor license agreements. See the NOTICE file 5# distributed with this work for additional information 6# regarding copyright ownership. The ASF licenses this file 7# to you under the Apache License, Version 2.0 (the 8# "License"); you may not use this file except in compliance 9# with the License. You may obtain a copy of the License at 10# 11# http://www.apache.org/licenses/LICENSE-2.0 12# 13# Unless required by applicable law or agreed to in writing, 14# software distributed under the License is distributed on an 15# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16# KIND, either express or implied. See the License for the 17# specific language governing permissions and limitations 18# under the License. 19 20"""Add or check license header 21 22Usuage: 23 24- add the default license header to source files that do not contain a valid 25 license: 26 27 license_header.py add 28 29- check if every files has a license header 30 31 license_header.py check 32""" 33 34import re 35import os 36import argparse 37from itertools import chain 38import logging 39import sys 40import subprocess 41 42# the default apache license 43_LICENSE = """Licensed to the Apache Software Foundation (ASF) under one 44or more contributor license agreements. See the NOTICE file 45distributed with this work for additional information 46regarding copyright ownership. The ASF licenses this file 47to you under the Apache License, Version 2.0 (the 48"License"); you may not use this file except in compliance 49with the License. You may obtain a copy of the License at 50 51 http://www.apache.org/licenses/LICENSE-2.0 52 53Unless required by applicable law or agreed to in writing, 54software distributed under the License is distributed on an 55"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 56KIND, either express or implied. See the License for the 57specific language governing permissions and limitations 58under the License.""" 59 60# if a file contains any str in the list, then consider it has been licensed 61_APACHE_LICENSE_PATTERNS = ['Licensed to the Apache Software Foundation'] 62_OTHER_LICENSE_PATTERNS = ['THE SOFTWARE IS PROVIDED \"AS IS\"', 63 'THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS'] 64TOP_LEVEL_LICENSE_FILE = 'LICENSE' 65 66# the folders or files that will be ignored 67_WHITE_LIST = [ 68 # Git submodules under different licenses 69 '3rdparty/ctc_include/contrib/moderngpu', 70 '3rdparty/dlpack', 71 '3rdparty/dmlc-core', 72 '3rdparty/googletest', 73 '3rdparty/mkldnn', 74 '3rdparty/nvidia_cub', 75 '3rdparty/onnx-tensorrt', 76 '3rdparty/openmp', 77 '3rdparty/ps-lite', 78 '3rdparty/tvm', 79 80 # 3rdparty headerfiles under different licenses 81 'include/mkldnn', 82 83 # Docs Sphinx themes under different licenses 84 'docs/python_docs/themes', 85 86 # Docs Jekyll website under different licenses 87 'docs/static_site', 88 89 # Code shared with project by author - see file for details 90 'src/operator/special_functions-inl.h', 91 92 # Code generated by scala-package, checked in, and verified 93 'scala-package/init-native/src/main/native/org_apache_mxnet_init_native_c_api.h', 94 'scala-package/native/src/main/native/org_apache_mxnet_native_c_api.h', 95 96 # Licensed under Caffe header 97 'src/operator/nn/pool.h', 98 'src/operator/nn/pool.cuh', 99 'src/operator/contrib/psroi_pooling-inl.h', 100 'src/operator/contrib/nn/deformable_im2col.h', 101 'src/operator/contrib/nn/deformable_im2col.cuh', 102 'src/operator/contrib/nn/modulated_deformable_im2col.h', 103 'src/operator/contrib/nn/modulated_deformable_im2col.cuh', 104 'src/operator/nn/im2col.h', 105 'src/operator/nn/im2col.cuh', 106 107 # Licenses in headers 108 'src/operator/contrib/erfinv-inl.h', 109 'docs/_static/searchtools_custom.js', 110 'docs/_static/js/clipboard.js', 111 'docs/_static/js/clipboard.min.js', 112 'docs/static_site/src/assets/js/clipboard.js', 113 'cmake/upstream/FindCUDAToolkit.cmake', 114 'cmake/upstream/select_compute_arch.cmake', 115 116 # Licensed under 2-Clause BSD in header 117 'example/ssd/dataset/pycocotools/coco.py', 118 119 # Licensed under the Boost Software License, Version 1.0 120 'cmake/Modules/FindJeMalloc.cmake', 121 122 # Licensed under MIT license and contributed by Microsoft 123 'src/operator/contrib/nn/modulated_deformable_im2col.cuh', 124 'src/operator/contrib/nn/modulated_deformable_im2col.h', 125 'src/operator/contrib/modulated_deformable_convolution-inl.h', 126 'src/operator/contrib/modulated_deformable_convolution.cc', 127 'src/operator/contrib/modulated_deformable_convolution.cu', 128 129 # Licensed under Apache 2.0 license and contributed by Microsoft 130 'src/operator/contrib/deformable_psroi_pooling.cu', 131 'src/operator/contrib/deformable_convolution.cu', 132 'src/operator/contrib/deformable_convolution-inl.h', 133 'src/operator/contrib/psroi_pooling.cc', 134 'src/operator/contrib/multi_proposal.cu', 135 'src/operator/contrib/deformable_psroi_pooling-inl.h', 136 'src/operator/contrib/deformable_psroi_pooling.cc', 137 'src/operator/contrib/deformable_convolution.cc', 138 'src/operator/contrib/psroi_pooling.cu', 139 'src/operator/contrib/multi_proposal.cc', 140 'src/operator/contrib/multi_proposal-inl.h', 141 142 # Julia package metadata, generated by Pkg3.jl 143 'julia/Project.toml', 144 145 # Licensed under Apache 2.0 license 146 'example/image-classification/predict-cpp/image-classification-predict.cc', 147 'src/operator/nn/mkldnn/mkldnn_base-inl.h', 148 149 # This file 150 'tools/license_header.py', 151 152 # Dual-Licensed under Apache 2.0 and Nvidia BSD-3 153 'python/mxnet/onnx/mx2onnx/_export_onnx.py', 154 'python/mxnet/onnx/mx2onnx/_op_translations/_op_translations_opset12.py', 155 'python/mxnet/onnx/mx2onnx/_op_translations/_op_translations_opset13.py', 156 157 # Github template 158 '.github/ISSUE_TEMPLATE/bug_report.md', 159 '.github/ISSUE_TEMPLATE/feature_request.md', 160 '.github/ISSUE_TEMPLATE/flaky_test.md', 161 '.github/PULL_REQUEST_TEMPLATE.md' 162 ] 163 164# language extensions and the according commment mark 165_LANGS = {'.cc':'*', '.h':'*', '.cu':'*', '.cuh':'*', '.py':'#', 166 '.pm':'#', '.scala':'*', '.cc':'*', '.sh':'#', '.cmake':'#', 167 '.java':'*', '.sh':'#', '.cpp':'*', '.hpp':'*', '.c':'*', 168 '.bat':'rem', '.pl':'#', '.m':'%', '.R':'#', '.mk':'#', '.cfg':'#', 169 '.t':'#', '.ps1':'#', '.jl':'#', '.clj':';;', '.pyx':'#', '.js':'*', 170 '.md':'<!---'} 171 172# Previous license header, which will be removed 173_OLD_LICENSE = re.compile('.*Copyright.*by Contributors') 174 175 176def get_mxnet_root(): 177 curpath = os.path.abspath(os.path.dirname(__file__)) 178 def is_mxnet_root(path: str) -> bool: 179 return os.path.exists(os.path.join(path, ".mxnet_root")) 180 while not is_mxnet_root(curpath): 181 parent = os.path.abspath(os.path.join(curpath, os.pardir)) 182 if parent == curpath: 183 raise RuntimeError("Got to the root and couldn't find a parent folder with .mxnet_root") 184 curpath = parent 185 return curpath 186 187 188def _lines_have_old_license(lines): 189 for l in lines: 190 if _OLD_LICENSE.match(l): 191 return True 192 return False 193 194 195def _lines_have_multiple_license(lines): 196 has_apache_license = False 197 has_other_license = False 198 for l in lines: 199 if any(p in l for p in _APACHE_LICENSE_PATTERNS): 200 has_apache_license = True 201 if any(p in l for p in _OTHER_LICENSE_PATTERNS): 202 has_other_license = True 203 return (has_apache_license and has_other_license) 204 205 206def _lines_have_apache_license(lines): 207 return any([any([p in l for p in _APACHE_LICENSE_PATTERNS]) for l in lines]) 208 209 210def _file_listed_in_top_level_license(fname): 211 with open(TOP_LEVEL_LICENSE_FILE, 'r', encoding="utf-8") as f: 212 lines = f.readlines() 213 module = os.path.split(fname)[0] + '/LICENSE' 214 return any([fname in l or module in l for l in lines]) 215 216 217def file_have_valid_license(fname): 218 with open(fname, 'r', encoding="utf-8") as f: 219 lines = f.readlines() 220 if not lines: 221 return True 222 if (_lines_have_apache_license(lines) and (not _lines_have_multiple_license(lines))): 223 return True 224 elif _lines_have_multiple_license(lines): 225 logging.error("File %s has multiple licenses", fname) 226 return False 227 else: 228 if _file_listed_in_top_level_license(fname): 229 return True 230 else: 231 logging.error("File %s doesn't have a valid license", fname) 232 return False 233 234 235def _get_license(comment_mark): 236 if comment_mark == '*': 237 body = '/*\n' 238 else: 239 body = '' 240 for l in _LICENSE.split('\n'): 241 if comment_mark == '*': 242 body += ' ' 243 body += comment_mark 244 if len(l): 245 body += ' ' + l 246 if comment_mark == '<!---': 247 body += ' -->' 248 body += '\n' 249 250 if comment_mark == '*': 251 body += ' */\n' 252 body += '\n' 253 return body 254 255 256def should_have_license(fname): 257 if any([l in fname for l in _WHITE_LIST]): 258 logging.debug('skip ' + fname + ', it matches the white list') 259 return False 260 _, ext = os.path.splitext(fname) 261 if ext not in _LANGS: 262 logging.debug('skip ' + fname + ', unknown file extension') 263 return False 264 return True 265 266 267def file_has_license(fname): 268 if not should_have_license(fname): 269 return True 270 try: 271 return file_have_valid_license(fname) 272 except UnicodeError: 273 return True 274 return True 275 276 277def file_add_license(fname): 278 if not should_have_license(fname): 279 return 280 if file_have_valid_license(fname): 281 return 282 with open(fname, 'r', encoding="utf-8") as f: 283 lines = f.readlines() 284 _, ext = os.path.splitext(fname) 285 with open(fname, 'w', encoding="utf-8") as f: 286 # shebang line 287 if lines[0].startswith('#!'): 288 f.write(lines[0].rstrip()+'\n\n') 289 del lines[0] 290 f.write(_get_license(_LANGS[ext])) 291 for l in lines: 292 if _OLD_LICENSE.match(l): 293 continue 294 f.write(l.rstrip()+'\n') 295 logging.info('added license header to ' + fname) 296 return 297 298 299def under_git(): 300 return subprocess.run(['git', 'rev-parse', 'HEAD'], 301 stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL).returncode == 0 302 303 304def git_files(): 305 return list(map(os.fsdecode, 306 subprocess.check_output('git ls-tree -r HEAD --name-only -z'.split()).split(b'\0'))) 307 308 309def file_generator(path: str): 310 for (dirpath, dirnames, files) in os.walk(path): 311 for file in files: 312 yield os.path.abspath(os.path.join(dirpath, file)) 313 314 315def foreach(fn, iterable): 316 for x in iterable: 317 fn(x) 318 319 320def script_name(): 321 """:returns: script name with leading paths removed""" 322 return os.path.split(sys.argv[0])[1] 323 324 325def main(): 326 logging.basicConfig( 327 format='{}: %(levelname)s %(message)s'.format(script_name()), 328 level=os.environ.get("LOGLEVEL", "INFO")) 329 330 parser = argparse.ArgumentParser( 331 description='Add or check source license header') 332 333 parser.add_argument( 334 'action', nargs=1, type=str, 335 choices=['add', 'check'], default='add', 336 help='add or check') 337 338 parser.add_argument( 339 'file', nargs='*', type=str, action='append', 340 help='Files to add license header to') 341 342 args = parser.parse_args() 343 action = args.action[0] 344 files = list(chain(*args.file)) 345 if not files and action == 'check': 346 if under_git(): 347 logging.info("Git detected: Using files under version control") 348 files = git_files() 349 else: 350 logging.info("Using files under mxnet sources root") 351 files = file_generator(get_mxnet_root()) 352 353 if action == 'check': 354 logging.info("Start to check %d files", (len(files))) 355 if False in list(map(file_has_license, files)): 356 return 1 357 else: 358 logging.info("All known and whitelisted files have license") 359 return 0 360 else: 361 assert action == 'add' 362 foreach(file_add_license, files) 363 return 0 364 365 366if __name__ == '__main__': 367 sys.exit(main()) 368