1#!/usr/bin/python3
2'''
3This entire file is licensed under MIT.
4
5Copyright 2020 William Wold
6
7Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
8
9The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
10
11THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
12'''
13
14# This script checks that every soruce file in the project has an appropriate license
15
16import logging
17import re
18import os
19from os import path
20import subprocess
21import re
22
23logger = logging.getLogger(__name__)
24logging.basicConfig(level=logging.WARNING)
25
26toplevel_dirs = ['include', 'src', 'gtk-priv', 'examples', 'test']
27ignore_patterns_file = 'test/license-ignore.txt'
28
29MIT_EXAMPLE = '''
30Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
31
32The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
33
34THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
35'''
36
37LGPL3_EXAMPLE = '''
38This program is free software; you can redistribute it and/or
39modify it under the terms of the GNU Lesser General Public
40License as published by the Free Software Foundation; either
41version 3 of the License, or (at your option) any later version.
42
43This program is distributed in the hope that it will be useful,
44but WITHOUT ANY WARRANTY; without even the implied warranty of
45MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
46Lesser General Public License for more details.
47
48You should have received a copy of the GNU Lesser General Public License
49along with this program; if not, write to the Free Software Foundation,
50Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
51'''
52
53def canonify_str(s):
54    return re.sub(r'[/#\'\n\s\*]', '', s)
55
56def get_project_root():
57    return path.dirname(path.dirname(path.realpath(__file__)))
58
59ignore_patterns = None
60def get_ignore_patterns():
61    global ignore_patterns
62    if ignore_patterns is not None:
63        return ignore_patterns
64    ignore_patterns = []
65    for f in ['.gitignore', '.git/info/exclude', ignore_patterns_file]:
66        p = get_project_root() + '/' + f
67        if path.isfile(p):
68            logger.info('Excluding paths in ' + p + ' from the license check')
69            for raw_line in open(p, 'r').read().splitlines():
70                line = re.sub(r'([^#]*)(.*)', r'\1', raw_line).strip()
71                if line:
72                    ignore_patterns.append(line)
73        else:
74            logger.warning(p + ' not found, it will not be used to exclude paths from the license check')
75    return ignore_patterns
76
77def path_matches(base_path, original):
78    pattern = re.escape(original)
79    pattern = re.sub(r'\\\*', r'.*', pattern)
80    pattern = re.sub(r'\\?/', r'(^|$|/)', pattern)
81    try:
82        return bool(re.match(pattern, base_path))
83    except Exception as e:
84        raise RuntimeError('Failed to match pattern ' + pattern + ' (original: ' + original + ') against ' + base_path + ': ' + str(e))
85
86def get_files(prefix, search_path):
87    full_path = path.join(prefix, search_path);
88    assert path.exists(full_path), full_path + ' does not exist'
89    if path.exists(path.join(full_path, 'build.ninja')):
90        logger.info(search_path + ' ignored because it is a build directory')
91        return []
92    for pattern in get_ignore_patterns():
93        if path_matches(search_path, pattern):
94            logger.info(search_path + ' ignored because it matches ' + pattern)
95            return []
96    if path.isfile(full_path):
97        logger.info('Found ' + search_path)
98        return [search_path]
99    elif path.isdir(full_path):
100        logger.info('Scanning ' + search_path)
101        result = []
102        for item in os.listdir(full_path):
103            result += get_files(prefix, path.join(search_path, item))
104        return result
105    else:
106        return []
107
108def get_important_files():
109    result = []
110    for toplevel_dir in toplevel_dirs:
111        logger.info('Scanning toplevel directory ' + toplevel_dir)
112        result += get_files(get_project_root(), toplevel_dir)
113    return result
114
115def print_list(name, files):
116    if files:
117        print('The following files are licensed under ' + name + ':')
118        for f in files:
119            print('  ' + f)
120    else:
121        print('No files are licensed under ' + name)
122    print()
123
124def load_file(p):
125    try:
126        with open(p, 'r') as f:
127            contents = f.read()
128            return canonify_str(contents)
129    except Exception:
130        rel = path.relpath(p, get_project_root())
131        raise RuntimeError(
132            'Failed to read ' + rel +
133            '. If this file should have been ignored, add it to ' + ignore_patterns_file)
134
135def main():
136    logger.info('Project root: ' + get_project_root())
137    logger.info('Ignore paths: \n  ' + '\n  '.join(get_ignore_patterns()))
138    all_files = get_important_files()
139    logger.info('Found ' + str(len(all_files)) + ' files')
140    assert len(all_files) > 10, 'There are ' + str(len(all_files)) + ' files (which is not as many as there should be)'
141    mit_files = []
142    lgpl3_files = []
143    none_files = []
144    multiples_files = []
145    mit_example = canonify_str(MIT_EXAMPLE)
146    lgpl3_example = canonify_str(LGPL3_EXAMPLE)
147    for p in all_files:
148        contents = load_file(path.join(get_project_root(), p))
149        found = 0
150        if mit_example in contents:
151            mit_files.append(p)
152            found += 1
153        if lgpl3_example in contents:
154            lgpl3_files.append(p)
155            found += 1
156        if found > 1:
157            multiples_files.append(p)
158        elif found < 1:
159            none_files.append(p)
160    print()
161    print_list('MIT', mit_files)
162    print_list('LGPLv3', lgpl3_files)
163    print_list('no license', none_files)
164    print_list('multiple licenses', multiples_files)
165    if none_files or multiples_files:
166        print('If some files should be excluded from the license check, add them to ' + ignore_patterns_file)
167        print('Failed license check')
168        exit(1)
169    else:
170        print('Passed license check')
171        exit(0)
172
173if __name__ == '__main__':
174    main()
175