1#!/usr/bin/env python 2 3# Copyright 2015 The Kubernetes Authors. 4# 5# Licensed under the Apache License, Version 2.0 (the "License"); 6# you may not use this file except in compliance with the License. 7# You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS IS" BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16 17from __future__ import print_function 18 19import argparse 20import difflib 21import glob 22import json 23import mmap 24import os 25import re 26import sys 27from datetime import date 28 29parser = argparse.ArgumentParser() 30parser.add_argument( 31 "filenames", 32 help="list of files to check, all files if unspecified", 33 nargs='*') 34 35rootdir = os.path.dirname(__file__) + "/../../" 36rootdir = os.path.abspath(rootdir) 37parser.add_argument( 38 "--rootdir", default=rootdir, help="root directory to examine") 39 40default_boilerplate_dir = os.path.join(rootdir, "hack/boilerplate") 41parser.add_argument( 42 "--boilerplate-dir", default=default_boilerplate_dir) 43 44parser.add_argument( 45 "-v", "--verbose", 46 help="give verbose output regarding why a file does not pass", 47 action="store_true") 48 49args = parser.parse_args() 50 51verbose_out = sys.stderr if args.verbose else open("/dev/null", "w") 52 53 54def get_refs(): 55 refs = {} 56 57 for path in glob.glob(os.path.join(args.boilerplate_dir, "boilerplate.*.txt")): 58 extension = os.path.basename(path).split(".")[1] 59 60 ref_file = open(path, 'r') 61 ref = ref_file.read().splitlines() 62 ref_file.close() 63 refs[extension] = ref 64 65 return refs 66 67 68def file_passes(filename, refs, regexs): 69 try: 70 f = open(filename, 'r') 71 except Exception as exc: 72 print("Unable to open %s: %s" % (filename, exc), file=verbose_out) 73 return False 74 75 data = f.read() 76 f.close() 77 78 basename = os.path.basename(filename) 79 extension = file_extension(filename) 80 if extension != "": 81 ref = refs[extension] 82 else: 83 ref = refs[basename] 84 85 # remove build tags from the top of Go files 86 if extension == "go": 87 p = regexs["go_build_constraints"] 88 (data, found) = p.subn("", data, 1) 89 90 # remove shebang from the top of shell files 91 if extension == "sh": 92 p = regexs["shebang"] 93 (data, found) = p.subn("", data, 1) 94 95 data = data.splitlines() 96 97 # if our test file is smaller than the reference it surely fails! 98 if len(ref) > len(data): 99 print('File %s smaller than reference (%d < %d)' % 100 (filename, len(data), len(ref)), 101 file=verbose_out) 102 return False 103 104 # trim our file to the same number of lines as the reference file 105 data = data[:len(ref)] 106 107 p = regexs["year"] 108 for d in data: 109 if p.search(d): 110 print('File %s is missing the year' % filename, file=verbose_out) 111 return False 112 113 # Replace all occurrences of the regex "CURRENT_YEAR|...|2016|2015|2014" with "YEAR" 114 p = regexs["date"] 115 for i, d in enumerate(data): 116 (data[i], found) = p.subn('YEAR', d) 117 if found != 0: 118 break 119 120 # if we don't match the reference at this point, fail 121 if ref != data: 122 print("Header in %s does not match reference, diff:" % 123 filename, file=verbose_out) 124 if args.verbose: 125 print(file=verbose_out) 126 for line in difflib.unified_diff(ref, data, 'reference', filename, lineterm=''): 127 print(line, file=verbose_out) 128 print(file=verbose_out) 129 return False 130 131 return True 132 133 134def file_extension(filename): 135 return os.path.splitext(filename)[1].split(".")[-1].lower() 136 137 138skipped_dirs = [ 139 '.git', 140 "vendor", 141 "test/e2e/framework/framework.go", 142 "images" 143] 144 145 146def normalize_files(files): 147 newfiles = [] 148 for pathname in files: 149 if any(x in pathname for x in skipped_dirs): 150 continue 151 newfiles.append(pathname) 152 for i, pathname in enumerate(newfiles): 153 if not os.path.isabs(pathname): 154 newfiles[i] = os.path.join(args.rootdir, pathname) 155 return newfiles 156 157 158def get_files(extensions): 159 files = [] 160 if len(args.filenames) > 0: 161 files = args.filenames 162 else: 163 for root, dirs, walkfiles in os.walk(args.rootdir): 164 # don't visit certain dirs. This is just a performance improvement 165 # as we would prune these later in normalize_files(). But doing it 166 # cuts down the amount of filesystem walking we do and cuts down 167 # the size of the file list 168 for d in skipped_dirs: 169 if d in dirs: 170 dirs.remove(d) 171 172 for name in walkfiles: 173 pathname = os.path.join(root, name) 174 files.append(pathname) 175 176 files = normalize_files(files) 177 outfiles = [] 178 for pathname in files: 179 basename = os.path.basename(pathname) 180 extension = file_extension(pathname) 181 if extension in extensions or basename in extensions: 182 outfiles.append(pathname) 183 return outfiles 184 185 186def get_regexs(): 187 regexs = {} 188 # Search for "YEAR" which exists in the boilerplate, but shouldn't in the real thing 189 regexs["year"] = re.compile('YEAR') 190 # dates can be 2014, 2015, 2016, ..., CURRENT_YEAR, company holder names can be anything 191 years = range(2014, date.today().year + 1) 192 regexs["date"] = re.compile( 193 '(%s)' % "|".join(map(lambda l: str(l), years))) 194 # strip // +build \n\n build constraints 195 regexs["go_build_constraints"] = re.compile( 196 r"^(// \+build.*\n)+\n", re.MULTILINE) 197 # strip #!.* from shell scripts 198 regexs["shebang"] = re.compile(r"^(#!.*\n)\n*", re.MULTILINE) 199 return regexs 200 201 202def main(): 203 regexs = get_regexs() 204 refs = get_refs() 205 filenames = get_files(refs.keys()) 206 207 for filename in filenames: 208 if not file_passes(filename, refs, regexs): 209 print(filename, file=sys.stdout) 210 211 return 0 212 213 214if __name__ == "__main__": 215 sys.exit(main()) 216