1#!/usr/bin/env python 2 3# Copyright 2015 The Kubernetes Authors. 4# 5# Licensed under the Apache License, Version 2.0 (the "License"); 6# you may not use this file except in compliance with the License. 7# You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS IS" BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16 17from __future__ import print_function 18 19import argparse 20import datetime 21import difflib 22import glob 23import os 24import re 25import sys 26 27parser = argparse.ArgumentParser() 28parser.add_argument( 29 "filenames", 30 help="list of files to check, all files if unspecified", 31 nargs='*') 32 33rootdir = os.path.dirname(__file__) + "/../../" 34rootdir = os.path.abspath(rootdir) 35parser.add_argument( 36 "--rootdir", default=rootdir, help="root directory to examine") 37 38default_boilerplate_dir = os.path.join(rootdir, "hack/boilerplate") 39parser.add_argument( 40 "--boilerplate-dir", default=default_boilerplate_dir) 41 42parser.add_argument( 43 "-v", "--verbose", 44 help="give verbose output regarding why a file does not pass", 45 action="store_true") 46 47args = parser.parse_args() 48 49verbose_out = sys.stderr if args.verbose else open("/dev/null", "w") 50 51 52def get_refs(): 53 refs = {} 54 55 for path in glob.glob(os.path.join(args.boilerplate_dir, "boilerplate.*.txt")): 56 extension = os.path.basename(path).split(".")[1] 57 58 ref_file = open(path, 'r') 59 ref = ref_file.read().splitlines() 60 ref_file.close() 61 refs[extension] = ref 62 63 return refs 64 65 66def is_generated_file(filename, data, regexs): 67 for d in skipped_ungenerated_files: 68 if d in filename: 69 return False 70 71 p = regexs["generated"] 72 return p.search(data) 73 74 75def file_passes(filename, refs, regexs): 76 try: 77 f = open(filename, 'r') 78 except Exception as exc: 79 print("Unable to open %s: %s" % (filename, exc), file=verbose_out) 80 return False 81 82 data = f.read() 83 f.close() 84 85 # determine if the file is automatically generated 86 generated = is_generated_file(filename, data, regexs) 87 88 basename = os.path.basename(filename) 89 extension = file_extension(filename) 90 if generated: 91 if extension == "go": 92 extension = "generatego" 93 elif extension == "bzl": 94 extension = "generatebzl" 95 96 if extension != "": 97 ref = refs[extension] 98 else: 99 ref = refs[basename] 100 101 # remove extra content from the top of files 102 if extension == "go" or extension == "generatego": 103 p = regexs["go_build_constraints"] 104 (data, found) = p.subn("", data, 1) 105 elif extension in ["sh", "py"]: 106 p = regexs["shebang"] 107 (data, found) = p.subn("", data, 1) 108 109 data = data.splitlines() 110 111 # if our test file is smaller than the reference it surely fails! 112 if len(ref) > len(data): 113 print('File %s smaller than reference (%d < %d)' % 114 (filename, len(data), len(ref)), 115 file=verbose_out) 116 return False 117 118 # trim our file to the same number of lines as the reference file 119 data = data[:len(ref)] 120 121 p = regexs["year"] 122 for d in data: 123 if p.search(d): 124 if generated: 125 print('File %s has the YEAR field, but it should not be in generated file' % 126 filename, file=verbose_out) 127 else: 128 print('File %s has the YEAR field, but missing the year of date' % 129 filename, file=verbose_out) 130 return False 131 132 if not generated: 133 # Replace all occurrences of the regex "2014|2015|2016|2017|2018" with "YEAR" 134 p = regexs["date"] 135 for i, d in enumerate(data): 136 (data[i], found) = p.subn('YEAR', d) 137 if found != 0: 138 break 139 140 # if we don't match the reference at this point, fail 141 if ref != data: 142 print("Header in %s does not match reference, diff:" % 143 filename, file=verbose_out) 144 if args.verbose: 145 print(file=verbose_out) 146 for line in difflib.unified_diff(ref, data, 'reference', filename, lineterm=''): 147 print(line, file=verbose_out) 148 print(file=verbose_out) 149 return False 150 151 return True 152 153 154def file_extension(filename): 155 return os.path.splitext(filename)[1].split(".")[-1].lower() 156 157 158skipped_dirs = ['third_party', '_gopath', '_output', '.git', 'cluster/env.sh', 159 "vendor", "test/e2e/generated/bindata.go", "hack/boilerplate/test", 160 "staging/src/k8s.io/kubectl/pkg/generated/bindata.go"] 161 162# list all the files contain 'DO NOT EDIT', but are not generated 163skipped_ungenerated_files = [ 164 'hack/lib/swagger.sh', 'hack/boilerplate/boilerplate.py'] 165 166 167def normalize_files(files): 168 newfiles = [] 169 for pathname in files: 170 if any(x in pathname for x in skipped_dirs): 171 continue 172 newfiles.append(pathname) 173 for i, pathname in enumerate(newfiles): 174 if not os.path.isabs(pathname): 175 newfiles[i] = os.path.join(args.rootdir, pathname) 176 return newfiles 177 178 179def get_files(extensions): 180 files = [] 181 if len(args.filenames) > 0: 182 files = args.filenames 183 else: 184 for root, dirs, walkfiles in os.walk(args.rootdir): 185 # don't visit certain dirs. This is just a performance improvement 186 # as we would prune these later in normalize_files(). But doing it 187 # cuts down the amount of filesystem walking we do and cuts down 188 # the size of the file list 189 for d in skipped_dirs: 190 if d in dirs: 191 dirs.remove(d) 192 193 for name in walkfiles: 194 pathname = os.path.join(root, name) 195 files.append(pathname) 196 197 files = normalize_files(files) 198 outfiles = [] 199 for pathname in files: 200 basename = os.path.basename(pathname) 201 extension = file_extension(pathname) 202 if extension in extensions or basename in extensions: 203 outfiles.append(pathname) 204 return outfiles 205 206 207def get_dates(): 208 years = datetime.datetime.now().year 209 return '(%s)' % '|'.join((str(year) for year in range(2014, years+1))) 210 211 212def get_regexs(): 213 regexs = {} 214 # Search for "YEAR" which exists in the boilerplate, but shouldn't in the real thing 215 regexs["year"] = re.compile('YEAR') 216 # get_dates return 2014, 2015, 2016, 2017, or 2018 until the current year as a regex like: "(2014|2015|2016|2017|2018)"; 217 # company holder names can be anything 218 regexs["date"] = re.compile(get_dates()) 219 # strip // +build \n\n build constraints 220 regexs["go_build_constraints"] = re.compile( 221 r"^(// \+build.*\n)+\n", re.MULTILINE) 222 # strip #!.* from scripts 223 regexs["shebang"] = re.compile(r"^(#!.*\n)\n*", re.MULTILINE) 224 # Search for generated files 225 regexs["generated"] = re.compile('DO NOT EDIT') 226 return regexs 227 228 229def main(): 230 regexs = get_regexs() 231 refs = get_refs() 232 filenames = get_files(refs.keys()) 233 234 for filename in filenames: 235 if not file_passes(filename, refs, regexs): 236 print(filename, file=sys.stdout) 237 238 return 0 239 240 241if __name__ == "__main__": 242 sys.exit(main()) 243