1#!/usr/bin/env python3 2 3# Copyright The Mbed TLS Contributors 4# SPDX-License-Identifier: Apache-2.0 5# 6# Licensed under the Apache License, Version 2.0 (the "License"); you may 7# not use this file except in compliance with the License. 8# You may obtain a copy of the License at 9# 10# http://www.apache.org/licenses/LICENSE-2.0 11# 12# Unless required by applicable law or agreed to in writing, software 13# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15# See the License for the specific language governing permissions and 16# limitations under the License. 17 18""" 19This script checks the current state of the source code for minor issues, 20including incorrect file permissions, presence of tabs, non-Unix line endings, 21trailing whitespace, and presence of UTF-8 BOM. 22Note: requires python 3, must be run from Mbed TLS root. 23""" 24 25import os 26import argparse 27import logging 28import codecs 29import re 30import subprocess 31import sys 32 33 34class FileIssueTracker: 35 """Base class for file-wide issue tracking. 36 37 To implement a checker that processes a file as a whole, inherit from 38 this class and implement `check_file_for_issue` and define ``heading``. 39 40 ``suffix_exemptions``: files whose name ends with a string in this set 41 will not be checked. 42 43 ``path_exemptions``: files whose path (relative to the root of the source 44 tree) matches this regular expression will not be checked. This can be 45 ``None`` to match no path. Paths are normalized and converted to ``/`` 46 separators before matching. 47 48 ``heading``: human-readable description of the issue 49 """ 50 51 suffix_exemptions = frozenset() 52 path_exemptions = None 53 # heading must be defined in derived classes. 54 # pylint: disable=no-member 55 56 def __init__(self): 57 self.files_with_issues = {} 58 59 @staticmethod 60 def normalize_path(filepath): 61 """Normalize ``filepath`` with / as the directory separator.""" 62 filepath = os.path.normpath(filepath) 63 # On Windows, we may have backslashes to separate directories. 64 # We need slashes to match exemption lists. 65 seps = os.path.sep 66 if os.path.altsep is not None: 67 seps += os.path.altsep 68 return '/'.join(filepath.split(seps)) 69 70 def should_check_file(self, filepath): 71 """Whether the given file name should be checked. 72 73 Files whose name ends with a string listed in ``self.suffix_exemptions`` 74 or whose path matches ``self.path_exemptions`` will not be checked. 75 """ 76 for files_exemption in self.suffix_exemptions: 77 if filepath.endswith(files_exemption): 78 return False 79 if self.path_exemptions and \ 80 re.match(self.path_exemptions, self.normalize_path(filepath)): 81 return False 82 return True 83 84 def check_file_for_issue(self, filepath): 85 """Check the specified file for the issue that this class is for. 86 87 Subclasses must implement this method. 88 """ 89 raise NotImplementedError 90 91 def record_issue(self, filepath, line_number): 92 """Record that an issue was found at the specified location.""" 93 if filepath not in self.files_with_issues.keys(): 94 self.files_with_issues[filepath] = [] 95 self.files_with_issues[filepath].append(line_number) 96 97 def output_file_issues(self, logger): 98 """Log all the locations where the issue was found.""" 99 if self.files_with_issues.values(): 100 logger.info(self.heading) 101 for filename, lines in sorted(self.files_with_issues.items()): 102 if lines: 103 logger.info("{}: {}".format( 104 filename, ", ".join(str(x) for x in lines) 105 )) 106 else: 107 logger.info(filename) 108 logger.info("") 109 110BINARY_FILE_PATH_RE_LIST = [ 111 r'docs/.*\.pdf\Z', 112 r'programs/fuzz/corpuses/[^.]+\Z', 113 r'tests/data_files/[^.]+\Z', 114 r'tests/data_files/.*\.(crt|csr|db|der|key|pubkey)\Z', 115 r'tests/data_files/.*\.req\.[^/]+\Z', 116 r'tests/data_files/.*malformed[^/]+\Z', 117 r'tests/data_files/format_pkcs12\.fmt\Z', 118] 119BINARY_FILE_PATH_RE = re.compile('|'.join(BINARY_FILE_PATH_RE_LIST)) 120 121class LineIssueTracker(FileIssueTracker): 122 """Base class for line-by-line issue tracking. 123 124 To implement a checker that processes files line by line, inherit from 125 this class and implement `line_with_issue`. 126 """ 127 128 # Exclude binary files. 129 path_exemptions = BINARY_FILE_PATH_RE 130 131 def issue_with_line(self, line, filepath): 132 """Check the specified line for the issue that this class is for. 133 134 Subclasses must implement this method. 135 """ 136 raise NotImplementedError 137 138 def check_file_line(self, filepath, line, line_number): 139 if self.issue_with_line(line, filepath): 140 self.record_issue(filepath, line_number) 141 142 def check_file_for_issue(self, filepath): 143 """Check the lines of the specified file. 144 145 Subclasses must implement the ``issue_with_line`` method. 146 """ 147 with open(filepath, "rb") as f: 148 for i, line in enumerate(iter(f.readline, b"")): 149 self.check_file_line(filepath, line, i + 1) 150 151 152def is_windows_file(filepath): 153 _root, ext = os.path.splitext(filepath) 154 return ext in ('.bat', '.dsp', '.dsw', '.sln', '.vcxproj') 155 156 157class PermissionIssueTracker(FileIssueTracker): 158 """Track files with bad permissions. 159 160 Files that are not executable scripts must not be executable.""" 161 162 heading = "Incorrect permissions:" 163 164 def check_file_for_issue(self, filepath): 165 is_executable = os.access(filepath, os.X_OK) 166 should_be_executable = filepath.endswith((".sh", ".pl", ".py")) 167 if is_executable != should_be_executable: 168 self.files_with_issues[filepath] = None 169 170 171class EndOfFileNewlineIssueTracker(FileIssueTracker): 172 """Track files that end with an incomplete line 173 (no newline character at the end of the last line).""" 174 175 heading = "Missing newline at end of file:" 176 177 path_exemptions = BINARY_FILE_PATH_RE 178 179 def check_file_for_issue(self, filepath): 180 with open(filepath, "rb") as f: 181 try: 182 f.seek(-1, 2) 183 except OSError: 184 # This script only works on regular files. If we can't seek 185 # 1 before the end, it means that this position is before 186 # the beginning of the file, i.e. that the file is empty. 187 return 188 if f.read(1) != b"\n": 189 self.files_with_issues[filepath] = None 190 191 192class Utf8BomIssueTracker(FileIssueTracker): 193 """Track files that start with a UTF-8 BOM. 194 Files should be ASCII or UTF-8. Valid UTF-8 does not start with a BOM.""" 195 196 heading = "UTF-8 BOM present:" 197 198 suffix_exemptions = frozenset([".vcxproj", ".sln"]) 199 path_exemptions = BINARY_FILE_PATH_RE 200 201 def check_file_for_issue(self, filepath): 202 with open(filepath, "rb") as f: 203 if f.read().startswith(codecs.BOM_UTF8): 204 self.files_with_issues[filepath] = None 205 206 207class UnixLineEndingIssueTracker(LineIssueTracker): 208 """Track files with non-Unix line endings (i.e. files with CR).""" 209 210 heading = "Non-Unix line endings:" 211 212 def should_check_file(self, filepath): 213 if not super().should_check_file(filepath): 214 return False 215 return not is_windows_file(filepath) 216 217 def issue_with_line(self, line, _filepath): 218 return b"\r" in line 219 220 221class WindowsLineEndingIssueTracker(LineIssueTracker): 222 """Track files with non-Windows line endings (i.e. CR or LF not in CRLF).""" 223 224 heading = "Non-Windows line endings:" 225 226 def should_check_file(self, filepath): 227 if not super().should_check_file(filepath): 228 return False 229 return is_windows_file(filepath) 230 231 def issue_with_line(self, line, _filepath): 232 return not line.endswith(b"\r\n") or b"\r" in line[:-2] 233 234 235class TrailingWhitespaceIssueTracker(LineIssueTracker): 236 """Track lines with trailing whitespace.""" 237 238 heading = "Trailing whitespace:" 239 suffix_exemptions = frozenset([".dsp", ".md"]) 240 241 def issue_with_line(self, line, _filepath): 242 return line.rstrip(b"\r\n") != line.rstrip() 243 244 245class TabIssueTracker(LineIssueTracker): 246 """Track lines with tabs.""" 247 248 heading = "Tabs present:" 249 suffix_exemptions = frozenset([ 250 ".pem", # some openssl dumps have tabs 251 ".sln", 252 "/Makefile", 253 "/Makefile.inc", 254 "/generate_visualc_files.pl", 255 ]) 256 257 def issue_with_line(self, line, _filepath): 258 return b"\t" in line 259 260 261class MergeArtifactIssueTracker(LineIssueTracker): 262 """Track lines with merge artifacts. 263 These are leftovers from a ``git merge`` that wasn't fully edited.""" 264 265 heading = "Merge artifact:" 266 267 def issue_with_line(self, line, _filepath): 268 # Detect leftover git conflict markers. 269 if line.startswith(b'<<<<<<< ') or line.startswith(b'>>>>>>> '): 270 return True 271 if line.startswith(b'||||||| '): # from merge.conflictStyle=diff3 272 return True 273 if line.rstrip(b'\r\n') == b'=======' and \ 274 not _filepath.endswith('.md'): 275 return True 276 return False 277 278 279class IntegrityChecker: 280 """Sanity-check files under the current directory.""" 281 282 def __init__(self, log_file): 283 """Instantiate the sanity checker. 284 Check files under the current directory. 285 Write a report of issues to log_file.""" 286 self.check_repo_path() 287 self.logger = None 288 self.setup_logger(log_file) 289 self.issues_to_check = [ 290 PermissionIssueTracker(), 291 EndOfFileNewlineIssueTracker(), 292 Utf8BomIssueTracker(), 293 UnixLineEndingIssueTracker(), 294 WindowsLineEndingIssueTracker(), 295 TrailingWhitespaceIssueTracker(), 296 TabIssueTracker(), 297 MergeArtifactIssueTracker(), 298 ] 299 300 @staticmethod 301 def check_repo_path(): 302 if not all(os.path.isdir(d) for d in ["include", "library", "tests"]): 303 raise Exception("Must be run from Mbed TLS root") 304 305 def setup_logger(self, log_file, level=logging.INFO): 306 self.logger = logging.getLogger() 307 self.logger.setLevel(level) 308 if log_file: 309 handler = logging.FileHandler(log_file) 310 self.logger.addHandler(handler) 311 else: 312 console = logging.StreamHandler() 313 self.logger.addHandler(console) 314 315 @staticmethod 316 def collect_files(): 317 bytes_output = subprocess.check_output(['git', 'ls-files', '-z']) 318 bytes_filepaths = bytes_output.split(b'\0')[:-1] 319 ascii_filepaths = map(lambda fp: fp.decode('ascii'), bytes_filepaths) 320 # Prepend './' to files in the top-level directory so that 321 # something like `'/Makefile' in fp` matches in the top-level 322 # directory as well as in subdirectories. 323 return [fp if os.path.dirname(fp) else os.path.join(os.curdir, fp) 324 for fp in ascii_filepaths] 325 326 def check_files(self): 327 for issue_to_check in self.issues_to_check: 328 for filepath in self.collect_files(): 329 if issue_to_check.should_check_file(filepath): 330 issue_to_check.check_file_for_issue(filepath) 331 332 def output_issues(self): 333 integrity_return_code = 0 334 for issue_to_check in self.issues_to_check: 335 if issue_to_check.files_with_issues: 336 integrity_return_code = 1 337 issue_to_check.output_file_issues(self.logger) 338 return integrity_return_code 339 340 341def run_main(): 342 parser = argparse.ArgumentParser(description=__doc__) 343 parser.add_argument( 344 "-l", "--log_file", type=str, help="path to optional output log", 345 ) 346 check_args = parser.parse_args() 347 integrity_check = IntegrityChecker(check_args.log_file) 348 integrity_check.check_files() 349 return_code = integrity_check.output_issues() 350 sys.exit(return_code) 351 352 353if __name__ == "__main__": 354 run_main() 355