1#!/usr/bin/env python3 2# 3# Copyright 2007 Google Inc. 4# 5# This program is free software; you can redistribute it and/or 6# modify it under the terms of the GNU General Public License 7# as published by the Free Software Foundation; either version 2 8# of the License, or (at your option) any later version. 9# 10# This program is distributed in the hope that it will be useful, 11# but WITHOUT ANY WARRANTY; without even the implied warranty of 12# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13# GNU General Public License for more details. 14# 15# You should have received a copy of the GNU General Public License 16# along with this program; if not, write to the Free Software 17# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, 18# USA. 19# 20# 21"""Common and low-level stuff for include server.""" 22 23__author__ = 'Nils Klarlund' 24 25import glob 26import os.path 27import resource 28import signal 29import shutil 30import sys 31import tempfile 32 33 34# MANAGEMENT OF TEMPORARY LOCATIONS FOR GENERATIONS OF COMPRESSED FILES 35 36 37class ClientRootKeeper(object): 38 """Determine the tmp directory to use for compressed files. 39 40 Use the RAM disk-like /dev/shm as default place to store compressed files if 41 available. The protocol between the include server and distcc client 42 stipulates that the top three directories constitute the prefix prepended to 43 absolute file paths. 44 45 Instance vars: 46 client_tmp: a path, the place for creation of temporary directories. 47 client_root: a path, the current such temporary directory 48 _client_root_before_padding: a path kept for testing purposes 49 50 A typical client root looks like: 51 52 - /tmp/tmpBDoZQV.include_server-6642-13/padding, or 53 - /dev/shm/tmpBDoZQV.include_server-6642-19 54 55 Note that each path has exactly three directory components to it. This is an 56 invariant. Some client roots are padded with '/padding' to satisfy the 57 invariant. 58 """ 59 60 # This constant is embedded in names of client root directories. 61 INCLUDE_SERVER_NAME = 'include_server' 62 63 def __init__(self): 64 """Constructor.""" 65 if 'DISTCC_CLIENT_TMP' in os.environ: 66 self.client_tmp = os.environ['DISTCC_CLIENT_TMP'] 67 elif os.path.isdir('/dev/shm') and os.access('/dev/shm', 68 os.X_OK + os.W_OK + os.R_OK): 69 self.client_tmp = '/dev/shm' 70 else: 71 self.client_tmp = '/tmp' 72 if not self.client_tmp or self.client_tmp[0] != '/': 73 sys.exit("""DISTCC_CLIENT_TMP must start with '/'.""") 74 self.client_tmp = self.client_tmp.rstrip('/') 75 # To have room to make a temp directory, we'll need to have less than two 76 # levels at this point. Note: '/a/b'.split('/') == ['', 'a', 'b']. 77 if len(self.client_tmp.split('/')) > 3: 78 sys.exit('DISTCC_CLIENT_TMP must have at most two directory levels.') 79 self.number_missing_levels = 3 - len(self.client_tmp.split('/')) 80 self.client_root = None 81 82 def Glob(self, pid_expr): 83 """Glob unpadded client roots whose pid is matched by pid expression.""" 84 return glob.glob('%s/*.%s-%s-*' 85 % (self.client_tmp, self.INCLUDE_SERVER_NAME, 86 pid_expr)) 87 88 def ClientRootMakedir(self, generation): 89 """Make a new client directory for a generation of compressed files. 90 91 Arguments: 92 generation: a natural number, usually 1 or slightly bigger; this number, 93 minus 1, indicates how many times a reset of the caches has taken place. 94 """ 95 try: 96 # Create a unique identifier that will never repeat. Use pid as suffix for 97 # cleanout mechanism that wipes files not associated with a running pid. 98 self._client_root_before_padding = tempfile.mkdtemp( 99 '.%s-%s-%d' % 100 (self.INCLUDE_SERVER_NAME, 101 os.getpid(), generation), 102 dir=self.client_tmp) 103 self.client_root = (self._client_root_before_padding 104 + '/padding' * self.number_missing_levels) 105 if not os.path.isdir(self.client_root): 106 os.makedirs(self.client_root) 107 except (IOError, OSError) as why: 108 sys.exit('Could not create client root directory %s: %s' % 109 (self.client_root, why)) 110 111 def CleanOutClientRoots(self, pid=None): 112 """Delete client root directories pertaining to this process. 113 Args: 114 pid: None (which means 'pid of current process') or an integer 115 """ 116 if not pid: 117 pid = os.getpid() 118 for client_root in self.Glob(str(pid)): 119 shutil.rmtree(client_root, ignore_errors=True) 120 121 def CleanOutOthers(self): 122 """Search for left-overs from include servers that have passed away.""" 123 # Find all client root subdirectories whether abandoned or not. 124 distcc_directories = self.Glob('*') 125 for directory in distcc_directories: 126 # Fish out pid from end of directory name. 127 hyphen_ultimate_position = directory.rfind('-') 128 assert hyphen_ultimate_position != -1 129 hyphen_penultimate_position = directory.rfind('-', 0, 130 hyphen_ultimate_position) 131 assert hyphen_penultimate_position != -1 132 pid_str = directory[hyphen_penultimate_position + 1: 133 hyphen_ultimate_position] 134 try: 135 pid = int(pid_str) 136 except ValueError: 137 continue # Happens only if a spoofer is around. 138 try: 139 # Got a pid; does it still exist? 140 os.getpgid(pid) 141 continue 142 except OSError: 143 # Process pid does not exist. Nuke its associated files. This will 144 # of course only succeed if the files belong the current uid of 145 # this process. 146 if not os.access(directory, os.W_OK): 147 continue # no access, not ours 148 Debug(DEBUG_TRACE, 149 "Cleaning out '%s' after defunct include server." % directory) 150 self.CleanOutClientRoots(pid) 151 152# COMPILATION SERVER 153 154# An upper bound on the number of directory components in the temporary 155# directory on the distccd server that corresponds to the root directory 156# on the client machine. Normally the server root is /tmp/distccd_XXXXXX. 157# But it could be something different if TMPDIR is set when distccd is 158# started. See dcc_get_tmp_top() in ../src/tempfile.c. 159# It turns out that our own test infrastructure (test/testdistcc.py) 160# sets TMPDIR before invoking distccd, so this needs to be reasonably 161# high, otherwise 'make distcheck' will fail. 162MAX_COMPONENTS_IN_SERVER_ROOT = 20 163 164# EMAILS 165 166# For automated emails, see also src/emaillog.h. 167DCC_EMAILLOG_WHOM_TO_BLAME = os.getenv('DISTCC_EMAILLOG_WHOM_TO_BLAME', 168 'distcc-pump-errors') 169EMAIL_SUBJECT = 'distcc-pump include server email' 170CANT_SEND_MESSAGE = """Please notify %s that the distcc-pump include server 171tried to send them email but failed.""" % DCC_EMAILLOG_WHOM_TO_BLAME 172MAX_EMAILS_TO_SEND = 3 173 174# TIME QUOTAS (SOLVING THE HALTING PROBLEM) 175 176# The maximum user time the include server is allowed handling one request. This 177# is a critical parameter because all caches are reset if this time is 178# exceeded. And if all caches are reset, then the next request may take much 179# longer time, possibly again exceeding the quota. The parameter is also of 180# importance to builds that involve compilations that distcc-pump does not grok: 181# an amount of time roughly equal to this quota is wasted before CPP is invoked 182# instead. 183USER_TIME_QUOTA = 3.8 # seconds 184 185# How often the following question is answered: has too much user time been 186# spent in the include handler servicing the current request? 187# 188# FIXME(klarlund): SIGALRM should not be raised in code that has I/O. Fix 189# include server so that this is guaranteed not to happen. Until then, we are 190# careful to wait a full 4 s before issuing SIGALRM. 191USER_TIME_QUOTA_CHECK_INTERVAL_TIME = 4 # seconds, an integer 192 193# ALGORITHMS 194 195SIMPLE = 0 # not implemented 196MEMOIZING = 1 # only one currently implemented 197ALGORITHMS = [SIMPLE, MEMOIZING] 198 199# PYTHON TUNING 200 201# The default for the first parameter of gc.set_threshold is 700; see 202# http://www.isi.edu/~chiang/python.html for a discussion of why this parameter 203# can be bumped up considerably for speed-up. The new default of 10000 was 204# tested on a very large application, where include server time CPU time drops 205# from 151s to 118s (best times out of 10 runs). There was no seeming changes to 206# memory usage. Trying with 100,000 did not speed up the application further. 207GC_THRESHOLD = 10000 208 209 210# FLAGS FOR COMMAND LINE OPTIONS 211 212opt_algorithm = MEMOIZING # currently, only choice 213opt_debug_pattern = 1 # see DEBUG below 214opt_email_bound = MAX_EMAILS_TO_SEND 215opt_exact_analysis = False # use CPP instead of include analyzer 216opt_print_times = False 217opt_path_observation_re = None 218opt_send_email = False 219opt_simple_algorithm = False 220opt_stat_reset_triggers = {} 221opt_statistics = False 222opt_unsafe_absolute_includes = False 223opt_no_force_dirs = False 224opt_verify = False # whether to compare calculated include closure to that 225 # produced by compiler 226opt_write_include_closure = False # write include closures to file 227 228# HELPER FUNCTION FOR STAT_RESET_TRIGGERS 229 230 231def Stamp(path): 232 """Return a stamp characterizing a file and its modification time.""" 233 try: 234 st_inf = os.stat(path) 235 # The inode and device identify a file uniquely. 236 return (st_inf.st_mtime, st_inf.st_ino, st_inf.st_dev) 237 except OSError: 238 return None 239 240 241# LANGUAGES AND FILE EXTENSIONS 242 243# The languages that we recognize. 244LANGUAGES = set(['c', 'c++', 'objective-c', 'objective-c++']) 245 246# The suffixes, following last period, used for source files and 247# preprocessed files, each with their corresponding source language. 248TRANSLATION_UNIT_MAP = { 249 # C 250 'c': 'c', 'i': 'c', 251 # C++ 252 'cc': 'c++', 'cpp': 'c++', 'cxx': 'c++', 'C': 'c++', 'CXX': 'c++', 253 'ii': 'c++', 254 # Objective C 255 'm': 'objective-c', 'mi': 'objective-c', 256 # Objective C++ 257 'mm': 'objective-c++', 'M': 'objective-c++', 'mii': 'objective-c++', 258 } 259 260# All languages are described by suffixes. 261assert set(TRANSLATION_UNIT_MAP.values()) == LANGUAGES 262 263 264# DEBUG 265 266# Debugging is controlled by the 5 least significant bits of 267# opt_debug_pattern. 268DEBUG_WARNING = 1 # For warnings 269DEBUG_TRACE = 2 # For tracing functions (upper level) 270DEBUG_TRACE1 = 4 # For tracing functions (medium level) 271DEBUG_TRACE2 = 8 # For tracing functions (lower level) 272DEBUG_DATA = 16 # For printing data 273DEBUG_NUM_BITS = 5 # The cardinality of {1,2,4,8,16} 274 275 276def Debug(trigger_pattern, message, *params): 277 """Print message to stderr depending on trigger pattern. 278 279 Args: 280 trigger_pattern: a bit vector (as an integer) 281 message: a format string 282 params: arguments to message 283 """ 284 # TODO(klarlund): use Python's logging module. 285 triggered = opt_debug_pattern & trigger_pattern 286 if triggered: 287 i = 1 288 for unused_j in range(DEBUG_NUM_BITS): 289 if i & DEBUG_WARNING & triggered: 290 print('WARNING include server:', message % params, file=sys.stderr) 291 if i & DEBUG_TRACE & triggered: 292 print('TRACE:', message % params, file=sys.stderr) 293 elif i & DEBUG_TRACE1 & triggered: 294 print(sys.stderr, 'TRACE1:', message % params, file=sys.stderr) 295 elif i & DEBUG_TRACE2 & triggered: 296 print('TRACE2:', message % params, file=sys.stderr) 297 elif i & DEBUG_DATA & triggered: 298 print('DATA:', message % params, file=sys.stderr) 299 i *= 2 300 sys.stderr.flush() 301 302 303# EXCEPTIONS 304 305 306class Error(Exception): 307 """For include server errors.""" 308 pass 309 310 311class NotCoveredError(Error): 312 """Exception for included file not covered by include processing.""" 313 314 def __init__(self, message, 315 source_file=None, 316 line_number=None, 317 send_email=True): 318 """Constructor. 319 320 Arguments: 321 message: text of error message 322 source_file: name of source_file if known 323 line_number: an integer, if known 324 send_email: a Boolean, if False then never send email 325 326 These arguments are all stored in the exception. However, the source_file 327 and line_number are appended, in a syntax defined here, to the message 328 before it is stored as self.args[0] through invocation of the Error 329 constructor. 330 """ 331 assert not line_number or source_file 332 self.source_file = None 333 self.line_number = None 334 self.send_email = send_email 335 if source_file: 336 # Mark this exception as mentioning the source_file. 337 self.source_file = source_file 338 # Line numbers are not currently used. 339 if line_number: 340 self.line_number = line_number 341 message = ("""File: '%s', line: %s: %s""" 342 % (source_file, line_number, message)) 343 else: 344 message = """File: '%s': %s""" % (source_file, message) 345 # Message, a string, becomes self.args[0] 346 Error.__init__(self, message) 347 348 349class NotCoveredTimeOutError(NotCoveredError): 350 """Raised when spending too much time analyzing dependencies.""" 351 pass 352 353 354class IncludeAnalyzerTimer(object): 355 """Start a timer limiting CPU time for servicing a single request. 356 357 We use user time so that a network hiccup will not entail a cache reset if, 358 say, we are using NFS. 359 360 An object of this class must be instantiated so that, no matter what, the 361 Cancel method is eventually called. This reinstates the original timer (if 362 present). 363 """ 364 365 def __init__(self): 366 self.start_utime = resource.getrusage(resource.RUSAGE_SELF).ru_utime 367 self.old = signal.signal(signal.SIGALRM, self._TimeIsUp) 368 signal.alarm(USER_TIME_QUOTA_CHECK_INTERVAL_TIME) 369 370 def _TimeIsUp(self, unused_sig_number, unused_frame): 371 """Check CPU time spent and raise exception or reschedule.""" 372 if (resource.getrusage(resource.RUSAGE_SELF).ru_utime 373 > self.start_utime + USER_TIME_QUOTA): 374 raise NotCoveredTimeOutError(('Bailing out because include server ' 375 + 'spent more than %3.1fs user time ' 376 + 'handling request') % 377 USER_TIME_QUOTA) 378 else: 379 # Reschedule ourselves. 380 signal.alarm(USER_TIME_QUOTA_CHECK_INTERVAL_TIME) 381 382 def Stop(self): 383 signal.alarm(0) 384 385 def Start(self): 386 signal.alarm(USER_TIME_QUOTA_CHECK_INTERVAL_TIME) 387 388 def Cancel(self): 389 """Must be called eventually. See class documentation.""" 390 sys.stdout.flush() 391 signal.alarm(0) 392 signal.signal(signal.SIGALRM, self.old) 393 394 395class SignalSIGTERM(Error): 396 pass 397 398 399def RaiseSignalSIGTERM(*unused_args): 400 """Raise SignalSIGTERM. 401 402 Use signal.signal for binding this function to SIGTERM. 403 """ 404 raise SignalSIGTERM 405 406 407# COMMON FUNCTIONS 408 409 410def SafeNormPath(path): 411 """Safe, but limited, version of os.path.normpath. 412 413 Args: 414 path: a string 415 416 Returns: 417 a string 418 419 Python's os.path.normpath is an unsafe operation; the result may not point to 420 the same file as the argument. Instead, this function just removes 421 initial './'s and a final '/'s if present. 422 """ 423 if path == '.': 424 return '' 425 else: 426 while path.startswith('./'): 427 path = path[2:] 428 return path.rstrip('/') 429