1#! /usr/bin/env python3 2 3# Copyright 2007 Google Inc. 4# 5# This program is free software; you can redistribute it and/or 6# modify it under the terms of the GNU General Public License 7# as published by the Free Software Foundation; either version 2 8# of the License, or (at your option) any later version. 9# 10# This program is distributed in the hope that it will be useful, 11# but WITHOUT ANY WARRANTY; without even the implied warranty of 12# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13# GNU General Public License for more details. 14# 15# You should have received a copy of the GNU General Public License 16# along with this program; if not, write to the Free Software 17# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, 18# USA. 19 20 21"""Divination of built-in system directories used by compiler installation. 22 23It is undesirable for the distcc-pump to send header files that reside 24under the built-in search path. In a correct compiler installation, 25these files must already be present on the server. This module lets 26the distcc-pump run the compiler in a special mode that allows the 27built-in system directories to be revealed. 28 29The current code is tested only for gcc 4.1.1. 30 31TODO(klarlund) Find out what other versions this code works for. 32TODO(klarlund) The include server halts if the built-in system 33directories cannot be determined. Should this be improved upon? 34""" 35 36__author__ = "Nils Klarlund" 37 38 39import os 40import re 41import sys 42import basics 43import shutil 44import subprocess 45 46Debug = basics.Debug 47DEBUG_TRACE = basics.DEBUG_TRACE 48DEBUG_DATA = basics.DEBUG_DATA 49NotCoveredError = basics.NotCoveredError 50 51 52def _RealPrefixWithinClientRoot(client_root, path): 53 """Determine longest directory prefix of PATH and whether PATH contains a symlink. 54 55 Given an absolute path CLIENT_ROOT and an absolute path PATH that is 56 interpreted as relative to CLIENT_ROOT, figure out the longest prefix 57 of PATH such that every component of the prefix is a directory -- not 58 a file or symlink -- when interpreted relative to CLIENT_ROOT. 59 60 Args: 61 path: a string starting with '/' 62 Returns: 63 a pair consisting of 64 - the prefix 65 - a bool, which is True iff PATH contained a symlink. 66 """ 67 prefix = "/" 68 parts = path.split('/') 69 while prefix != path: 70 part = parts.pop(0) 71 last_prefix = prefix 72 prefix = os.path.join(prefix, part) 73 if os.path.islink(client_root + prefix): 74 return last_prefix, True 75 if not os.path.isdir(client_root + prefix): 76 return last_prefix, False 77 return path, False 78 79 80def _MakeLinkFromMirrorToRealLocation(system_dir, client_root, system_links): 81 """Create a link under client root what will resolve to system dir on server. 82 83 See comments for CompilerDefaults class for rationale. 84 85 Args: 86 system_dir: a path such as /usr/include or 87 /usr/lib/gcc/i486-linux-gnu/4.0.3/include 88 client_root: a path such as /dev/shm/tmpX.include_server-X-1 89 system_links: a list of paths under client_root; each denotes a symlink 90 91 The link is created only if necessary. So, 92 /usr/include/gcc/i486-linux-gnu/4.0.3/include 93 is not created if 94 /usr/include 95 is already in place, since it's a prefix of the longer path. 96 97 If a link is created, the symlink name will be appended to system_links. 98 99 For example, if system_dir is '/usr/include' and client_root is 100 '/dev/shm/tmpX.include_server-X-1', then this function will create a 101 symlink in /dev/shm/tmpX.include_server-X-1/usr/include which points 102 to ../../../../../../../../../../../../usr/include, and it will append 103 '/dev/shm/tmpX.include_server-X-1/usr/include' to system_links. 104 """ 105 if not system_dir.startswith('/'): 106 raise ValueError("Expected absolute path, but got '%s'." % system_dir) 107 if os.path.realpath(system_dir) != system_dir: 108 raise NotCoveredError( 109 "Default compiler search path '%s' must be a realpath." %s) 110 # Typical values for rooted_system_dir: 111 # /dev/shm/tmpX.include_server-X-1/usr/include 112 real_prefix, is_link = _RealPrefixWithinClientRoot(client_root, system_dir) 113 parent = os.path.dirname(system_dir) 114 rooted_system_dir = client_root + system_dir 115 rooted_parent = client_root + parent 116 if real_prefix == system_dir: 117 # rooted_system_dir already exists as a real (non-symlink) path. 118 # Make rooted_system_dir a link. 119 # 120 # For example, this could happen if /usr/include/c++/4.0 and 121 # /usr/include are both default system directories. 122 # First we'd call this function with /usr/include/c++/4.0, 123 # and it would call os.mkdirdirs() to create 124 # /dev/shm/tmpX.include_server-X-1/usr/include/c++, 125 # and then it would create a symlink named 4.0 within that. 126 # Then we'd call this function again with /usr/include. 127 # In this case, we can replace the whole subtree with a single symlink 128 # at /dev/shm/tmpX.include_server-X-1/usr/include. 129 shutil.rmtree(rooted_system_dir) 130 system_links[:] = filter(lambda path : 131 not path.startswith(rooted_system_dir), 132 system_links) 133 elif real_prefix == parent: 134 # The really constructed path does not extend beyond the parent directory, 135 # so we're all set to create the link if it's not already there. 136 if os.path.exists(rooted_system_dir): 137 assert os.path.islink(rooted_system_dir) 138 return 139 elif not is_link: 140 os.makedirs(rooted_parent) 141 else: 142 # A link above real_prefix has already been created with this routine. 143 return 144 assert _RealPrefixWithinClientRoot(client_root, parent) == (parent, False), (client_root, parent) 145 depth = len([c for c in system_dir if c == '/']) 146 # The more directories on the path system_dir, the more '../' need to 147 # appended. We add enough '../' to get to the root directory. It's OK 148 # if we have too many, since '..' in the root directory points back to 149 # the root directory. 150 # TODO(klarlund,fergus): do this in a more principled way. 151 # This probably requires changing the protocol. 152 os.symlink('../' * (basics.MAX_COMPONENTS_IN_SERVER_ROOT + depth) 153 + system_dir[1:], # remove leading '/' 154 rooted_system_dir) 155 system_links.append(rooted_system_dir) 156 157 158def _SystemSearchdirsGCC(compiler, sysroot, language, canonical_lookup): 159 """Run gcc on empty file; parse output to figure out default paths. 160 161 This function works only for gcc, and only some versions at that. 162 163 Arguments: 164 compiler: a filepath (the first argument on the distcc command line) 165 sysroot: the --sysroot passed to the compiler ("" to disable) 166 language: 'c' or 'c++' or other item in basics.LANGUAGES 167 canonical_lookup: a function that maps strings to their realpaths 168 Returns: 169 list of system search dirs for this compiler and language 170 171 """ 172 173 # We are trying to wring the following kind of text out of the 174 # compiler: 175 #-------------------- 176 # blah. blah. 177 # ... 178 # blah. blah. 179 # #include "..." search starts here: 180 # #include <...> search starts here: 181 # /usr/local/include 182 # /usr/lib/gcc/i486-linux-gnu/4.0.3/include 183 # /usr/include 184 # End of search list. 185 # blah. blah. 186 #------------ 187 188 command = [compiler] 189 if sysroot: 190 command += ["--sysroot=" + sysroot] 191 command += ["-x", language, "-v", "-c", "/dev/null", "-o", "/dev/null"] 192 Debug(DEBUG_DATA, "system search dirs command: %s" % command) 193 194 try: 195 # We clear the environment, because otherwise, directories 196 # declared by CPATH, for example, will be incorporated into the 197 # result. (See the CPP manual for the meaning of CPATH.) The only 198 # thing we keep is PATH, so we can be sure to find the compiler. 199 # NOTE: having the full PATH can be tricky: what if there's a gcc 200 # -> distcc symlink somewhere on the PATH, before the real gcc? 201 # We think the right thing will happen here, but it's complicated. 202 # TODO(csilvers): it's possible we could need to pass in some 203 # other environment vars, like LD_LIBRARY_PATH. Instead of adding 204 # in more env-vars by hand, consider just removing from os.environ 205 # all the env-vars that are meaningful to gcc, such as CPATH. See 206 # http://docs.freebsd.org/info/gcc/gcc.info.Environment_Variables.html, 207 # or the "Environment Variables Affecting GCC" section of the gcc 208 # info page. 209 if 'PATH' in os.environ: 210 trimmed_env = {'PATH': os.environ['PATH']} 211 else: 212 trimmed_env = {} 213 p = subprocess.Popen(command, 214 shell=False, 215 stdin=None, 216 stdout=subprocess.PIPE, 217 stderr=subprocess.STDOUT, 218 env=trimmed_env,universal_newlines=True) 219 out = p.communicate()[0] 220 except (IOError, OSError) as why: 221 raise NotCoveredError ( 222 ( "Couldn't determine default system include directories\n" 223 + "for compiler '%s', language '%s':\n" 224 + "error executing '%s': %s.") 225 % (compiler, language, command, why)) 226 227 if p.returncode != 0: 228 raise NotCoveredError( 229 ( "Couldn't determine default system include directories\n" 230 + "for compiler '%s', language '%s':\n" 231 + "command '%s' exited with status '%d'.\n Command output:\n%s") % 232 (compiler, language, command, p.returncode, out)) 233 234 match_obj = re.search( 235 "%s\n(.*?)\n%s" # don't ask 236 % ("#include <...> search starts here:", "End of search list"), 237 out, 238 re.MULTILINE + re.DOTALL) 239 if match_obj == None: 240 raise NotCoveredError( 241 ( "Couldn't determine default system include directories\n" 242 + "for compiler '%s', language '%s':\n" 243 + "couldn't parse output of '%s'.\nReceived:\n%s") % 244 (compiler, language, command, out)) 245 return [ canonical_lookup(directory) 246 for line in match_obj.group(1).split("\n") 247 for directory in line.split() 248 # Ignore Apple-modified MacOS gcc's "framework" directories. 249 if not line.endswith(" (framework directory)") 250 ] 251 # TODO: Rather than just ignoring framework directories, we 252 # should handle them properly, fully emulating the search 253 # algorithm used by Apple's modified GCC. 254 # The search algorithm used for framework directories is not very 255 # well documented, as far as I can tell, but the source code is in 256 # gcc/config/darwin-c.c in the Apple GCC sources. 257 # From a quick glance, I think it looks like this: 258 # - For each #include of the form Foo/bar.h, 259 # For each framework directory Baz, 260 # Look in Baz/Foo.framework/Headers/bar.h 261 # and in Baz/Foo.framework/PrivateHeaders/bar.h 262 # - If the regular search fails, look for subframeworks. 263 # For each #include of the form Foo/bar.h 264 # from Baz/Quux.framework/Headers/whatever.h 265 # Look in Baz/Quux.framework/Frameworks/Foo/Headers/bar.h. 266 267class CompilerDefaults(object): 268 """Records and caches the default search dirs and creates symlink farm. 269 270 This function works only for gcc, and only some versions at that, 271 because we parse the output from gcc to determine the default search dirs. 272 273 The 'default' searchdirs are those on the search-path that are built in, that 274 is known to the preprocessor, as opposed to being set on the commandline via 275 -I et al. 276 277 When we pass an option such as -I/foo/bar to the server, 278 the server will rewrite it to say -I/server/path/root/foo/bar, 279 where /server/path/root is the temporary directory on the server 280 that corresponds to root on the client (e.g. typically /dev/shm/distccd_nnn). 281 This causes problems in this case of -I options such as -I/usr/include/foo, 282 where the path contains a 'default' search directory (in this case 283 /usr/include) as a prefix. 284 Header files under the system default directories are assumed to exist 285 on the server, and it would be expensive to send them to the server 286 unnecessarily (we measured it, and it slowed down the build of Samba by 20%). 287 So for -I options like -I/usr/include/foo, we want the server 288 to use /usr/include/foo on the server, not /server/path/root/usr/include/foo. 289 290 Because the server unconditionally rewrites include search 291 paths on the command line to be relative to the server root, we must take 292 corrective action when identifying default system dirs: references to files 293 under these relocated system directories must be redirected to the absolute 294 location where they're actually found. 295 296 To do so, we create a symlink forest under client_root. 297 This will contain symlinks of the form 298 299 usr/include -> ../../../../../../../../../../../../usr/include 300 301 After being sent to the server, the server will rewrite them as 302 303 /server/path/root/usr/include -> 304 /server/path/root/../../../../../../../../../../../../usr/include 305 306 which will make 307 308 /server/path/root/usr/include 309 310 become a symlink to 311 312 /usr/include 313 314 Consequently, an include search directory such as -I /usr/include/foo will 315 work on the server, even after it has been rewritten to: 316 317 -I /server/path/root/usr/include/foo 318 """ 319 320 def __init__(self, canonical_lookup, client_root): 321 """Constructor. 322 323 Instance variables: 324 system_dirs_real_paths: a dictionary such that 325 system_dirs_real_paths[c][lang] is a list of directory paths 326 (strings) for compiler c and language lang 327 system_dirs_default: a list of all such strings, subjected to 328 realpath-ification, for all c and lang 329 client_root: a path such as /dev/shm/tmpX.include_server-X-1 330 system_links: locations under client_root representing system default dirs 331 """ 332 self.canonical_lookup = canonical_lookup 333 self.system_dirs_default_all = set([]) 334 self.system_dirs_default = {} 335 self.system_links = [] 336 self.client_root = client_root 337 338 def SetSystemDirsDefaults(self, compiler, sysroot, language, timer=None): 339 """Set instance variables according to compiler, and make symlink farm. 340 341 Arguments: 342 compiler: a filepath (the first argument on the distcc command line) 343 sysroot: the --sysroot passed to the compiler ("" to disable) 344 language: 'c' or 'c++' or other item in basics.LANGUAGES 345 timer: a basis.IncludeAnalyzerTimer or None 346 347 The timer will be disabled during this routine because the select involved 348 in Popen calls does not handle SIGALRM. 349 350 See also the class documentation for this class. 351 """ 352 assert isinstance(compiler, str) 353 assert isinstance(language, str) 354 Debug(DEBUG_TRACE, 355 "SetSystemDirsDefaults with CC, SYSROOT, LANG: %s, %s, %s" % 356 (compiler, sysroot, language)) 357 if compiler in self.system_dirs_default: 358 if sysroot in self.system_dirs_default[compiler]: 359 if language in self.system_dirs_default[compiler][sysroot]: 360 return 361 else: 362 self.system_dirs_default[compiler][sysroot] = {} 363 else: 364 self.system_dirs_default[compiler] = {sysroot: {}} 365 try: 366 if timer: 367 # We have to disable the timer because the select system call that is 368 # executed when calling the compiler through Popen gives up if presented 369 # with a SIGALRM. 370 timer.Stop() 371 self.system_dirs_default[compiler][sysroot][language] = ( 372 _SystemSearchdirsGCC(compiler, 373 sysroot, language, self.canonical_lookup)) 374 Debug(DEBUG_DATA, 375 "system_dirs_default[%s][%s][%s]: %s" % 376 (compiler, sysroot, language, 377 self.system_dirs_default[compiler][sysroot][language])) 378 # Now summarize what we know and add to system_dirs_default_all. 379 self.system_dirs_default_all |= ( 380 set(self.system_dirs_default[compiler][sysroot][language])) 381 # Construct the symlink farm for the compiler default dirs. 382 for system_dir in self.system_dirs_default[compiler][sysroot][language]: 383 _MakeLinkFromMirrorToRealLocation(system_dir, self.client_root, 384 self.system_links) 385 finally: 386 if timer: 387 timer.Start() 388