1#! /usr/bin/env python3
2
3# Copyright 2007 Google Inc.
4#
5# This program is free software; you can redistribute it and/or
6# modify it under the terms of the GNU General Public License
7# as published by the Free Software Foundation; either version 2
8# of the License, or (at your option) any later version.
9#
10# This program is distributed in the hope that it will be useful,
11# but WITHOUT ANY WARRANTY; without even the implied warranty of
12# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13# GNU General Public License for more details.
14#
15# You should have received a copy of the GNU General Public License
16# along with this program; if not, write to the Free Software
17# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301,
18# USA.
19
20
21"""Divination of built-in system directories used by compiler installation.
22
23It is undesirable for the distcc-pump to send header files that reside
24under the built-in search path.  In a correct compiler installation,
25these files must already be present on the server. This module lets
26the distcc-pump run the compiler in a special mode that allows the
27built-in system directories to be revealed.
28
29The current code is tested only for gcc 4.1.1.
30
31TODO(klarlund) Find out what other versions this code works for.
32TODO(klarlund) The include server halts if the built-in system
33directories cannot be determined. Should this be improved upon?
34"""
35
36__author__ = "Nils Klarlund"
37
38
39import os
40import re
41import sys
42import basics
43import shutil
44import subprocess
45
46Debug = basics.Debug
47DEBUG_TRACE = basics.DEBUG_TRACE
48DEBUG_DATA = basics.DEBUG_DATA
49NotCoveredError = basics.NotCoveredError
50
51
52def _RealPrefixWithinClientRoot(client_root, path):
53  """Determine longest directory prefix of PATH and whether PATH contains a symlink.
54
55  Given an absolute path CLIENT_ROOT and an absolute path PATH that is
56  interpreted as relative to CLIENT_ROOT, figure out the longest prefix
57  of PATH such that every component of the prefix is a directory -- not
58  a file or symlink -- when interpreted relative to CLIENT_ROOT.
59
60  Args:
61    path: a string starting with '/'
62  Returns:
63    a pair consisting of
64    - the prefix
65    - a bool, which is True iff PATH contained a symlink.
66  """
67  prefix = "/"
68  parts = path.split('/')
69  while prefix != path:
70    part = parts.pop(0)
71    last_prefix = prefix
72    prefix = os.path.join(prefix, part)
73    if os.path.islink(client_root + prefix):
74      return last_prefix, True
75    if not os.path.isdir(client_root + prefix):
76      return last_prefix, False
77  return path, False
78
79
80def _MakeLinkFromMirrorToRealLocation(system_dir, client_root, system_links):
81  """Create a link under client root what will resolve to system dir on server.
82
83  See comments for CompilerDefaults class for rationale.
84
85  Args:
86    system_dir: a path such as /usr/include or
87                /usr/lib/gcc/i486-linux-gnu/4.0.3/include
88    client_root: a path such as /dev/shm/tmpX.include_server-X-1
89    system_links: a list of paths under client_root; each denotes a symlink
90
91  The link is created only if necessary. So,
92    /usr/include/gcc/i486-linux-gnu/4.0.3/include
93  is not created if
94    /usr/include
95  is already in place, since it's a prefix of the longer path.
96
97  If a link is created, the symlink name will be appended to system_links.
98
99  For example, if system_dir is '/usr/include' and client_root is
100  '/dev/shm/tmpX.include_server-X-1', then this function will create a
101  symlink in /dev/shm/tmpX.include_server-X-1/usr/include which points
102  to ../../../../../../../../../../../../usr/include, and it will append
103  '/dev/shm/tmpX.include_server-X-1/usr/include' to system_links.
104  """
105  if not system_dir.startswith('/'):
106    raise ValueError("Expected absolute path, but got '%s'." % system_dir)
107  if os.path.realpath(system_dir) != system_dir:
108    raise NotCoveredError(
109        "Default compiler search path '%s' must be a realpath." %s)
110  # Typical values for rooted_system_dir:
111  #  /dev/shm/tmpX.include_server-X-1/usr/include
112  real_prefix, is_link = _RealPrefixWithinClientRoot(client_root, system_dir)
113  parent = os.path.dirname(system_dir)
114  rooted_system_dir = client_root + system_dir
115  rooted_parent = client_root + parent
116  if real_prefix == system_dir:
117    # rooted_system_dir already exists as a real (non-symlink) path.
118    # Make rooted_system_dir a link.
119    #
120    # For example, this could happen if /usr/include/c++/4.0 and
121    # /usr/include are both default system directories.
122    # First we'd call this function with /usr/include/c++/4.0,
123    # and it would call os.mkdirdirs() to create
124    # /dev/shm/tmpX.include_server-X-1/usr/include/c++,
125    # and then it would create a symlink named 4.0 within that.
126    # Then we'd call this function again with /usr/include.
127    # In this case, we can replace the whole subtree with a single symlink
128    # at /dev/shm/tmpX.include_server-X-1/usr/include.
129    shutil.rmtree(rooted_system_dir)
130    system_links[:] = filter(lambda path :
131                             not path.startswith(rooted_system_dir),
132                             system_links)
133  elif real_prefix == parent:
134    # The really constructed path does not extend beyond the parent directory,
135    # so we're all set to create the link if it's not already there.
136    if os.path.exists(rooted_system_dir):
137      assert os.path.islink(rooted_system_dir)
138      return
139  elif not is_link:
140    os.makedirs(rooted_parent)
141  else:
142    # A link above real_prefix has already been created with this routine.
143    return
144  assert _RealPrefixWithinClientRoot(client_root, parent) == (parent, False), (client_root, parent)
145  depth = len([c for c in system_dir if c == '/'])
146  # The more directories on the path system_dir, the more '../' need to
147  # appended. We add enough '../' to get to the root directory. It's OK
148  # if we have too many, since '..' in the root directory points back to
149  # the root directory.
150  # TODO(klarlund,fergus): do this in a more principled way.
151  # This probably requires changing the protocol.
152  os.symlink('../' * (basics.MAX_COMPONENTS_IN_SERVER_ROOT + depth)
153             + system_dir[1:],  # remove leading '/'
154             rooted_system_dir)
155  system_links.append(rooted_system_dir)
156
157
158def _SystemSearchdirsGCC(compiler, sysroot, language, canonical_lookup):
159  """Run gcc on empty file; parse output to figure out default paths.
160
161  This function works only for gcc, and only some versions at that.
162
163  Arguments:
164    compiler: a filepath (the first argument on the distcc command line)
165    sysroot: the --sysroot passed to the compiler ("" to disable)
166    language: 'c' or 'c++' or other item in basics.LANGUAGES
167    canonical_lookup: a function that maps strings to their realpaths
168  Returns:
169    list of system search dirs for this compiler and language
170
171  """
172
173  # We are trying to wring the following kind of text out of the
174  # compiler:
175  #--------------------
176  # blah. blah.
177  # ...
178  # blah. blah.
179  # #include "..." search starts here:
180  # #include <...> search starts here:
181  #  /usr/local/include
182  #  /usr/lib/gcc/i486-linux-gnu/4.0.3/include
183  #  /usr/include
184  # End of search list.
185  # blah. blah.
186  #------------
187
188  command = [compiler]
189  if sysroot:
190    command += ["--sysroot=" + sysroot]
191  command += ["-x", language, "-v", "-c", "/dev/null", "-o", "/dev/null"]
192  Debug(DEBUG_DATA, "system search dirs command: %s" % command)
193
194  try:
195    # We clear the environment, because otherwise, directories
196    # declared by CPATH, for example, will be incorporated into the
197    # result. (See the CPP manual for the meaning of CPATH.)  The only
198    # thing we keep is PATH, so we can be sure to find the compiler.
199    # NOTE: having the full PATH can be tricky: what if there's a gcc
200    # -> distcc symlink somewhere on the PATH, before the real gcc?
201    # We think the right thing will happen here, but it's complicated.
202    # TODO(csilvers): it's possible we could need to pass in some
203    # other environment vars, like LD_LIBRARY_PATH.  Instead of adding
204    # in more env-vars by hand, consider just removing from os.environ
205    # all the env-vars that are meaningful to gcc, such as CPATH.  See
206    # http://docs.freebsd.org/info/gcc/gcc.info.Environment_Variables.html,
207    # or the "Environment Variables Affecting GCC" section of the gcc
208    # info page.
209    if 'PATH' in os.environ:
210      trimmed_env = {'PATH': os.environ['PATH']}
211    else:
212      trimmed_env = {}
213    p = subprocess.Popen(command,
214                         shell=False,
215                         stdin=None,
216                         stdout=subprocess.PIPE,
217                         stderr=subprocess.STDOUT,
218                         env=trimmed_env,universal_newlines=True)
219    out = p.communicate()[0]
220  except (IOError, OSError) as why:
221    raise NotCoveredError (
222             ( "Couldn't determine default system include directories\n"
223             + "for compiler '%s', language '%s':\n"
224             + "error executing '%s': %s.")
225             % (compiler, language, command, why))
226
227  if p.returncode != 0:
228    raise NotCoveredError(
229             ( "Couldn't determine default system include directories\n"
230             + "for compiler '%s', language '%s':\n"
231             + "command '%s' exited with status '%d'.\n Command output:\n%s") %
232             (compiler, language, command, p.returncode, out))
233
234  match_obj = re.search(
235    "%s\n(.*?)\n%s"  # don't ask
236    % ("#include <...> search starts here:", "End of search list"),
237    out,
238    re.MULTILINE + re.DOTALL)
239  if match_obj == None:
240    raise NotCoveredError(
241             ( "Couldn't determine default system include directories\n"
242             + "for compiler '%s', language '%s':\n"
243             + "couldn't parse output of '%s'.\nReceived:\n%s") %
244             (compiler, language, command, out))
245  return [ canonical_lookup(directory)
246           for line in match_obj.group(1).split("\n")
247           for directory in line.split()
248           # Ignore Apple-modified MacOS gcc's "framework" directories.
249           if not line.endswith(" (framework directory)")
250           ]
251           # TODO: Rather than just ignoring framework directories, we
252           # should handle them properly, fully emulating the search
253           # algorithm used by Apple's modified GCC.
254           # The search algorithm used for framework directories is not very
255           # well documented, as far as I can tell, but the source code is in
256           # gcc/config/darwin-c.c in the Apple GCC sources.
257           # From a quick glance, I think it looks like this:
258           # - For each #include of the form Foo/bar.h,
259           #        For each framework directory Baz,
260           #            Look in Baz/Foo.framework/Headers/bar.h
261           #            and in Baz/Foo.framework/PrivateHeaders/bar.h
262           # - If the regular search fails, look for subframeworks.
263           #     For each #include of the form Foo/bar.h
264           #       from Baz/Quux.framework/Headers/whatever.h
265           #            Look in Baz/Quux.framework/Frameworks/Foo/Headers/bar.h.
266
267class CompilerDefaults(object):
268  """Records and caches the default search dirs and creates symlink farm.
269
270  This function works only for gcc, and only some versions at that,
271  because we parse the output from gcc to determine the default search dirs.
272
273  The 'default' searchdirs are those on the search-path that are built in, that
274  is known to the preprocessor, as opposed to being set on the commandline via
275  -I et al.
276
277  When we pass an option such as -I/foo/bar to the server,
278  the server will rewrite it to say -I/server/path/root/foo/bar,
279  where /server/path/root is the temporary directory on the server
280  that corresponds to root on the client (e.g. typically /dev/shm/distccd_nnn).
281  This causes problems in this case of -I options such as -I/usr/include/foo,
282  where the path contains a 'default' search directory (in this case
283  /usr/include) as a prefix.
284  Header files under the system default directories are assumed to exist
285  on the server, and it would be expensive to send them to the server
286  unnecessarily (we measured it, and it slowed down the build of Samba by 20%).
287  So for -I options like -I/usr/include/foo, we want the server
288  to use /usr/include/foo on the server, not /server/path/root/usr/include/foo.
289
290  Because the server unconditionally rewrites include search
291  paths on the command line to be relative to the server root, we must take
292  corrective action when identifying default system dirs: references to files
293  under these relocated system directories must be redirected to the absolute
294  location where they're actually found.
295
296  To do so, we create a symlink forest under client_root.
297  This will contain symlinks of the form
298
299    usr/include -> ../../../../../../../../../../../../usr/include
300
301  After being sent to the server, the server will rewrite them as
302
303    /server/path/root/usr/include ->
304       /server/path/root/../../../../../../../../../../../../usr/include
305
306  which will make
307
308     /server/path/root/usr/include
309
310  become a symlink to
311
312     /usr/include
313
314  Consequently, an include search directory such as -I /usr/include/foo will
315  work on the server, even after it has been rewritten to:
316
317    -I /server/path/root/usr/include/foo
318  """
319
320  def __init__(self, canonical_lookup, client_root):
321    """Constructor.
322
323    Instance variables:
324      system_dirs_real_paths: a dictionary such that
325        system_dirs_real_paths[c][lang] is a list of directory paths
326        (strings) for compiler c and language lang
327      system_dirs_default: a list of all such strings, subjected to
328        realpath-ification, for all c and lang
329      client_root: a path such as /dev/shm/tmpX.include_server-X-1
330      system_links: locations under client_root representing system default dirs
331    """
332    self.canonical_lookup = canonical_lookup
333    self.system_dirs_default_all = set([])
334    self.system_dirs_default = {}
335    self.system_links = []
336    self.client_root = client_root
337
338  def SetSystemDirsDefaults(self, compiler, sysroot, language, timer=None):
339    """Set instance variables according to compiler, and make symlink farm.
340
341    Arguments:
342      compiler: a filepath (the first argument on the distcc command line)
343      sysroot: the --sysroot passed to the compiler ("" to disable)
344      language: 'c' or 'c++' or other item in basics.LANGUAGES
345      timer: a basis.IncludeAnalyzerTimer or None
346
347    The timer will be disabled during this routine because the select involved
348    in Popen calls does not handle SIGALRM.
349
350    See also the class documentation for this class.
351    """
352    assert isinstance(compiler, str)
353    assert isinstance(language, str)
354    Debug(DEBUG_TRACE,
355          "SetSystemDirsDefaults with CC, SYSROOT, LANG: %s, %s, %s" %
356          (compiler, sysroot, language))
357    if compiler in self.system_dirs_default:
358      if sysroot in self.system_dirs_default[compiler]:
359        if language in self.system_dirs_default[compiler][sysroot]:
360          return
361      else:
362        self.system_dirs_default[compiler][sysroot] = {}
363    else:
364      self.system_dirs_default[compiler] = {sysroot: {}}
365    try:
366      if timer:
367        # We have to disable the timer because the select system call that is
368        # executed when calling the compiler through Popen gives up if presented
369        # with a SIGALRM.
370        timer.Stop()
371      self.system_dirs_default[compiler][sysroot][language] = (
372        _SystemSearchdirsGCC(compiler,
373                             sysroot, language, self.canonical_lookup))
374      Debug(DEBUG_DATA,
375            "system_dirs_default[%s][%s][%s]: %s" %
376            (compiler, sysroot, language,
377             self.system_dirs_default[compiler][sysroot][language]))
378      # Now summarize what we know and add to system_dirs_default_all.
379      self.system_dirs_default_all |= (
380          set(self.system_dirs_default[compiler][sysroot][language]))
381      # Construct the symlink farm for the compiler default dirs.
382      for system_dir in self.system_dirs_default[compiler][sysroot][language]:
383        _MakeLinkFromMirrorToRealLocation(system_dir, self.client_root,
384                                          self.system_links)
385    finally:
386      if timer:
387        timer.Start()
388