pylib/gyp/common.py

#!/usr/bin/python

# Copyright (c) 2009 Google Inc. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.

import errno
import filecmp
import os.path
import re
import tempfile
import sys

def ExceptionAppend(e, msg):
  """Append a message to the given exception's message."""
  if not e.args:
    e.args = (msg,)
  elif len(e.args) == 1:
    e.args = (str(e.args[0]) + ' ' + msg,)
  else:
    e.args = (str(e.args[0]) + ' ' + msg,) + e.args[1:]


def ParseQualifiedTarget(target):
  # Splits a qualified target into a build file, target name and toolset.

  # NOTE: rsplit is used to disambiguate the Windows drive letter separator.
  target_split = target.rsplit(':', 1)
  if len(target_split) == 2:
    [build_file, target] = target_split
  else:
    build_file = None

  target_split = target.rsplit('#', 1)
  if len(target_split) == 2:
    [target, toolset] = target_split
  else:
    toolset = None

  return [build_file, target, toolset]


def ResolveTarget(build_file, target, toolset):
  # This function resolves a target into a canonical form:
  # - a fully defined build file, either absolute or relative to the current
  # directory
  # - a target name
  # - a toolset
  #
  # build_file is the file relative to which 'target' is defined.
  # target is the qualified target.
  # toolset is the default toolset for that target.
  [parsed_build_file, target, parsed_toolset] = ParseQualifiedTarget(target)

  if parsed_build_file:
    if build_file:
      # If a relative path, parsed_build_file is relative to the directory
      # containing build_file.  If build_file is not in the current directory,
      # parsed_build_file is not a usable path as-is.  Resolve it by
      # interpreting it as relative to build_file.  If parsed_build_file is
      # absolute, it is usable as a path regardless of the current directory,
      # and os.path.join will return it as-is.
      build_file = os.path.normpath(os.path.join(os.path.dirname(build_file),
                                                 parsed_build_file))
    else:
      build_file = parsed_build_file

  if parsed_toolset:
    toolset = parsed_toolset

  return [build_file, target, toolset]


def BuildFile(fully_qualified_target):
  # Extracts the build file from the fully qualified target.
  return ParseQualifiedTarget(fully_qualified_target)[0]


def QualifiedTarget(build_file, target, toolset):
  # "Qualified" means the file that a target was defined in and the target
  # name, separated by a colon, suffixed by a # and the toolset name:
  # /path/to/file.gyp:target_name#toolset
  fully_qualified = build_file + ':' + target
  if toolset:
    fully_qualified = fully_qualified + '#' + toolset
  return fully_qualified


def RelativePath(path, relative_to):
  # Assuming both |path| and |relative_to| are relative to the current
  # directory, returns a relative path that identifies path relative to
  # relative_to.

  # Convert to absolute (and therefore normalized paths).
  path = os.path.abspath(path)
  relative_to = os.path.abspath(relative_to)

  # Split the paths into components.
  path_split = path.split(os.path.sep)
  relative_to_split = relative_to.split(os.path.sep)

  # Determine how much of the prefix the two paths share.
  prefix_len = len(os.path.commonprefix([path_split, relative_to_split]))

  # Put enough ".." components to back up out of relative_to to the common
  # prefix, and then append the part of path_split after the common prefix.
  relative_split = [os.path.pardir] * (len(relative_to_split) - prefix_len) + \
                   path_split[prefix_len:]

  if len(relative_split) == 0:
    # The paths were the same.
    return ''

  # Turn it back into a string and we're done.
  return os.path.join(*relative_split)


def FixIfRelativePath(path, relative_to):
  # Like RelativePath but returns |path| unchanged if it is absolute.
  if os.path.isabs(path):
    return path
  return RelativePath(path, relative_to)


def UnrelativePath(path, relative_to):
  # Assuming that |relative_to| is relative to the current directory, and |path|
  # is a path relative to the dirname of |relative_to|, returns a path that
  # identifies |path| relative to the current directory.
  rel_dir = os.path.dirname(relative_to)
  return os.path.normpath(os.path.join(rel_dir, path))


# re objects used by EncodePOSIXShellArgument.  See IEEE 1003.1 XCU.2.2 at
# http://www.opengroup.org/onlinepubs/009695399/utilities/xcu_chap02.html#tag_02_02
# and the documentation for various shells.

# _quote is a pattern that should match any argument that needs to be quoted
# with double-quotes by EncodePOSIXShellArgument.  It matches the following
# characters appearing anywhere in an argument:
#   \t, \n, space  parameter separators
#   #              comments
#   $              expansions (quoted to always expand within one argument)
#   %              called out by IEEE 1003.1 XCU.2.2
#   &              job control
#   '              quoting
#   (, )           subshell execution
#   *, ?, [        pathname expansion
#   ;              command delimiter
#   <, >, |        redirection
#   =              assignment
#   {, }           brace expansion (bash)
#   ~              tilde expansion
# It also matches the empty string, because "" (or '') is the only way to
# represent an empty string literal argument to a POSIX shell.
#
# This does not match the characters in _escape, because those need to be
# backslash-escaped regardless of whether they appear in a double-quoted
# string.
_quote = re.compile('[\t\n #$%&\'()*;<=>?[{|}~]|^$')

# _escape is a pattern that should match any character that needs to be
# escaped with a backslash, whether or not the argument matched the _quote
# pattern.  _escape is used with re.sub to backslash anything in _escape's
# first match group, hence the (parentheses) in the regular expression.
#
# _escape matches the following characters appearing anywhere in an argument:
#   "  to prevent POSIX shells from interpreting this character for quoting
#   \  to prevent POSIX shells from interpreting this character for escaping
#   `  to prevent POSIX shells from interpreting this character for command
#      substitution
# Missing from this list is $, because the desired behavior of
# EncodePOSIXShellArgument is to permit parameter (variable) expansion.
#
# Also missing from this list is !, which bash will interpret as the history
# expansion character when history is enabled.  bash does not enable history
# by default in non-interactive shells, so this is not thought to be a problem.
# ! was omitted from this list because bash interprets "\!" as a literal string
# including the backslash character (avoiding history expansion but retaining
# the backslash), which would not be correct for argument encoding.  Handling
# this case properly would also be problematic because bash allows the history
# character to be changed with the histchars shell variable.  Fortunately,
# as history is not enabled in non-interactive shells and
# EncodePOSIXShellArgument is only expected to encode for non-interactive
# shells, there is no room for error here by ignoring !.
_escape = re.compile(r'(["\\`])')

def EncodePOSIXShellArgument(argument):
  """Encodes |argument| suitably for consumption by POSIX shells.

  argument may be quoted and escaped as necessary to ensure that POSIX shells
  treat the returned value as a literal representing the argument passed to
  this function.  Parameter (variable) expansions beginning with $ are allowed
  to remain intact without escaping the $, to allow the argument to contain
  references to variables to be expanded by the shell.
  """

  if not isinstance(argument, str):
    argument = str(argument)

  if _quote.search(argument):
    quote = '"'
  else:
    quote = ''

  encoded = quote + re.sub(_escape, r'\\\1', argument) + quote

  return encoded


def EncodePOSIXShellList(list):
  """Encodes |list| suitably for consumption by POSIX shells.

  Returns EncodePOSIXShellArgument for each item in list, and joins them
  together using the space character as an argument separator.
  """

  encoded_arguments = []
  for argument in list:
    encoded_arguments.append(EncodePOSIXShellArgument(argument))
  return ' '.join(encoded_arguments)


def DeepDependencyTargets(target_dicts, roots):
  """Returns the recursive list of target dependencies."""
  dependencies = set()
  pending = set(roots)
  while pending:
    # Pluck out one.
    r = pending.pop()
    # Skip if visited already.
    if r in dependencies:
      continue
    # Add it.
    dependencies.add(r)
    # Add its children.
    spec = target_dicts[r]
    pending.update(set(spec.get('dependencies', [])))
    pending.update(set(spec.get('dependencies_original', [])))
  return list(dependencies - set(roots))


def BuildFileTargets(target_list, build_file):
  """From a target_list, returns the subset from the specified build_file.
  """
  return [p for p in target_list if BuildFile(p) == build_file]


def AllTargets(target_list, target_dicts, build_file):
  """Returns all targets (direct and dependencies) for the specified build_file.
  """
  bftargets = BuildFileTargets(target_list, build_file)
  deptargets = DeepDependencyTargets(target_dicts, bftargets)
  return bftargets + deptargets


def WriteOnDiff(filename):
  """Write to a file only if the new contents differ.

  Arguments:
    filename: name of the file to potentially write to.
  Returns:
    A file like object which will write to temporary file and only overwrite
    the target if it differs (on close).
  """

  class Writer:
    """Wrapper around file which only covers the target if it differs."""
    def __init__(self):
      # Pick temporary file.
      tmp_fd, self.tmp_path = tempfile.mkstemp(
          suffix='.tmp',
          prefix=os.path.split(filename)[1] + '.gyp.',
          dir=os.path.split(filename)[0])
      try:
        self.tmp_file = os.fdopen(tmp_fd, 'wb')
      except Exception:
        # Don't leave turds behind.
        os.unlink(self.tmp_path)
        raise

    def __getattr__(self, attrname):
      # Delegate everything else to self.tmp_file
      return getattr(self.tmp_file, attrname)

    def close(self):
      try:
        # Close tmp file.
        self.tmp_file.close()
        # Determine if different.
        same = False
        try:
          same = filecmp.cmp(self.tmp_path, filename, False)
        except OSError, e:
          if e.errno != errno.ENOENT:
            raise

        if same:
          # The new file is identical to the old one, just get rid of the new
          # one.
          os.unlink(self.tmp_path)
        else:
          # The new file is different from the old one, or there is no old one.
          # Rename the new file to the permanent name.
          #
          # tempfile.mkstemp uses an overly restrictive mode, resulting in a
          # file that can only be read by the owner, regardless of the umask.
          # There's no reason to not respect the umask here, which means that
          # an extra hoop is required to fetch it and reset the new file's mode.
          #
          # No way to get the umask without setting a new one?  Set a safe one
          # and then set it back to the old value.
          umask = os.umask(077)
          os.umask(umask)
          os.chmod(self.tmp_path, 0666 & ~umask)
          if sys.platform == 'win32' and os.path.exists(filename):
            # NOTE: on windows (but not cygwin) rename will not replace an
            # existing file, so it must be preceded with a remove. Sadly there
            # is no way to make the switch atomic.
            os.remove(filename)
          os.rename(self.tmp_path, filename)
      except Exception:
        # Don't leave turds behind.
        os.unlink(self.tmp_path)
        raise

  return Writer()


# From Alex Martelli,
# http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/52560
# ASPN: Python Cookbook: Remove duplicates from a sequence
# First comment, dated 2001/10/13.
# (Also in the printed Python Cookbook.)

def uniquer(seq, idfun=None):
    if idfun is None:
        def idfun(x): return x
    seen = {}
    result = []
    for item in seq:
        marker = idfun(item)
        if marker in seen: continue
        seen[marker] = 1
        result.append(item)
    return result