1# Copyright 2014 The Chromium Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5"""Helper functions useful when writing scripts that integrate with GN.
6
7The main functions are ToGNString() and FromGNString(), to convert between
8serialized GN veriables and Python variables.
9
10To use in an arbitrary Python file in the build:
11
12  import os
13  import sys
14
15  sys.path.append(os.path.join(os.path.dirname(__file__),
16                               os.pardir, os.pardir, 'build'))
17  import gn_helpers
18
19Where the sequence of parameters to join is the relative path from your source
20file to the build directory.
21"""
22
23import json
24import os
25import re
26import sys
27
28
29_CHROMIUM_ROOT = os.path.join(os.path.dirname(__file__), os.pardir)
30
31BUILD_VARS_FILENAME = 'build_vars.json'
32IMPORT_RE = re.compile(r'^import\("//(\S+)"\)')
33
34
35class GNError(Exception):
36  pass
37
38
39# Computes ASCII code of an element of encoded Python 2 str / Python 3 bytes.
40_Ord = ord if sys.version_info.major < 3 else lambda c: c
41
42
43def _TranslateToGnChars(s):
44  for decoded_ch in s.encode('utf-8'):  # str in Python 2, bytes in Python 3.
45    code = _Ord(decoded_ch)  # int
46    if code in (34, 36, 92):  # For '"', '$', or '\\'.
47      yield '\\' + chr(code)
48    elif 32 <= code < 127:
49      yield chr(code)
50    else:
51      yield '$0x%02X' % code
52
53
54def ToGNString(value, pretty=False):
55  """Returns a stringified GN equivalent of a Python value.
56
57  Args:
58    value: The Python value to convert.
59    pretty: Whether to pretty print. If true, then non-empty lists are rendered
60        recursively with one item per line, with indents. Otherwise lists are
61        rendered without new line.
62  Returns:
63    The stringified GN equivalent to |value|.
64
65  Raises:
66    GNError: |value| cannot be printed to GN.
67  """
68
69  if sys.version_info.major < 3:
70    basestring_compat = basestring
71  else:
72    basestring_compat = str
73
74  # Emits all output tokens without intervening whitespaces.
75  def GenerateTokens(v, level):
76    if isinstance(v, basestring_compat):
77      yield '"' + ''.join(_TranslateToGnChars(v)) + '"'
78
79    elif isinstance(v, bool):
80      yield 'true' if v else 'false'
81
82    elif isinstance(v, int):
83      yield str(v)
84
85    elif isinstance(v, list):
86      yield '['
87      for i, item in enumerate(v):
88        if i > 0:
89          yield ','
90        for tok in GenerateTokens(item, level + 1):
91          yield tok
92      yield ']'
93
94    elif isinstance(v, dict):
95      if level > 0:
96        yield '{'
97      for key in sorted(v):
98        if not isinstance(key, basestring_compat):
99          raise GNError('Dictionary key is not a string.')
100        if not key or key[0].isdigit() or not key.replace('_', '').isalnum():
101          raise GNError('Dictionary key is not a valid GN identifier.')
102        yield key  # No quotations.
103        yield '='
104        for tok in GenerateTokens(v[key], level + 1):
105          yield tok
106      if level > 0:
107        yield '}'
108
109    else:  # Not supporting float: Add only when needed.
110      raise GNError('Unsupported type when printing to GN.')
111
112  can_start = lambda tok: tok and tok not in ',}]='
113  can_end = lambda tok: tok and tok not in ',{[='
114
115  # Adds whitespaces, trying to keep everything (except dicts) in 1 line.
116  def PlainGlue(gen):
117    prev_tok = None
118    for i, tok in enumerate(gen):
119      if i > 0:
120        if can_end(prev_tok) and can_start(tok):
121          yield '\n'  # New dict item.
122        elif prev_tok == '[' and tok == ']':
123          yield '  '  # Special case for [].
124        elif tok != ',':
125          yield ' '
126      yield tok
127      prev_tok = tok
128
129  # Adds whitespaces so non-empty lists can span multiple lines, with indent.
130  def PrettyGlue(gen):
131    prev_tok = None
132    level = 0
133    for i, tok in enumerate(gen):
134      if i > 0:
135        if can_end(prev_tok) and can_start(tok):
136          yield '\n' + '  ' * level  # New dict item.
137        elif tok == '=' or prev_tok in '=':
138          yield ' '  # Separator before and after '=', on same line.
139      if tok in ']}':
140        level -= 1
141      # Exclude '[]' and '{}' cases.
142      if int(prev_tok == '[') + int(tok == ']') == 1 or \
143         int(prev_tok == '{') + int(tok == '}') == 1:
144        yield '\n' + '  ' * level
145      yield tok
146      if tok in '[{':
147        level += 1
148      if tok == ',':
149        yield '\n' + '  ' * level
150      prev_tok = tok
151
152  token_gen = GenerateTokens(value, 0)
153  ret = ''.join((PrettyGlue if pretty else PlainGlue)(token_gen))
154  # Add terminating '\n' for dict |value| or multi-line output.
155  if isinstance(value, dict) or '\n' in ret:
156    return ret + '\n'
157  return ret
158
159
160def FromGNString(input_string):
161  """Converts the input string from a GN serialized value to Python values.
162
163  For details on supported types see GNValueParser.Parse() below.
164
165  If your GN script did:
166    something = [ "file1", "file2" ]
167    args = [ "--values=$something" ]
168  The command line would look something like:
169    --values="[ \"file1\", \"file2\" ]"
170  Which when interpreted as a command line gives the value:
171    [ "file1", "file2" ]
172
173  You can parse this into a Python list using GN rules with:
174    input_values = FromGNValues(options.values)
175  Although the Python 'ast' module will parse many forms of such input, it
176  will not handle GN escaping properly, nor GN booleans. You should use this
177  function instead.
178
179
180  A NOTE ON STRING HANDLING:
181
182  If you just pass a string on the command line to your Python script, or use
183  string interpolation on a string variable, the strings will not be quoted:
184    str = "asdf"
185    args = [ str, "--value=$str" ]
186  Will yield the command line:
187    asdf --value=asdf
188  The unquoted asdf string will not be valid input to this function, which
189  accepts only quoted strings like GN scripts. In such cases, you can just use
190  the Python string literal directly.
191
192  The main use cases for this is for other types, in particular lists. When
193  using string interpolation on a list (as in the top example) the embedded
194  strings will be quoted and escaped according to GN rules so the list can be
195  re-parsed to get the same result.
196  """
197  parser = GNValueParser(input_string)
198  return parser.Parse()
199
200
201def FromGNArgs(input_string):
202  """Converts a string with a bunch of gn arg assignments into a Python dict.
203
204  Given a whitespace-separated list of
205
206    <ident> = (integer | string | boolean | <list of the former>)
207
208  gn assignments, this returns a Python dict, i.e.:
209
210    FromGNArgs('foo=true\nbar=1\n') -> { 'foo': True, 'bar': 1 }.
211
212  Only simple types and lists supported; variables, structs, calls
213  and other, more complicated things are not.
214
215  This routine is meant to handle only the simple sorts of values that
216  arise in parsing --args.
217  """
218  parser = GNValueParser(input_string)
219  return parser.ParseArgs()
220
221
222def UnescapeGNString(value):
223  """Given a string with GN escaping, returns the unescaped string.
224
225  Be careful not to feed with input from a Python parsing function like
226  'ast' because it will do Python unescaping, which will be incorrect when
227  fed into the GN unescaper.
228
229  Args:
230    value: Input string to unescape.
231  """
232  result = ''
233  i = 0
234  while i < len(value):
235    if value[i] == '\\':
236      if i < len(value) - 1:
237        next_char = value[i + 1]
238        if next_char in ('$', '"', '\\'):
239          # These are the escaped characters GN supports.
240          result += next_char
241          i += 1
242        else:
243          # Any other backslash is a literal.
244          result += '\\'
245    else:
246      result += value[i]
247    i += 1
248  return result
249
250
251def _IsDigitOrMinus(char):
252  return char in '-0123456789'
253
254
255class GNValueParser(object):
256  """Duplicates GN parsing of values and converts to Python types.
257
258  Normally you would use the wrapper function FromGNValue() below.
259
260  If you expect input as a specific type, you can also call one of the Parse*
261  functions directly. All functions throw GNError on invalid input.
262  """
263
264  def __init__(self, string, checkout_root=_CHROMIUM_ROOT):
265    self.input = string
266    self.cur = 0
267    self.checkout_root = checkout_root
268
269  def IsDone(self):
270    return self.cur == len(self.input)
271
272  def ReplaceImports(self):
273    """Replaces import(...) lines with the contents of the imports.
274
275    Recurses on itself until there are no imports remaining, in the case of
276    nested imports.
277    """
278    lines = self.input.splitlines()
279    if not any(line.startswith('import(') for line in lines):
280      return
281    for line in lines:
282      if not line.startswith('import('):
283        continue
284      regex_match = IMPORT_RE.match(line)
285      if not regex_match:
286        raise GNError('Not a valid import string: %s' % line)
287      import_path = os.path.join(self.checkout_root, regex_match.group(1))
288      with open(import_path) as f:
289        imported_args = f.read()
290      self.input = self.input.replace(line, imported_args)
291    # Call ourselves again if we've just replaced an import() with additional
292    # imports.
293    self.ReplaceImports()
294
295
296  def _ConsumeWhitespace(self):
297    while not self.IsDone() and self.input[self.cur] in ' \t\n':
298      self.cur += 1
299
300  def ConsumeCommentAndWhitespace(self):
301    self._ConsumeWhitespace()
302
303    # Consume each comment, line by line.
304    while not self.IsDone() and self.input[self.cur] == '#':
305      # Consume the rest of the comment, up until the end of the line.
306      while not self.IsDone() and self.input[self.cur] != '\n':
307        self.cur += 1
308      # Move the cursor to the next line (if there is one).
309      if not self.IsDone():
310        self.cur += 1
311
312      self._ConsumeWhitespace()
313
314  def Parse(self):
315    """Converts a string representing a printed GN value to the Python type.
316
317    See additional usage notes on FromGNString() above.
318
319    * GN booleans ('true', 'false') will be converted to Python booleans.
320
321    * GN numbers ('123') will be converted to Python numbers.
322
323    * GN strings (double-quoted as in '"asdf"') will be converted to Python
324      strings with GN escaping rules. GN string interpolation (embedded
325      variables preceded by $) are not supported and will be returned as
326      literals.
327
328    * GN lists ('[1, "asdf", 3]') will be converted to Python lists.
329
330    * GN scopes ('{ ... }') are not supported.
331
332    Raises:
333      GNError: Parse fails.
334    """
335    result = self._ParseAllowTrailing()
336    self.ConsumeCommentAndWhitespace()
337    if not self.IsDone():
338      raise GNError("Trailing input after parsing:\n  " + self.input[self.cur:])
339    return result
340
341  def ParseArgs(self):
342    """Converts a whitespace-separated list of ident=literals to a dict.
343
344    See additional usage notes on FromGNArgs(), above.
345
346    Raises:
347      GNError: Parse fails.
348    """
349    d = {}
350
351    self.ReplaceImports()
352    self.ConsumeCommentAndWhitespace()
353
354    while not self.IsDone():
355      ident = self._ParseIdent()
356      self.ConsumeCommentAndWhitespace()
357      if self.input[self.cur] != '=':
358        raise GNError("Unexpected token: " + self.input[self.cur:])
359      self.cur += 1
360      self.ConsumeCommentAndWhitespace()
361      val = self._ParseAllowTrailing()
362      self.ConsumeCommentAndWhitespace()
363      d[ident] = val
364
365    return d
366
367  def _ParseAllowTrailing(self):
368    """Internal version of Parse() that doesn't check for trailing stuff."""
369    self.ConsumeCommentAndWhitespace()
370    if self.IsDone():
371      raise GNError("Expected input to parse.")
372
373    next_char = self.input[self.cur]
374    if next_char == '[':
375      return self.ParseList()
376    elif next_char == '{':
377      return self.ParseScope()
378    elif _IsDigitOrMinus(next_char):
379      return self.ParseNumber()
380    elif next_char == '"':
381      return self.ParseString()
382    elif self._ConstantFollows('true'):
383      return True
384    elif self._ConstantFollows('false'):
385      return False
386    else:
387      raise GNError("Unexpected token: " + self.input[self.cur:])
388
389  def _ParseIdent(self):
390    ident = ''
391
392    next_char = self.input[self.cur]
393    if not next_char.isalpha() and not next_char=='_':
394      raise GNError("Expected an identifier: " + self.input[self.cur:])
395
396    ident += next_char
397    self.cur += 1
398
399    next_char = self.input[self.cur]
400    while next_char.isalpha() or next_char.isdigit() or next_char=='_':
401      ident += next_char
402      self.cur += 1
403      next_char = self.input[self.cur]
404
405    return ident
406
407  def ParseNumber(self):
408    self.ConsumeCommentAndWhitespace()
409    if self.IsDone():
410      raise GNError('Expected number but got nothing.')
411
412    begin = self.cur
413
414    # The first character can include a negative sign.
415    if not self.IsDone() and _IsDigitOrMinus(self.input[self.cur]):
416      self.cur += 1
417    while not self.IsDone() and self.input[self.cur].isdigit():
418      self.cur += 1
419
420    number_string = self.input[begin:self.cur]
421    if not len(number_string) or number_string == '-':
422      raise GNError('Not a valid number.')
423    return int(number_string)
424
425  def ParseString(self):
426    self.ConsumeCommentAndWhitespace()
427    if self.IsDone():
428      raise GNError('Expected string but got nothing.')
429
430    if self.input[self.cur] != '"':
431      raise GNError('Expected string beginning in a " but got:\n  ' +
432                    self.input[self.cur:])
433    self.cur += 1  # Skip over quote.
434
435    begin = self.cur
436    while not self.IsDone() and self.input[self.cur] != '"':
437      if self.input[self.cur] == '\\':
438        self.cur += 1  # Skip over the backslash.
439        if self.IsDone():
440          raise GNError('String ends in a backslash in:\n  ' + self.input)
441      self.cur += 1
442
443    if self.IsDone():
444      raise GNError('Unterminated string:\n  ' + self.input[begin:])
445
446    end = self.cur
447    self.cur += 1  # Consume trailing ".
448
449    return UnescapeGNString(self.input[begin:end])
450
451  def ParseList(self):
452    self.ConsumeCommentAndWhitespace()
453    if self.IsDone():
454      raise GNError('Expected list but got nothing.')
455
456    # Skip over opening '['.
457    if self.input[self.cur] != '[':
458      raise GNError('Expected [ for list but got:\n  ' + self.input[self.cur:])
459    self.cur += 1
460    self.ConsumeCommentAndWhitespace()
461    if self.IsDone():
462      raise GNError('Unterminated list:\n  ' + self.input)
463
464    list_result = []
465    previous_had_trailing_comma = True
466    while not self.IsDone():
467      if self.input[self.cur] == ']':
468        self.cur += 1  # Skip over ']'.
469        return list_result
470
471      if not previous_had_trailing_comma:
472        raise GNError('List items not separated by comma.')
473
474      list_result += [ self._ParseAllowTrailing() ]
475      self.ConsumeCommentAndWhitespace()
476      if self.IsDone():
477        break
478
479      # Consume comma if there is one.
480      previous_had_trailing_comma = self.input[self.cur] == ','
481      if previous_had_trailing_comma:
482        # Consume comma.
483        self.cur += 1
484        self.ConsumeCommentAndWhitespace()
485
486    raise GNError('Unterminated list:\n  ' + self.input)
487
488  def ParseScope(self):
489    self.ConsumeCommentAndWhitespace()
490    if self.IsDone():
491      raise GNError('Expected scope but got nothing.')
492
493    # Skip over opening '{'.
494    if self.input[self.cur] != '{':
495      raise GNError('Expected { for scope but got:\n ' + self.input[self.cur:])
496    self.cur += 1
497    self.ConsumeCommentAndWhitespace()
498    if self.IsDone():
499      raise GNError('Unterminated scope:\n ' + self.input)
500
501    scope_result = {}
502    while not self.IsDone():
503      if self.input[self.cur] == '}':
504        self.cur += 1
505        return scope_result
506
507      ident = self._ParseIdent()
508      self.ConsumeCommentAndWhitespace()
509      if self.input[self.cur] != '=':
510        raise GNError("Unexpected token: " + self.input[self.cur:])
511      self.cur += 1
512      self.ConsumeCommentAndWhitespace()
513      val = self._ParseAllowTrailing()
514      self.ConsumeCommentAndWhitespace()
515      scope_result[ident] = val
516
517    raise GNError('Unterminated scope:\n ' + self.input)
518
519  def _ConstantFollows(self, constant):
520    """Checks and maybe consumes a string constant at current input location.
521
522    Param:
523      constant: The string constant to check.
524
525    Returns:
526      True if |constant| follows immediately at the current location in the
527      input. In this case, the string is consumed as a side effect. Otherwise,
528      returns False and the current position is unchanged.
529    """
530    end = self.cur + len(constant)
531    if end > len(self.input):
532      return False  # Not enough room.
533    if self.input[self.cur:end] == constant:
534      self.cur = end
535      return True
536    return False
537
538
539def ReadBuildVars(output_directory):
540  """Parses $output_directory/build_vars.json into a dict."""
541  with open(os.path.join(output_directory, BUILD_VARS_FILENAME)) as f:
542    return json.load(f)
543