1# Copyright (c) 2012 The Chromium Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5"""Formats as a .C file for compilation.
6"""
7
8from __future__ import print_function
9
10import codecs
11import os
12import re
13
14import six
15
16from grit import util
17
18
19def _FormatHeader(root, output_dir):
20  """Returns the required preamble for C files."""
21  # Find the location of the resource header file, so that we can include
22  # it.
23  resource_header = 'resource.h'  # fall back to this
24  for output in root.GetOutputFiles():
25    if output.attrs['type'] == 'rc_header':
26      resource_header = os.path.abspath(output.GetOutputFilename())
27      resource_header = util.MakeRelativePath(output_dir, resource_header)
28  return """// This file is automatically generated by GRIT.  Do not edit.
29
30#include "%s"
31
32// All strings are UTF-8
33""" % (resource_header)
34# end _FormatHeader() function
35
36
37def Format(root, lang='en', output_dir='.'):
38  """Outputs a C switch statement representing the string table."""
39  from grit.node import message
40  assert isinstance(lang, six.string_types)
41
42  yield _FormatHeader(root, output_dir)
43
44  yield 'const char* GetString(int id) {\n  switch (id) {'
45
46  for item in root.ActiveDescendants():
47    with item:
48      if isinstance(item, message.MessageNode):
49        yield _FormatMessage(item, lang)
50
51  yield '\n    default:\n      return 0;\n  }\n}\n'
52
53
54def _HexToOct(match):
55  "Return the octal form of the hex numbers"
56  hex = match.group("hex")
57  result = ""
58  while len(hex):
59    next_num = int(hex[2:4], 16)
60    result += "\\" + '%03o' % next_num
61    hex = hex[4:]
62  return match.group("escaped_backslashes") + result
63
64
65def _FormatMessage(item, lang):
66  """Format a single <message> element."""
67
68  message = item.ws_at_start + item.Translate(lang) + item.ws_at_end
69  # Output message with non-ascii chars escaped as octal numbers C's grammar
70  # allows escaped hexadecimal numbers to be infinite, but octal is always of
71  # the form \OOO.  Python 3 doesn't support string-escape, so we have to jump
72  # through some hoops here via codecs.escape_encode.
73  # This basically does:
74  #   - message - the starting string
75  #   - message.encode(...) - convert to bytes
76  #   - codecs.escape_encode(...) - convert non-ASCII bytes to \x## escapes
77  #   - (...).decode() - convert bytes back to a string
78  message = codecs.escape_encode(message.encode('utf-8'))[0].decode('utf-8')
79  # an escaped char is (\xHH)+ but only if the initial
80  # backslash is not escaped.
81  not_a_backslash = r"(^|[^\\])"  # beginning of line or a non-backslash char
82  escaped_backslashes = not_a_backslash + r"(\\\\)*"
83  hex_digits = r"((\\x)[0-9a-f]{2})+"
84  two_digit_hex_num = re.compile(
85    r"(?P<escaped_backslashes>%s)(?P<hex>%s)"
86    % (escaped_backslashes, hex_digits))
87  message = two_digit_hex_num.sub(_HexToOct, message)
88  # unescape \ (convert \\ back to \)
89  message = message.replace('\\\\', '\\')
90  message = message.replace('"', '\\"')
91  message = util.LINEBREAKS.sub(r'\\n', message)
92
93  name_attr = item.GetTextualIds()[0]
94
95  return '\n    case %s:\n      return "%s";' % (name_attr, message)
96