1# -*- coding: utf-8 -*-
2# Copyright 2010-2018, Google Inc.
3# All rights reserved.
4#
5# Redistribution and use in source and binary forms, with or without
6# modification, are permitted provided that the following conditions are
7# met:
8#
9#     * Redistributions of source code must retain the above copyright
10# notice, this list of conditions and the following disclaimer.
11#     * Redistributions in binary form must reproduce the above
12# copyright notice, this list of conditions and the following disclaimer
13# in the documentation and/or other materials provided with the
14# distribution.
15#     * Neither the name of Google Inc. nor the names of its
16# contributors may be used to endorse or promote products derived from
17# this software without specific prior written permission.
18#
19# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31"""Utilities to generate source codes."""
32
33__author__ = "hidehiko"
34
35import struct
36
37
38def ToCppStringLiteral(s):
39  """Returns C-style string literal, or NULL if given s is None."""
40  if s is None:
41    return b'NULL'
42
43  if all(0x20 <= c <= 0x7E for c in s):
44    # All characters are in ascii code.
45    return b'"%b"' % s.replace(b'\\', br'\\').replace(b'"', br'\"')
46  else:
47    # One or more characters are non-ascii.
48    return b'"%b"' % b''.join(br'\x%02X' % c for c in s)
49
50
51def FormatWithCppEscape(format_text, *args):
52  """Returns a string filling format with args."""
53  literal_list = []
54  for arg in args:
55    if isinstance(arg, (bytes, type(None))):
56      arg = ToCppStringLiteral(arg)
57    literal_list.append(arg)
58
59  return format_text % tuple(literal_list)
60
61
62def WriteCppDataArray(data, variable_name, target_compiler, stream):
63  """Format data into C++ style array.
64
65  Visual C++ does not support string literals longer than 65535 characters
66  so integer arrays (e.g. arrays of uint64) are used to represent byte arrays
67  on Windows.
68
69  The generated code looks like:
70    const uint64 kVAR_data_wordtype[] = {
71        0x0123456789ABCDEF, ...
72    };
73    const char * const kVAR_data =
74        reinterpret_cast<const char *>(kVAR_data_wordtype);
75    const size_t kVAR_size = 123;
76
77  This implementation works well with other toolchains, too, but we use
78  string literals for other toolchains.
79
80  The generated code with a string literal looks like:
81    const char kVAR_data[] =
82        "\\x12\\x34\\x56\\x78...";
83    const size_t kVAR_size = 123;
84
85  Args:
86    data: original data to be formatted.
87    variable_name: the core name of variables.
88    target_compiler: the target compiler which will compile the formatted
89      code.
90    stream: output stream.
91  """
92
93  # To accept "target_compiler = None", check taget_compiler itself first.
94  if target_compiler and target_compiler.startswith('msvs'):
95    stream.write('const uint64 k%s_data_wordtype[] = {\n' % variable_name)
96
97    for word_index in range(0, len(data), 8):
98      word_chunk = data[word_index:word_index + 8].ljust(8, '\x00')
99      stream.write('0x%016X, ' % struct.unpack('<Q', word_chunk))
100      if (word_index / 8) % 4 == 3:
101        # Line feed for every 4 elements.
102        stream.write('\n')
103
104    stream.write('};\n')
105    stream.write(
106        'const char * const k%s_data = '
107        'reinterpret_cast<const char *>(k%s_data_wordtype);\n' % (
108            variable_name, variable_name))
109  else:
110    stream.write('const char k%s_data[] =\n' % variable_name)
111    # Output 16bytes per line.
112    chunk_size = 16
113    for index in range(0, len(data), chunk_size):
114      chunk = data[index:index + chunk_size]
115      stream.write('"')
116      stream.writelines(r'\x%02X' % ord(c) for c in chunk)
117      stream.write('"\n')
118    stream.write(';\n')
119
120  stream.write('const size_t k%s_size = %d;\n' % (variable_name, len(data)))
121
122
123def ToJavaStringLiteral(codepoint_list):
124  """Returns string literal with surrogate pair and emoji support."""
125  if type(codepoint_list) is int:
126    codepoint_list = (codepoint_list,)
127  if codepoint_list is None or len(codepoint_list) == 0:
128    return b'null'
129  result = b'"'
130  for codepoint in codepoint_list:
131    utf16_string = chr(codepoint).encode('utf-16be')
132    if len(utf16_string) == 2:
133      (u0, l0) = utf16_string
134      result += br'\u%02X%02X' % (u0, l0)
135    else:
136      (u0, l0, u1, l1) = utf16_string
137      result += br'\u%02X%02X\u%02X%02X' % (u0, l0, u1, l1)
138  result += b'"'
139  return result
140
141
142def SkipLineComment(stream, comment_prefix='#'):
143  """Skips line comments from stream."""
144  for line in stream:
145    if isinstance(line, bytes):
146      if isinstance(comment_prefix, str):
147        comment_prefix = comment_prefix.encode('utf-8')
148      line_ending = b'\n'
149    else:
150      line_ending = '\n'
151    stripped_line = line.strip()
152    if stripped_line and not stripped_line.startswith(comment_prefix):
153      yield line.rstrip(line_ending)
154
155
156def ParseColumnStream(stream, num_column=None, delimiter=None):
157  """Returns parsed columns read from stream."""
158  if num_column is None:
159    for line in stream:
160      if isinstance(line, bytes):
161        line_ending = b'\n'
162      else:
163        line_ending = '\n'
164      yield line.rstrip(line_ending).split(delimiter)
165  else:
166    for line in stream:
167      if isinstance(line, bytes):
168        line_ending = b'\n'
169      else:
170        line_ending = '\n'
171      yield line.rstrip(line_ending).split(delimiter)[:num_column]
172
173
174def SelectColumn(stream, column_index):
175  """Returns the tuple specified by the column_index from the tuple stream."""
176  for columns in stream:
177    yield tuple(columns[i] for i in column_index)
178
179
180def SplitChunk(iterable, n):
181  """Splits sequence to consecutive n-element chunks.
182
183  Quite similar to grouper in itertools section of python manual,
184  but slightly different if len(iterable) is not factor of n.
185  grouper extends the last chunk to make it an n-element chunk by adding
186  appropriate value, but this returns truncated chunk.
187  """
188  for index in range(0, len(iterable), n):
189    yield iterable[index:index + n]
190