1# -*- coding: utf-8 -*- 2# Copyright 2010-2018, Google Inc. 3# All rights reserved. 4# 5# Redistribution and use in source and binary forms, with or without 6# modification, are permitted provided that the following conditions are 7# met: 8# 9# * Redistributions of source code must retain the above copyright 10# notice, this list of conditions and the following disclaimer. 11# * Redistributions in binary form must reproduce the above 12# copyright notice, this list of conditions and the following disclaimer 13# in the documentation and/or other materials provided with the 14# distribution. 15# * Neither the name of Google Inc. nor the names of its 16# contributors may be used to endorse or promote products derived from 17# this software without specific prior written permission. 18# 19# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 31"""Utilities to generate source codes.""" 32 33__author__ = "hidehiko" 34 35import struct 36 37 38def ToCppStringLiteral(s): 39 """Returns C-style string literal, or NULL if given s is None.""" 40 if s is None: 41 return b'NULL' 42 43 if all(0x20 <= c <= 0x7E for c in s): 44 # All characters are in ascii code. 45 return b'"%b"' % s.replace(b'\\', br'\\').replace(b'"', br'\"') 46 else: 47 # One or more characters are non-ascii. 48 return b'"%b"' % b''.join(br'\x%02X' % c for c in s) 49 50 51def FormatWithCppEscape(format_text, *args): 52 """Returns a string filling format with args.""" 53 literal_list = [] 54 for arg in args: 55 if isinstance(arg, (bytes, type(None))): 56 arg = ToCppStringLiteral(arg) 57 literal_list.append(arg) 58 59 return format_text % tuple(literal_list) 60 61 62def WriteCppDataArray(data, variable_name, target_compiler, stream): 63 """Format data into C++ style array. 64 65 Visual C++ does not support string literals longer than 65535 characters 66 so integer arrays (e.g. arrays of uint64) are used to represent byte arrays 67 on Windows. 68 69 The generated code looks like: 70 const uint64 kVAR_data_wordtype[] = { 71 0x0123456789ABCDEF, ... 72 }; 73 const char * const kVAR_data = 74 reinterpret_cast<const char *>(kVAR_data_wordtype); 75 const size_t kVAR_size = 123; 76 77 This implementation works well with other toolchains, too, but we use 78 string literals for other toolchains. 79 80 The generated code with a string literal looks like: 81 const char kVAR_data[] = 82 "\\x12\\x34\\x56\\x78..."; 83 const size_t kVAR_size = 123; 84 85 Args: 86 data: original data to be formatted. 87 variable_name: the core name of variables. 88 target_compiler: the target compiler which will compile the formatted 89 code. 90 stream: output stream. 91 """ 92 93 # To accept "target_compiler = None", check taget_compiler itself first. 94 if target_compiler and target_compiler.startswith('msvs'): 95 stream.write('const uint64 k%s_data_wordtype[] = {\n' % variable_name) 96 97 for word_index in range(0, len(data), 8): 98 word_chunk = data[word_index:word_index + 8].ljust(8, '\x00') 99 stream.write('0x%016X, ' % struct.unpack('<Q', word_chunk)) 100 if (word_index / 8) % 4 == 3: 101 # Line feed for every 4 elements. 102 stream.write('\n') 103 104 stream.write('};\n') 105 stream.write( 106 'const char * const k%s_data = ' 107 'reinterpret_cast<const char *>(k%s_data_wordtype);\n' % ( 108 variable_name, variable_name)) 109 else: 110 stream.write('const char k%s_data[] =\n' % variable_name) 111 # Output 16bytes per line. 112 chunk_size = 16 113 for index in range(0, len(data), chunk_size): 114 chunk = data[index:index + chunk_size] 115 stream.write('"') 116 stream.writelines(r'\x%02X' % ord(c) for c in chunk) 117 stream.write('"\n') 118 stream.write(';\n') 119 120 stream.write('const size_t k%s_size = %d;\n' % (variable_name, len(data))) 121 122 123def ToJavaStringLiteral(codepoint_list): 124 """Returns string literal with surrogate pair and emoji support.""" 125 if type(codepoint_list) is int: 126 codepoint_list = (codepoint_list,) 127 if codepoint_list is None or len(codepoint_list) == 0: 128 return b'null' 129 result = b'"' 130 for codepoint in codepoint_list: 131 utf16_string = chr(codepoint).encode('utf-16be') 132 if len(utf16_string) == 2: 133 (u0, l0) = utf16_string 134 result += br'\u%02X%02X' % (u0, l0) 135 else: 136 (u0, l0, u1, l1) = utf16_string 137 result += br'\u%02X%02X\u%02X%02X' % (u0, l0, u1, l1) 138 result += b'"' 139 return result 140 141 142def SkipLineComment(stream, comment_prefix='#'): 143 """Skips line comments from stream.""" 144 for line in stream: 145 if isinstance(line, bytes): 146 if isinstance(comment_prefix, str): 147 comment_prefix = comment_prefix.encode('utf-8') 148 line_ending = b'\n' 149 else: 150 line_ending = '\n' 151 stripped_line = line.strip() 152 if stripped_line and not stripped_line.startswith(comment_prefix): 153 yield line.rstrip(line_ending) 154 155 156def ParseColumnStream(stream, num_column=None, delimiter=None): 157 """Returns parsed columns read from stream.""" 158 if num_column is None: 159 for line in stream: 160 if isinstance(line, bytes): 161 line_ending = b'\n' 162 else: 163 line_ending = '\n' 164 yield line.rstrip(line_ending).split(delimiter) 165 else: 166 for line in stream: 167 if isinstance(line, bytes): 168 line_ending = b'\n' 169 else: 170 line_ending = '\n' 171 yield line.rstrip(line_ending).split(delimiter)[:num_column] 172 173 174def SelectColumn(stream, column_index): 175 """Returns the tuple specified by the column_index from the tuple stream.""" 176 for columns in stream: 177 yield tuple(columns[i] for i in column_index) 178 179 180def SplitChunk(iterable, n): 181 """Splits sequence to consecutive n-element chunks. 182 183 Quite similar to grouper in itertools section of python manual, 184 but slightly different if len(iterable) is not factor of n. 185 grouper extends the last chunk to make it an n-element chunk by adding 186 appropriate value, but this returns truncated chunk. 187 """ 188 for index in range(0, len(iterable), n): 189 yield iterable[index:index + n] 190