1#!/usr/bin/env python2
2#
3#  Python utilities shared by the build scripts.
4#
5
6import datetime
7import json
8
9class BitEncoder:
10    "Bitstream encoder."
11
12    _bits = None
13    _varuint_dist = None
14    _varuint_cats = None
15    _varuint_count = None
16    _varuint_bits = None
17
18    def __init__(self):
19        self._bits = []
20        self._varuint_dist = [ 0 ] * 65536
21        self._varuint_cats = [0] * 5
22        self._varuint_count = 0
23        self._varuint_bits = 0
24
25    def bits(self, x, nbits):
26        if (x >> nbits) != 0:
27            raise Exception('input value has too many bits (value: %d, bits: %d)' % (x, nbits))
28        for shift in xrange(nbits - 1, -1, -1):  # nbits - 1, nbits - 2, ..., 0
29            self._bits.append((x >> shift) & 0x01)
30
31    def string(self, x):
32        for i in xrange(len(x)):
33            ch = ord(x[i])
34            for shift in xrange(7, -1, -1):  # 7, 6, ..., 0
35                self._bits.append((ch >> shift) & 0x01)
36
37    # Shared varint encoding.
38    def varuint(self, x):
39        assert(x >= 0)
40        if x <= 0xffff:
41            self._varuint_dist[x] += 1
42        self._varuint_count += 1
43
44        if x == 0:
45            self.bits(0, 2)
46            self._varuint_bits += 2
47            self._varuint_cats[0] += 1
48        elif x <= 4:
49            self.bits(1, 2)
50            self.bits(x - 1, 2)
51            self._varuint_bits += 2 + 2
52            self._varuint_cats[1] += 1
53        elif x <= 36:
54            self.bits(2, 2)
55            self.bits(x - 5, 5)
56            self._varuint_bits += 2 + 5
57            self._varuint_cats[2] += 1
58        elif x <= 163:
59            self.bits(3, 2)
60            self.bits(x - 37 + 1, 7)
61            self._varuint_bits += 2 + 7
62            self._varuint_cats[3] += 1
63        else:
64            self.bits(3, 2)
65            self.bits(0, 7)
66            self.bits(x, 20)
67            self._varuint_bits += 2 + 7 + 20
68            self._varuint_cats[4] += 1
69
70    def getNumBits(self):
71        "Get current number of encoded bits."
72        return len(self._bits)
73
74    def getNumBytes(self):
75        "Get current number of encoded bytes, rounded up."
76        nbits = len(self._bits)
77        while (nbits % 8) != 0:
78            nbits += 1
79        return nbits / 8
80
81    def getBytes(self):
82        "Get current bitstream as a byte sequence, padded with zero bits."
83        bytes = []
84
85        for i in xrange(self.getNumBytes()):
86            t = 0
87            for j in xrange(8):
88                off = i*8 + j
89                if off >= len(self._bits):
90                    t = (t << 1)
91                else:
92                    t = (t << 1) + self._bits[off]
93            bytes.append(t)
94
95        return bytes
96
97    def getByteString(self):
98        "Get current bitstream as a string."
99        return ''.join([chr(i) for i in self.getBytes()])
100
101class GenerateC:
102    "Helper for generating C source and header files."
103
104    _data = None
105    wrap_col = 76
106
107    def __init__(self):
108        self._data = []
109
110    def emitRaw(self, text):
111        "Emit raw text (without automatic newline)."
112        self._data.append(text)
113
114    def emitLine(self, text):
115        "Emit a raw line (with automatic newline)."
116        self._data.append(text + '\n')
117
118    def emitHeader(self, autogen_by):
119        "Emit file header comments."
120
121        # Note: a timestamp would be nice but it breaks incremental building
122        self.emitLine('/*')
123        self.emitLine(' *  Automatically generated by %s, do not edit!' % autogen_by)
124        self.emitLine(' */')
125        self.emitLine('')
126
127    def emitArray(self, data, tablename, visibility=None, typename='char', size=None, intvalues=False, const=True):
128        "Emit an array as a C array."
129
130        # lenient input
131        if isinstance(data, unicode):
132            data = data.encode('utf-8')
133        if isinstance(data, str):
134            tmp = []
135            for i in xrange(len(data)):
136                tmp.append(ord(data[i]))
137            data = tmp
138
139        size_spec = ''
140        if size is not None:
141            size_spec = '%d' % size
142        visib_qual = ''
143        if visibility is not None:
144            visib_qual = visibility + ' '
145        const_qual = ''
146        if const:
147            const_qual = 'const '
148        self.emitLine('%s%s%s %s[%s] = {' % (visib_qual, const_qual, typename, tablename, size_spec))
149
150        line = ''
151        for i in xrange(len(data)):
152            if intvalues:
153                suffix = ''
154                if data[i] < -32768 or data[i] > 32767:
155                    suffix = 'L'
156                t = "%d%s," % (data[i], suffix)
157            else:
158                t = "(%s)'\\x%02x', " % (typename, data[i])
159            if len(line) + len(t) >= self.wrap_col:
160                self.emitLine(line)
161                line = t
162            else:
163                line += t
164        if line != '':
165            self.emitLine(line)
166        self.emitLine('};')
167
168    def emitDefine(self, name, value, comment=None):
169        "Emit a C define with an optional comment."
170
171        # XXX: there is no escaping right now (for comment or value)
172        if comment is not None:
173            self.emitLine('#define %-60s  %-30s /* %s */' % (name, value, comment))
174        else:
175            self.emitLine('#define %-60s  %s' % (name, value))
176
177    def getString(self):
178        "Get the entire file as a string."
179        return ''.join(self._data)
180
181def json_encode(x):
182    "JSON encode a value."
183    try:
184        return json.dumps(x)
185    except AttributeError:
186        pass
187
188    # for older library versions
189    return json.write(x)
190
191def json_decode(x):
192    "JSON decode a value."
193    try:
194        return json.loads(x)
195    except AttributeError:
196        pass
197
198    # for older library versions
199    return json.read(x)
200
201# Compute a byte hash identical to duk_util_hashbytes().
202DUK__MAGIC_M = 0x5bd1e995
203DUK__MAGIC_R = 24
204def duk_util_hashbytes(x, off, nbytes, str_seed, big_endian):
205    h = (str_seed ^ nbytes) & 0xffffffff
206
207    while nbytes >= 4:
208        # 4-byte fetch byte order:
209        #  - native (endian dependent) if unaligned accesses allowed
210        #  - little endian if unaligned accesses not allowed
211
212        if big_endian:
213            k = ord(x[off + 3]) + (ord(x[off + 2]) << 8) + \
214                (ord(x[off + 1]) << 16) + (ord(x[off + 0]) << 24)
215        else:
216            k = ord(x[off]) + (ord(x[off + 1]) << 8) + \
217                (ord(x[off + 2]) << 16) + (ord(x[off + 3]) << 24)
218
219        k = (k * DUK__MAGIC_M) & 0xffffffff
220        k = (k ^ (k >> DUK__MAGIC_R)) & 0xffffffff
221        k = (k * DUK__MAGIC_M) & 0xffffffff
222        h = (h * DUK__MAGIC_M) & 0xffffffff
223        h = (h ^ k) & 0xffffffff
224
225        off += 4
226        nbytes -= 4
227
228    if nbytes >= 3:
229        h = (h ^ (ord(x[off + 2]) << 16)) & 0xffffffff
230    if nbytes >= 2:
231        h = (h ^ (ord(x[off + 1]) << 8)) & 0xffffffff
232    if nbytes >= 1:
233        h = (h ^ ord(x[off])) & 0xffffffff
234        h = (h * DUK__MAGIC_M) & 0xffffffff
235
236    h = (h ^ (h >> 13)) & 0xffffffff
237    h = (h * DUK__MAGIC_M) & 0xffffffff
238    h = (h ^ (h >> 15)) & 0xffffffff
239
240    return h
241
242# Compute a string hash identical to duk_heap_hashstring() when dense
243# hashing is enabled.
244DUK__STRHASH_SHORTSTRING = 4096
245DUK__STRHASH_MEDIUMSTRING = 256 * 1024
246DUK__STRHASH_BLOCKSIZE = 256
247def duk_heap_hashstring_dense(x, hash_seed, big_endian=False, strhash16=False):
248    str_seed = (hash_seed ^ len(x)) & 0xffffffff
249
250    if len(x) <= DUK__STRHASH_SHORTSTRING:
251        res = duk_util_hashbytes(x, 0, len(x), str_seed, big_endian)
252    else:
253        if len(x) <= DUK__STRHASH_MEDIUMSTRING:
254            skip = 16 * DUK__STRHASH_BLOCKSIZE + DUK__STRHASH_BLOCKSIZE
255        else:
256            skip = 256 * DUK__STRHASH_BLOCKSIZE + DUK__STRHASH_BLOCKSIZE
257
258        res = duk_util_hashbytes(x, 0, DUK__STRHASH_SHORTSTRING, str_seed, big_endian)
259        off = DUK__STRHASH_SHORTSTRING + (skip * (res % 256)) / 256
260
261        while off < len(x):
262            left = len(x) - off
263            now = left
264            if now > DUK__STRHASH_BLOCKSIZE:
265                now = DUK__STRHASH_BLOCKSIZE
266            res = (res ^ duk_util_hashbytes(str, off, now, str_seed, big_endian)) & 0xffffffff
267            off += skip
268
269    if strhash16:
270        res &= 0xffff
271
272    return res
273
274# Compute a string hash identical to duk_heap_hashstring() when sparse
275# hashing is enabled.
276DUK__STRHASH_SKIP_SHIFT = 5   # XXX: assumes default value
277def duk_heap_hashstring_sparse(x, hash_seed, strhash16=False):
278    res = (hash_seed ^ len(x)) & 0xffffffff
279
280    step = (len(x) >> DUK__STRHASH_SKIP_SHIFT) + 1
281    off = len(x)
282    while off >= step:
283        assert(off >= 1)
284        res = ((res * 33) + ord(x[off - 1])) & 0xffffffff
285        off -= step
286
287    if strhash16:
288        res &= 0xffff
289
290    return res
291
292# Must match src-input/duk_unicode_support:duk_unicode_unvalidated_utf8_length().
293def duk_unicode_unvalidated_utf8_length(x):
294    assert(isinstance(x, str))
295    clen = 0
296    for c in x:
297        t = ord(c)
298        if t < 0x80 or t >= 0xc0:  # 0x80...0xbf are continuation chars, not counted
299            clen += 1
300    return clen
301