1# Copyright 2009 Matt Chaput. All rights reserved.
2#
3# Redistribution and use in source and binary forms, with or without
4# modification, are permitted provided that the following conditions are met:
5#
6#    1. Redistributions of source code must retain the above copyright notice,
7#       this list of conditions and the following disclaimer.
8#
9#    2. Redistributions in binary form must reproduce the above copyright
10#       notice, this list of conditions and the following disclaimer in the
11#       documentation and/or other materials provided with the distribution.
12#
13# THIS SOFTWARE IS PROVIDED BY MATT CHAPUT ``AS IS'' AND ANY EXPRESS OR
14# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
15# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
16# EVENT SHALL MATT CHAPUT OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
17# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
18# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
19# OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
20# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
21# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
22# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23#
24# The views and conclusions contained in the software and documentation are
25# those of the authors and should not be interpreted as representing official
26# policies, either expressed or implied, of Matt Chaput.
27
28from array import array
29from copy import copy
30from struct import calcsize
31
32from whoosh.compat import BytesIO, bytes_type
33from whoosh.compat import dump as dump_pickle
34from whoosh.compat import load as load_pickle
35from whoosh.compat import array_frombytes, array_tobytes
36from whoosh.system import _INT_SIZE, _SHORT_SIZE, _FLOAT_SIZE, _LONG_SIZE
37from whoosh.system import IS_LITTLE
38from whoosh.system import pack_byte, unpack_byte, pack_sbyte, unpack_sbyte
39from whoosh.system import pack_ushort, unpack_ushort
40from whoosh.system import pack_ushort_le, unpack_ushort_le
41from whoosh.system import pack_int, unpack_int, pack_uint, unpack_uint
42from whoosh.system import pack_uint_le, unpack_uint_le
43from whoosh.system import pack_long, unpack_long, pack_ulong, unpack_ulong
44from whoosh.system import pack_float, unpack_float
45from whoosh.util.varints import varint, read_varint
46from whoosh.util.varints import signed_varint, decode_signed_varint
47
48
49_SIZEMAP = dict((typecode, calcsize(typecode)) for typecode in "bBiIhHqQf")
50_ORDERMAP = {"little": "<", "big": ">"}
51
52_types = (("sbyte", "b"), ("ushort", "H"), ("int", "i"),
53          ("long", "q"), ("float", "f"))
54
55
56# Main function
57
58class StructFile(object):
59    """Returns a "structured file" object that wraps the given file object and
60    provides numerous additional methods for writing structured data, such as
61    "write_varint" and "write_long".
62    """
63
64    def __init__(self, fileobj, name=None, onclose=None):
65        self.file = fileobj
66        self._name = name
67        self.onclose = onclose
68        self.is_closed = False
69
70        self.is_real = hasattr(fileobj, "fileno")
71        if self.is_real:
72            self.fileno = fileobj.fileno
73
74    def __repr__(self):
75        return "%s(%r)" % (self.__class__.__name__, self._name)
76
77    def __str__(self):
78        return self._name
79
80    def __enter__(self):
81        return self
82
83    def __exit__(self, exc_type, exc_val, exc_tb):
84        self.close()
85
86    def __iter__(self):
87        return iter(self.file)
88
89    def raw_file(self):
90        return self.file
91
92    def read(self, *args, **kwargs):
93        return self.file.read(*args, **kwargs)
94
95    def readline(self, *args, **kwargs):
96        return self.file.readline(*args, **kwargs)
97
98    def write(self, *args, **kwargs):
99        return self.file.write(*args, **kwargs)
100
101    def tell(self, *args, **kwargs):
102        return self.file.tell(*args, **kwargs)
103
104    def seek(self, *args, **kwargs):
105        return self.file.seek(*args, **kwargs)
106
107    def truncate(self, *args, **kwargs):
108        return self.file.truncate(*args, **kwargs)
109
110    def flush(self):
111        """Flushes the buffer of the wrapped file. This is a no-op if the
112        wrapped file does not have a flush method.
113        """
114
115        if hasattr(self.file, "flush"):
116            self.file.flush()
117
118    def close(self):
119        """Closes the wrapped file.
120        """
121
122        if self.is_closed:
123            raise Exception("This file is already closed")
124        if self.onclose:
125            self.onclose(self)
126        if hasattr(self.file, "close"):
127            self.file.close()
128        self.is_closed = True
129
130    def subset(self, offset, length, name=None):
131        from whoosh.filedb.compound import SubFile
132
133        name = name or self._name
134        return StructFile(SubFile(self.file, offset, length), name=name)
135
136    def write_string(self, s):
137        """Writes a string to the wrapped file. This method writes the length
138        of the string first, so you can read the string back without having to
139        know how long it was.
140        """
141        self.write_varint(len(s))
142        self.write(s)
143
144    def write_string2(self, s):
145        self.write(pack_ushort(len(s)) + s)
146
147    def write_string4(self, s):
148        self.write(pack_int(len(s)) + s)
149
150    def read_string(self):
151        """Reads a string from the wrapped file.
152        """
153        return self.read(self.read_varint())
154
155    def read_string2(self):
156        l = self.read_ushort()
157        return self.read(l)
158
159    def read_string4(self):
160        l = self.read_int()
161        return self.read(l)
162
163    def get_string2(self, pos):
164        l = self.get_ushort(pos)
165        base = pos + _SHORT_SIZE
166        return self.get(base, l), base + l
167
168    def get_string4(self, pos):
169        l = self.get_int(pos)
170        base = pos + _INT_SIZE
171        return self.get(base, l), base + l
172
173    def skip_string(self):
174        l = self.read_varint()
175        self.seek(l, 1)
176
177    def write_varint(self, i):
178        """Writes a variable-length unsigned integer to the wrapped file.
179        """
180        self.write(varint(i))
181
182    def write_svarint(self, i):
183        """Writes a variable-length signed integer to the wrapped file.
184        """
185        self.write(signed_varint(i))
186
187    def read_varint(self):
188        """Reads a variable-length encoded unsigned integer from the wrapped
189        file.
190        """
191        return read_varint(self.read)
192
193    def read_svarint(self):
194        """Reads a variable-length encoded signed integer from the wrapped
195        file.
196        """
197        return decode_signed_varint(read_varint(self.read))
198
199    def write_tagint(self, i):
200        """Writes a sometimes-compressed unsigned integer to the wrapped file.
201        This is similar to the varint methods but uses a less compressed but
202        faster format.
203        """
204
205        # Store numbers 0-253 in one byte. Byte 254 means "an unsigned 16-bit
206        # int follows." Byte 255 means "An unsigned 32-bit int follows."
207        if i <= 253:
208            self.write(chr(i))
209        elif i <= 65535:
210            self.write("\xFE" + pack_ushort(i))
211        else:
212            self.write("\xFF" + pack_uint(i))
213
214    def read_tagint(self):
215        """Reads a sometimes-compressed unsigned integer from the wrapped file.
216        This is similar to the varint methods but uses a less compressed but
217        faster format.
218        """
219
220        tb = ord(self.read(1))
221        if tb == 254:
222            return self.read_ushort()
223        elif tb == 255:
224            return self.read_uint()
225        else:
226            return tb
227
228    def write_byte(self, n):
229        """Writes a single byte to the wrapped file, shortcut for
230        ``file.write(chr(n))``.
231        """
232        self.write(pack_byte(n))
233
234    def read_byte(self):
235        return ord(self.read(1))
236
237    def write_pickle(self, obj, protocol=-1):
238        """Writes a pickled representation of obj to the wrapped file.
239        """
240        dump_pickle(obj, self.file, protocol)
241
242    def read_pickle(self):
243        """Reads a pickled object from the wrapped file.
244        """
245        return load_pickle(self.file)
246
247    def write_sbyte(self, n):
248        self.write(pack_sbyte(n))
249
250    def write_int(self, n):
251        self.write(pack_int(n))
252
253    def write_uint(self, n):
254        self.write(pack_uint(n))
255
256    def write_uint_le(self, n):
257        self.write(pack_uint_le(n))
258
259    def write_ushort(self, n):
260        self.write(pack_ushort(n))
261
262    def write_ushort_le(self, n):
263        self.write(pack_ushort_le(n))
264
265    def write_long(self, n):
266        self.write(pack_long(n))
267
268    def write_ulong(self, n):
269        self.write(pack_ulong(n))
270
271    def write_float(self, n):
272        self.write(pack_float(n))
273
274    def write_array(self, arry):
275        if IS_LITTLE:
276            arry = copy(arry)
277            arry.byteswap()
278        if self.is_real:
279            arry.tofile(self.file)
280        else:
281            self.write(array_tobytes(arry))
282
283    def read_sbyte(self):
284        return unpack_sbyte(self.read(1))[0]
285
286    def read_int(self):
287        return unpack_int(self.read(_INT_SIZE))[0]
288
289    def read_uint(self):
290        return unpack_uint(self.read(_INT_SIZE))[0]
291
292    def read_uint_le(self):
293        return unpack_uint_le(self.read(_INT_SIZE))[0]
294
295    def read_ushort(self):
296        return unpack_ushort(self.read(_SHORT_SIZE))[0]
297
298    def read_ushort_le(self):
299        return unpack_ushort_le(self.read(_SHORT_SIZE))[0]
300
301    def read_long(self):
302        return unpack_long(self.read(_LONG_SIZE))[0]
303
304    def read_ulong(self):
305        return unpack_ulong(self.read(_LONG_SIZE))[0]
306
307    def read_float(self):
308        return unpack_float(self.read(_FLOAT_SIZE))[0]
309
310    def read_array(self, typecode, length):
311        a = array(typecode)
312        if self.is_real:
313            a.fromfile(self.file, length)
314        else:
315            array_frombytes(a, self.read(length * _SIZEMAP[typecode]))
316        if IS_LITTLE:
317            a.byteswap()
318        return a
319
320    def get(self, position, length):
321        self.seek(position)
322        return self.read(length)
323
324    def get_byte(self, position):
325        return unpack_byte(self.get(position, 1))[0]
326
327    def get_sbyte(self, position):
328        return unpack_sbyte(self.get(position, 1))[0]
329
330    def get_int(self, position):
331        return unpack_int(self.get(position, _INT_SIZE))[0]
332
333    def get_uint(self, position):
334        return unpack_uint(self.get(position, _INT_SIZE))[0]
335
336    def get_ushort(self, position):
337        return unpack_ushort(self.get(position, _SHORT_SIZE))[0]
338
339    def get_long(self, position):
340        return unpack_long(self.get(position, _LONG_SIZE))[0]
341
342    def get_ulong(self, position):
343        return unpack_ulong(self.get(position, _LONG_SIZE))[0]
344
345    def get_float(self, position):
346        return unpack_float(self.get(position, _FLOAT_SIZE))[0]
347
348    def get_array(self, position, typecode, length):
349        self.seek(position)
350        return self.read_array(typecode, length)
351
352
353class BufferFile(StructFile):
354    def __init__(self, buf, name=None, onclose=None):
355        self._buf = buf
356        self._name = name
357        self.file = BytesIO(buf)
358        self.onclose = onclose
359
360        self.is_real = False
361        self.is_closed = False
362
363    def subset(self, position, length, name=None):
364        name = name or self._name
365        return BufferFile(self.get(position, length), name=name)
366
367    def get(self, position, length):
368        return bytes_type(self._buf[position:position + length])
369
370    def get_array(self, position, typecode, length):
371        a = array(typecode)
372        array_frombytes(a, self.get(position, length * _SIZEMAP[typecode]))
373        if IS_LITTLE:
374            a.byteswap()
375        return a
376
377
378class ChecksumFile(StructFile):
379    def __init__(self, *args, **kwargs):
380        StructFile.__init__(self, *args, **kwargs)
381        self._check = 0
382        self._crc32 = __import__("zlib").crc32
383
384    def __iter__(self):
385        for line in self.file:
386            self._check = self._crc32(line, self._check)
387            yield line
388
389    def seek(self, *args):
390        raise Exception("Cannot seek on a ChecksumFile")
391
392    def read(self, *args, **kwargs):
393        b = self.file.read(*args, **kwargs)
394        self._check = self._crc32(b, self._check)
395        return b
396
397    def write(self, b):
398        self._check = self._crc32(b, self._check)
399        self.file.write(b)
400
401    def checksum(self):
402        return self._check & 0xffffffff
403