1# -*- python -*- or near enough
2
3import sys
4import zlib
5
6from cpython cimport PyBytes_FromStringAndSize, \
7    PyBytes_AS_STRING, PyBytes_Size
8
9from .pyalloc cimport pyalloc_v
10
11from libc.stdio cimport fread, fseek, ftell
12from libc.string cimport memcpy
13
14cdef extern from "Python.h":
15    void *PyCObject_Import(char *, char *) except NULL
16    ctypedef struct PyTypeObject:
17        pass
18    ctypedef struct PyObject:
19        pass
20    ctypedef struct FILE
21
22
23DEF _BLOCK_SIZE = 131072
24
25BLOCK_SIZE = _BLOCK_SIZE  # public
26
27cdef class GenericStream:
28
29    def __init__(self, fobj):
30        self.fobj = fobj
31
32    cpdef int seek(self, long int offset, int whence=0) except -1:
33        self.fobj.seek(offset, whence)
34        return 0
35
36    cpdef long int tell(self) except -1:
37        return self.fobj.tell()
38
39    def read(self, n_bytes):
40        return self.fobj.read(n_bytes)
41
42    cpdef int all_data_read(self) except *:
43        return 1
44
45    cdef int read_into(self, void *buf, size_t n) except -1:
46        """ Read n bytes from stream into pre-allocated buffer `buf`
47        """
48        cdef char *p
49        cdef size_t read_size, count
50
51        # Read data to buf in _BLOCK_SIZE blocks
52        count = 0
53        p = <char*>buf
54        while count < n:
55            read_size = min(n - count, _BLOCK_SIZE)
56            data = self.fobj.read(read_size)
57            read_size = len(data)
58            if read_size == 0:
59                break
60            memcpy(p, <const char*>data, read_size)
61            p += read_size
62            count += read_size
63
64        if count != n:
65            raise IOError('could not read bytes')
66        return 0
67
68    cdef object read_string(self, size_t n, void **pp, int copy=True):
69        """Make new memory, wrap with object"""
70        if copy != True:
71            data = self.fobj.read(n)
72            if PyBytes_Size(data) != n:
73                raise IOError('could not read bytes')
74            pp[0] = <void*>PyBytes_AS_STRING(data)
75            return data
76
77        cdef object d_copy = pyalloc_v(n, pp)
78        self.read_into(pp[0], n)
79        return d_copy
80
81
82cdef class ZlibInputStream(GenericStream):
83    """
84    File-like object uncompressing bytes from a zlib compressed stream.
85
86    Parameters
87    ----------
88    stream : file-like
89        Stream to read compressed data from.
90    max_length : int
91        Maximum number of bytes to read from the stream.
92
93    Notes
94    -----
95    Some matlab files contain zlib streams without valid Z_STREAM_END
96    termination.  To get round this, we use the decompressobj object, that
97    allows you to decode an incomplete stream.  See discussion at
98    https://bugs.python.org/issue8672
99
100    """
101
102    cdef ssize_t _max_length
103    cdef object _decompressor
104    cdef bytes _buffer
105    cdef size_t _buffer_size
106    cdef size_t _buffer_position
107    cdef size_t _total_position
108    cdef size_t _read_bytes
109
110    def __init__(self, fobj, ssize_t max_length):
111        self.fobj = fobj
112
113        self._max_length = max_length
114        self._decompressor = zlib.decompressobj()
115        self._buffer = b''
116        self._buffer_size = 0
117        self._buffer_position = 0
118        self._total_position = 0
119        self._read_bytes = 0
120
121    cdef inline void _fill_buffer(self) except *:
122        cdef size_t read_size
123        cdef bytes block
124
125        if self._buffer_position < self._buffer_size:
126            return
127
128        read_size = min(_BLOCK_SIZE, self._max_length - self._read_bytes)
129
130        block = self.fobj.read(read_size)
131        self._read_bytes += len(block)
132
133        self._buffer_position = 0
134        if not block:
135            self._buffer = self._decompressor.flush()
136        else:
137            self._buffer = self._decompressor.decompress(block)
138        self._buffer_size = len(self._buffer)
139
140    cdef int read_into(self, void *buf, size_t n) except -1:
141        """Read n bytes from stream into pre-allocated buffer `buf`
142        """
143        cdef char *dstp
144        cdef char *srcp
145        cdef size_t read_size, count, size
146
147        dstp = <char*>buf
148        count = 0
149        while count < n:
150            self._fill_buffer()
151            if self._buffer_size == 0:
152                break
153
154            srcp = <char*>self._buffer
155            srcp += self._buffer_position
156
157            size = min(n - count, self._buffer_size - self._buffer_position)
158            memcpy(dstp, srcp, size)
159
160            count += size
161            dstp += size
162            self._buffer_position += size
163
164        self._total_position += count
165
166        if count != n:
167            raise IOError('could not read bytes')
168
169        return 0
170
171    cdef object read_string(self, size_t n, void **pp, int copy=True):
172        """Make new memory, wrap with object"""
173        cdef object d_copy = pyalloc_v(n, pp)
174        self.read_into(pp[0], n)
175        return d_copy
176
177    def read(self, n_bytes):
178        cdef void *p
179        return self.read_string(n_bytes, &p)
180
181    cpdef int all_data_read(self) except *:
182        if self._read_bytes < self._max_length:
183            # we might still have checksum bytes to read
184            self._fill_buffer()
185        return (self._max_length == self._read_bytes) and \
186               (self._buffer_size == self._buffer_position)
187
188    cpdef long int tell(self) except -1:
189        if self._total_position == -1:
190            raise IOError("Invalid file position.")
191        return self._total_position
192
193    cpdef int seek(self, long int offset, int whence=0) except -1:
194        cdef ssize_t new_pos, size
195        if whence == 1:
196            new_pos = <ssize_t>self._total_position + offset
197        elif whence == 0:
198            new_pos = offset
199        elif whence == 2:
200            raise IOError("Zlib stream cannot seek from file end")
201        else:
202            raise ValueError("Invalid value for whence")
203
204        if new_pos < self._total_position:
205            raise IOError("Zlib stream cannot seek backwards")
206
207        while self._total_position < new_pos:
208            self._fill_buffer()
209            if self._buffer_size == 0:
210                break
211
212            size = min(new_pos - self._total_position,
213                       self._buffer_size - self._buffer_position)
214
215            self._total_position += size
216            self._buffer_position += size
217
218        return 0
219
220
221def _read_into(GenericStream st, size_t n):
222    # for testing only.  Use st.read instead
223    cdef char * d_ptr
224    # use bytearray because bytes() is immutable
225    my_str = bytearray(b' ' * n)
226    d_ptr = my_str
227    st.read_into(d_ptr, n)
228    return bytes(my_str)
229
230
231def _read_string(GenericStream st, size_t n):
232    # for testing only.  Use st.read instead
233    cdef void *d_ptr
234    cdef object obj = st.read_string(n, &d_ptr, True)
235    # use bytearray because bytes() is immutable
236    my_str = bytearray(b'A' * n)
237    cdef char *mys_ptr = my_str
238    memcpy(mys_ptr, d_ptr, n)
239    return bytes(my_str)
240
241
242cpdef GenericStream make_stream(object fobj):
243    """ Make stream of correct type for file-like `fobj`
244    """
245    if isinstance(fobj, GenericStream):
246        return fobj
247    return GenericStream(fobj)
248