1# -*- python -*- or near enough 2 3import sys 4import zlib 5 6from cpython cimport PyBytes_FromStringAndSize, \ 7 PyBytes_AS_STRING, PyBytes_Size 8 9from .pyalloc cimport pyalloc_v 10 11from libc.stdio cimport fread, fseek, ftell 12from libc.string cimport memcpy 13 14cdef extern from "Python.h": 15 void *PyCObject_Import(char *, char *) except NULL 16 ctypedef struct PyTypeObject: 17 pass 18 ctypedef struct PyObject: 19 pass 20 ctypedef struct FILE 21 22 23DEF _BLOCK_SIZE = 131072 24 25BLOCK_SIZE = _BLOCK_SIZE # public 26 27cdef class GenericStream: 28 29 def __init__(self, fobj): 30 self.fobj = fobj 31 32 cpdef int seek(self, long int offset, int whence=0) except -1: 33 self.fobj.seek(offset, whence) 34 return 0 35 36 cpdef long int tell(self) except -1: 37 return self.fobj.tell() 38 39 def read(self, n_bytes): 40 return self.fobj.read(n_bytes) 41 42 cpdef int all_data_read(self) except *: 43 return 1 44 45 cdef int read_into(self, void *buf, size_t n) except -1: 46 """ Read n bytes from stream into pre-allocated buffer `buf` 47 """ 48 cdef char *p 49 cdef size_t read_size, count 50 51 # Read data to buf in _BLOCK_SIZE blocks 52 count = 0 53 p = <char*>buf 54 while count < n: 55 read_size = min(n - count, _BLOCK_SIZE) 56 data = self.fobj.read(read_size) 57 read_size = len(data) 58 if read_size == 0: 59 break 60 memcpy(p, <const char*>data, read_size) 61 p += read_size 62 count += read_size 63 64 if count != n: 65 raise IOError('could not read bytes') 66 return 0 67 68 cdef object read_string(self, size_t n, void **pp, int copy=True): 69 """Make new memory, wrap with object""" 70 if copy != True: 71 data = self.fobj.read(n) 72 if PyBytes_Size(data) != n: 73 raise IOError('could not read bytes') 74 pp[0] = <void*>PyBytes_AS_STRING(data) 75 return data 76 77 cdef object d_copy = pyalloc_v(n, pp) 78 self.read_into(pp[0], n) 79 return d_copy 80 81 82cdef class ZlibInputStream(GenericStream): 83 """ 84 File-like object uncompressing bytes from a zlib compressed stream. 85 86 Parameters 87 ---------- 88 stream : file-like 89 Stream to read compressed data from. 90 max_length : int 91 Maximum number of bytes to read from the stream. 92 93 Notes 94 ----- 95 Some matlab files contain zlib streams without valid Z_STREAM_END 96 termination. To get round this, we use the decompressobj object, that 97 allows you to decode an incomplete stream. See discussion at 98 https://bugs.python.org/issue8672 99 100 """ 101 102 cdef ssize_t _max_length 103 cdef object _decompressor 104 cdef bytes _buffer 105 cdef size_t _buffer_size 106 cdef size_t _buffer_position 107 cdef size_t _total_position 108 cdef size_t _read_bytes 109 110 def __init__(self, fobj, ssize_t max_length): 111 self.fobj = fobj 112 113 self._max_length = max_length 114 self._decompressor = zlib.decompressobj() 115 self._buffer = b'' 116 self._buffer_size = 0 117 self._buffer_position = 0 118 self._total_position = 0 119 self._read_bytes = 0 120 121 cdef inline void _fill_buffer(self) except *: 122 cdef size_t read_size 123 cdef bytes block 124 125 if self._buffer_position < self._buffer_size: 126 return 127 128 read_size = min(_BLOCK_SIZE, self._max_length - self._read_bytes) 129 130 block = self.fobj.read(read_size) 131 self._read_bytes += len(block) 132 133 self._buffer_position = 0 134 if not block: 135 self._buffer = self._decompressor.flush() 136 else: 137 self._buffer = self._decompressor.decompress(block) 138 self._buffer_size = len(self._buffer) 139 140 cdef int read_into(self, void *buf, size_t n) except -1: 141 """Read n bytes from stream into pre-allocated buffer `buf` 142 """ 143 cdef char *dstp 144 cdef char *srcp 145 cdef size_t read_size, count, size 146 147 dstp = <char*>buf 148 count = 0 149 while count < n: 150 self._fill_buffer() 151 if self._buffer_size == 0: 152 break 153 154 srcp = <char*>self._buffer 155 srcp += self._buffer_position 156 157 size = min(n - count, self._buffer_size - self._buffer_position) 158 memcpy(dstp, srcp, size) 159 160 count += size 161 dstp += size 162 self._buffer_position += size 163 164 self._total_position += count 165 166 if count != n: 167 raise IOError('could not read bytes') 168 169 return 0 170 171 cdef object read_string(self, size_t n, void **pp, int copy=True): 172 """Make new memory, wrap with object""" 173 cdef object d_copy = pyalloc_v(n, pp) 174 self.read_into(pp[0], n) 175 return d_copy 176 177 def read(self, n_bytes): 178 cdef void *p 179 return self.read_string(n_bytes, &p) 180 181 cpdef int all_data_read(self) except *: 182 if self._read_bytes < self._max_length: 183 # we might still have checksum bytes to read 184 self._fill_buffer() 185 return (self._max_length == self._read_bytes) and \ 186 (self._buffer_size == self._buffer_position) 187 188 cpdef long int tell(self) except -1: 189 if self._total_position == -1: 190 raise IOError("Invalid file position.") 191 return self._total_position 192 193 cpdef int seek(self, long int offset, int whence=0) except -1: 194 cdef ssize_t new_pos, size 195 if whence == 1: 196 new_pos = <ssize_t>self._total_position + offset 197 elif whence == 0: 198 new_pos = offset 199 elif whence == 2: 200 raise IOError("Zlib stream cannot seek from file end") 201 else: 202 raise ValueError("Invalid value for whence") 203 204 if new_pos < self._total_position: 205 raise IOError("Zlib stream cannot seek backwards") 206 207 while self._total_position < new_pos: 208 self._fill_buffer() 209 if self._buffer_size == 0: 210 break 211 212 size = min(new_pos - self._total_position, 213 self._buffer_size - self._buffer_position) 214 215 self._total_position += size 216 self._buffer_position += size 217 218 return 0 219 220 221def _read_into(GenericStream st, size_t n): 222 # for testing only. Use st.read instead 223 cdef char * d_ptr 224 # use bytearray because bytes() is immutable 225 my_str = bytearray(b' ' * n) 226 d_ptr = my_str 227 st.read_into(d_ptr, n) 228 return bytes(my_str) 229 230 231def _read_string(GenericStream st, size_t n): 232 # for testing only. Use st.read instead 233 cdef void *d_ptr 234 cdef object obj = st.read_string(n, &d_ptr, True) 235 # use bytearray because bytes() is immutable 236 my_str = bytearray(b'A' * n) 237 cdef char *mys_ptr = my_str 238 memcpy(mys_ptr, d_ptr, n) 239 return bytes(my_str) 240 241 242cpdef GenericStream make_stream(object fobj): 243 """ Make stream of correct type for file-like `fobj` 244 """ 245 if isinstance(fobj, GenericStream): 246 return fobj 247 return GenericStream(fobj) 248