1# cython: language_level=3 2 3API_VERSION = '1.1_01' 4 5from libc.stdlib cimport free 6 7cdef extern from "_chunker.c": 8 ctypedef int uint32_t 9 ctypedef struct _Chunker "Chunker": 10 pass 11 _Chunker *chunker_init(int window_size, int chunk_mask, int min_size, int max_size, uint32_t seed) 12 void chunker_set_fd(_Chunker *chunker, object f, int fd) 13 void chunker_free(_Chunker *chunker) 14 object chunker_process(_Chunker *chunker) 15 uint32_t *buzhash_init_table(uint32_t seed) 16 uint32_t c_buzhash "buzhash"(unsigned char *data, size_t len, uint32_t *h) 17 uint32_t c_buzhash_update "buzhash_update"(uint32_t sum, unsigned char remove, unsigned char add, size_t len, uint32_t *h) 18 19 20cdef class Chunker: 21 cdef _Chunker *chunker 22 23 def __cinit__(self, int seed, int chunk_min_exp, int chunk_max_exp, int hash_mask_bits, int hash_window_size): 24 min_size = 1 << chunk_min_exp 25 max_size = 1 << chunk_max_exp 26 # see chunker_process, first while loop condition, first term must be able to get True: 27 assert hash_window_size + min_size + 1 <= max_size, "too small max_size" 28 hash_mask = (1 << hash_mask_bits) - 1 29 self.chunker = chunker_init(hash_window_size, hash_mask, min_size, max_size, seed & 0xffffffff) 30 31 def chunkify(self, fd, fh=-1): 32 """ 33 Cut a file into chunks. 34 35 :param fd: Python file object 36 :param fh: OS-level file handle (if available), 37 defaults to -1 which means not to use OS-level fd. 38 """ 39 chunker_set_fd(self.chunker, fd, fh) 40 return self 41 42 def __dealloc__(self): 43 if self.chunker: 44 chunker_free(self.chunker) 45 46 def __iter__(self): 47 return self 48 49 def __next__(self): 50 return chunker_process(self.chunker) 51 52 53def buzhash(data, unsigned long seed): 54 cdef uint32_t *table 55 cdef uint32_t sum 56 table = buzhash_init_table(seed & 0xffffffff) 57 sum = c_buzhash(<const unsigned char *> data, len(data), table) 58 free(table) 59 return sum 60 61 62def buzhash_update(uint32_t sum, unsigned char remove, unsigned char add, size_t len, unsigned long seed): 63 cdef uint32_t *table 64 table = buzhash_init_table(seed & 0xffffffff) 65 sum = c_buzhash_update(sum, remove, add, len, table) 66 free(table) 67 return sum 68