1# cython: language_level=3
2
3API_VERSION = '1.1_01'
4
5from libc.stdlib cimport free
6
7cdef extern from "_chunker.c":
8    ctypedef int uint32_t
9    ctypedef struct _Chunker "Chunker":
10        pass
11    _Chunker *chunker_init(int window_size, int chunk_mask, int min_size, int max_size, uint32_t seed)
12    void chunker_set_fd(_Chunker *chunker, object f, int fd)
13    void chunker_free(_Chunker *chunker)
14    object chunker_process(_Chunker *chunker)
15    uint32_t *buzhash_init_table(uint32_t seed)
16    uint32_t c_buzhash "buzhash"(unsigned char *data, size_t len, uint32_t *h)
17    uint32_t c_buzhash_update  "buzhash_update"(uint32_t sum, unsigned char remove, unsigned char add, size_t len, uint32_t *h)
18
19
20cdef class Chunker:
21    cdef _Chunker *chunker
22
23    def __cinit__(self, int seed, int chunk_min_exp, int chunk_max_exp, int hash_mask_bits, int hash_window_size):
24        min_size = 1 << chunk_min_exp
25        max_size = 1 << chunk_max_exp
26        # see chunker_process, first while loop condition, first term must be able to get True:
27        assert hash_window_size + min_size + 1 <= max_size, "too small max_size"
28        hash_mask = (1 << hash_mask_bits) - 1
29        self.chunker = chunker_init(hash_window_size, hash_mask, min_size, max_size, seed & 0xffffffff)
30
31    def chunkify(self, fd, fh=-1):
32        """
33        Cut a file into chunks.
34
35        :param fd: Python file object
36        :param fh: OS-level file handle (if available),
37                   defaults to -1 which means not to use OS-level fd.
38        """
39        chunker_set_fd(self.chunker, fd, fh)
40        return self
41
42    def __dealloc__(self):
43        if self.chunker:
44            chunker_free(self.chunker)
45
46    def __iter__(self):
47        return self
48
49    def __next__(self):
50        return chunker_process(self.chunker)
51
52
53def buzhash(data, unsigned long seed):
54    cdef uint32_t *table
55    cdef uint32_t sum
56    table = buzhash_init_table(seed & 0xffffffff)
57    sum = c_buzhash(<const unsigned char *> data, len(data), table)
58    free(table)
59    return sum
60
61
62def buzhash_update(uint32_t sum, unsigned char remove, unsigned char add, size_t len, unsigned long seed):
63    cdef uint32_t *table
64    table = buzhash_init_table(seed & 0xffffffff)
65    sum = c_buzhash_update(sum, remove, add, len, table)
66    free(table)
67    return sum
68