1""" 2Compute hashtable sizes with nices properties 3- prime sizes (for small to medium sizes) 4- 2 prime-factor sizes (for big sizes) 5- fast growth for small sizes 6- slow growth for big sizes 7 8Note: 9 this is just a tool for developers. 10 within borgbackup, it is just used to generate hash_sizes definition for _hashindex.c. 11""" 12 13from collections import namedtuple 14 15K, M, G = 2**10, 2**20, 2**30 16 17# hash table size (in number of buckets) 18start, end_p1, end_p2 = 1 * K, 127 * M, 2 * G - 10 * M # stay well below 2^31 - 1 19 20Policy = namedtuple("Policy", "upto grow") 21 22policies = [ 23 # which growth factor to use when growing a hashtable of size < upto 24 # grow fast (*2.0) at the start so we do not have to resize too often (expensive). 25 # grow slow (*1.1) for huge hash tables (do not jump too much in memory usage) 26 Policy(256*K, 2.0), 27 Policy(2*M, 1.7), 28 Policy(16*M, 1.4), 29 Policy(128*M, 1.2), 30 Policy(2*G-1, 1.1), 31] 32 33 34# slightly modified version of: 35# http://www.macdevcenter.com/pub/a/python/excerpt/pythonckbk_chap1/index1.html?page=2 36def eratosthenes(): 37 """Yields the sequence of prime numbers via the Sieve of Eratosthenes.""" 38 D = {} # map each composite integer to its first-found prime factor 39 q = 2 # q gets 2, 3, 4, 5, ... ad infinitum 40 while True: 41 p = D.pop(q, None) 42 if p is None: 43 # q not a key in D, so q is prime, therefore, yield it 44 yield q 45 # mark q squared as not-prime (with q as first-found prime factor) 46 D[q * q] = q 47 else: 48 # let x <- smallest (N*p)+q which wasn't yet known to be composite 49 # we just learned x is composite, with p first-found prime factor, 50 # since p is the first-found prime factor of q -- find and mark it 51 x = p + q 52 while x in D: 53 x += p 54 D[x] = p 55 q += 1 56 57 58def two_prime_factors(pfix=65537): 59 """Yields numbers with 2 prime factors pfix and p.""" 60 for p in eratosthenes(): 61 yield pfix * p 62 63 64def get_grow_factor(size): 65 for p in policies: 66 if size < p.upto: 67 return p.grow 68 69 70def find_bigger_prime(gen, i): 71 while True: 72 p = next(gen) 73 if p >= i: 74 return p 75 76 77def main(): 78 sizes = [] 79 i = start 80 81 gen = eratosthenes() 82 while i < end_p1: 83 grow_factor = get_grow_factor(i) 84 p = find_bigger_prime(gen, i) 85 sizes.append(p) 86 i = int(i * grow_factor) 87 88 gen = two_prime_factors() # for lower ram consumption 89 while i < end_p2: 90 grow_factor = get_grow_factor(i) 91 p = find_bigger_prime(gen, i) 92 sizes.append(p) 93 i = int(i * grow_factor) 94 95 print("""\ 96static int hash_sizes[] = { 97 %s 98}; 99""" % ', '.join(str(size) for size in sizes)) 100 101 102if __name__ == '__main__': 103 main() 104