1#!/bin/sh
2"""": # -*-python-*-
3# https://sourceware.org/bugzilla/show_bug.cgi?id=26034
4export "BUP_ARGV_0"="$0"
5arg_i=1
6for arg in "$@"; do
7    export "BUP_ARGV_${arg_i}"="$arg"
8    shift
9    arg_i=$((arg_i + 1))
10done
11# Here to end of preamble replaced during install
12bup_python="$(dirname "$0")/../../config/bin/python" || exit $?
13exec "$bup_python" "$0"
14"""
15# end of bup preamble
16
17from __future__ import absolute_import
18import math, os.path, struct, sys
19
20sys.path[:0] = [os.path.dirname(os.path.realpath(__file__)) + '/..']
21
22from bup import compat, options, git, _helpers
23from bup.helpers import log
24from bup.io import byte_stream
25
26POPULATION_OF_EARTH=6.7e9  # as of September, 2010
27
28optspec = """
29bup margin
30--
31predict    Guess object offsets and report the maximum deviation
32ignore-midx  Don't use midx files; use only plain pack idx files.
33"""
34o = options.Options(optspec)
35opt, flags, extra = o.parse(compat.argv[1:])
36
37if extra:
38    o.fatal("no arguments expected")
39
40git.check_repo_or_die()
41
42mi = git.PackIdxList(git.repo(b'objects/pack'), ignore_midx=opt.ignore_midx)
43
44def do_predict(ix, out):
45    total = len(ix)
46    maxdiff = 0
47    for count,i in enumerate(ix):
48        prefix = struct.unpack('!Q', i[:8])[0]
49        expected = prefix * total // (1 << 64)
50        diff = count - expected
51        maxdiff = max(maxdiff, abs(diff))
52    out.write(b'%d of %d (%.3f%%) '
53              % (maxdiff, len(ix), maxdiff * 100.0 / len(ix)))
54    out.flush()
55    assert(count+1 == len(ix))
56
57sys.stdout.flush()
58out = byte_stream(sys.stdout)
59
60if opt.predict:
61    if opt.ignore_midx:
62        for pack in mi.packs:
63            do_predict(pack, out)
64    else:
65        do_predict(mi, out)
66else:
67    # default mode: find longest matching prefix
68    last = b'\0'*20
69    longmatch = 0
70    for i in mi:
71        if i == last:
72            continue
73        #assert(str(i) >= last)
74        pm = _helpers.bitmatch(last, i)
75        longmatch = max(longmatch, pm)
76        last = i
77    out.write(b'%d\n' % longmatch)
78    log('%d matching prefix bits\n' % longmatch)
79    doublings = math.log(len(mi), 2)
80    bpd = longmatch / doublings
81    log('%.2f bits per doubling\n' % bpd)
82    remain = 160 - longmatch
83    rdoublings = remain / bpd
84    log('%d bits (%.2f doublings) remaining\n' % (remain, rdoublings))
85    larger = 2**rdoublings
86    log('%g times larger is possible\n' % larger)
87    perperson = larger/POPULATION_OF_EARTH
88    log('\nEveryone on earth could have %d data sets like yours, all in one\n'
89        'repository, and we would expect 1 object collision.\n'
90        % int(perperson))
91