1#!/bin/sh 2"""": # -*-python-*- 3# https://sourceware.org/bugzilla/show_bug.cgi?id=26034 4export "BUP_ARGV_0"="$0" 5arg_i=1 6for arg in "$@"; do 7 export "BUP_ARGV_${arg_i}"="$arg" 8 shift 9 arg_i=$((arg_i + 1)) 10done 11# Here to end of preamble replaced during install 12bup_python="$(dirname "$0")/../../config/bin/python" || exit $? 13exec "$bup_python" "$0" 14""" 15# end of bup preamble 16 17from __future__ import absolute_import 18import math, os.path, struct, sys 19 20sys.path[:0] = [os.path.dirname(os.path.realpath(__file__)) + '/..'] 21 22from bup import compat, options, git, _helpers 23from bup.helpers import log 24from bup.io import byte_stream 25 26POPULATION_OF_EARTH=6.7e9 # as of September, 2010 27 28optspec = """ 29bup margin 30-- 31predict Guess object offsets and report the maximum deviation 32ignore-midx Don't use midx files; use only plain pack idx files. 33""" 34o = options.Options(optspec) 35opt, flags, extra = o.parse(compat.argv[1:]) 36 37if extra: 38 o.fatal("no arguments expected") 39 40git.check_repo_or_die() 41 42mi = git.PackIdxList(git.repo(b'objects/pack'), ignore_midx=opt.ignore_midx) 43 44def do_predict(ix, out): 45 total = len(ix) 46 maxdiff = 0 47 for count,i in enumerate(ix): 48 prefix = struct.unpack('!Q', i[:8])[0] 49 expected = prefix * total // (1 << 64) 50 diff = count - expected 51 maxdiff = max(maxdiff, abs(diff)) 52 out.write(b'%d of %d (%.3f%%) ' 53 % (maxdiff, len(ix), maxdiff * 100.0 / len(ix))) 54 out.flush() 55 assert(count+1 == len(ix)) 56 57sys.stdout.flush() 58out = byte_stream(sys.stdout) 59 60if opt.predict: 61 if opt.ignore_midx: 62 for pack in mi.packs: 63 do_predict(pack, out) 64 else: 65 do_predict(mi, out) 66else: 67 # default mode: find longest matching prefix 68 last = b'\0'*20 69 longmatch = 0 70 for i in mi: 71 if i == last: 72 continue 73 #assert(str(i) >= last) 74 pm = _helpers.bitmatch(last, i) 75 longmatch = max(longmatch, pm) 76 last = i 77 out.write(b'%d\n' % longmatch) 78 log('%d matching prefix bits\n' % longmatch) 79 doublings = math.log(len(mi), 2) 80 bpd = longmatch / doublings 81 log('%.2f bits per doubling\n' % bpd) 82 remain = 160 - longmatch 83 rdoublings = remain / bpd 84 log('%d bits (%.2f doublings) remaining\n' % (remain, rdoublings)) 85 larger = 2**rdoublings 86 log('%g times larger is possible\n' % larger) 87 perperson = larger/POPULATION_OF_EARTH 88 log('\nEveryone on earth could have %d data sets like yours, all in one\n' 89 'repository, and we would expect 1 object collision.\n' 90 % int(perperson)) 91