1#!/bin/sh 2"""": # -*-python-*- 3# https://sourceware.org/bugzilla/show_bug.cgi?id=26034 4export "BUP_ARGV_0"="$0" 5arg_i=1 6for arg in "$@"; do 7 export "BUP_ARGV_${arg_i}"="$arg" 8 shift 9 arg_i=$((arg_i + 1)) 10done 11# Here to end of preamble replaced during install 12bup_python="$(dirname "$0")/../../config/bin/python" || exit $? 13exec "$bup_python" "$0" 14""" 15# end of bup preamble 16 17from __future__ import absolute_import, division, print_function 18from binascii import hexlify 19import os, sys, time 20 21sys.path[:0] = [os.path.dirname(os.path.realpath(__file__)) + '/..'] 22 23from bup import compat, hashsplit, git, options, client 24from bup.compat import argv_bytes, environ 25from bup.helpers import (add_error, handle_ctrl_c, hostname, log, parse_num, 26 qprogress, reprogress, saved_errors, 27 valid_save_name, 28 parse_date_or_fatal) 29from bup.io import byte_stream 30from bup.pwdgrp import userfullname, username 31 32 33optspec = """ 34bup split [-t] [-c] [-n name] OPTIONS [--git-ids | filenames...] 35bup split -b OPTIONS [--git-ids | filenames...] 36bup split --copy OPTIONS [--git-ids | filenames...] 37bup split --noop [-b|-t] OPTIONS [--git-ids | filenames...] 38-- 39 Modes: 40b,blobs output a series of blob ids. Implies --fanout=0. 41t,tree output a tree id 42c,commit output a commit id 43n,name= save the result under the given name 44noop split the input, but throw away the result 45copy split the input, copy it to stdout, don't save to repo 46 Options: 47r,remote= remote repository path 48d,date= date for the commit (seconds since the epoch) 49q,quiet don't print progress messages 50v,verbose increase log output (can be used more than once) 51git-ids read a list of git object ids from stdin and split their contents 52keep-boundaries don't let one chunk span two input files 53bench print benchmark timings to stderr 54max-pack-size= maximum bytes in a single pack 55max-pack-objects= maximum number of objects in a single pack 56fanout= average number of blobs in a single tree 57bwlimit= maximum bytes/sec to transmit to server 58#,compress= set compression level to # (0-9, 9 is highest) [1] 59""" 60handle_ctrl_c() 61 62o = options.Options(optspec) 63opt, flags, extra = o.parse(compat.argv[1:]) 64if opt.name: opt.name = argv_bytes(opt.name) 65if opt.remote: opt.remote = argv_bytes(opt.remote) 66if opt.verbose is None: opt.verbose = 0 67 68if not (opt.blobs or opt.tree or opt.commit or opt.name or 69 opt.noop or opt.copy): 70 o.fatal("use one or more of -b, -t, -c, -n, --noop, --copy") 71if opt.copy and (opt.blobs or opt.tree): 72 o.fatal('--copy is incompatible with -b, -t') 73if (opt.noop or opt.copy) and (opt.commit or opt.name): 74 o.fatal('--noop and --copy are incompatible with -c, -n') 75if opt.blobs and (opt.tree or opt.commit or opt.name): 76 o.fatal('-b is incompatible with -t, -c, -n') 77if extra and opt.git_ids: 78 o.fatal("don't provide filenames when using --git-ids") 79 80if opt.verbose >= 2: 81 git.verbose = opt.verbose - 1 82 opt.bench = 1 83 84max_pack_size = None 85if opt.max_pack_size: 86 max_pack_size = parse_num(opt.max_pack_size) 87max_pack_objects = None 88if opt.max_pack_objects: 89 max_pack_objects = parse_num(opt.max_pack_objects) 90 91if opt.fanout: 92 hashsplit.fanout = parse_num(opt.fanout) 93if opt.blobs: 94 hashsplit.fanout = 0 95if opt.bwlimit: 96 client.bwlimit = parse_num(opt.bwlimit) 97if opt.date: 98 date = parse_date_or_fatal(opt.date, o.fatal) 99else: 100 date = time.time() 101 102total_bytes = 0 103def prog(filenum, nbytes): 104 global total_bytes 105 total_bytes += nbytes 106 if filenum > 0: 107 qprogress('Splitting: file #%d, %d kbytes\r' 108 % (filenum+1, total_bytes // 1024)) 109 else: 110 qprogress('Splitting: %d kbytes\r' % (total_bytes // 1024)) 111 112 113is_reverse = environ.get(b'BUP_SERVER_REVERSE') 114if is_reverse and opt.remote: 115 o.fatal("don't use -r in reverse mode; it's automatic") 116start_time = time.time() 117 118if opt.name and not valid_save_name(opt.name): 119 o.fatal("'%r' is not a valid branch name." % opt.name) 120refname = opt.name and b'refs/heads/%s' % opt.name or None 121 122if opt.noop or opt.copy: 123 cli = pack_writer = oldref = None 124elif opt.remote or is_reverse: 125 git.check_repo_or_die() 126 cli = client.Client(opt.remote) 127 oldref = refname and cli.read_ref(refname) or None 128 pack_writer = cli.new_packwriter(compression_level=opt.compress, 129 max_pack_size=max_pack_size, 130 max_pack_objects=max_pack_objects) 131else: 132 git.check_repo_or_die() 133 cli = None 134 oldref = refname and git.read_ref(refname) or None 135 pack_writer = git.PackWriter(compression_level=opt.compress, 136 max_pack_size=max_pack_size, 137 max_pack_objects=max_pack_objects) 138 139input = byte_stream(sys.stdin) 140 141if opt.git_ids: 142 # the input is actually a series of git object ids that we should retrieve 143 # and split. 144 # 145 # This is a bit messy, but basically it converts from a series of 146 # CatPipe.get() iterators into a series of file-type objects. 147 # It would be less ugly if either CatPipe.get() returned a file-like object 148 # (not very efficient), or split_to_shalist() expected an iterator instead 149 # of a file. 150 cp = git.CatPipe() 151 class IterToFile: 152 def __init__(self, it): 153 self.it = iter(it) 154 def read(self, size): 155 v = next(self.it, None) 156 return v or b'' 157 def read_ids(): 158 while 1: 159 line = input.readline() 160 if not line: 161 break 162 if line: 163 line = line.strip() 164 try: 165 it = cp.get(line.strip()) 166 next(it, None) # skip the file info 167 except KeyError as e: 168 add_error('error: %s' % e) 169 continue 170 yield IterToFile(it) 171 files = read_ids() 172else: 173 # the input either comes from a series of files or from stdin. 174 files = extra and (open(argv_bytes(fn), 'rb') for fn in extra) or [input] 175 176if pack_writer: 177 new_blob = pack_writer.new_blob 178 new_tree = pack_writer.new_tree 179elif opt.blobs or opt.tree: 180 # --noop mode 181 new_blob = lambda content: git.calc_hash(b'blob', content) 182 new_tree = lambda shalist: git.calc_hash(b'tree', git.tree_encode(shalist)) 183 184sys.stdout.flush() 185out = byte_stream(sys.stdout) 186 187if opt.blobs: 188 shalist = hashsplit.split_to_blobs(new_blob, files, 189 keep_boundaries=opt.keep_boundaries, 190 progress=prog) 191 for (sha, size, level) in shalist: 192 out.write(hexlify(sha) + b'\n') 193 reprogress() 194elif opt.tree or opt.commit or opt.name: 195 if opt.name: # insert dummy_name which may be used as a restore target 196 mode, sha = \ 197 hashsplit.split_to_blob_or_tree(new_blob, new_tree, files, 198 keep_boundaries=opt.keep_boundaries, 199 progress=prog) 200 splitfile_name = git.mangle_name(b'data', hashsplit.GIT_MODE_FILE, mode) 201 shalist = [(mode, splitfile_name, sha)] 202 else: 203 shalist = hashsplit.split_to_shalist( 204 new_blob, new_tree, files, 205 keep_boundaries=opt.keep_boundaries, progress=prog) 206 tree = new_tree(shalist) 207else: 208 last = 0 209 it = hashsplit.hashsplit_iter(files, 210 keep_boundaries=opt.keep_boundaries, 211 progress=prog) 212 for (blob, level) in it: 213 hashsplit.total_split += len(blob) 214 if opt.copy: 215 sys.stdout.write(str(blob)) 216 megs = hashsplit.total_split // 1024 // 1024 217 if not opt.quiet and last != megs: 218 last = megs 219 220if opt.verbose: 221 log('\n') 222if opt.tree: 223 out.write(hexlify(tree) + b'\n') 224if opt.commit or opt.name: 225 msg = b'bup split\n\nGenerated by command:\n%r\n' % compat.argvb 226 ref = opt.name and (b'refs/heads/%s' % opt.name) or None 227 userline = b'%s <%s@%s>' % (userfullname(), username(), hostname()) 228 commit = pack_writer.new_commit(tree, oldref, userline, date, None, 229 userline, date, None, msg) 230 if opt.commit: 231 out.write(hexlify(commit) + b'\n') 232 233if pack_writer: 234 pack_writer.close() # must close before we can update the ref 235 236if opt.name: 237 if cli: 238 cli.update_ref(refname, commit, oldref) 239 else: 240 git.update_ref(refname, commit, oldref) 241 242if cli: 243 cli.close() 244 245secs = time.time() - start_time 246size = hashsplit.total_split 247if opt.bench: 248 log('bup: %.2f kbytes in %.2f secs = %.2f kbytes/sec\n' 249 % (size / 1024, secs, size / 1024 / secs)) 250 251if saved_errors: 252 log('WARNING: %d errors encountered while saving.\n' % len(saved_errors)) 253 sys.exit(1) 254