1#!/bin/sh
2"""": # -*-python-*-
3# https://sourceware.org/bugzilla/show_bug.cgi?id=26034
4export "BUP_ARGV_0"="$0"
5arg_i=1
6for arg in "$@"; do
7    export "BUP_ARGV_${arg_i}"="$arg"
8    shift
9    arg_i=$((arg_i + 1))
10done
11# Here to end of preamble replaced during install
12bup_python="$(dirname "$0")/../../config/bin/python" || exit $?
13exec "$bup_python" "$0"
14"""
15# end of bup preamble
16
17from __future__ import absolute_import, print_function
18from binascii import hexlify, unhexlify
19from collections import defaultdict
20from itertools import groupby
21from sys import stderr
22from time import localtime, strftime, time
23import os.path, re, sys
24
25sys.path[:0] = [os.path.dirname(os.path.realpath(__file__)) + '/..']
26
27from bup import compat, git, options
28from bup.compat import argv_bytes, int_types
29from bup.gc import bup_gc
30from bup.helpers import die_if_errors, log, partition, period_as_secs
31from bup.io import byte_stream
32from bup.repo import LocalRepo
33from bup.rm import bup_rm
34
35
36def branches(refnames=tuple()):
37    return ((name[11:], hexlify(sha)) for (name,sha)
38            in git.list_refs(patterns=(b'refs/heads/' + n for n in refnames),
39                             limit_to_heads=True))
40
41def save_name(branch, utc):
42    return branch + b'/' \
43            + strftime('%Y-%m-%d-%H%M%S', localtime(utc)).encode('ascii')
44
45def classify_saves(saves, period_start):
46    """For each (utc, id) in saves, yield (True, (utc, id)) if the save
47    should be kept and (False, (utc, id)) if the save should be removed.
48    The ids are binary hashes.
49    """
50
51    def retain_newest_in_region(region):
52        for save in region[0:1]:
53            yield True, save
54        for save in region[1:]:
55            yield False, save
56
57    matches, rest = partition(lambda s: s[0] >= period_start['all'], saves)
58    for save in matches:
59        yield True, save
60
61    tm_ranges = ((period_start['dailies'], lambda s: localtime(s[0]).tm_yday),
62                 (period_start['monthlies'], lambda s: localtime(s[0]).tm_mon),
63                 (period_start['yearlies'], lambda s: localtime(s[0]).tm_year))
64
65    # Break the decreasing utc sorted saves up into the respective
66    # period ranges (dailies, monthlies, ...).  Within each range,
67    # group the saves by the period scale (days, months, ...), and
68    # then yield a "keep" action (True, utc) for the newest save in
69    # each group, and a "drop" action (False, utc) for the rest.
70    for pstart, time_region_id in tm_ranges:
71        matches, rest = partition(lambda s: s[0] >= pstart, rest)
72        for region_id, region_saves in groupby(matches, time_region_id):
73            for action in retain_newest_in_region(list(region_saves)):
74                yield action
75
76    # Finally, drop any saves older than the specified periods
77    for save in rest:
78        yield False, save
79
80
81optspec = """
82bup prune-older [options...] [BRANCH...]
83--
84keep-all-for=       retain all saves within the PERIOD
85keep-dailies-for=   retain the newest save per day within the PERIOD
86keep-monthlies-for= retain the newest save per month within the PERIOD
87keep-yearlies-for=  retain the newest save per year within the PERIOD
88wrt=                end all periods at this number of seconds since the epoch
89pretend       don't prune, just report intended actions to standard output
90gc            collect garbage after removals [1]
91gc-threshold= only rewrite a packfile if it's over this percent garbage [10]
92#,compress=   set compression level to # (0-9, 9 is highest) [1]
93v,verbose     increase log output (can be used more than once)
94unsafe        use the command even though it may be DANGEROUS
95"""
96
97o = options.Options(optspec)
98opt, flags, roots = o.parse(compat.argv[1:])
99roots = [argv_bytes(x) for x in roots]
100
101if not opt.unsafe:
102    o.fatal('refusing to run dangerous, experimental command without --unsafe')
103
104now = int(time()) if opt.wrt is None else opt.wrt
105if not isinstance(now, int_types):
106    o.fatal('--wrt value ' + str(now) + ' is not an integer')
107
108period_start = {}
109for period, extent in (('all', opt.keep_all_for),
110                       ('dailies', opt.keep_dailies_for),
111                       ('monthlies', opt.keep_monthlies_for),
112                       ('yearlies', opt.keep_yearlies_for)):
113    if extent:
114        secs = period_as_secs(extent.encode('ascii'))
115        if not secs:
116            o.fatal('%r is not a valid period' % extent)
117        period_start[period] = now - secs
118
119if not period_start:
120    o.fatal('at least one keep argument is required')
121
122period_start = defaultdict(lambda: float('inf'), period_start)
123
124if opt.verbose:
125    epoch_ymd = strftime('%Y-%m-%d-%H%M%S', localtime(0))
126    for kind in ['all', 'dailies', 'monthlies', 'yearlies']:
127        period_utc = period_start[kind]
128        if period_utc != float('inf'):
129            if not (period_utc > float('-inf')):
130                log('keeping all ' + kind)
131            else:
132                try:
133                    when = strftime('%Y-%m-%d-%H%M%S', localtime(period_utc))
134                    log('keeping ' + kind + ' since ' + when + '\n')
135                except ValueError as ex:
136                    if period_utc < 0:
137                        log('keeping %s since %d seconds before %s\n'
138                            %(kind, abs(period_utc), epoch_ymd))
139                    elif period_utc > 0:
140                        log('keeping %s since %d seconds after %s\n'
141                            %(kind, period_utc, epoch_ymd))
142                    else:
143                        log('keeping %s since %s\n' % (kind, epoch_ymd))
144
145git.check_repo_or_die()
146
147# This could be more efficient, but for now just build the whole list
148# in memory and let bup_rm() do some redundant work.
149
150def parse_info(f):
151    author_secs = f.readline().strip()
152    return int(author_secs)
153
154sys.stdout.flush()
155out = byte_stream(sys.stdout)
156
157removals = []
158for branch, branch_id in branches(roots):
159    die_if_errors()
160    saves = ((utc, unhexlify(oidx)) for (oidx, utc) in
161             git.rev_list(branch_id, format=b'%at', parse=parse_info))
162    for keep_save, (utc, id) in classify_saves(saves, period_start):
163        assert(keep_save in (False, True))
164        # FIXME: base removals on hashes
165        if opt.pretend:
166            out.write(b'+ ' if keep_save else b'- '
167                      + save_name(branch, utc) + b'\n')
168        elif not keep_save:
169            removals.append(save_name(branch, utc))
170
171if not opt.pretend:
172    die_if_errors()
173    repo = LocalRepo()
174    bup_rm(repo, removals, compression=opt.compress, verbosity=opt.verbose)
175    if opt.gc:
176        die_if_errors()
177        bup_gc(threshold=opt.gc_threshold,
178               compression=opt.compress,
179               verbosity=opt.verbose)
180
181die_if_errors()
182