1#!/usr/bin/env python3
2#
3# Usage: gphoto-m4-sync [--diff] <dir>...
4#        gphoto-m4-sync --help
5#
6# The gphoto-m4-sync script helps with keeping track of which files in
7# which gphoto-m4 tree copy differ from the original gphoto-m4 tree.
8#
9# In normal operation, gphoto-m4-sync will search for gphoto-m4
10# directories anywhere in the directory trees given on the command
11# line and compare the gphoto-m4 tree from which gphoto-m4-sync was
12# started to those other trees.
13#
14# When not given a --diff options, gphoto-m4-sync will print a human
15# readable report on which files are different in which gphoto-m4
16# tree.
17#
18# Options:
19#
20#   --diff   Print a list of 'diff' command lines to compare
21#            the different files instead. Pipe into something like
22#            "| sh | less" to execute.
23#
24#   --help   Print this help message.
25#
26# Exit code:
27#
28#   0    when no  differences have been found among the gphoto-m4 trees
29#   1    when any differences have been found among the gphoto-m4 trees
30#   2    any other error
31
32
33########################################################################
34
35
36import hashlib
37import os
38import sys
39
40
41########################################################################
42
43
44class File(object):
45
46    def __init__(self, tree, fname):
47        self.tree = tree
48        self.fname = fname
49        self.fpath = os.path.join(tree.top, fname)
50
51        self.statinfo = os.stat(self.fpath)
52
53        m = hashlib.sha1()
54        m.update(open(self.fpath, 'rb').read())
55        self.digest = m.hexdigest()
56
57    def __repr__(self):
58        return 'File(%s,%s)' % (repr(self.fname), repr(self.digest))
59
60    def __str__(self):
61        return '%s %s' % (self.digest, self.fname)
62
63
64########################################################################
65
66
67class BaseTree(object):
68
69    def __init__(self, top):
70        self.top = os.path.abspath(top)
71        self._files = self.__scan_files()
72
73    def __repr__(self):
74        return '%s(%s)[%s]' % (self.__class__.__name__, self.top, self._files)
75
76    def __iter__(self):
77        return sorted(self._files).__iter__()
78
79    def __getitem__(self, key):
80        return self._files[key]
81
82    def __scan_files(self):
83        files = {}
84        for dirpath, dirnames, filenames in os.walk(self.top, topdown=True):
85            try: # do not descend into these directories
86                dirnames.remove('.git')
87            except ValueError:
88                pass
89
90            for fname in filenames:
91                # Ignore a bunch of files
92                if fname[-1] == '~':
93                    continue
94                if fname.startswith('.git'):
95                    continue
96                if fname in ['Makefile.in', 'Makefile']:
97                    continue
98
99                abs_fname = os.path.join(dirpath, fname)
100                rel_fname = os.path.relpath(abs_fname, start=self.top)
101
102                files[rel_fname] = File(self, rel_fname)
103        return files
104
105
106########################################################################
107
108
109class GitTree(BaseTree):
110
111    def __init__(self, top):
112        path = os.path.join(top, '.git')
113        if not os.path.exists(path):
114            raise AssertionError("File or directory does not exist: %s" %
115                                 repr(path))
116        super(GitTree, self).__init__(top)
117
118
119########################################################################
120
121
122class NotGitTree(BaseTree):
123
124    def __init__(self, top):
125        path = os.path.join(top, '.git')
126        if os.path.exists(path):
127            raise AssertionError("File or directory does exist: %s" %
128                                 repr(path))
129        super(NotGitTree, self).__init__(top)
130
131
132########################################################################
133
134
135def scan_tree(top):
136    for dirpath, dirnames, filenames in os.walk(top):
137        if os.path.basename(dirpath) == 'gphoto-m4':
138            if 'gp-camlibs.m4' not in filenames:
139                continue
140            yield (dirpath, NotGitTree(dirpath))
141
142
143########################################################################
144
145
146def print_help():
147    skip_line = True
148    skip_lines = ['#', '# ']
149    for line in open(__file__, 'r'):
150        if line[-1] == '\n':
151            line = line[:-1]
152
153        if line.startswith('#!'):
154            continue
155        elif skip_line and (line in skip_lines):
156            continue
157        elif skip_line and (line not in skip_lines):
158            skip_line = False
159        elif line == '':
160            break
161
162        if not skip_line:
163            print(line[2:])
164
165
166########################################################################
167
168
169class ResultTable(object):
170
171    def __init__(self):
172        self.lines = {}
173        self.files_with_differences = 0
174        self.differences = 0
175
176    def __setitem__(self, key, value):
177        assert(key not in self.lines)
178        self.lines[key] = value
179        if value.file_versions > 0:
180            self.files_with_differences += 1
181        self.differences += value.file_versions
182
183    def __getitem__(self, key):
184        assert(self.files_with_differences != None)
185        return self.lines[key]
186
187    def items(self):
188        for k in sorted(self.lines.keys()):
189            v = self.lines[k]
190            yield k,v
191
192    def close(self):
193        pass
194
195
196########################################################################
197
198
199class ResultLine(object):
200
201    def __init__(self, fname):
202        self.fname = fname
203
204        self.__digest_map = {}
205        self.__digests = {}
206        self.__digest_list = None
207
208        self.__flags = {}
209
210        self.__fpaths = {}
211
212    def set_digest(self, index, digest):
213        self.__digest_map[index] = digest
214        self.__digests[digest] = True
215
216    def close(self, file_versions):
217        self.file_versions = file_versions
218        self.__digest_list = sorted(self.__digests.keys())
219        assert(len(self.__digest_list) > 0)
220        if file_versions == 0:
221            # All files are equal, so we do not need different characters
222            # to distinguish different digest values - a space will do as
223            # well.
224            self.__digest_map = {}
225
226    def get_digest(self, index):
227        if self.__digest_list == None:
228            raise RuntimeError("You need to call ResultLine.close() before Result_Line.get_digest()")
229        if index in self.__digest_map:
230            dig = self.__digest_map[index]
231            idx = self.__digest_list.index(dig)
232            return 'abcdefghijklmnopqrstuvwxyz'[idx]
233        else:
234            return ' '
235
236    def set_flag(self, index, flag, fpath):
237        self.__flags[index] = flag
238        self.__fpaths[index] = fpath
239
240    def get_flag(self, index):
241        return self.__flags[index]
242
243    def get_fpath(self, index):
244        return self.__fpaths[index]
245
246
247########################################################################
248
249
250def cmd_print_report(result_table, all_files, treelist, trees):
251    # Enumerate list of trees
252    print("Trees (0 is the original tree):")
253    for i, tree in enumerate(treelist):
254        print("  %d. %s" % (i,tree))
255    print()
256
257    # Determine maximum length of file name
258    fn_maxlen = 0
259    for fn in all_files:
260        if len(fn) > fn_maxlen:
261            fn_maxlen = len(fn)
262
263    fmt = "    %%-%ds " % fn_maxlen
264    print("File table:")
265
266    # print table head
267    print(fmt % '', end='')
268    print((' {0:-^%d}' % (3*len(treelist)-1)).format('Tree'))
269    print(fmt % 'file name', end='')
270    for i, tree in enumerate(treelist):
271        print(' %2d' % i, end='')
272    print('  file diffs')
273    sep_line = ('    ' +
274                '-' * (fn_maxlen + 1 + 3*len(treelist) + 2 + len('file diffs')))
275    print(sep_line)
276
277    # print table body
278    for fname in sorted(all_files):
279        result_line = result_table[fname]
280        print(fmt % fname, end='')
281        print(" %s%s" % (result_line.get_flag(0),
282                         result_line.get_digest(0)), end='')
283
284        for tree_idx, tree_top in enumerate(sorted(trees.keys()), start=1):
285            tree = trees[tree_top]
286            print(" %s%s" % (result_line.get_flag(tree_idx),
287                             result_line.get_digest(tree_idx)), end='')
288
289        if result_line.file_versions > 0:
290            print('   %3d' % result_line.file_versions)
291        else:
292            print('  ok')
293    print(sep_line)
294    print()
295
296    print("Legend:")
297    legend = [
298        ('N', 'new file'),
299        ('O', 'original file'),
300        ('/', 'no such file'),
301        ('=', 'same content as the original file'),
302        ('<', 'file with different content is younger than original file'),
303        ('>', 'file with different content is older than original file'),
304    ]
305    for ch, descr in legend:
306        print("    %s  %s" % (ch, descr))
307    print("    ")
308    print("    Small letters identify file contents: Same letter means same content.")
309    print()
310
311    # Determine exit code
312    exit_code = 0
313    if result_table.differences > 0:
314        exit_code = 1
315
316    # Print summary
317    print("Summary:")
318    if result_table.differences > 0:
319        print("  About %d difference(s) found in %d file(s)." %
320              (result_table.differences, result_table.files_with_differences))
321        print("  ")
322        print("  Diff commands for comparing differing files can be obtained with the")
323        print("  '--diff' option.")
324    else:
325        print("  All gphoto-m4 trees are equal.")
326
327    # Finally exit.
328    sys.exit(exit_code)
329
330
331########################################################################
332
333
334def print_diff_commands(diff_commands):
335    print("#!/bin/sh")
336    print("#")
337    print("# This file has been autogenerated by %s" % __file__)
338    print("#")
339    print("# List of diff commands.  You can pipe these into")
340    print("#     | sh | colordiff | less -r '+/comparing '")
341    print("# or")
342    print("#     | sh | less '+/^comparing '")
343    print("# or")
344    print("#     | less")
345    for fname, orig_dig, other_dig, orig_fpath, other_fpath in diff_commands:
346        if orig_fpath:
347            orig_label = "%s (digest '%s')" % (orig_fpath, orig_dig)
348        else:
349            orig_fpath = '/dev/null'
350            orig_label = '(no such file)'
351
352        if other_dig:
353            other_label = "%s (digest '%s')" % (other_fpath, other_dig)
354        else:
355            other_label = other_fpath
356
357        print()
358        print("""echo 'comparing fname %s'""" % fname)
359        print("""diff -u --label "%s" %s --label "%s" %s"""
360              % (orig_label, orig_fpath,
361                 other_label, other_fpath))
362
363
364########################################################################
365
366
367def gphoto_m4_sync(dir_list, print_diffs):
368
369    # List all files in this clone of the `gphoto-m4` repository
370    orig_top = os.path.dirname(os.path.abspath(__file__))
371    orig_tree = GitTree(orig_top)
372
373    # For each `gphoto-m4` directory given on the command line, find
374    # all files.
375    trees = {}
376    for top in dir_list:
377        for dirpath, tree in scan_tree(os.path.abspath(top)):
378            trees[dirpath] = tree
379
380    if len(trees) == 0:
381        print("No gphoto-m4 trees found in directories given on command line.")
382        sys.exit(2)
383
384
385    # Make a list of all files within all `gphoto-m4` trees
386    all_files = {}
387    for i in orig_tree:
388        all_files[i] = True
389    for tree in trees.values():
390        for i in tree:
391            all_files[i] = True
392    all_files = sorted(all_files.keys())
393
394    # calculate table values
395    diff_params = []
396    result_table = ResultTable()
397    for fname in sorted(all_files):
398        result_line = ResultLine(fname)
399        file_diffs = 0
400        if fname in orig_tree:
401            result_line.set_flag(0, 'O', orig_tree[fname].fpath)
402            orig_dig = orig_tree[fname].digest
403            result_line.set_digest(0, orig_dig)
404        else:
405            result_line.set_flag(0, '/', None)
406            orig_dig = None
407
408        comp_digs = {}
409        for tree_idx, tree_top in enumerate(sorted(trees.keys()), start=1):
410            tree = trees[tree_top]
411            if fname in tree:
412                dig = tree[fname].digest
413                flag = 'N'
414                if orig_dig == dig:
415                    flag = '='
416                elif orig_dig:
417                    if tree[fname].statinfo.st_mtime > orig_tree[fname].statinfo.st_mtime:
418                        flag = '>'
419                    elif tree[fname].statinfo.st_mtime < orig_tree[fname].statinfo.st_mtime:
420                        flag = '<'
421                result_line.set_digest(tree_idx, dig)
422            else:
423                flag = '/'
424
425            if fname in tree:
426                _fpath = tree[fname].fpath
427            else:
428                _fpath = None
429            result_line.set_flag(tree_idx, flag, _fpath)
430
431            if orig_dig:
432                if result_line.get_flag(tree_idx) != '=':
433                    file_diffs += 1
434            else:
435                if result_line.get_flag(tree_idx) != '/':
436                    file_diffs += 1
437
438        result_line.close(file_diffs)
439        del file_diffs
440        result_table[fname] = result_line
441    result_table.close()
442
443    if False:
444        # Diff all files - (some comparisons are unnecessary)
445        for fname in sorted(all_files):
446            result_line = result_table[fname]
447            orig_dig = result_line.get_digest(0)
448            orig_fpath = result_line.get_fpath(0)
449            for tree_idx, tree_top in enumerate(sorted(trees.keys()), start=1):
450                tree = trees[tree_top]
451                if result_line.get_flag(0) == 'O':
452                    if result_line.get_flag(tree_idx) not in ['=', '/']:
453                        diff_params.append((fname,
454                                            orig_fpath, orig_dig,
455                                            result_line.get_fpath(tree_idx),
456                                            result_line.get_digest(tree_idx)))
457                else:
458                    if result_line.get_flag(tree_idx) != '/':
459                        diff_params.append((fname,
460                                            None, None,
461                                            result_line.get_fpath(tree_idx),
462                                            None))
463
464    # Print report
465    if not print_diffs:
466        cmd_print_report(result_table, all_files,
467                         [orig_top] + sorted(trees.keys()),
468                         trees)
469
470    # Print diffs
471    if print_diffs:
472        # print("# Calculate minimum set of diff commands:")
473        diff_commands = []
474        for fname, result_line in result_table.items():
475            line_flags = []
476            if result_line.file_versions > 0:
477                # print("#   -", fname)
478                all_trees = [orig_tree] + [ trees[k] for k in sorted(trees.keys()) ]
479                for idx_a in range(len(all_trees)):
480                    tree_a = all_trees[idx_a]
481                    dig_a = result_line.get_digest(idx_a)
482                    if dig_a == ' ':
483                        continue
484                    # print("# tree_a", tree_a)
485                    for idx_b in range(len(all_trees)):
486                        tree_b = all_trees[idx_b]
487                        dig_b = result_line.get_digest(idx_b)
488                        if dig_a == dig_b:
489                            continue
490                        if dig_b == ' ':
491                            continue
492                        # print("# tree_b", tree_b)
493                        flag     = (fname, dig_a, dig_b)
494                        rev_flag = (fname, dig_b, dig_a)
495                        if flag in line_flags:
496                            pass
497                        elif rev_flag in line_flags:
498                            pass
499                        else:
500                            line_flags.append(flag)
501                            cmd = (fname, dig_a, dig_b,
502                                   tree_a[fname].fpath, tree_b[fname].fpath)
503                            diff_commands.append(cmd)
504                    break
505            del line_flags
506        # print("#")
507
508        print_diff_commands(diff_commands)
509        sys.exit(0)
510
511
512#######################################################################
513
514
515def main(args):
516    if (args == []):
517        print_help()
518        sys.exit(0)
519
520    arg_diff = False
521    for i, arg in enumerate(args):
522        if arg == '--':
523            i += 1
524            break
525        elif arg == '--help':
526            print_help()
527            sys.exit(0)
528        elif arg == '--diff':
529            arg_diff = True
530        elif arg.startswith('--'):
531            raise ValueError("Unhandled command line option '%s'" % arg)
532        else:
533            assert(arg[:2] != '--')
534            break
535
536    dir_list = args[i:]
537    if False:
538        print("Arguments:", dir_list)
539        print()
540
541    gphoto_m4_sync(dir_list, arg_diff)
542
543
544########################################################################
545
546
547if __name__ == '__main__':
548    main(sys.argv[1:])
549
550
551########################################################################
552