1#!/usr/bin/env python3 2# 3# Usage: gphoto-m4-sync [--diff] <dir>... 4# gphoto-m4-sync --help 5# 6# The gphoto-m4-sync script helps with keeping track of which files in 7# which gphoto-m4 tree copy differ from the original gphoto-m4 tree. 8# 9# In normal operation, gphoto-m4-sync will search for gphoto-m4 10# directories anywhere in the directory trees given on the command 11# line and compare the gphoto-m4 tree from which gphoto-m4-sync was 12# started to those other trees. 13# 14# When not given a --diff options, gphoto-m4-sync will print a human 15# readable report on which files are different in which gphoto-m4 16# tree. 17# 18# Options: 19# 20# --diff Print a list of 'diff' command lines to compare 21# the different files instead. Pipe into something like 22# "| sh | less" to execute. 23# 24# --help Print this help message. 25# 26# Exit code: 27# 28# 0 when no differences have been found among the gphoto-m4 trees 29# 1 when any differences have been found among the gphoto-m4 trees 30# 2 any other error 31 32 33######################################################################## 34 35 36import hashlib 37import os 38import sys 39 40 41######################################################################## 42 43 44class File(object): 45 46 def __init__(self, tree, fname): 47 self.tree = tree 48 self.fname = fname 49 self.fpath = os.path.join(tree.top, fname) 50 51 self.statinfo = os.stat(self.fpath) 52 53 m = hashlib.sha1() 54 m.update(open(self.fpath, 'rb').read()) 55 self.digest = m.hexdigest() 56 57 def __repr__(self): 58 return 'File(%s,%s)' % (repr(self.fname), repr(self.digest)) 59 60 def __str__(self): 61 return '%s %s' % (self.digest, self.fname) 62 63 64######################################################################## 65 66 67class BaseTree(object): 68 69 def __init__(self, top): 70 self.top = os.path.abspath(top) 71 self._files = self.__scan_files() 72 73 def __repr__(self): 74 return '%s(%s)[%s]' % (self.__class__.__name__, self.top, self._files) 75 76 def __iter__(self): 77 return sorted(self._files).__iter__() 78 79 def __getitem__(self, key): 80 return self._files[key] 81 82 def __scan_files(self): 83 files = {} 84 for dirpath, dirnames, filenames in os.walk(self.top, topdown=True): 85 try: # do not descend into these directories 86 dirnames.remove('.git') 87 except ValueError: 88 pass 89 90 for fname in filenames: 91 # Ignore a bunch of files 92 if fname[-1] == '~': 93 continue 94 if fname.startswith('.git'): 95 continue 96 if fname in ['Makefile.in', 'Makefile']: 97 continue 98 99 abs_fname = os.path.join(dirpath, fname) 100 rel_fname = os.path.relpath(abs_fname, start=self.top) 101 102 files[rel_fname] = File(self, rel_fname) 103 return files 104 105 106######################################################################## 107 108 109class GitTree(BaseTree): 110 111 def __init__(self, top): 112 path = os.path.join(top, '.git') 113 if not os.path.exists(path): 114 raise AssertionError("File or directory does not exist: %s" % 115 repr(path)) 116 super(GitTree, self).__init__(top) 117 118 119######################################################################## 120 121 122class NotGitTree(BaseTree): 123 124 def __init__(self, top): 125 path = os.path.join(top, '.git') 126 if os.path.exists(path): 127 raise AssertionError("File or directory does exist: %s" % 128 repr(path)) 129 super(NotGitTree, self).__init__(top) 130 131 132######################################################################## 133 134 135def scan_tree(top): 136 for dirpath, dirnames, filenames in os.walk(top): 137 if os.path.basename(dirpath) == 'gphoto-m4': 138 if 'gp-camlibs.m4' not in filenames: 139 continue 140 yield (dirpath, NotGitTree(dirpath)) 141 142 143######################################################################## 144 145 146def print_help(): 147 skip_line = True 148 skip_lines = ['#', '# '] 149 for line in open(__file__, 'r'): 150 if line[-1] == '\n': 151 line = line[:-1] 152 153 if line.startswith('#!'): 154 continue 155 elif skip_line and (line in skip_lines): 156 continue 157 elif skip_line and (line not in skip_lines): 158 skip_line = False 159 elif line == '': 160 break 161 162 if not skip_line: 163 print(line[2:]) 164 165 166######################################################################## 167 168 169class ResultTable(object): 170 171 def __init__(self): 172 self.lines = {} 173 self.files_with_differences = 0 174 self.differences = 0 175 176 def __setitem__(self, key, value): 177 assert(key not in self.lines) 178 self.lines[key] = value 179 if value.file_versions > 0: 180 self.files_with_differences += 1 181 self.differences += value.file_versions 182 183 def __getitem__(self, key): 184 assert(self.files_with_differences != None) 185 return self.lines[key] 186 187 def items(self): 188 for k in sorted(self.lines.keys()): 189 v = self.lines[k] 190 yield k,v 191 192 def close(self): 193 pass 194 195 196######################################################################## 197 198 199class ResultLine(object): 200 201 def __init__(self, fname): 202 self.fname = fname 203 204 self.__digest_map = {} 205 self.__digests = {} 206 self.__digest_list = None 207 208 self.__flags = {} 209 210 self.__fpaths = {} 211 212 def set_digest(self, index, digest): 213 self.__digest_map[index] = digest 214 self.__digests[digest] = True 215 216 def close(self, file_versions): 217 self.file_versions = file_versions 218 self.__digest_list = sorted(self.__digests.keys()) 219 assert(len(self.__digest_list) > 0) 220 if file_versions == 0: 221 # All files are equal, so we do not need different characters 222 # to distinguish different digest values - a space will do as 223 # well. 224 self.__digest_map = {} 225 226 def get_digest(self, index): 227 if self.__digest_list == None: 228 raise RuntimeError("You need to call ResultLine.close() before Result_Line.get_digest()") 229 if index in self.__digest_map: 230 dig = self.__digest_map[index] 231 idx = self.__digest_list.index(dig) 232 return 'abcdefghijklmnopqrstuvwxyz'[idx] 233 else: 234 return ' ' 235 236 def set_flag(self, index, flag, fpath): 237 self.__flags[index] = flag 238 self.__fpaths[index] = fpath 239 240 def get_flag(self, index): 241 return self.__flags[index] 242 243 def get_fpath(self, index): 244 return self.__fpaths[index] 245 246 247######################################################################## 248 249 250def cmd_print_report(result_table, all_files, treelist, trees): 251 # Enumerate list of trees 252 print("Trees (0 is the original tree):") 253 for i, tree in enumerate(treelist): 254 print(" %d. %s" % (i,tree)) 255 print() 256 257 # Determine maximum length of file name 258 fn_maxlen = 0 259 for fn in all_files: 260 if len(fn) > fn_maxlen: 261 fn_maxlen = len(fn) 262 263 fmt = " %%-%ds " % fn_maxlen 264 print("File table:") 265 266 # print table head 267 print(fmt % '', end='') 268 print((' {0:-^%d}' % (3*len(treelist)-1)).format('Tree')) 269 print(fmt % 'file name', end='') 270 for i, tree in enumerate(treelist): 271 print(' %2d' % i, end='') 272 print(' file diffs') 273 sep_line = (' ' + 274 '-' * (fn_maxlen + 1 + 3*len(treelist) + 2 + len('file diffs'))) 275 print(sep_line) 276 277 # print table body 278 for fname in sorted(all_files): 279 result_line = result_table[fname] 280 print(fmt % fname, end='') 281 print(" %s%s" % (result_line.get_flag(0), 282 result_line.get_digest(0)), end='') 283 284 for tree_idx, tree_top in enumerate(sorted(trees.keys()), start=1): 285 tree = trees[tree_top] 286 print(" %s%s" % (result_line.get_flag(tree_idx), 287 result_line.get_digest(tree_idx)), end='') 288 289 if result_line.file_versions > 0: 290 print(' %3d' % result_line.file_versions) 291 else: 292 print(' ok') 293 print(sep_line) 294 print() 295 296 print("Legend:") 297 legend = [ 298 ('N', 'new file'), 299 ('O', 'original file'), 300 ('/', 'no such file'), 301 ('=', 'same content as the original file'), 302 ('<', 'file with different content is younger than original file'), 303 ('>', 'file with different content is older than original file'), 304 ] 305 for ch, descr in legend: 306 print(" %s %s" % (ch, descr)) 307 print(" ") 308 print(" Small letters identify file contents: Same letter means same content.") 309 print() 310 311 # Determine exit code 312 exit_code = 0 313 if result_table.differences > 0: 314 exit_code = 1 315 316 # Print summary 317 print("Summary:") 318 if result_table.differences > 0: 319 print(" About %d difference(s) found in %d file(s)." % 320 (result_table.differences, result_table.files_with_differences)) 321 print(" ") 322 print(" Diff commands for comparing differing files can be obtained with the") 323 print(" '--diff' option.") 324 else: 325 print(" All gphoto-m4 trees are equal.") 326 327 # Finally exit. 328 sys.exit(exit_code) 329 330 331######################################################################## 332 333 334def print_diff_commands(diff_commands): 335 print("#!/bin/sh") 336 print("#") 337 print("# This file has been autogenerated by %s" % __file__) 338 print("#") 339 print("# List of diff commands. You can pipe these into") 340 print("# | sh | colordiff | less -r '+/comparing '") 341 print("# or") 342 print("# | sh | less '+/^comparing '") 343 print("# or") 344 print("# | less") 345 for fname, orig_dig, other_dig, orig_fpath, other_fpath in diff_commands: 346 if orig_fpath: 347 orig_label = "%s (digest '%s')" % (orig_fpath, orig_dig) 348 else: 349 orig_fpath = '/dev/null' 350 orig_label = '(no such file)' 351 352 if other_dig: 353 other_label = "%s (digest '%s')" % (other_fpath, other_dig) 354 else: 355 other_label = other_fpath 356 357 print() 358 print("""echo 'comparing fname %s'""" % fname) 359 print("""diff -u --label "%s" %s --label "%s" %s""" 360 % (orig_label, orig_fpath, 361 other_label, other_fpath)) 362 363 364######################################################################## 365 366 367def gphoto_m4_sync(dir_list, print_diffs): 368 369 # List all files in this clone of the `gphoto-m4` repository 370 orig_top = os.path.dirname(os.path.abspath(__file__)) 371 orig_tree = GitTree(orig_top) 372 373 # For each `gphoto-m4` directory given on the command line, find 374 # all files. 375 trees = {} 376 for top in dir_list: 377 for dirpath, tree in scan_tree(os.path.abspath(top)): 378 trees[dirpath] = tree 379 380 if len(trees) == 0: 381 print("No gphoto-m4 trees found in directories given on command line.") 382 sys.exit(2) 383 384 385 # Make a list of all files within all `gphoto-m4` trees 386 all_files = {} 387 for i in orig_tree: 388 all_files[i] = True 389 for tree in trees.values(): 390 for i in tree: 391 all_files[i] = True 392 all_files = sorted(all_files.keys()) 393 394 # calculate table values 395 diff_params = [] 396 result_table = ResultTable() 397 for fname in sorted(all_files): 398 result_line = ResultLine(fname) 399 file_diffs = 0 400 if fname in orig_tree: 401 result_line.set_flag(0, 'O', orig_tree[fname].fpath) 402 orig_dig = orig_tree[fname].digest 403 result_line.set_digest(0, orig_dig) 404 else: 405 result_line.set_flag(0, '/', None) 406 orig_dig = None 407 408 comp_digs = {} 409 for tree_idx, tree_top in enumerate(sorted(trees.keys()), start=1): 410 tree = trees[tree_top] 411 if fname in tree: 412 dig = tree[fname].digest 413 flag = 'N' 414 if orig_dig == dig: 415 flag = '=' 416 elif orig_dig: 417 if tree[fname].statinfo.st_mtime > orig_tree[fname].statinfo.st_mtime: 418 flag = '>' 419 elif tree[fname].statinfo.st_mtime < orig_tree[fname].statinfo.st_mtime: 420 flag = '<' 421 result_line.set_digest(tree_idx, dig) 422 else: 423 flag = '/' 424 425 if fname in tree: 426 _fpath = tree[fname].fpath 427 else: 428 _fpath = None 429 result_line.set_flag(tree_idx, flag, _fpath) 430 431 if orig_dig: 432 if result_line.get_flag(tree_idx) != '=': 433 file_diffs += 1 434 else: 435 if result_line.get_flag(tree_idx) != '/': 436 file_diffs += 1 437 438 result_line.close(file_diffs) 439 del file_diffs 440 result_table[fname] = result_line 441 result_table.close() 442 443 if False: 444 # Diff all files - (some comparisons are unnecessary) 445 for fname in sorted(all_files): 446 result_line = result_table[fname] 447 orig_dig = result_line.get_digest(0) 448 orig_fpath = result_line.get_fpath(0) 449 for tree_idx, tree_top in enumerate(sorted(trees.keys()), start=1): 450 tree = trees[tree_top] 451 if result_line.get_flag(0) == 'O': 452 if result_line.get_flag(tree_idx) not in ['=', '/']: 453 diff_params.append((fname, 454 orig_fpath, orig_dig, 455 result_line.get_fpath(tree_idx), 456 result_line.get_digest(tree_idx))) 457 else: 458 if result_line.get_flag(tree_idx) != '/': 459 diff_params.append((fname, 460 None, None, 461 result_line.get_fpath(tree_idx), 462 None)) 463 464 # Print report 465 if not print_diffs: 466 cmd_print_report(result_table, all_files, 467 [orig_top] + sorted(trees.keys()), 468 trees) 469 470 # Print diffs 471 if print_diffs: 472 # print("# Calculate minimum set of diff commands:") 473 diff_commands = [] 474 for fname, result_line in result_table.items(): 475 line_flags = [] 476 if result_line.file_versions > 0: 477 # print("# -", fname) 478 all_trees = [orig_tree] + [ trees[k] for k in sorted(trees.keys()) ] 479 for idx_a in range(len(all_trees)): 480 tree_a = all_trees[idx_a] 481 dig_a = result_line.get_digest(idx_a) 482 if dig_a == ' ': 483 continue 484 # print("# tree_a", tree_a) 485 for idx_b in range(len(all_trees)): 486 tree_b = all_trees[idx_b] 487 dig_b = result_line.get_digest(idx_b) 488 if dig_a == dig_b: 489 continue 490 if dig_b == ' ': 491 continue 492 # print("# tree_b", tree_b) 493 flag = (fname, dig_a, dig_b) 494 rev_flag = (fname, dig_b, dig_a) 495 if flag in line_flags: 496 pass 497 elif rev_flag in line_flags: 498 pass 499 else: 500 line_flags.append(flag) 501 cmd = (fname, dig_a, dig_b, 502 tree_a[fname].fpath, tree_b[fname].fpath) 503 diff_commands.append(cmd) 504 break 505 del line_flags 506 # print("#") 507 508 print_diff_commands(diff_commands) 509 sys.exit(0) 510 511 512####################################################################### 513 514 515def main(args): 516 if (args == []): 517 print_help() 518 sys.exit(0) 519 520 arg_diff = False 521 for i, arg in enumerate(args): 522 if arg == '--': 523 i += 1 524 break 525 elif arg == '--help': 526 print_help() 527 sys.exit(0) 528 elif arg == '--diff': 529 arg_diff = True 530 elif arg.startswith('--'): 531 raise ValueError("Unhandled command line option '%s'" % arg) 532 else: 533 assert(arg[:2] != '--') 534 break 535 536 dir_list = args[i:] 537 if False: 538 print("Arguments:", dir_list) 539 print() 540 541 gphoto_m4_sync(dir_list, arg_diff) 542 543 544######################################################################## 545 546 547if __name__ == '__main__': 548 main(sys.argv[1:]) 549 550 551######################################################################## 552