1""" 2dirsync 3 4Report the difference in content 5of two directories, synchronise or 6update a directory from another, taking 7into account time-stamps of files and/or 8its content etc. 9 10(c) Thomas Khyn 2014 11 12Based on Robocopy by Anand B Pillai 13 14""" 15 16import os 17import sys 18import stat 19import time 20import shutil 21import re 22import logging 23import filecmp 24 25from .options import OPTIONS 26from .version import __pkg_name__ 27 28 29class DCMP(object): 30 """Dummy object for directory comparison data storage""" 31 def __init__(self, l, r, c): 32 self.left_only = l 33 self.right_only = r 34 self.common = c 35 36 37class Syncer(object): 38 """ An advanced directory synchronisation, update 39 and file copying class """ 40 41 def __init__(self, dir1, dir2, action, **options): 42 43 self.logger = options.get('logger', None) 44 if not self.logger: 45 # configure default logger to stdout 46 log = logging.getLogger('dirsync') 47 log.setLevel(logging.INFO) 48 if not log.handlers: 49 hdl = logging.StreamHandler(sys.stdout) 50 hdl.setFormatter(logging.Formatter('%(message)s')) 51 log.addHandler(hdl) 52 self.logger = log 53 54 self._dir1 = dir1 55 self._dir2 = dir2 56 57 self._copyfiles = True 58 self._updatefiles = True 59 self._creatdirs = True 60 61 self._changed = [] 62 self._added = [] 63 self._deleted = [] 64 65 # stat vars 66 self._numdirs = 0 67 self._numfiles = 0 68 self._numdelfiles = 0 69 self._numdeldirs = 0 70 self._numnewdirs = 0 71 self._numcontupdates = 0 72 self._numtimeupdates = 0 73 self._starttime = 0.0 74 self._endtime = 0.0 75 76 # failure stat vars 77 self._numcopyfld = 0 78 self._numupdsfld = 0 79 self._numdirsfld = 0 80 self._numdelffld = 0 81 self._numdeldfld = 0 82 83 self._mainfunc = getattr(self, action) 84 85 # options setup 86 def get_option(name): 87 return options.get(name, OPTIONS[name][1]['default']) 88 89 self._verbose = get_option('verbose') 90 self._purge = get_option('purge') 91 self._copydirection = 2 if get_option('twoway') else 0 92 self._forcecopy = get_option('force') 93 self._maketarget = get_option('create') 94 self._use_ctime = get_option('ctime') 95 self._use_content = get_option('content') 96 97 self._ignore = get_option('ignore') 98 self._only = get_option('only') 99 self._exclude = list(get_option('exclude')) 100 self._include = get_option('include') 101 102 # excludes .dirsync file by default, must explicitly be in include 103 # not to be excluded 104 self._exclude.append('^\.dirsync$') 105 106 if not os.path.isdir(self._dir1): 107 raise ValueError("Error: Source directory does not exist.") 108 109 if not self._maketarget and not os.path.isdir(self._dir2): 110 raise ValueError( 111 "Error: Target directory %s does not exist. " 112 "(Try the -c or --create option to create it)." % self._dir2) 113 114 def log(self, msg=''): 115 self.logger.info(msg) 116 117 def _compare(self, dir1, dir2): 118 """ Compare contents of two directories """ 119 120 left = set() 121 right = set() 122 123 self._numdirs += 1 124 125 excl_patterns = set(self._exclude).union(self._ignore) 126 127 for cwd, dirs, files in os.walk(dir1): 128 self._numdirs += len(dirs) 129 for f in dirs + files: 130 path = os.path.relpath(os.path.join(cwd, f), dir1) 131 re_path = path.replace('\\', '/') 132 if self._only: 133 for pattern in self._only: 134 if re.match(pattern, re_path): 135 # go to exclude and ignore filtering 136 break 137 else: 138 # next item, this one does not match any pattern 139 # in the _only list 140 continue 141 142 add_path = False 143 for pattern in self._include: 144 if re.match(pattern, re_path): 145 add_path = True 146 break 147 else: 148 # path was not in includes 149 # test if it is in excludes 150 for pattern in excl_patterns: 151 if re.match(pattern, re_path): 152 # path is in excludes, do not add it 153 break 154 else: 155 # path was not in excludes 156 # it should be added 157 add_path = True 158 159 if add_path: 160 left.add(path) 161 anc_dirs = re_path[:-1].split('/') 162 anc_dirs_path = '' 163 for ad in anc_dirs[1:]: 164 anc_dirs_path = os.path.join(anc_dirs_path, ad) 165 left.add(anc_dirs_path) 166 167 for cwd, dirs, files in os.walk(dir2): 168 for f in dirs + files: 169 path = os.path.relpath(os.path.join(cwd, f), dir2) 170 re_path = path.replace('\\', '/') 171 for pattern in self._ignore: 172 if re.match(pattern, re_path): 173 if f in dirs: 174 dirs.remove(f) 175 break 176 else: 177 right.add(path) 178 # no need to add the parent dirs here, 179 # as there is no _only pattern detection 180 if f in dirs and path not in left: 181 self._numdirs += 1 182 183 common = left.intersection(right) 184 left.difference_update(common) 185 right.difference_update(common) 186 187 return DCMP(left, right, common) 188 189 def do_work(self): 190 """ Do work """ 191 192 self._starttime = time.time() 193 194 if not os.path.isdir(self._dir2): 195 if self._maketarget: 196 if self._verbose: 197 self.log('Creating directory %s' % self._dir2) 198 try: 199 os.makedirs(self._dir2) 200 self._numnewdirs += 1 201 except Exception as e: 202 self.log(str(e)) 203 return None 204 205 # All right! 206 self._mainfunc() 207 self._endtime = time.time() 208 209 def _dowork(self, dir1, dir2, copyfunc=None, updatefunc=None): 210 """ Private attribute for doing work """ 211 212 if self._verbose: 213 self.log('Source directory: %s:' % dir1) 214 215 self._dcmp = self._compare(dir1, dir2) 216 217 # Files & directories only in target directory 218 if self._purge: 219 for f2 in self._dcmp.right_only: 220 fullf2 = os.path.join(self._dir2, f2) 221 if self._verbose: 222 self.log('Deleting %s' % fullf2) 223 try: 224 if os.path.isfile(fullf2): 225 try: 226 try: 227 os.remove(fullf2) 228 except PermissionError as e: 229 os.chmod(fullf2, stat.S_IWRITE) 230 os.remove(fullf2) 231 self._deleted.append(fullf2) 232 self._numdelfiles += 1 233 except OSError as e: 234 self.log(str(e)) 235 self._numdelffld += 1 236 elif os.path.isdir(fullf2): 237 try: 238 shutil.rmtree(fullf2, True) 239 self._deleted.append(fullf2) 240 self._numdeldirs += 1 241 except shutil.Error as e: 242 self.log(str(e)) 243 self._numdeldfld += 1 244 245 except Exception as e: # of any use ? 246 self.log(str(e)) 247 continue 248 249 # Files & directories only in source directory 250 for f1 in self._dcmp.left_only: 251 try: 252 st = os.stat(os.path.join(self._dir1, f1)) 253 except os.error: 254 continue 255 256 if stat.S_ISREG(st.st_mode): 257 if copyfunc: 258 copyfunc(f1, self._dir1, self._dir2) 259 self._added.append(os.path.join(self._dir2, f1)) 260 elif stat.S_ISDIR(st.st_mode): 261 to_make = os.path.join(self._dir2, f1) 262 if not os.path.exists(to_make): 263 os.makedirs(to_make) 264 self._numnewdirs += 1 265 self._added.append(to_make) 266 267 # common files/directories 268 for f1 in self._dcmp.common: 269 try: 270 st = os.stat(os.path.join(self._dir1, f1)) 271 except os.error: 272 continue 273 274 if stat.S_ISREG(st.st_mode): 275 if updatefunc: 276 updatefunc(f1, self._dir1, self._dir2) 277 # nothing to do if we have a directory 278 279 def _copy(self, filename, dir1, dir2): 280 """ Private function for copying a file """ 281 282 # NOTE: dir1 is source & dir2 is target 283 if self._copyfiles: 284 285 rel_path = filename.replace('\\', '/').split('/') 286 rel_dir = '/'.join(rel_path[:-1]) 287 filename = rel_path[-1] 288 289 dir2_root = dir2 290 291 dir1 = os.path.join(dir1, rel_dir) 292 dir2 = os.path.join(dir2, rel_dir) 293 294 if self._verbose: 295 self.log('Copying file %s from %s to %s' % 296 (filename, dir1, dir2)) 297 try: 298 # source to target 299 if self._copydirection == 0 or self._copydirection == 2: 300 301 if not os.path.exists(dir2): 302 if self._forcecopy: 303 # 1911 = 0o777 304 os.chmod(os.path.dirname(dir2_root), 1911) 305 try: 306 os.makedirs(dir2) 307 self._numnewdirs += 1 308 except OSError as e: 309 self.log(str(e)) 310 self._numdirsfld += 1 311 312 if self._forcecopy: 313 os.chmod(dir2, 1911) # 1911 = 0o777 314 315 sourcefile = os.path.join(dir1, filename) 316 try: 317 if os.path.islink(sourcefile): 318 os.symlink(os.readlink(sourcefile), 319 os.path.join(dir2, filename)) 320 else: 321 shutil.copy2(sourcefile, dir2) 322 self._numfiles += 1 323 except (IOError, OSError) as e: 324 self.log(str(e)) 325 self._numcopyfld += 1 326 327 if self._copydirection == 1 or self._copydirection == 2: 328 # target to source 329 330 if not os.path.exists(dir1): 331 if self._forcecopy: 332 # 1911 = 0o777 333 os.chmod(os.path.dirname(self.dir1_root), 1911) 334 335 try: 336 os.makedirs(dir1) 337 self._numnewdirs += 1 338 except OSError as e: 339 self.log(str(e)) 340 self._numdirsfld += 1 341 342 targetfile = os.path.abspath(os.path.join(dir1, filename)) 343 if self._forcecopy: 344 os.chmod(dir1, 1911) # 1911 = 0o777 345 346 sourcefile = os.path.join(dir2, filename) 347 348 try: 349 if os.path.islink(sourcefile): 350 os.symlink(os.readlink(sourcefile), 351 os.path.join(dir1, filename)) 352 else: 353 shutil.copy2(sourcefile, targetfile) 354 self._numfiles += 1 355 except (IOError, OSError) as e: 356 self.log(str(e)) 357 self._numcopyfld += 1 358 359 except Exception as e: 360 self.log('Error copying file %s' % filename) 361 self.log(str(e)) 362 363 def _cmptimestamps(self, filest1, filest2): 364 """ Compare time stamps of two files and return True 365 if file1 (source) is more recent than file2 (target) """ 366 367 mtime_cmp = int((filest1.st_mtime - filest2.st_mtime) * 1000) > 0 368 if self._use_ctime: 369 return mtime_cmp or \ 370 int((filest1.st_ctime - filest2.st_mtime) * 1000) > 0 371 else: 372 return mtime_cmp 373 374 def _update(self, filename, dir1, dir2): 375 """ Private function for updating a file based on 376 last time stamp of modification or difference of content""" 377 378 # NOTE: dir1 is source & dir2 is target 379 if self._updatefiles: 380 381 file1 = os.path.join(dir1, filename) 382 file2 = os.path.join(dir2, filename) 383 384 try: 385 st1 = os.stat(file1) 386 st2 = os.stat(file2) 387 except os.error: 388 return -1 389 390 # Update will update in both directions depending 391 # on ( the timestamp of the file or its content ) & copy-direction. 392 393 if self._copydirection == 0 or self._copydirection == 2: 394 395 # If flag 'content' is used then look only at difference of file 396 # contents instead of time stamps. 397 # Update file if file's modification time is older than 398 # source file's modification time, or creation time. Sometimes 399 # it so happens that a file's creation time is newer than it's 400 # modification time! (Seen this on windows) 401 need_upd = (not filecmp.cmp(file1, file2, False)) if self._use_content else self._cmptimestamps(st1, st2) 402 if need_upd: 403 if self._verbose: 404 # source to target 405 self.log('Updating file %s' % file2) 406 try: 407 if self._forcecopy: 408 os.chmod(file2, 1638) # 1638 = 0o666 409 410 try: 411 if os.path.islink(file1): 412 os.symlink(os.readlink(file1), file2) 413 else: 414 try: 415 shutil.copy2(file1, file2) 416 except PermissionError as e: 417 os.chmod(file2, stat.S_IWRITE) 418 shutil.copy2(file1, file2) 419 self._changed.append(file2) 420 if self._use_content: 421 self._numcontupdates += 1 422 else: 423 self._numtimeupdates += 1 424 return 0 425 except (IOError, OSError) as e: 426 self.log(str(e)) 427 self._numupdsfld += 1 428 return -1 429 430 except Exception as e: 431 self.log(str(e)) 432 return -1 433 434 if self._copydirection == 1 or self._copydirection == 2: 435 436 # No need to do reverse synchronization in case of content comparing. 437 # Update file if file's modification time is older than 438 # source file's modification time, or creation time. Sometimes 439 # it so happens that a file's creation time is newer than it's 440 # modification time! (Seen this on windows) 441 need_upd = False if self._use_content else self._cmptimestamps(st2, st1) 442 if need_upd: 443 if self._verbose: 444 # target to source 445 self.log('Updating file %s' % file1) 446 try: 447 if self._forcecopy: 448 os.chmod(file1, 1638) # 1638 = 0o666 449 450 try: 451 if os.path.islink(file2): 452 os.symlink(os.readlink(file2), file1) 453 else: 454 shutil.copy2(file2, file1) 455 self._changed.append(file1) 456 self._numtimeupdates += 1 457 return 0 458 except (IOError, OSError) as e: 459 self.log(str(e)) 460 self._numupdsfld += 1 461 return -1 462 463 except Exception as e: 464 self.log(str(e)) 465 return -1 466 467 return -1 468 469 def _dirdiffandcopy(self, dir1, dir2): 470 """ 471 Private function which does directory diff & copy 472 """ 473 self._dowork(dir1, dir2, self._copy) 474 475 def _dirdiffandupdate(self, dir1, dir2): 476 """ 477 Private function which does directory diff & update 478 """ 479 self._dowork(dir1, dir2, None, self._update) 480 481 def _dirdiffcopyandupdate(self, dir1, dir2): 482 """ 483 Private function which does directory diff, copy and update (synchro) 484 """ 485 self._dowork(dir1, dir2, self._copy, self._update) 486 487 def _diff(self, dir1, dir2): 488 """ 489 Private function which only does directory diff 490 """ 491 492 self._dcmp = self._compare(dir1, dir2) 493 494 if self._dcmp.left_only: 495 self.log('Only in %s' % dir1) 496 for x in sorted(self._dcmp.left_only): 497 self.log('>> %s' % x) 498 self.log('') 499 500 if self._dcmp.right_only: 501 self.log('Only in %s' % dir2) 502 for x in sorted(self._dcmp.right_only): 503 self.log('<< %s' % x) 504 self.log('') 505 506 if self._dcmp.common: 507 self.log('Common to %s and %s' % (self._dir1, self._dir2)) 508 for x in sorted(self._dcmp.common): 509 self.log('-- %s' % x) 510 else: 511 self.log('No common files or sub-directories!') 512 513 def sync(self): 514 """ Synchronize will try to synchronize two directories w.r.t 515 each other's contents, copying files if necessary from source 516 to target, and creating directories if necessary. If the optional 517 argument purge is True, directories in target (dir2) that are 518 not present in the source (dir1) will be deleted . Synchronization 519 is done in the direction of source to target """ 520 521 self._copyfiles = True 522 self._updatefiles = True 523 self._creatdirs = True 524 self._copydirection = 0 525 526 if self._verbose: 527 self.log('Synchronizing directory %s with %s' % 528 (self._dir2, self._dir1)) 529 self._dirdiffcopyandupdate(self._dir1, self._dir2) 530 531 def update(self): 532 """ Update will try to update the target directory 533 w.r.t source directory. Only files that are common 534 to both directories will be updated, no new files 535 or directories are created """ 536 537 self._copyfiles = False 538 self._updatefiles = True 539 self._purge = False 540 self._creatdirs = False 541 542 if self._verbose: 543 self.log('Updating directory %s with %s' % 544 (self._dir2, self._dir1)) 545 self._dirdiffandupdate(self._dir1, self._dir2) 546 547 def diff(self): 548 """ 549 Only report difference in content between two directories 550 """ 551 552 self._copyfiles = False 553 self._updatefiles = False 554 self._purge = False 555 self._creatdirs = False 556 557 self.log('Difference of directory %s from %s' % 558 (self._dir2, self._dir1)) 559 self._diff(self._dir1, self._dir2) 560 561 def report(self): 562 """ Print report of work at the end """ 563 564 # We need only the first 4 significant digits 565 tt = (str(self._endtime - self._starttime))[:4] 566 567 self.log('%s finished in %s seconds.' % (__pkg_name__, tt)) 568 self.log('%d directories parsed, %d files copied' % 569 (self._numdirs, self._numfiles)) 570 if self._numdelfiles: 571 self.log('%d files were purged.' % self._numdelfiles) 572 if self._numdeldirs: 573 self.log('%d directories were purged.' % self._numdeldirs) 574 if self._numnewdirs: 575 self.log('%d directories were created.' % self._numnewdirs) 576 if self._numcontupdates: 577 self.log('%d files were updated by content.' % self._numcontupdates) 578 if self._numtimeupdates: 579 self.log('%d files were updated by timestamp.' % self._numtimeupdates) 580 581 # Failure stats 582 self.log('') 583 if self._numcopyfld: 584 self.log('there were errors in copying %d files.' 585 % self._numcopyfld) 586 if self._numdirsfld: 587 self.log('there were errors in creating %d directories.' 588 % self._numdirsfld) 589 if self._numupdsfld: 590 self.log('there were errors in updating %d files.' 591 % self._numupdsfld) 592 if self._numdeldfld: 593 self.log('there were errors in purging %d directories.' 594 % self._numdeldfld) 595 if self._numdelffld: 596 self.log('there were errors in purging %d files.' 597 % self._numdelffld) 598