1"""
2dirsync
3
4Report the difference in content
5of two directories, synchronise or
6update a directory from another, taking
7into account time-stamps of files and/or
8its content etc.
9
10(c) Thomas Khyn 2014
11
12Based on Robocopy by Anand B Pillai
13
14"""
15
16import os
17import sys
18import stat
19import time
20import shutil
21import re
22import logging
23import filecmp
24
25from .options import OPTIONS
26from .version import __pkg_name__
27
28
29class DCMP(object):
30    """Dummy object for directory comparison data storage"""
31    def __init__(self, l, r, c):
32        self.left_only = l
33        self.right_only = r
34        self.common = c
35
36
37class Syncer(object):
38    """ An advanced directory synchronisation, update
39    and file copying class """
40
41    def __init__(self, dir1, dir2, action, **options):
42
43        self.logger = options.get('logger', None)
44        if not self.logger:
45            # configure default logger to stdout
46            log = logging.getLogger('dirsync')
47            log.setLevel(logging.INFO)
48            if not log.handlers:
49                hdl = logging.StreamHandler(sys.stdout)
50                hdl.setFormatter(logging.Formatter('%(message)s'))
51                log.addHandler(hdl)
52            self.logger = log
53
54        self._dir1 = dir1
55        self._dir2 = dir2
56
57        self._copyfiles = True
58        self._updatefiles = True
59        self._creatdirs = True
60
61        self._changed = []
62        self._added = []
63        self._deleted = []
64
65        # stat vars
66        self._numdirs = 0
67        self._numfiles = 0
68        self._numdelfiles = 0
69        self._numdeldirs = 0
70        self._numnewdirs = 0
71        self._numcontupdates = 0
72        self._numtimeupdates = 0
73        self._starttime = 0.0
74        self._endtime = 0.0
75
76        # failure stat vars
77        self._numcopyfld = 0
78        self._numupdsfld = 0
79        self._numdirsfld = 0
80        self._numdelffld = 0
81        self._numdeldfld = 0
82
83        self._mainfunc = getattr(self, action)
84
85        # options setup
86        def get_option(name):
87            return options.get(name, OPTIONS[name][1]['default'])
88
89        self._verbose = get_option('verbose')
90        self._purge = get_option('purge')
91        self._copydirection = 2 if get_option('twoway') else 0
92        self._forcecopy = get_option('force')
93        self._maketarget = get_option('create')
94        self._use_ctime = get_option('ctime')
95        self._use_content = get_option('content')
96
97        self._ignore = get_option('ignore')
98        self._only = get_option('only')
99        self._exclude = list(get_option('exclude'))
100        self._include = get_option('include')
101
102        # excludes .dirsync file by default, must explicitly be in include
103        # not to be excluded
104        self._exclude.append('^\.dirsync$')
105
106        if not os.path.isdir(self._dir1):
107            raise ValueError("Error: Source directory does not exist.")
108
109        if not self._maketarget and not os.path.isdir(self._dir2):
110            raise ValueError(
111                "Error: Target directory %s does not exist. "
112                "(Try the -c or --create option to create it)." % self._dir2)
113
114    def log(self, msg=''):
115        self.logger.info(msg)
116
117    def _compare(self, dir1, dir2):
118        """ Compare contents of two directories """
119
120        left = set()
121        right = set()
122
123        self._numdirs += 1
124
125        excl_patterns = set(self._exclude).union(self._ignore)
126
127        for cwd, dirs, files in os.walk(dir1):
128            self._numdirs += len(dirs)
129            for f in dirs + files:
130                path = os.path.relpath(os.path.join(cwd, f), dir1)
131                re_path = path.replace('\\', '/')
132                if self._only:
133                    for pattern in self._only:
134                        if re.match(pattern, re_path):
135                            # go to exclude and ignore filtering
136                            break
137                    else:
138                        # next item, this one does not match any pattern
139                        # in the _only list
140                        continue
141
142                add_path = False
143                for pattern in self._include:
144                    if re.match(pattern, re_path):
145                        add_path = True
146                        break
147                else:
148                    # path was not in includes
149                    # test if it is in excludes
150                    for pattern in excl_patterns:
151                        if re.match(pattern, re_path):
152                            # path is in excludes, do not add it
153                            break
154                    else:
155                        # path was not in excludes
156                        # it should be added
157                        add_path = True
158
159                if add_path:
160                    left.add(path)
161                    anc_dirs = re_path[:-1].split('/')
162                    anc_dirs_path = ''
163                    for ad in anc_dirs[1:]:
164                        anc_dirs_path = os.path.join(anc_dirs_path, ad)
165                        left.add(anc_dirs_path)
166
167        for cwd, dirs, files in os.walk(dir2):
168            for f in dirs + files:
169                path = os.path.relpath(os.path.join(cwd, f), dir2)
170                re_path = path.replace('\\', '/')
171                for pattern in self._ignore:
172                    if re.match(pattern, re_path):
173                        if f in dirs:
174                            dirs.remove(f)
175                        break
176                else:
177                    right.add(path)
178                    # no need to add the parent dirs here,
179                    # as there is no _only pattern detection
180                    if f in dirs and path not in left:
181                        self._numdirs += 1
182
183        common = left.intersection(right)
184        left.difference_update(common)
185        right.difference_update(common)
186
187        return DCMP(left, right, common)
188
189    def do_work(self):
190        """ Do work """
191
192        self._starttime = time.time()
193
194        if not os.path.isdir(self._dir2):
195            if self._maketarget:
196                if self._verbose:
197                    self.log('Creating directory %s' % self._dir2)
198                try:
199                    os.makedirs(self._dir2)
200                    self._numnewdirs += 1
201                except Exception as e:
202                    self.log(str(e))
203                    return None
204
205        # All right!
206        self._mainfunc()
207        self._endtime = time.time()
208
209    def _dowork(self, dir1, dir2, copyfunc=None, updatefunc=None):
210        """ Private attribute for doing work """
211
212        if self._verbose:
213            self.log('Source directory: %s:' % dir1)
214
215        self._dcmp = self._compare(dir1, dir2)
216
217        # Files & directories only in target directory
218        if self._purge:
219            for f2 in self._dcmp.right_only:
220                fullf2 = os.path.join(self._dir2, f2)
221                if self._verbose:
222                    self.log('Deleting %s' % fullf2)
223                try:
224                    if os.path.isfile(fullf2):
225                        try:
226                            try:
227                                os.remove(fullf2)
228                            except PermissionError as e:
229                                os.chmod(fullf2, stat.S_IWRITE)
230                                os.remove(fullf2)
231                            self._deleted.append(fullf2)
232                            self._numdelfiles += 1
233                        except OSError as e:
234                            self.log(str(e))
235                            self._numdelffld += 1
236                    elif os.path.isdir(fullf2):
237                        try:
238                            shutil.rmtree(fullf2, True)
239                            self._deleted.append(fullf2)
240                            self._numdeldirs += 1
241                        except shutil.Error as e:
242                            self.log(str(e))
243                            self._numdeldfld += 1
244
245                except Exception as e:  # of any use ?
246                    self.log(str(e))
247                    continue
248
249        # Files & directories only in source directory
250        for f1 in self._dcmp.left_only:
251            try:
252                st = os.stat(os.path.join(self._dir1, f1))
253            except os.error:
254                continue
255
256            if stat.S_ISREG(st.st_mode):
257                if copyfunc:
258                    copyfunc(f1, self._dir1, self._dir2)
259                    self._added.append(os.path.join(self._dir2, f1))
260            elif stat.S_ISDIR(st.st_mode):
261                to_make = os.path.join(self._dir2, f1)
262                if not os.path.exists(to_make):
263                    os.makedirs(to_make)
264                    self._numnewdirs += 1
265                    self._added.append(to_make)
266
267        # common files/directories
268        for f1 in self._dcmp.common:
269            try:
270                st = os.stat(os.path.join(self._dir1, f1))
271            except os.error:
272                continue
273
274            if stat.S_ISREG(st.st_mode):
275                if updatefunc:
276                    updatefunc(f1, self._dir1, self._dir2)
277            # nothing to do if we have a directory
278
279    def _copy(self, filename, dir1, dir2):
280        """ Private function for copying a file """
281
282        # NOTE: dir1 is source & dir2 is target
283        if self._copyfiles:
284
285            rel_path = filename.replace('\\', '/').split('/')
286            rel_dir = '/'.join(rel_path[:-1])
287            filename = rel_path[-1]
288
289            dir2_root = dir2
290
291            dir1 = os.path.join(dir1, rel_dir)
292            dir2 = os.path.join(dir2, rel_dir)
293
294            if self._verbose:
295                self.log('Copying file %s from %s to %s' %
296                         (filename, dir1, dir2))
297            try:
298                # source to target
299                if self._copydirection == 0 or self._copydirection == 2:
300
301                    if not os.path.exists(dir2):
302                        if self._forcecopy:
303                            # 1911 = 0o777
304                            os.chmod(os.path.dirname(dir2_root), 1911)
305                        try:
306                            os.makedirs(dir2)
307                            self._numnewdirs += 1
308                        except OSError as e:
309                            self.log(str(e))
310                            self._numdirsfld += 1
311
312                    if self._forcecopy:
313                        os.chmod(dir2, 1911)  # 1911 = 0o777
314
315                    sourcefile = os.path.join(dir1, filename)
316                    try:
317                        if os.path.islink(sourcefile):
318                            os.symlink(os.readlink(sourcefile),
319                                       os.path.join(dir2, filename))
320                        else:
321                            shutil.copy2(sourcefile, dir2)
322                        self._numfiles += 1
323                    except (IOError, OSError) as e:
324                        self.log(str(e))
325                        self._numcopyfld += 1
326
327                if self._copydirection == 1 or self._copydirection == 2:
328                    # target to source
329
330                    if not os.path.exists(dir1):
331                        if self._forcecopy:
332                            # 1911 = 0o777
333                            os.chmod(os.path.dirname(self.dir1_root), 1911)
334
335                        try:
336                            os.makedirs(dir1)
337                            self._numnewdirs += 1
338                        except OSError as e:
339                            self.log(str(e))
340                            self._numdirsfld += 1
341
342                    targetfile = os.path.abspath(os.path.join(dir1, filename))
343                    if self._forcecopy:
344                        os.chmod(dir1, 1911)  # 1911 = 0o777
345
346                    sourcefile = os.path.join(dir2, filename)
347
348                    try:
349                        if os.path.islink(sourcefile):
350                            os.symlink(os.readlink(sourcefile),
351                                       os.path.join(dir1, filename))
352                        else:
353                            shutil.copy2(sourcefile, targetfile)
354                        self._numfiles += 1
355                    except (IOError, OSError) as e:
356                        self.log(str(e))
357                        self._numcopyfld += 1
358
359            except Exception as e:
360                self.log('Error copying file %s' % filename)
361                self.log(str(e))
362
363    def _cmptimestamps(self, filest1, filest2):
364        """ Compare time stamps of two files and return True
365        if file1 (source) is more recent than file2 (target) """
366
367        mtime_cmp = int((filest1.st_mtime - filest2.st_mtime) * 1000) > 0
368        if self._use_ctime:
369            return mtime_cmp or \
370                   int((filest1.st_ctime - filest2.st_mtime) * 1000) > 0
371        else:
372            return mtime_cmp
373
374    def _update(self, filename, dir1, dir2):
375        """ Private function for updating a file based on
376        last time stamp of modification or difference of content"""
377
378        # NOTE: dir1 is source & dir2 is target
379        if self._updatefiles:
380
381            file1 = os.path.join(dir1, filename)
382            file2 = os.path.join(dir2, filename)
383
384            try:
385                st1 = os.stat(file1)
386                st2 = os.stat(file2)
387            except os.error:
388                return -1
389
390            # Update will update in both directions depending
391            # on ( the timestamp of the file or its content ) & copy-direction.
392
393            if self._copydirection == 0 or self._copydirection == 2:
394
395                # If flag 'content' is used then look only at difference of file
396                # contents instead of time stamps.
397                # Update file if file's modification time is older than
398                # source file's modification time, or creation time. Sometimes
399                # it so happens that a file's creation time is newer than it's
400                # modification time! (Seen this on windows)
401                need_upd = (not filecmp.cmp(file1, file2, False)) if self._use_content else self._cmptimestamps(st1, st2)
402                if need_upd:
403                    if self._verbose:
404                        # source to target
405                        self.log('Updating file %s' % file2)
406                    try:
407                        if self._forcecopy:
408                            os.chmod(file2, 1638)  # 1638 = 0o666
409
410                        try:
411                            if os.path.islink(file1):
412                                os.symlink(os.readlink(file1), file2)
413                            else:
414                                try:
415                                    shutil.copy2(file1, file2)
416                                except PermissionError as e:
417                                    os.chmod(file2, stat.S_IWRITE)
418                                    shutil.copy2(file1, file2)
419                            self._changed.append(file2)
420                            if self._use_content:
421                               self._numcontupdates += 1
422                            else:
423                               self._numtimeupdates += 1
424                            return 0
425                        except (IOError, OSError) as e:
426                            self.log(str(e))
427                            self._numupdsfld += 1
428                            return -1
429
430                    except Exception as e:
431                        self.log(str(e))
432                        return -1
433
434            if self._copydirection == 1 or self._copydirection == 2:
435
436                # No need to do reverse synchronization in case of content comparing.
437                # Update file if file's modification time is older than
438                # source file's modification time, or creation time. Sometimes
439                # it so happens that a file's creation time is newer than it's
440                # modification time! (Seen this on windows)
441                need_upd = False if self._use_content else self._cmptimestamps(st2, st1)
442                if need_upd:
443                    if self._verbose:
444                        # target to source
445                        self.log('Updating file %s' % file1)
446                    try:
447                        if self._forcecopy:
448                            os.chmod(file1, 1638)  # 1638 = 0o666
449
450                        try:
451                            if os.path.islink(file2):
452                                os.symlink(os.readlink(file2), file1)
453                            else:
454                                shutil.copy2(file2, file1)
455                            self._changed.append(file1)
456                            self._numtimeupdates += 1
457                            return 0
458                        except (IOError, OSError) as e:
459                            self.log(str(e))
460                            self._numupdsfld += 1
461                            return -1
462
463                    except Exception as e:
464                        self.log(str(e))
465                        return -1
466
467        return -1
468
469    def _dirdiffandcopy(self, dir1, dir2):
470        """
471        Private function which does directory diff & copy
472        """
473        self._dowork(dir1, dir2, self._copy)
474
475    def _dirdiffandupdate(self, dir1, dir2):
476        """
477        Private function which does directory diff & update
478        """
479        self._dowork(dir1, dir2, None, self._update)
480
481    def _dirdiffcopyandupdate(self, dir1, dir2):
482        """
483        Private function which does directory diff, copy and update (synchro)
484        """
485        self._dowork(dir1, dir2, self._copy, self._update)
486
487    def _diff(self, dir1, dir2):
488        """
489        Private function which only does directory diff
490        """
491
492        self._dcmp = self._compare(dir1, dir2)
493
494        if self._dcmp.left_only:
495            self.log('Only in %s' % dir1)
496            for x in sorted(self._dcmp.left_only):
497                self.log('>> %s' % x)
498            self.log('')
499
500        if self._dcmp.right_only:
501            self.log('Only in %s' % dir2)
502            for x in sorted(self._dcmp.right_only):
503                self.log('<< %s' % x)
504            self.log('')
505
506        if self._dcmp.common:
507            self.log('Common to %s and %s' % (self._dir1, self._dir2))
508            for x in sorted(self._dcmp.common):
509                self.log('-- %s' % x)
510        else:
511            self.log('No common files or sub-directories!')
512
513    def sync(self):
514        """ Synchronize will try to synchronize two directories w.r.t
515        each other's contents, copying files if necessary from source
516        to target, and creating directories if necessary. If the optional
517        argument purge is True, directories in target (dir2) that are
518        not present in the source (dir1) will be deleted . Synchronization
519        is done in the direction of source to target """
520
521        self._copyfiles = True
522        self._updatefiles = True
523        self._creatdirs = True
524        self._copydirection = 0
525
526        if self._verbose:
527            self.log('Synchronizing directory %s with %s' %
528                     (self._dir2, self._dir1))
529        self._dirdiffcopyandupdate(self._dir1, self._dir2)
530
531    def update(self):
532        """ Update will try to update the target directory
533        w.r.t source directory. Only files that are common
534        to both directories will be updated, no new files
535        or directories are created """
536
537        self._copyfiles = False
538        self._updatefiles = True
539        self._purge = False
540        self._creatdirs = False
541
542        if self._verbose:
543            self.log('Updating directory %s with %s' %
544                     (self._dir2, self._dir1))
545        self._dirdiffandupdate(self._dir1, self._dir2)
546
547    def diff(self):
548        """
549        Only report difference in content between two directories
550        """
551
552        self._copyfiles = False
553        self._updatefiles = False
554        self._purge = False
555        self._creatdirs = False
556
557        self.log('Difference of directory %s from %s' %
558                 (self._dir2, self._dir1))
559        self._diff(self._dir1, self._dir2)
560
561    def report(self):
562        """ Print report of work at the end """
563
564        # We need only the first 4 significant digits
565        tt = (str(self._endtime - self._starttime))[:4]
566
567        self.log('%s finished in %s seconds.' % (__pkg_name__, tt))
568        self.log('%d directories parsed, %d files copied' %
569                 (self._numdirs, self._numfiles))
570        if self._numdelfiles:
571            self.log('%d files were purged.' % self._numdelfiles)
572        if self._numdeldirs:
573            self.log('%d directories were purged.' % self._numdeldirs)
574        if self._numnewdirs:
575            self.log('%d directories were created.' % self._numnewdirs)
576        if self._numcontupdates:
577            self.log('%d files were updated by content.' % self._numcontupdates)
578        if self._numtimeupdates:
579            self.log('%d files were updated by timestamp.' % self._numtimeupdates)
580
581        # Failure stats
582        self.log('')
583        if self._numcopyfld:
584            self.log('there were errors in copying %d files.'
585                     % self._numcopyfld)
586        if self._numdirsfld:
587            self.log('there were errors in creating %d directories.'
588                     % self._numdirsfld)
589        if self._numupdsfld:
590            self.log('there were errors in updating %d files.'
591                     % self._numupdsfld)
592        if self._numdeldfld:
593            self.log('there were errors in purging %d directories.'
594                     % self._numdeldfld)
595        if self._numdelffld:
596            self.log('there were errors in purging %d files.'
597                     % self._numdelffld)
598