1# Copyright 2003 Dave Abrahams
2# Copyright 2001, 2002 Vladimir Prus
3# Copyright 2012 Jurko Gospodnetic
4# Distributed under the Boost Software License, Version 1.0.
5# (See accompanying file LICENSE_1_0.txt or copy at
6# http://www.boost.org/LICENSE_1_0.txt)
7
8###############################################################################
9#
10# Based in part on an old Subversion tree.py source file (tools for comparing
11# directory trees). See http://subversion.tigris.org for more information.
12#
13# Copyright (c) 2001 Sam Tobin-Hochstadt.  All rights reserved.
14#
15# This software is licensed as described in the file COPYING, which you should
16# have received as part of this distribution. The terms are also available at
17# http://subversion.tigris.org/license-1.html. If newer versions of this
18# license are posted there, you may use a newer version instead, at your
19# option.
20#
21###############################################################################
22
23from __future__ import print_function
24
25import os
26import os.path
27import stat
28import sys
29
30
31class TreeNode:
32    """
33      Fundamental data type used to build file system tree structures.
34
35      If CHILDREN is None, then the node represents a file. Otherwise, CHILDREN
36    is a list of the nodes representing that directory's children.
37
38      NAME is simply the name of the file or directory. CONTENTS is a string
39    holding the file's contents (if a file).
40
41    """
42
43    def __init__(self, name, children=None, contents=None):
44        assert children is None or contents is None
45        self.name = name
46        self.mtime = 0
47        self.children = children
48        self.contents = contents
49        self.path = name
50
51    def add_child(self, newchild):
52        assert not self.is_file()
53        for a in self.children:
54            if a.name == newchild.name:
55                if newchild.is_file():
56                    a.contents = newchild.contents
57                    a.path = os.path.join(self.path, newchild.name)
58                else:
59                    for i in newchild.children:
60                        a.add_child(i)
61                break
62        else:
63            self.children.append(newchild)
64            newchild.path = os.path.join(self.path, newchild.name)
65
66    def get_child(self, name):
67        """
68          If the given TreeNode directory NODE contains a child named NAME,
69        return the child; else, return None.
70
71        """
72        for n in self.children:
73            if n.name == name:
74                return n
75
76    def is_file(self):
77        return self.children is None
78
79    def pprint(self):
80        print(" * Node name: %s" % self.name)
81        print("    Path:     %s" % self.path)
82        print("    Contents: %s" % self.contents)
83        if self.is_file():
84            print("    Children: is a file.")
85        else:
86            print("    Children: %d" % len(self.children))
87
88
89class TreeDifference:
90    def __init__(self):
91        self.added_files = []
92        self.removed_files = []
93        self.modified_files = []
94        self.touched_files = []
95
96    def append(self, other):
97        self.added_files.extend(other.added_files)
98        self.removed_files.extend(other.removed_files)
99        self.modified_files.extend(other.modified_files)
100        self.touched_files.extend(other.touched_files)
101
102    def ignore_directories(self):
103        """Removes directories from our lists of found differences."""
104        not_dir = lambda x : x[-1] != "/"
105        self.added_files = list(filter(not_dir, self.added_files))
106        self.removed_files = list(filter(not_dir, self.removed_files))
107        self.modified_files = list(filter(not_dir, self.modified_files))
108        self.touched_files = list(filter(not_dir, self.touched_files))
109
110    def pprint(self, file=sys.stdout):
111        file.write("Added files   : %s\n" % self.added_files)
112        file.write("Removed files : %s\n" % self.removed_files)
113        file.write("Modified files: %s\n" % self.modified_files)
114        file.write("Touched files : %s\n" % self.touched_files)
115
116    def empty(self):
117        return not (self.added_files or self.removed_files or
118            self.modified_files or self.touched_files)
119
120
121def build_tree(path):
122    """
123      Takes PATH as the folder path, walks the file system below that path, and
124    creates a tree structure based on any files and folders found there.
125    Returns the prepared tree structure plus the maximum file modification
126    timestamp under the given folder.
127
128    """
129    return _handle_dir(os.path.normpath(path))
130
131
132def tree_difference(a, b):
133    """Compare TreeNodes A and B, and create a TreeDifference instance."""
134    return _do_tree_difference(a, b, "", True)
135
136
137def _do_tree_difference(a, b, parent_path, root=False):
138    """Internal recursive worker function for tree_difference()."""
139
140    # We do not want to list root node names.
141    if root:
142        assert not parent_path
143        assert not a.is_file()
144        assert not b.is_file()
145        full_path = ""
146    else:
147        assert a.name == b.name
148        full_path = parent_path + a.name
149    result = TreeDifference()
150
151    # A and B are both files.
152    if a.is_file() and b.is_file():
153        if a.contents != b.contents:
154            result.modified_files.append(full_path)
155        elif a.mtime != b.mtime:
156            result.touched_files.append(full_path)
157        return result
158
159    # Directory converted to file.
160    if not a.is_file() and b.is_file():
161        result.removed_files.extend(_traverse_tree(a, parent_path))
162        result.added_files.append(full_path)
163
164    # File converted to directory.
165    elif a.is_file() and not b.is_file():
166        result.removed_files.append(full_path)
167        result.added_files.extend(_traverse_tree(b, parent_path))
168
169    # A and B are both directories.
170    else:
171        if full_path:
172            full_path += "/"
173        accounted_for = []  # Children present in both trees.
174        for a_child in a.children:
175            b_child = b.get_child(a_child.name)
176            if b_child:
177                accounted_for.append(b_child)
178                result.append(_do_tree_difference(a_child, b_child, full_path))
179            else:
180                result.removed_files.append(full_path + a_child.name)
181        for b_child in b.children:
182            if b_child not in accounted_for:
183                result.added_files.extend(_traverse_tree(b_child, full_path))
184
185    return result
186
187
188def _traverse_tree(t, parent_path):
189    """Returns a list of all names in a tree."""
190    assert not parent_path or parent_path[-1] == "/"
191    full_node_name = parent_path + t.name
192    if t.is_file():
193        result = [full_node_name]
194    else:
195        name_prefix = full_node_name + "/"
196        result = [name_prefix]
197        for i in t.children:
198            result.extend(_traverse_tree(i, name_prefix))
199    return result
200
201
202def _get_text(path):
203    """Return a string with the textual contents of a file at PATH."""
204    fp = open(path, 'rb')
205    try:
206        return fp.read()
207    finally:
208        fp.close()
209
210
211def _handle_dir(path):
212    """
213      Main recursive worker function for build_tree(). Returns a newly created
214    tree node representing the given normalized folder path as well as the
215    maximum file/folder modification time detected under the same path.
216
217    """
218    files = []
219    dirs = []
220    node = TreeNode(os.path.basename(path), children=[])
221    max_mtime = node.mtime = os.stat(path).st_mtime
222
223    # List files & folders.
224    for f in os.listdir(path):
225        f = os.path.join(path, f)
226        if os.path.isdir(f):
227            dirs.append(f)
228        elif os.path.isfile(f):
229            files.append(f)
230
231    # Add a child node for each file.
232    for f in files:
233        fcontents = _get_text(f)
234        new_file_node = TreeNode(os.path.basename(f), contents=fcontents)
235        new_file_node.mtime = os.stat(f).st_mtime
236        max_mtime = max(max_mtime, new_file_node.mtime)
237        node.add_child(new_file_node)
238
239    # For each subdir, create a node, walk its tree, add it as a child.
240    for d in dirs:
241        new_dir_node, new_max_mtime = _handle_dir(d)
242        max_mtime = max(max_mtime, new_max_mtime)
243        node.add_child(new_dir_node)
244
245    return node, max_mtime
246