1# Copyright 2003 Dave Abrahams
2# Copyright 2001, 2002 Vladimir Prus
3# Copyright 2012 Jurko Gospodnetic
4# Distributed under the Boost Software License, Version 1.0.
5# (See accompanying file LICENSE_1_0.txt or copy at
6# http://www.boost.org/LICENSE_1_0.txt)
7
8###############################################################################
9#
10# Based in part on an old Subversion tree.py source file (tools for comparing
11# directory trees). See http://subversion.tigris.org for more information.
12#
13# Copyright (c) 2001 Sam Tobin-Hochstadt.  All rights reserved.
14#
15# This software is licensed as described in the file COPYING, which you should
16# have received as part of this distribution. The terms are also available at
17# http://subversion.tigris.org/license-1.html. If newer versions of this
18# license are posted there, you may use a newer version instead, at your
19# option.
20#
21###############################################################################
22
23import os
24import os.path
25import stat
26import sys
27
28
29class TreeNode:
30    """
31      Fundamental data type used to build file system tree structures.
32
33      If CHILDREN is None, then the node represents a file. Otherwise, CHILDREN
34    is a list of the nodes representing that directory's children.
35
36      NAME is simply the name of the file or directory. CONTENTS is a string
37    holding the file's contents (if a file).
38
39    """
40
41    def __init__(self, name, children=None, contents=None):
42        assert children is None or contents is None
43        self.name = name
44        self.mtime = 0
45        self.children = children
46        self.contents = contents
47        self.path = name
48
49    def add_child(self, newchild):
50        assert not self.is_file()
51        for a in self.children:
52            if a.name == newchild.name:
53                if newchild.is_file():
54                    a.contents = newchild.contents
55                    a.path = os.path.join(self.path, newchild.name)
56                else:
57                    for i in newchild.children:
58                        a.add_child(i)
59                break
60        else:
61            self.children.append(newchild)
62            newchild.path = os.path.join(self.path, newchild.name)
63
64    def get_child(self, name):
65        """
66          If the given TreeNode directory NODE contains a child named NAME,
67        return the child; else, return None.
68
69        """
70        for n in self.children:
71            if n.name == name:
72                return n
73
74    def is_file(self):
75        return self.children is None
76
77    def pprint(self):
78        print(" * Node name: %s" % self.name)
79        print("    Path:     %s" % self.path)
80        print("    Contents: %s" % self.contents)
81        if self.is_file():
82            print("    Children: is a file.")
83        else:
84            print("    Children: %d" % len(self.children))
85
86
87class TreeDifference:
88    def __init__(self):
89        self.added_files = []
90        self.removed_files = []
91        self.modified_files = []
92        self.touched_files = []
93
94    def append(self, other):
95        self.added_files.extend(other.added_files)
96        self.removed_files.extend(other.removed_files)
97        self.modified_files.extend(other.modified_files)
98        self.touched_files.extend(other.touched_files)
99
100    def ignore_directories(self):
101        """Removes directories from our lists of found differences."""
102        not_dir = lambda x : x[-1] != "/"
103        self.added_files = filter(not_dir, self.added_files)
104        self.removed_files = filter(not_dir, self.removed_files)
105        self.modified_files = filter(not_dir, self.modified_files)
106        self.touched_files = filter(not_dir, self.touched_files)
107
108    def pprint(self, file=sys.stdout):
109        file.write("Added files   : %s\n" % self.added_files)
110        file.write("Removed files : %s\n" % self.removed_files)
111        file.write("Modified files: %s\n" % self.modified_files)
112        file.write("Touched files : %s\n" % self.touched_files)
113
114    def empty(self):
115        return not (self.added_files or self.removed_files or
116            self.modified_files or self.touched_files)
117
118
119def build_tree(path):
120    """
121      Takes PATH as the folder path, walks the file system below that path, and
122    creates a tree structure based on any files and folders found there.
123    Returns the prepared tree structure plus the maximum file modification
124    timestamp under the given folder.
125
126    """
127    return _handle_dir(os.path.normpath(path))
128
129
130def tree_difference(a, b):
131    """Compare TreeNodes A and B, and create a TreeDifference instance."""
132    return _do_tree_difference(a, b, "", True)
133
134
135def _do_tree_difference(a, b, parent_path, root=False):
136    """Internal recursive worker function for tree_difference()."""
137
138    # We do not want to list root node names.
139    if root:
140        assert not parent_path
141        assert not a.is_file()
142        assert not b.is_file()
143        full_path = ""
144    else:
145        assert a.name == b.name
146        full_path = parent_path + a.name
147    result = TreeDifference()
148
149    # A and B are both files.
150    if a.is_file() and b.is_file():
151        if a.contents != b.contents:
152            result.modified_files.append(full_path)
153        elif a.mtime != b.mtime:
154            result.touched_files.append(full_path)
155        return result
156
157    # Directory converted to file.
158    if not a.is_file() and b.is_file():
159        result.removed_files.extend(_traverse_tree(a, parent_path))
160        result.added_files.append(full_path)
161
162    # File converted to directory.
163    elif a.is_file() and not b.is_file():
164        result.removed_files.append(full_path)
165        result.added_files.extend(_traverse_tree(b, parent_path))
166
167    # A and B are both directories.
168    else:
169        if full_path:
170            full_path += "/"
171        accounted_for = []  # Children present in both trees.
172        for a_child in a.children:
173            b_child = b.get_child(a_child.name)
174            if b_child:
175                accounted_for.append(b_child)
176                result.append(_do_tree_difference(a_child, b_child, full_path))
177            else:
178                result.removed_files.append(full_path + a_child.name)
179        for b_child in b.children:
180            if b_child not in accounted_for:
181                result.added_files.extend(_traverse_tree(b_child, full_path))
182
183    return result
184
185
186def _traverse_tree(t, parent_path):
187    """Returns a list of all names in a tree."""
188    assert not parent_path or parent_path[-1] == "/"
189    full_node_name = parent_path + t.name
190    if t.is_file():
191        result = [full_node_name]
192    else:
193        name_prefix = full_node_name + "/"
194        result = [name_prefix]
195        for i in t.children:
196            result.extend(_traverse_tree(i, name_prefix))
197    return result
198
199
200def _get_text(path):
201    """Return a string with the textual contents of a file at PATH."""
202    fp = open(path, 'r')
203    try:
204        return fp.read()
205    finally:
206        fp.close()
207
208
209def _handle_dir(path):
210    """
211      Main recursive worker function for build_tree(). Returns a newly created
212    tree node representing the given normalized folder path as well as the
213    maximum file/folder modification time detected under the same path.
214
215    """
216    files = []
217    dirs = []
218    node = TreeNode(os.path.basename(path), children=[])
219    max_mtime = node.mtime = os.stat(path).st_mtime
220
221    # List files & folders.
222    for f in os.listdir(path):
223        f = os.path.join(path, f)
224        if os.path.isdir(f):
225            dirs.append(f)
226        elif os.path.isfile(f):
227            files.append(f)
228
229    # Add a child node for each file.
230    for f in files:
231        fcontents = _get_text(f)
232        new_file_node = TreeNode(os.path.basename(f), contents=fcontents)
233        new_file_node.mtime = os.stat(f).st_mtime
234        max_mtime = max(max_mtime, new_file_node.mtime)
235        node.add_child(new_file_node)
236
237    # For each subdir, create a node, walk its tree, add it as a child.
238    for d in dirs:
239        new_dir_node, new_max_mtime = _handle_dir(d)
240        max_mtime = max(max_mtime, new_max_mtime)
241        node.add_child(new_dir_node)
242
243    return node, max_mtime
244