1# Copyright 2003 Dave Abrahams 2# Copyright 2001, 2002 Vladimir Prus 3# Copyright 2012 Jurko Gospodnetic 4# Distributed under the Boost Software License, Version 1.0. 5# (See accompanying file LICENSE_1_0.txt or copy at 6# http://www.boost.org/LICENSE_1_0.txt) 7 8############################################################################### 9# 10# Based in part on an old Subversion tree.py source file (tools for comparing 11# directory trees). See http://subversion.tigris.org for more information. 12# 13# Copyright (c) 2001 Sam Tobin-Hochstadt. All rights reserved. 14# 15# This software is licensed as described in the file COPYING, which you should 16# have received as part of this distribution. The terms are also available at 17# http://subversion.tigris.org/license-1.html. If newer versions of this 18# license are posted there, you may use a newer version instead, at your 19# option. 20# 21############################################################################### 22 23import os 24import os.path 25import stat 26import sys 27 28 29class TreeNode: 30 """ 31 Fundamental data type used to build file system tree structures. 32 33 If CHILDREN is None, then the node represents a file. Otherwise, CHILDREN 34 is a list of the nodes representing that directory's children. 35 36 NAME is simply the name of the file or directory. CONTENTS is a string 37 holding the file's contents (if a file). 38 39 """ 40 41 def __init__(self, name, children=None, contents=None): 42 assert children is None or contents is None 43 self.name = name 44 self.mtime = 0 45 self.children = children 46 self.contents = contents 47 self.path = name 48 49 def add_child(self, newchild): 50 assert not self.is_file() 51 for a in self.children: 52 if a.name == newchild.name: 53 if newchild.is_file(): 54 a.contents = newchild.contents 55 a.path = os.path.join(self.path, newchild.name) 56 else: 57 for i in newchild.children: 58 a.add_child(i) 59 break 60 else: 61 self.children.append(newchild) 62 newchild.path = os.path.join(self.path, newchild.name) 63 64 def get_child(self, name): 65 """ 66 If the given TreeNode directory NODE contains a child named NAME, 67 return the child; else, return None. 68 69 """ 70 for n in self.children: 71 if n.name == name: 72 return n 73 74 def is_file(self): 75 return self.children is None 76 77 def pprint(self): 78 print(" * Node name: %s" % self.name) 79 print(" Path: %s" % self.path) 80 print(" Contents: %s" % self.contents) 81 if self.is_file(): 82 print(" Children: is a file.") 83 else: 84 print(" Children: %d" % len(self.children)) 85 86 87class TreeDifference: 88 def __init__(self): 89 self.added_files = [] 90 self.removed_files = [] 91 self.modified_files = [] 92 self.touched_files = [] 93 94 def append(self, other): 95 self.added_files.extend(other.added_files) 96 self.removed_files.extend(other.removed_files) 97 self.modified_files.extend(other.modified_files) 98 self.touched_files.extend(other.touched_files) 99 100 def ignore_directories(self): 101 """Removes directories from our lists of found differences.""" 102 not_dir = lambda x : x[-1] != "/" 103 self.added_files = filter(not_dir, self.added_files) 104 self.removed_files = filter(not_dir, self.removed_files) 105 self.modified_files = filter(not_dir, self.modified_files) 106 self.touched_files = filter(not_dir, self.touched_files) 107 108 def pprint(self, file=sys.stdout): 109 file.write("Added files : %s\n" % self.added_files) 110 file.write("Removed files : %s\n" % self.removed_files) 111 file.write("Modified files: %s\n" % self.modified_files) 112 file.write("Touched files : %s\n" % self.touched_files) 113 114 def empty(self): 115 return not (self.added_files or self.removed_files or 116 self.modified_files or self.touched_files) 117 118 119def build_tree(path): 120 """ 121 Takes PATH as the folder path, walks the file system below that path, and 122 creates a tree structure based on any files and folders found there. 123 Returns the prepared tree structure plus the maximum file modification 124 timestamp under the given folder. 125 126 """ 127 return _handle_dir(os.path.normpath(path)) 128 129 130def tree_difference(a, b): 131 """Compare TreeNodes A and B, and create a TreeDifference instance.""" 132 return _do_tree_difference(a, b, "", True) 133 134 135def _do_tree_difference(a, b, parent_path, root=False): 136 """Internal recursive worker function for tree_difference().""" 137 138 # We do not want to list root node names. 139 if root: 140 assert not parent_path 141 assert not a.is_file() 142 assert not b.is_file() 143 full_path = "" 144 else: 145 assert a.name == b.name 146 full_path = parent_path + a.name 147 result = TreeDifference() 148 149 # A and B are both files. 150 if a.is_file() and b.is_file(): 151 if a.contents != b.contents: 152 result.modified_files.append(full_path) 153 elif a.mtime != b.mtime: 154 result.touched_files.append(full_path) 155 return result 156 157 # Directory converted to file. 158 if not a.is_file() and b.is_file(): 159 result.removed_files.extend(_traverse_tree(a, parent_path)) 160 result.added_files.append(full_path) 161 162 # File converted to directory. 163 elif a.is_file() and not b.is_file(): 164 result.removed_files.append(full_path) 165 result.added_files.extend(_traverse_tree(b, parent_path)) 166 167 # A and B are both directories. 168 else: 169 if full_path: 170 full_path += "/" 171 accounted_for = [] # Children present in both trees. 172 for a_child in a.children: 173 b_child = b.get_child(a_child.name) 174 if b_child: 175 accounted_for.append(b_child) 176 result.append(_do_tree_difference(a_child, b_child, full_path)) 177 else: 178 result.removed_files.append(full_path + a_child.name) 179 for b_child in b.children: 180 if b_child not in accounted_for: 181 result.added_files.extend(_traverse_tree(b_child, full_path)) 182 183 return result 184 185 186def _traverse_tree(t, parent_path): 187 """Returns a list of all names in a tree.""" 188 assert not parent_path or parent_path[-1] == "/" 189 full_node_name = parent_path + t.name 190 if t.is_file(): 191 result = [full_node_name] 192 else: 193 name_prefix = full_node_name + "/" 194 result = [name_prefix] 195 for i in t.children: 196 result.extend(_traverse_tree(i, name_prefix)) 197 return result 198 199 200def _get_text(path): 201 """Return a string with the textual contents of a file at PATH.""" 202 fp = open(path, 'r') 203 try: 204 return fp.read() 205 finally: 206 fp.close() 207 208 209def _handle_dir(path): 210 """ 211 Main recursive worker function for build_tree(). Returns a newly created 212 tree node representing the given normalized folder path as well as the 213 maximum file/folder modification time detected under the same path. 214 215 """ 216 files = [] 217 dirs = [] 218 node = TreeNode(os.path.basename(path), children=[]) 219 max_mtime = node.mtime = os.stat(path).st_mtime 220 221 # List files & folders. 222 for f in os.listdir(path): 223 f = os.path.join(path, f) 224 if os.path.isdir(f): 225 dirs.append(f) 226 elif os.path.isfile(f): 227 files.append(f) 228 229 # Add a child node for each file. 230 for f in files: 231 fcontents = _get_text(f) 232 new_file_node = TreeNode(os.path.basename(f), contents=fcontents) 233 new_file_node.mtime = os.stat(f).st_mtime 234 max_mtime = max(max_mtime, new_file_node.mtime) 235 node.add_child(new_file_node) 236 237 # For each subdir, create a node, walk its tree, add it as a child. 238 for d in dirs: 239 new_dir_node, new_max_mtime = _handle_dir(d) 240 max_mtime = max(max_mtime, new_max_mtime) 241 node.add_child(new_dir_node) 242 243 return node, max_mtime 244