1# coding: UTF-8 2 3from seafobj import fs_mgr 4import os 5 6ZERO_OBJ_ID = '0000000000000000000000000000000000000000' 7 8 9class DiffEntry(object): 10 def __init__(self, path, obj_id, size=-1, new_path=None): 11 self.path = path 12 self.new_path = new_path 13 self.obj_id = obj_id 14 self.size = size 15 16class CommitDiffer(object): 17 def __init__(self, repo_id, version, root1, root2, handle_rename=False, fold_dirs=False): 18 self.repo_id = repo_id 19 self.version = version 20 self.root1 = root1 21 self.root2 = root2 22 self.handle_rename = handle_rename 23 self.fold_dirs = fold_dirs 24 25 def diff_to_unicode(self): 26 # you can also do this by overwriting key points 27 res = [] 28 diff_res = self.diff() 29 for dirents in diff_res: 30 for dirent in dirents: 31 for key in list(dirent.__dict__.keys()): 32 v = dirent.__dict__[key] 33 if isinstance(v, str): 34 dirent.__dict__[key] = v.decode('utf8') 35 res.append(dirents) 36 return tuple(res) 37 38 def diff(self): 39 added_files = [] 40 deleted_files = [] 41 deleted_dirs = [] 42 modified_files = [] 43 added_dirs = [] 44 renamed_files = [] 45 renamed_dirs = [] 46 moved_files = [] 47 moved_dirs = [] 48 49 new_dirs = [] 50 del_dirs = [] 51 queued_dirs = [] # (path, dir_id1, dir_id2) 52 53 if self.root1 == self.root2: 54 return (added_files, deleted_files, added_dirs, deleted_dirs, 55 modified_files, renamed_files, moved_files, 56 renamed_dirs, moved_dirs) 57 else: 58 queued_dirs.append(('/', self.root1, self.root2)) 59 60 while True: 61 path = old_id = new_id = None 62 try: 63 path, old_id, new_id = queued_dirs.pop(0) 64 except IndexError: 65 break 66 67 dir1 = fs_mgr.load_seafdir(self.repo_id, self.version, old_id) 68 dir2 = fs_mgr.load_seafdir(self.repo_id, self.version, new_id) 69 70 for dent in dir1.get_files_list(): 71 new_dent = dir2.lookup_dent(dent.name) 72 if not new_dent or new_dent.type != dent.type: 73 deleted_files.append(DiffEntry(make_path(path, dent.name), dent.id, dent.size)) 74 else: 75 dir2.remove_entry(dent.name) 76 if new_dent.id == dent.id: 77 pass 78 else: 79 modified_files.append(DiffEntry(make_path(path, dent.name), new_dent.id, new_dent.size)) 80 81 added_files.extend([DiffEntry(make_path(path, dent.name), dent.id, dent.size) for dent in dir2.get_files_list()]) 82 83 for dent in dir1.get_subdirs_list(): 84 new_dent = dir2.lookup_dent(dent.name) 85 if not new_dent or new_dent.type != dent.type: 86 del_dirs.append(DiffEntry(make_path(path, dent.name), dent.id)) 87 else: 88 dir2.remove_entry(dent.name) 89 if new_dent.id == dent.id: 90 pass 91 else: 92 queued_dirs.append((make_path(path, dent.name), dent.id, new_dent.id)) 93 94 new_dirs.extend([DiffEntry(make_path(path, dent.name), dent.id) for dent in dir2.get_subdirs_list()]) 95 96 if not self.fold_dirs: 97 while True: 98 # Process newly added dirs and its sub-dirs, all files under 99 # these dirs should be marked as added. 100 try: 101 dir_dent = new_dirs.pop(0) 102 added_dirs.append(DiffEntry(dir_dent.path, dir_dent.obj_id)) 103 except IndexError: 104 break 105 d = fs_mgr.load_seafdir(self.repo_id, self.version, dir_dent.obj_id) 106 added_files.extend([DiffEntry(make_path(dir_dent.path, dent.name), dent.id, dent.size) for dent in d.get_files_list()]) 107 108 new_dirs.extend([DiffEntry(make_path(dir_dent.path, dent.name), dent.id) for dent in d.get_subdirs_list()]) 109 110 while True: 111 try: 112 dir_dent = del_dirs.pop(0) 113 deleted_dirs.append(DiffEntry(dir_dent.path, dir_dent.obj_id)) 114 except IndexError: 115 break 116 d = fs_mgr.load_seafdir(self.repo_id, self.version, dir_dent.obj_id) 117 deleted_files.extend([DiffEntry(make_path(dir_dent.path, dent.name), dent.id, dent.size) for dent in d.get_files_list()]) 118 119 del_dirs.extend([DiffEntry(make_path(dir_dent.path, dent.name), dent.id) for dent in d.get_subdirs_list()]) 120 121 else: 122 deleted_dirs = del_dirs 123 added_dirs = new_dirs 124 125 if self.handle_rename: 126 ret_added_files = [] 127 ret_added_dirs = [] 128 129 # If an empty file or dir is generated from renaming or moving, just add it into both added_files 130 # and deleted_files, because we can't know where it actually come from. 131 del_file_dict = {} 132 for de in deleted_files: 133 if de.obj_id != ZERO_OBJ_ID: 134 del_file_dict[de.obj_id] = de 135 136 for de in added_files: 137 if de.obj_id in del_file_dict: 138 del_de = del_file_dict[de.obj_id] 139 if os.path.dirname(de.path) == os.path.dirname(del_de.path): 140 # it's a rename operation if add and del are in the same dir 141 renamed_files.append(DiffEntry(del_de.path, de.obj_id, de.size, de.path)) 142 else: 143 moved_files.append(DiffEntry(del_de.path, de.obj_id, de.size, de.path)) 144 del del_file_dict[de.obj_id] 145 else: 146 ret_added_files.append(de) 147 148 del_dir_dict = {} 149 for de in deleted_dirs: 150 if de.obj_id != ZERO_OBJ_ID: 151 del_dir_dict[de.obj_id] = de 152 153 for de in added_dirs: 154 if de.obj_id in del_dir_dict: 155 del_de = del_dir_dict[de.obj_id] 156 if os.path.dirname(de.path) == os.path.dirname(del_de.path): 157 renamed_dirs.append(DiffEntry(del_de.path, de.obj_id, -1, de.path)) 158 else: 159 moved_dirs.append(DiffEntry(del_de.path, de.obj_id, -1, de.path)) 160 del del_dir_dict[de.obj_id] 161 else: 162 ret_added_dirs.append(de) 163 164 ret_deleted_files = list(del_file_dict.values()) 165 ret_deleted_dirs = list(del_dir_dict.values()) 166 for de in deleted_files: 167 if de.obj_id == ZERO_OBJ_ID: 168 ret_deleted_files.append(de) 169 for de in deleted_dirs: 170 if de.obj_id == ZERO_OBJ_ID: 171 ret_deleted_dirs.append(de) 172 else: 173 ret_added_files = added_files 174 ret_deleted_files = deleted_files 175 ret_added_dirs = added_dirs 176 ret_deleted_dirs = deleted_dirs 177 178 return (ret_added_files, ret_deleted_files, ret_added_dirs, ret_deleted_dirs, 179 modified_files, renamed_files, moved_files, 180 renamed_dirs, moved_dirs) 181 182def make_path(dirname, filename): 183 if dirname == '/': 184 return dirname + filename 185 else: 186 return '/'.join((dirname, filename)) 187