1# coding: UTF-8
2
3from seafobj import fs_mgr
4import os
5
6ZERO_OBJ_ID = '0000000000000000000000000000000000000000'
7
8
9class DiffEntry(object):
10    def __init__(self, path, obj_id, size=-1, new_path=None):
11        self.path = path
12        self.new_path = new_path
13        self.obj_id = obj_id
14        self.size = size
15
16class CommitDiffer(object):
17    def __init__(self, repo_id, version, root1, root2, handle_rename=False, fold_dirs=False):
18        self.repo_id = repo_id
19        self.version = version
20        self.root1 = root1
21        self.root2 = root2
22        self.handle_rename = handle_rename
23        self.fold_dirs = fold_dirs
24
25    def diff_to_unicode(self):
26        # you can also do this by overwriting key points
27        res = []
28        diff_res = self.diff()
29        for dirents in diff_res:
30            for dirent in dirents:
31                for key in list(dirent.__dict__.keys()):
32                    v = dirent.__dict__[key]
33                    if isinstance(v, str):
34                        dirent.__dict__[key] = v.decode('utf8')
35            res.append(dirents)
36        return tuple(res)
37
38    def diff(self):
39        added_files = []
40        deleted_files = []
41        deleted_dirs = []
42        modified_files = []
43        added_dirs = []
44        renamed_files = []
45        renamed_dirs = []
46        moved_files = []
47        moved_dirs = []
48
49        new_dirs = []
50        del_dirs = []
51        queued_dirs = [] # (path, dir_id1, dir_id2)
52
53        if self.root1 == self.root2:
54            return (added_files, deleted_files, added_dirs, deleted_dirs,
55                    modified_files, renamed_files, moved_files,
56                    renamed_dirs, moved_dirs)
57        else:
58            queued_dirs.append(('/', self.root1, self.root2))
59
60        while True:
61            path = old_id = new_id = None
62            try:
63                path, old_id, new_id = queued_dirs.pop(0)
64            except IndexError:
65                break
66
67            dir1 = fs_mgr.load_seafdir(self.repo_id, self.version, old_id)
68            dir2 = fs_mgr.load_seafdir(self.repo_id, self.version, new_id)
69
70            for dent in dir1.get_files_list():
71                new_dent = dir2.lookup_dent(dent.name)
72                if not new_dent or new_dent.type != dent.type:
73                    deleted_files.append(DiffEntry(make_path(path, dent.name), dent.id, dent.size))
74                else:
75                    dir2.remove_entry(dent.name)
76                    if new_dent.id == dent.id:
77                        pass
78                    else:
79                        modified_files.append(DiffEntry(make_path(path, dent.name), new_dent.id, new_dent.size))
80
81            added_files.extend([DiffEntry(make_path(path, dent.name), dent.id, dent.size) for dent in dir2.get_files_list()])
82
83            for dent in dir1.get_subdirs_list():
84                new_dent = dir2.lookup_dent(dent.name)
85                if not new_dent or new_dent.type != dent.type:
86                    del_dirs.append(DiffEntry(make_path(path, dent.name), dent.id))
87                else:
88                    dir2.remove_entry(dent.name)
89                    if new_dent.id == dent.id:
90                        pass
91                    else:
92                        queued_dirs.append((make_path(path, dent.name), dent.id, new_dent.id))
93
94            new_dirs.extend([DiffEntry(make_path(path, dent.name), dent.id) for dent in dir2.get_subdirs_list()])
95
96        if not self.fold_dirs:
97            while True:
98                # Process newly added dirs and its sub-dirs, all files under
99                # these dirs should be marked as added.
100                try:
101                    dir_dent = new_dirs.pop(0)
102                    added_dirs.append(DiffEntry(dir_dent.path, dir_dent.obj_id))
103                except IndexError:
104                    break
105                d = fs_mgr.load_seafdir(self.repo_id, self.version, dir_dent.obj_id)
106                added_files.extend([DiffEntry(make_path(dir_dent.path, dent.name), dent.id, dent.size) for dent in d.get_files_list()])
107
108                new_dirs.extend([DiffEntry(make_path(dir_dent.path, dent.name), dent.id) for dent in d.get_subdirs_list()])
109
110            while True:
111                try:
112                    dir_dent = del_dirs.pop(0)
113                    deleted_dirs.append(DiffEntry(dir_dent.path, dir_dent.obj_id))
114                except IndexError:
115                    break
116                d = fs_mgr.load_seafdir(self.repo_id, self.version, dir_dent.obj_id)
117                deleted_files.extend([DiffEntry(make_path(dir_dent.path, dent.name), dent.id, dent.size) for dent in d.get_files_list()])
118
119                del_dirs.extend([DiffEntry(make_path(dir_dent.path, dent.name), dent.id) for dent in d.get_subdirs_list()])
120
121        else:
122            deleted_dirs = del_dirs
123            added_dirs = new_dirs
124
125        if self.handle_rename:
126            ret_added_files = []
127            ret_added_dirs = []
128
129            # If an empty file or dir is generated from renaming or moving, just add it into both added_files
130            # and deleted_files, because we can't know where it actually come from.
131            del_file_dict = {}
132            for de in deleted_files:
133                if de.obj_id != ZERO_OBJ_ID:
134                    del_file_dict[de.obj_id] = de
135
136            for de in added_files:
137                if de.obj_id in del_file_dict:
138                    del_de = del_file_dict[de.obj_id]
139                    if os.path.dirname(de.path) == os.path.dirname(del_de.path):
140                        # it's a rename operation if add and del are in the same dir
141                        renamed_files.append(DiffEntry(del_de.path, de.obj_id, de.size, de.path))
142                    else:
143                        moved_files.append(DiffEntry(del_de.path, de.obj_id, de.size, de.path))
144                    del del_file_dict[de.obj_id]
145                else:
146                    ret_added_files.append(de)
147
148            del_dir_dict = {}
149            for de in deleted_dirs:
150                if de.obj_id != ZERO_OBJ_ID:
151                    del_dir_dict[de.obj_id] = de
152
153            for de in added_dirs:
154                if de.obj_id in del_dir_dict:
155                    del_de = del_dir_dict[de.obj_id]
156                    if os.path.dirname(de.path) == os.path.dirname(del_de.path):
157                        renamed_dirs.append(DiffEntry(del_de.path, de.obj_id, -1, de.path))
158                    else:
159                        moved_dirs.append(DiffEntry(del_de.path, de.obj_id, -1, de.path))
160                    del del_dir_dict[de.obj_id]
161                else:
162                    ret_added_dirs.append(de)
163
164            ret_deleted_files = list(del_file_dict.values())
165            ret_deleted_dirs = list(del_dir_dict.values())
166            for de in deleted_files:
167                if de.obj_id == ZERO_OBJ_ID:
168                    ret_deleted_files.append(de)
169            for de in deleted_dirs:
170                if de.obj_id == ZERO_OBJ_ID:
171                    ret_deleted_dirs.append(de)
172        else:
173            ret_added_files = added_files
174            ret_deleted_files = deleted_files
175            ret_added_dirs = added_dirs
176            ret_deleted_dirs = deleted_dirs
177
178        return (ret_added_files, ret_deleted_files, ret_added_dirs, ret_deleted_dirs,
179                modified_files, renamed_files, moved_files,
180                renamed_dirs, moved_dirs)
181
182def make_path(dirname, filename):
183    if dirname == '/':
184        return dirname + filename
185    else:
186        return '/'.join((dirname, filename))
187