1# test_missing_obj_finder.py -- tests for MissingObjectFinder
2# Copyright (C) 2012 syntevo GmbH
3#
4# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
5# General Public License as public by the Free Software Foundation; version 2.0
6# or (at your option) any later version. You can redistribute it and/or
7# modify it under the terms of either of these two licenses.
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14#
15# You should have received a copy of the licenses; if not, see
16# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
17# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
18# License, Version 2.0.
19#
20
21from dulwich.object_store import (
22    MemoryObjectStore,
23    )
24from dulwich.objects import (
25    Blob,
26    )
27from dulwich.tests import TestCase
28from dulwich.tests.utils import (
29    make_object,
30    make_tag,
31    build_commit_graph,
32    )
33
34
35class MissingObjectFinderTest(TestCase):
36
37    def setUp(self):
38        super(MissingObjectFinderTest, self).setUp()
39        self.store = MemoryObjectStore()
40        self.commits = []
41
42    def cmt(self, n):
43        return self.commits[n-1]
44
45    def assertMissingMatch(self, haves, wants, expected):
46        for sha, path in self.store.find_missing_objects(haves, wants):
47            self.assertTrue(
48                    sha in expected,
49                    "(%s,%s) erroneously reported as missing" % (sha, path))
50            expected.remove(sha)
51
52        self.assertEqual(
53                len(expected), 0,
54                "some objects are not reported as missing: %s" % (expected, ))
55
56
57class MOFLinearRepoTest(MissingObjectFinderTest):
58
59    def setUp(self):
60        super(MOFLinearRepoTest, self).setUp()
61        # present in 1, removed in 3
62        f1_1 = make_object(Blob, data=b'f1')
63        # present in all revisions, changed in 2 and 3
64        f2_1 = make_object(Blob, data=b'f2')
65        f2_2 = make_object(Blob, data=b'f2-changed')
66        f2_3 = make_object(Blob, data=b'f2-changed-again')
67        # added in 2, left unmodified in 3
68        f3_2 = make_object(Blob, data=b'f3')
69
70        commit_spec = [[1], [2, 1], [3, 2]]
71        trees = {1: [(b'f1', f1_1), (b'f2', f2_1)],
72                 2: [(b'f1', f1_1), (b'f2', f2_2), (b'f3', f3_2)],
73                 3: [(b'f2', f2_3), (b'f3', f3_2)]}
74        # commit 1: f1 and f2
75        # commit 2: f3 added, f2 changed. Missing shall report commit id and a
76        # tree referenced by commit
77        # commit 3: f1 removed, f2 changed. Commit sha and root tree sha shall
78        # be reported as modified
79        self.commits = build_commit_graph(self.store, commit_spec, trees)
80        self.missing_1_2 = [self.cmt(2).id, self.cmt(2).tree, f2_2.id, f3_2.id]
81        self.missing_2_3 = [self.cmt(3).id, self.cmt(3).tree, f2_3.id]
82        self.missing_1_3 = [
83            self.cmt(2).id, self.cmt(3).id,
84            self.cmt(2).tree, self.cmt(3).tree,
85            f2_2.id, f3_2.id, f2_3.id]
86
87    def test_1_to_2(self):
88        self.assertMissingMatch(
89                [self.cmt(1).id], [self.cmt(2).id],
90                self.missing_1_2)
91
92    def test_2_to_3(self):
93        self.assertMissingMatch(
94                [self.cmt(2).id], [self.cmt(3).id],
95                self.missing_2_3)
96
97    def test_1_to_3(self):
98        self.assertMissingMatch(
99                [self.cmt(1).id], [self.cmt(3).id],
100                self.missing_1_3)
101
102    def test_bogus_haves(self):
103        """Ensure non-existent SHA in haves are tolerated"""
104        bogus_sha = self.cmt(2).id[::-1]
105        haves = [self.cmt(1).id, bogus_sha]
106        wants = [self.cmt(3).id]
107        self.assertMissingMatch(haves, wants, self.missing_1_3)
108
109    def test_bogus_wants_failure(self):
110        """Ensure non-existent SHA in wants are not tolerated"""
111        bogus_sha = self.cmt(2).id[::-1]
112        haves = [self.cmt(1).id]
113        wants = [self.cmt(3).id, bogus_sha]
114        self.assertRaises(
115                KeyError, self.store.find_missing_objects, haves, wants)
116
117    def test_no_changes(self):
118        self.assertMissingMatch([self.cmt(3).id], [self.cmt(3).id], [])
119
120
121class MOFMergeForkRepoTest(MissingObjectFinderTest):
122    # 1 --- 2 --- 4 --- 6 --- 7
123    #          \        /
124    #           3  ---
125    #            \
126    #             5
127
128    def setUp(self):
129        super(MOFMergeForkRepoTest, self).setUp()
130        f1_1 = make_object(Blob, data=b'f1')
131        f1_2 = make_object(Blob, data=b'f1-2')
132        f1_4 = make_object(Blob, data=b'f1-4')
133        f1_7 = make_object(Blob, data=b'f1-2')  # same data as in rev 2
134        f2_1 = make_object(Blob, data=b'f2')
135        f2_3 = make_object(Blob, data=b'f2-3')
136        f3_3 = make_object(Blob, data=b'f3')
137        f3_5 = make_object(Blob, data=b'f3-5')
138        commit_spec = [[1], [2, 1], [3, 2], [4, 2], [5, 3], [6, 3, 4], [7, 6]]
139        trees = {1: [(b'f1', f1_1), (b'f2', f2_1)],
140                 2: [(b'f1', f1_2), (b'f2', f2_1)],  # f1 changed
141                 # f3 added, f2 changed
142                 3: [(b'f1', f1_2), (b'f2', f2_3), (b'f3', f3_3)],
143                 4: [(b'f1', f1_4), (b'f2', f2_1)],  # f1 changed
144                 5: [(b'f1', f1_2), (b'f3', f3_5)],  # f2 removed, f3 changed
145                 # merged 3 and 4
146                 6: [(b'f1', f1_4), (b'f2', f2_3), (b'f3', f3_3)],
147                 # f1 changed to match rev2. f3 removed
148                 7: [(b'f1', f1_7), (b'f2', f2_3)]}
149        self.commits = build_commit_graph(self.store, commit_spec, trees)
150
151        self.f1_2_id = f1_2.id
152        self.f1_4_id = f1_4.id
153        self.f1_7_id = f1_7.id
154        self.f2_3_id = f2_3.id
155        self.f3_3_id = f3_3.id
156
157        self.assertEqual(f1_2.id, f1_7.id, "[sanity]")
158
159    def test_have6_want7(self):
160        # have 6, want 7. Ideally, shall not report f1_7 as it's the same as
161        # f1_2, however, to do so, MissingObjectFinder shall not record trees
162        # of common commits only, but also all parent trees and tree items,
163        # which is an overkill (i.e. in sha_done it records f1_4 as known, and
164        # doesn't record f1_2 was known prior to that, hence can't detect f1_7
165        # is in fact f1_2 and shall not be reported)
166        self.assertMissingMatch(
167                [self.cmt(6).id], [self.cmt(7).id],
168                [self.cmt(7).id, self.cmt(7).tree, self.f1_7_id])
169
170    def test_have4_want7(self):
171        # have 4, want 7. Shall not include rev5 as it is not in the tree
172        # between 4 and 7 (well, it is, but its SHA's are irrelevant for 4..7
173        # commit hierarchy)
174        self.assertMissingMatch([self.cmt(4).id], [self.cmt(7).id], [
175            self.cmt(7).id, self.cmt(6).id, self.cmt(3).id,
176            self.cmt(7).tree, self.cmt(6).tree, self.cmt(3).tree,
177            self.f2_3_id, self.f3_3_id])
178
179    def test_have1_want6(self):
180        # have 1, want 6. Shall not include rev5
181        self.assertMissingMatch([self.cmt(1).id], [self.cmt(6).id], [
182            self.cmt(6).id, self.cmt(4).id, self.cmt(3).id, self.cmt(2).id,
183            self.cmt(6).tree, self.cmt(4).tree, self.cmt(3).tree,
184            self.cmt(2).tree, self.f1_2_id, self.f1_4_id, self.f2_3_id,
185            self.f3_3_id])
186
187    def test_have3_want6(self):
188        # have 3, want 7. Shall not report rev2 and its tree, because
189        # haves(3) means has parents, i.e. rev2, too
190        # BUT shall report any changes descending rev2 (excluding rev3)
191        # Shall NOT report f1_7 as it's techically == f1_2
192        self.assertMissingMatch([self.cmt(3).id], [self.cmt(7).id], [
193              self.cmt(7).id, self.cmt(6).id, self.cmt(4).id,
194              self.cmt(7).tree, self.cmt(6).tree, self.cmt(4).tree,
195              self.f1_4_id])
196
197    def test_have5_want7(self):
198        # have 5, want 7. Common parent is rev2, hence children of rev2 from
199        # a descent line other than rev5 shall be reported
200        # expects f1_4 from rev6. f3_5 is known in rev5;
201        # f1_7 shall be the same as f1_2 (known, too)
202        self.assertMissingMatch([self.cmt(5).id], [self.cmt(7).id], [
203              self.cmt(7).id, self.cmt(6).id, self.cmt(4).id,
204              self.cmt(7).tree, self.cmt(6).tree, self.cmt(4).tree,
205              self.f1_4_id])
206
207
208class MOFTagsTest(MissingObjectFinderTest):
209
210    def setUp(self):
211        super(MOFTagsTest, self).setUp()
212        f1_1 = make_object(Blob, data=b'f1')
213        commit_spec = [[1]]
214        trees = {1: [(b'f1', f1_1)]}
215        self.commits = build_commit_graph(self.store, commit_spec, trees)
216
217        self._normal_tag = make_tag(self.cmt(1))
218        self.store.add_object(self._normal_tag)
219
220        self._tag_of_tag = make_tag(self._normal_tag)
221        self.store.add_object(self._tag_of_tag)
222
223        self._tag_of_tree = make_tag(self.store[self.cmt(1).tree])
224        self.store.add_object(self._tag_of_tree)
225
226        self._tag_of_blob = make_tag(f1_1)
227        self.store.add_object(self._tag_of_blob)
228
229        self._tag_of_tag_of_blob = make_tag(self._tag_of_blob)
230        self.store.add_object(self._tag_of_tag_of_blob)
231
232        self.f1_1_id = f1_1.id
233
234    def test_tagged_commit(self):
235        # The user already has the tagged commit, all they want is the tag,
236        # so send them only the tag object.
237        self.assertMissingMatch([self.cmt(1).id], [self._normal_tag.id],
238                                [self._normal_tag.id])
239
240    # The remaining cases are unusual, but do happen in the wild.
241    def test_tagged_tag(self):
242        # User already has tagged tag, send only tag of tag
243        self.assertMissingMatch([self._normal_tag.id], [self._tag_of_tag.id],
244                                [self._tag_of_tag.id])
245        # User needs both tags, but already has commit
246        self.assertMissingMatch([self.cmt(1).id], [self._tag_of_tag.id],
247                                [self._normal_tag.id, self._tag_of_tag.id])
248
249    def test_tagged_tree(self):
250        self.assertMissingMatch(
251            [], [self._tag_of_tree.id],
252            [self._tag_of_tree.id, self.cmt(1).tree, self.f1_1_id])
253
254    def test_tagged_blob(self):
255        self.assertMissingMatch([], [self._tag_of_blob.id],
256                                [self._tag_of_blob.id, self.f1_1_id])
257
258    def test_tagged_tagged_blob(self):
259        self.assertMissingMatch([], [self._tag_of_tag_of_blob.id],
260                                [self._tag_of_tag_of_blob.id,
261                                 self._tag_of_blob.id, self.f1_1_id])
262