1# Copyright (C) 2006-2011 Canonical Ltd
2#
3# This program is free software; you can redistribute it and/or modify
4# it under the terms of the GNU General Public License as published by
5# the Free Software Foundation; either version 2 of the License, or
6# (at your option) any later version.
7#
8# This program is distributed in the hope that it will be useful,
9# but WITHOUT ANY WARRANTY; without even the implied warranty of
10# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11# GNU General Public License for more details.
12#
13# You should have received a copy of the GNU General Public License
14# along with this program; if not, write to the Free Software
15# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
17"""Tests for Knit data structure"""
18
19import gzip
20from io import BytesIO
21from patiencediff import PatienceSequenceMatcher
22import sys
23
24from ... import (
25    errors,
26    multiparent,
27    osutils,
28    tests,
29    transport,
30    )
31from .. import (
32    knit,
33    pack,
34    )
35from ..index import *
36from ..knit import (
37    AnnotatedKnitContent,
38    KnitContent,
39    KnitCorrupt,
40    KnitDataStreamIncompatible,
41    KnitDataStreamUnknown,
42    KnitHeaderError,
43    KnitIndexUnknownMethod,
44    KnitVersionedFiles,
45    PlainKnitContent,
46    _VFContentMapGenerator,
47    _KndxIndex,
48    _KnitGraphIndex,
49    _KnitKeyAccess,
50    make_file_factory,
51    )
52from .. import (
53    knitpack_repo,
54    pack_repo,
55    )
56from ...tests import (
57    TestCase,
58    TestCaseWithMemoryTransport,
59    TestCaseWithTransport,
60    TestNotApplicable,
61    )
62from ..versionedfile import (
63    AbsentContentFactory,
64    ConstantMapper,
65    network_bytes_to_kind_and_offset,
66    RecordingVersionedFilesDecorator,
67    )
68from ...tests import (
69    features,
70    )
71
72
73compiled_knit_feature = features.ModuleAvailableFeature(
74    'breezy.bzr._knit_load_data_pyx')
75
76
77class ErrorTests(TestCase):
78
79    def test_knit_data_stream_incompatible(self):
80        error = KnitDataStreamIncompatible(
81            'stream format', 'target format')
82        self.assertEqual('Cannot insert knit data stream of format '
83                         '"stream format" into knit of format '
84                         '"target format".', str(error))
85
86    def test_knit_data_stream_unknown(self):
87        error = KnitDataStreamUnknown(
88            'stream format')
89        self.assertEqual('Cannot parse knit data stream of format '
90                         '"stream format".', str(error))
91
92    def test_knit_header_error(self):
93        error = KnitHeaderError('line foo\n', 'path/to/file')
94        self.assertEqual("Knit header error: 'line foo\\n' unexpected"
95                         " for file \"path/to/file\".", str(error))
96
97    def test_knit_index_unknown_method(self):
98        error = KnitIndexUnknownMethod('http://host/foo.kndx',
99                                       ['bad', 'no-eol'])
100        self.assertEqual("Knit index http://host/foo.kndx does not have a"
101                         " known method in options: ['bad', 'no-eol']",
102                         str(error))
103
104
105class KnitContentTestsMixin(object):
106
107    def test_constructor(self):
108        content = self._make_content([])
109
110    def test_text(self):
111        content = self._make_content([])
112        self.assertEqual(content.text(), [])
113
114        content = self._make_content(
115            [(b"origin1", b"text1"), (b"origin2", b"text2")])
116        self.assertEqual(content.text(), [b"text1", b"text2"])
117
118    def test_copy(self):
119        content = self._make_content(
120            [(b"origin1", b"text1"), (b"origin2", b"text2")])
121        copy = content.copy()
122        self.assertIsInstance(copy, content.__class__)
123        self.assertEqual(copy.annotate(), content.annotate())
124
125    def assertDerivedBlocksEqual(self, source, target, noeol=False):
126        """Assert that the derived matching blocks match real output"""
127        source_lines = source.splitlines(True)
128        target_lines = target.splitlines(True)
129
130        def nl(line):
131            if noeol and not line.endswith('\n'):
132                return line + '\n'
133            else:
134                return line
135        source_content = self._make_content(
136            [(None, nl(l)) for l in source_lines])
137        target_content = self._make_content(
138            [(None, nl(l)) for l in target_lines])
139        line_delta = source_content.line_delta(target_content)
140        delta_blocks = list(KnitContent.get_line_delta_blocks(line_delta,
141                                                              source_lines, target_lines))
142        matcher = PatienceSequenceMatcher(None, source_lines, target_lines)
143        matcher_blocks = list(matcher.get_matching_blocks())
144        self.assertEqual(matcher_blocks, delta_blocks)
145
146    def test_get_line_delta_blocks(self):
147        self.assertDerivedBlocksEqual('a\nb\nc\n', 'q\nc\n')
148        self.assertDerivedBlocksEqual(TEXT_1, TEXT_1)
149        self.assertDerivedBlocksEqual(TEXT_1, TEXT_1A)
150        self.assertDerivedBlocksEqual(TEXT_1, TEXT_1B)
151        self.assertDerivedBlocksEqual(TEXT_1B, TEXT_1A)
152        self.assertDerivedBlocksEqual(TEXT_1A, TEXT_1B)
153        self.assertDerivedBlocksEqual(TEXT_1A, '')
154        self.assertDerivedBlocksEqual('', TEXT_1A)
155        self.assertDerivedBlocksEqual('', '')
156        self.assertDerivedBlocksEqual('a\nb\nc', 'a\nb\nc\nd')
157
158    def test_get_line_delta_blocks_noeol(self):
159        """Handle historical knit deltas safely
160
161        Some existing knit deltas don't consider the last line to differ
162        when the only difference whether it has a final newline.
163
164        New knit deltas appear to always consider the last line to differ
165        in this case.
166        """
167        self.assertDerivedBlocksEqual('a\nb\nc', 'a\nb\nc\nd\n', noeol=True)
168        self.assertDerivedBlocksEqual('a\nb\nc\nd\n', 'a\nb\nc', noeol=True)
169        self.assertDerivedBlocksEqual('a\nb\nc\n', 'a\nb\nc', noeol=True)
170        self.assertDerivedBlocksEqual('a\nb\nc', 'a\nb\nc\n', noeol=True)
171
172
173TEXT_1 = """\
174Banana cup cakes:
175
176- bananas
177- eggs
178- broken tea cups
179"""
180
181TEXT_1A = """\
182Banana cup cake recipe
183(serves 6)
184
185- bananas
186- eggs
187- broken tea cups
188- self-raising flour
189"""
190
191TEXT_1B = """\
192Banana cup cake recipe
193
194- bananas (do not use plantains!!!)
195- broken tea cups
196- flour
197"""
198
199delta_1_1a = """\
2000,1,2
201Banana cup cake recipe
202(serves 6)
2035,5,1
204- self-raising flour
205"""
206
207TEXT_2 = """\
208Boeuf bourguignon
209
210- beef
211- red wine
212- small onions
213- carrot
214- mushrooms
215"""
216
217
218class TestPlainKnitContent(TestCase, KnitContentTestsMixin):
219
220    def _make_content(self, lines):
221        annotated_content = AnnotatedKnitContent(lines)
222        return PlainKnitContent(annotated_content.text(), 'bogus')
223
224    def test_annotate(self):
225        content = self._make_content([])
226        self.assertEqual(content.annotate(), [])
227
228        content = self._make_content(
229            [("origin1", "text1"), ("origin2", "text2")])
230        self.assertEqual(content.annotate(),
231                         [("bogus", "text1"), ("bogus", "text2")])
232
233    def test_line_delta(self):
234        content1 = self._make_content([("", "a"), ("", "b")])
235        content2 = self._make_content([("", "a"), ("", "a"), ("", "c")])
236        self.assertEqual(content1.line_delta(content2),
237                         [(1, 2, 2, ["a", "c"])])
238
239    def test_line_delta_iter(self):
240        content1 = self._make_content([("", "a"), ("", "b")])
241        content2 = self._make_content([("", "a"), ("", "a"), ("", "c")])
242        it = content1.line_delta_iter(content2)
243        self.assertEqual(next(it), (1, 2, 2, ["a", "c"]))
244        self.assertRaises(StopIteration, next, it)
245
246
247class TestAnnotatedKnitContent(TestCase, KnitContentTestsMixin):
248
249    def _make_content(self, lines):
250        return AnnotatedKnitContent(lines)
251
252    def test_annotate(self):
253        content = self._make_content([])
254        self.assertEqual(content.annotate(), [])
255
256        content = self._make_content(
257            [(b"origin1", b"text1"), (b"origin2", b"text2")])
258        self.assertEqual(content.annotate(),
259                         [(b"origin1", b"text1"), (b"origin2", b"text2")])
260
261    def test_line_delta(self):
262        content1 = self._make_content([("", "a"), ("", "b")])
263        content2 = self._make_content([("", "a"), ("", "a"), ("", "c")])
264        self.assertEqual(content1.line_delta(content2),
265                         [(1, 2, 2, [("", "a"), ("", "c")])])
266
267    def test_line_delta_iter(self):
268        content1 = self._make_content([("", "a"), ("", "b")])
269        content2 = self._make_content([("", "a"), ("", "a"), ("", "c")])
270        it = content1.line_delta_iter(content2)
271        self.assertEqual(next(it), (1, 2, 2, [("", "a"), ("", "c")]))
272        self.assertRaises(StopIteration, next, it)
273
274
275class MockTransport(object):
276
277    def __init__(self, file_lines=None):
278        self.file_lines = file_lines
279        self.calls = []
280        # We have no base directory for the MockTransport
281        self.base = ''
282
283    def get(self, filename):
284        if self.file_lines is None:
285            raise errors.NoSuchFile(filename)
286        else:
287            return BytesIO(b"\n".join(self.file_lines))
288
289    def readv(self, relpath, offsets):
290        fp = self.get(relpath)
291        for offset, size in offsets:
292            fp.seek(offset)
293            yield offset, fp.read(size)
294
295    def __getattr__(self, name):
296        def queue_call(*args, **kwargs):
297            self.calls.append((name, args, kwargs))
298        return queue_call
299
300
301class MockReadvFailingTransport(MockTransport):
302    """Fail in the middle of a readv() result.
303
304    This Transport will successfully yield the first two requested hunks, but
305    raise NoSuchFile for the rest.
306    """
307
308    def readv(self, relpath, offsets):
309        count = 0
310        for result in MockTransport.readv(self, relpath, offsets):
311            count += 1
312            # we use 2 because the first offset is the pack header, the second
313            # is the first actual content requset
314            if count > 2:
315                raise errors.NoSuchFile(relpath)
316            yield result
317
318
319class KnitRecordAccessTestsMixin(object):
320    """Tests for getting and putting knit records."""
321
322    def test_add_raw_records(self):
323        """add_raw_records adds records retrievable later."""
324        access = self.get_access()
325        memos = access.add_raw_records([(b'key', 10)], [b'1234567890'])
326        self.assertEqual([b'1234567890'], list(access.get_raw_records(memos)))
327
328    def test_add_raw_record(self):
329        """add_raw_record adds records retrievable later."""
330        access = self.get_access()
331        memos = access.add_raw_record(b'key', 10, [b'1234567890'])
332        self.assertEqual([b'1234567890'], list(access.get_raw_records([memos])))
333
334    def test_add_several_raw_records(self):
335        """add_raw_records with many records and read some back."""
336        access = self.get_access()
337        memos = access.add_raw_records([(b'key', 10), (b'key2', 2), (b'key3', 5)],
338                                       [b'12345678901234567'])
339        self.assertEqual([b'1234567890', b'12', b'34567'],
340                         list(access.get_raw_records(memos)))
341        self.assertEqual([b'1234567890'],
342                         list(access.get_raw_records(memos[0:1])))
343        self.assertEqual([b'12'],
344                         list(access.get_raw_records(memos[1:2])))
345        self.assertEqual([b'34567'],
346                         list(access.get_raw_records(memos[2:3])))
347        self.assertEqual([b'1234567890', b'34567'],
348                         list(access.get_raw_records(memos[0:1] + memos[2:3])))
349
350
351class TestKnitKnitAccess(TestCaseWithMemoryTransport, KnitRecordAccessTestsMixin):
352    """Tests for the .kndx implementation."""
353
354    def get_access(self):
355        """Get a .knit style access instance."""
356        mapper = ConstantMapper("foo")
357        access = _KnitKeyAccess(self.get_transport(), mapper)
358        return access
359
360
361class _TestException(Exception):
362    """Just an exception for local tests to use."""
363
364
365class TestPackKnitAccess(TestCaseWithMemoryTransport, KnitRecordAccessTestsMixin):
366    """Tests for the pack based access."""
367
368    def get_access(self):
369        return self._get_access()[0]
370
371    def _get_access(self, packname='packfile', index='FOO'):
372        transport = self.get_transport()
373
374        def write_data(bytes):
375            transport.append_bytes(packname, bytes)
376        writer = pack.ContainerWriter(write_data)
377        writer.begin()
378        access = pack_repo._DirectPackAccess({})
379        access.set_writer(writer, index, (transport, packname))
380        return access, writer
381
382    def make_pack_file(self):
383        """Create a pack file with 2 records."""
384        access, writer = self._get_access(packname='packname', index='foo')
385        memos = []
386        memos.extend(access.add_raw_records([(b'key1', 10)], [b'1234567890']))
387        memos.extend(access.add_raw_records([(b'key2', 5)], [b'12345']))
388        writer.end()
389        return memos
390
391    def test_pack_collection_pack_retries(self):
392        """An explicit pack of a pack collection succeeds even when a
393        concurrent pack happens.
394        """
395        builder = self.make_branch_builder('.')
396        builder.start_series()
397        builder.build_snapshot(None, [
398            ('add', ('', b'root-id', 'directory', None)),
399            ('add', ('file', b'file-id', 'file', b'content\nrev 1\n')),
400            ], revision_id=b'rev-1')
401        builder.build_snapshot([b'rev-1'], [
402            ('modify', ('file', b'content\nrev 2\n')),
403            ], revision_id=b'rev-2')
404        builder.build_snapshot([b'rev-2'], [
405            ('modify', ('file', b'content\nrev 3\n')),
406            ], revision_id=b'rev-3')
407        self.addCleanup(builder.finish_series)
408        b = builder.get_branch()
409        self.addCleanup(b.lock_write().unlock)
410        repo = b.repository
411        collection = repo._pack_collection
412        # Concurrently repack the repo.
413        reopened_repo = repo.controldir.open_repository()
414        reopened_repo.pack()
415        # Pack the new pack.
416        collection.pack()
417
418    def make_vf_for_retrying(self):
419        """Create 3 packs and a reload function.
420
421        Originally, 2 pack files will have the data, but one will be missing.
422        And then the third will be used in place of the first two if reload()
423        is called.
424
425        :return: (versioned_file, reload_counter)
426            versioned_file  a KnitVersionedFiles using the packs for access
427        """
428        builder = self.make_branch_builder('.', format="1.9")
429        builder.start_series()
430        builder.build_snapshot(None, [
431            ('add', ('', b'root-id', 'directory', None)),
432            ('add', ('file', b'file-id', 'file', b'content\nrev 1\n')),
433            ], revision_id=b'rev-1')
434        builder.build_snapshot([b'rev-1'], [
435            ('modify', ('file', b'content\nrev 2\n')),
436            ], revision_id=b'rev-2')
437        builder.build_snapshot([b'rev-2'], [
438            ('modify', ('file', b'content\nrev 3\n')),
439            ], revision_id=b'rev-3')
440        builder.finish_series()
441        b = builder.get_branch()
442        b.lock_write()
443        self.addCleanup(b.unlock)
444        # Pack these three revisions into another pack file, but don't remove
445        # the originals
446        repo = b.repository
447        collection = repo._pack_collection
448        collection.ensure_loaded()
449        orig_packs = collection.packs
450        packer = knitpack_repo.KnitPacker(collection, orig_packs, '.testpack')
451        new_pack = packer.pack()
452        # forget about the new pack
453        collection.reset()
454        repo.refresh_data()
455        vf = repo.revisions
456        # Set up a reload() function that switches to using the new pack file
457        new_index = new_pack.revision_index
458        access_tuple = new_pack.access_tuple()
459        reload_counter = [0, 0, 0]
460
461        def reload():
462            reload_counter[0] += 1
463            if reload_counter[1] > 0:
464                # We already reloaded, nothing more to do
465                reload_counter[2] += 1
466                return False
467            reload_counter[1] += 1
468            vf._index._graph_index._indices[:] = [new_index]
469            vf._access._indices.clear()
470            vf._access._indices[new_index] = access_tuple
471            return True
472        # Delete one of the pack files so the data will need to be reloaded. We
473        # will delete the file with 'rev-2' in it
474        trans, name = orig_packs[1].access_tuple()
475        trans.delete(name)
476        # We don't have the index trigger reloading because we want to test
477        # that we reload when the .pack disappears
478        vf._access._reload_func = reload
479        return vf, reload_counter
480
481    def make_reload_func(self, return_val=True):
482        reload_called = [0]
483
484        def reload():
485            reload_called[0] += 1
486            return return_val
487        return reload_called, reload
488
489    def make_retry_exception(self):
490        # We raise a real exception so that sys.exc_info() is properly
491        # populated
492        try:
493            raise _TestException('foobar')
494        except _TestException as e:
495            retry_exc = errors.RetryWithNewPacks(None, reload_occurred=False,
496                                                 exc_info=sys.exc_info())
497        # GZ 2010-08-10: Cycle with exc_info affects 3 tests
498        return retry_exc
499
500    def test_read_from_several_packs(self):
501        access, writer = self._get_access()
502        memos = []
503        memos.extend(access.add_raw_records([(b'key', 10)], [b'1234567890']))
504        writer.end()
505        access, writer = self._get_access('pack2', 'FOOBAR')
506        memos.extend(access.add_raw_records([(b'key', 5)], [b'12345']))
507        writer.end()
508        access, writer = self._get_access('pack3', 'BAZ')
509        memos.extend(access.add_raw_records([(b'key', 5)], [b'alpha']))
510        writer.end()
511        transport = self.get_transport()
512        access = pack_repo._DirectPackAccess({"FOO": (transport, 'packfile'),
513                                              "FOOBAR": (transport, 'pack2'),
514                                              "BAZ": (transport, 'pack3')})
515        self.assertEqual([b'1234567890', b'12345', b'alpha'],
516                         list(access.get_raw_records(memos)))
517        self.assertEqual([b'1234567890'],
518                         list(access.get_raw_records(memos[0:1])))
519        self.assertEqual([b'12345'],
520                         list(access.get_raw_records(memos[1:2])))
521        self.assertEqual([b'alpha'],
522                         list(access.get_raw_records(memos[2:3])))
523        self.assertEqual([b'1234567890', b'alpha'],
524                         list(access.get_raw_records(memos[0:1] + memos[2:3])))
525
526    def test_set_writer(self):
527        """The writer should be settable post construction."""
528        access = pack_repo._DirectPackAccess({})
529        transport = self.get_transport()
530        packname = 'packfile'
531        index = 'foo'
532
533        def write_data(bytes):
534            transport.append_bytes(packname, bytes)
535        writer = pack.ContainerWriter(write_data)
536        writer.begin()
537        access.set_writer(writer, index, (transport, packname))
538        memos = access.add_raw_records([(b'key', 10)], [b'1234567890'])
539        writer.end()
540        self.assertEqual([b'1234567890'], list(access.get_raw_records(memos)))
541
542    def test_missing_index_raises_retry(self):
543        memos = self.make_pack_file()
544        transport = self.get_transport()
545        reload_called, reload_func = self.make_reload_func()
546        # Note that the index key has changed from 'foo' to 'bar'
547        access = pack_repo._DirectPackAccess({'bar': (transport, 'packname')},
548                                             reload_func=reload_func)
549        e = self.assertListRaises(errors.RetryWithNewPacks,
550                                  access.get_raw_records, memos)
551        # Because a key was passed in which does not match our index list, we
552        # assume that the listing was already reloaded
553        self.assertTrue(e.reload_occurred)
554        self.assertIsInstance(e.exc_info, tuple)
555        self.assertIs(e.exc_info[0], KeyError)
556        self.assertIsInstance(e.exc_info[1], KeyError)
557
558    def test_missing_index_raises_key_error_with_no_reload(self):
559        memos = self.make_pack_file()
560        transport = self.get_transport()
561        # Note that the index key has changed from 'foo' to 'bar'
562        access = pack_repo._DirectPackAccess({'bar': (transport, 'packname')})
563        e = self.assertListRaises(KeyError, access.get_raw_records, memos)
564
565    def test_missing_file_raises_retry(self):
566        memos = self.make_pack_file()
567        transport = self.get_transport()
568        reload_called, reload_func = self.make_reload_func()
569        # Note that the 'filename' has been changed to 'different-packname'
570        access = pack_repo._DirectPackAccess(
571            {'foo': (transport, 'different-packname')},
572            reload_func=reload_func)
573        e = self.assertListRaises(errors.RetryWithNewPacks,
574                                  access.get_raw_records, memos)
575        # The file has gone missing, so we assume we need to reload
576        self.assertFalse(e.reload_occurred)
577        self.assertIsInstance(e.exc_info, tuple)
578        self.assertIs(e.exc_info[0], errors.NoSuchFile)
579        self.assertIsInstance(e.exc_info[1], errors.NoSuchFile)
580        self.assertEqual('different-packname', e.exc_info[1].path)
581
582    def test_missing_file_raises_no_such_file_with_no_reload(self):
583        memos = self.make_pack_file()
584        transport = self.get_transport()
585        # Note that the 'filename' has been changed to 'different-packname'
586        access = pack_repo._DirectPackAccess(
587            {'foo': (transport, 'different-packname')})
588        e = self.assertListRaises(errors.NoSuchFile,
589                                  access.get_raw_records, memos)
590
591    def test_failing_readv_raises_retry(self):
592        memos = self.make_pack_file()
593        transport = self.get_transport()
594        failing_transport = MockReadvFailingTransport(
595            [transport.get_bytes('packname')])
596        reload_called, reload_func = self.make_reload_func()
597        access = pack_repo._DirectPackAccess(
598            {'foo': (failing_transport, 'packname')},
599            reload_func=reload_func)
600        # Asking for a single record will not trigger the Mock failure
601        self.assertEqual([b'1234567890'],
602                         list(access.get_raw_records(memos[:1])))
603        self.assertEqual([b'12345'],
604                         list(access.get_raw_records(memos[1:2])))
605        # A multiple offset readv() will fail mid-way through
606        e = self.assertListRaises(errors.RetryWithNewPacks,
607                                  access.get_raw_records, memos)
608        # The file has gone missing, so we assume we need to reload
609        self.assertFalse(e.reload_occurred)
610        self.assertIsInstance(e.exc_info, tuple)
611        self.assertIs(e.exc_info[0], errors.NoSuchFile)
612        self.assertIsInstance(e.exc_info[1], errors.NoSuchFile)
613        self.assertEqual('packname', e.exc_info[1].path)
614
615    def test_failing_readv_raises_no_such_file_with_no_reload(self):
616        memos = self.make_pack_file()
617        transport = self.get_transport()
618        failing_transport = MockReadvFailingTransport(
619            [transport.get_bytes('packname')])
620        reload_called, reload_func = self.make_reload_func()
621        access = pack_repo._DirectPackAccess(
622            {'foo': (failing_transport, 'packname')})
623        # Asking for a single record will not trigger the Mock failure
624        self.assertEqual([b'1234567890'],
625                         list(access.get_raw_records(memos[:1])))
626        self.assertEqual([b'12345'],
627                         list(access.get_raw_records(memos[1:2])))
628        # A multiple offset readv() will fail mid-way through
629        e = self.assertListRaises(errors.NoSuchFile,
630                                  access.get_raw_records, memos)
631
632    def test_reload_or_raise_no_reload(self):
633        access = pack_repo._DirectPackAccess({}, reload_func=None)
634        retry_exc = self.make_retry_exception()
635        # Without a reload_func, we will just re-raise the original exception
636        self.assertRaises(_TestException, access.reload_or_raise, retry_exc)
637
638    def test_reload_or_raise_reload_changed(self):
639        reload_called, reload_func = self.make_reload_func(return_val=True)
640        access = pack_repo._DirectPackAccess({}, reload_func=reload_func)
641        retry_exc = self.make_retry_exception()
642        access.reload_or_raise(retry_exc)
643        self.assertEqual([1], reload_called)
644        retry_exc.reload_occurred = True
645        access.reload_or_raise(retry_exc)
646        self.assertEqual([2], reload_called)
647
648    def test_reload_or_raise_reload_no_change(self):
649        reload_called, reload_func = self.make_reload_func(return_val=False)
650        access = pack_repo._DirectPackAccess({}, reload_func=reload_func)
651        retry_exc = self.make_retry_exception()
652        # If reload_occurred is False, then we consider it an error to have
653        # reload_func() return False (no changes).
654        self.assertRaises(_TestException, access.reload_or_raise, retry_exc)
655        self.assertEqual([1], reload_called)
656        retry_exc.reload_occurred = True
657        # If reload_occurred is True, then we assume nothing changed because
658        # it had changed earlier, but didn't change again
659        access.reload_or_raise(retry_exc)
660        self.assertEqual([2], reload_called)
661
662    def test_annotate_retries(self):
663        vf, reload_counter = self.make_vf_for_retrying()
664        # It is a little bit bogus to annotate the Revision VF, but it works,
665        # as we have ancestry stored there
666        key = (b'rev-3',)
667        reload_lines = vf.annotate(key)
668        self.assertEqual([1, 1, 0], reload_counter)
669        plain_lines = vf.annotate(key)
670        self.assertEqual([1, 1, 0], reload_counter)  # No extra reloading
671        if reload_lines != plain_lines:
672            self.fail('Annotation was not identical with reloading.')
673        # Now delete the packs-in-use, which should trigger another reload, but
674        # this time we just raise an exception because we can't recover
675        for trans, name in vf._access._indices.values():
676            trans.delete(name)
677        self.assertRaises(errors.NoSuchFile, vf.annotate, key)
678        self.assertEqual([2, 1, 1], reload_counter)
679
680    def test__get_record_map_retries(self):
681        vf, reload_counter = self.make_vf_for_retrying()
682        keys = [(b'rev-1',), (b'rev-2',), (b'rev-3',)]
683        records = vf._get_record_map(keys)
684        self.assertEqual(keys, sorted(records.keys()))
685        self.assertEqual([1, 1, 0], reload_counter)
686        # Now delete the packs-in-use, which should trigger another reload, but
687        # this time we just raise an exception because we can't recover
688        for trans, name in vf._access._indices.values():
689            trans.delete(name)
690        self.assertRaises(errors.NoSuchFile, vf._get_record_map, keys)
691        self.assertEqual([2, 1, 1], reload_counter)
692
693    def test_get_record_stream_retries(self):
694        vf, reload_counter = self.make_vf_for_retrying()
695        keys = [(b'rev-1',), (b'rev-2',), (b'rev-3',)]
696        record_stream = vf.get_record_stream(keys, 'topological', False)
697        record = next(record_stream)
698        self.assertEqual((b'rev-1',), record.key)
699        self.assertEqual([0, 0, 0], reload_counter)
700        record = next(record_stream)
701        self.assertEqual((b'rev-2',), record.key)
702        self.assertEqual([1, 1, 0], reload_counter)
703        record = next(record_stream)
704        self.assertEqual((b'rev-3',), record.key)
705        self.assertEqual([1, 1, 0], reload_counter)
706        # Now delete all pack files, and see that we raise the right error
707        for trans, name in vf._access._indices.values():
708            trans.delete(name)
709        self.assertListRaises(errors.NoSuchFile,
710                              vf.get_record_stream, keys, 'topological', False)
711
712    def test_iter_lines_added_or_present_in_keys_retries(self):
713        vf, reload_counter = self.make_vf_for_retrying()
714        keys = [(b'rev-1',), (b'rev-2',), (b'rev-3',)]
715        # Unfortunately, iter_lines_added_or_present_in_keys iterates the
716        # result in random order (determined by the iteration order from a
717        # set()), so we don't have any solid way to trigger whether data is
718        # read before or after. However we tried to delete the middle node to
719        # exercise the code well.
720        # What we care about is that all lines are always yielded, but not
721        # duplicated
722        count = 0
723        reload_lines = sorted(vf.iter_lines_added_or_present_in_keys(keys))
724        self.assertEqual([1, 1, 0], reload_counter)
725        # Now do it again, to make sure the result is equivalent
726        plain_lines = sorted(vf.iter_lines_added_or_present_in_keys(keys))
727        self.assertEqual([1, 1, 0], reload_counter)  # No extra reloading
728        self.assertEqual(plain_lines, reload_lines)
729        self.assertEqual(21, len(plain_lines))
730        # Now delete all pack files, and see that we raise the right error
731        for trans, name in vf._access._indices.values():
732            trans.delete(name)
733        self.assertListRaises(errors.NoSuchFile,
734                              vf.iter_lines_added_or_present_in_keys, keys)
735        self.assertEqual([2, 1, 1], reload_counter)
736
737    def test_get_record_stream_yields_disk_sorted_order(self):
738        # if we get 'unordered' pick a semi-optimal order for reading. The
739        # order should be grouped by pack file, and then by position in file
740        repo = self.make_repository('test', format='pack-0.92')
741        repo.lock_write()
742        self.addCleanup(repo.unlock)
743        repo.start_write_group()
744        vf = repo.texts
745        vf.add_lines((b'f-id', b'rev-5'), [(b'f-id', b'rev-4')], [b'lines\n'])
746        vf.add_lines((b'f-id', b'rev-1'), [], [b'lines\n'])
747        vf.add_lines((b'f-id', b'rev-2'), [(b'f-id', b'rev-1')], [b'lines\n'])
748        repo.commit_write_group()
749        # We inserted them as rev-5, rev-1, rev-2, we should get them back in
750        # the same order
751        stream = vf.get_record_stream([(b'f-id', b'rev-1'), (b'f-id', b'rev-5'),
752                                       (b'f-id', b'rev-2')], 'unordered', False)
753        keys = [r.key for r in stream]
754        self.assertEqual([(b'f-id', b'rev-5'), (b'f-id', b'rev-1'),
755                          (b'f-id', b'rev-2')], keys)
756        repo.start_write_group()
757        vf.add_lines((b'f-id', b'rev-4'), [(b'f-id', b'rev-3')], [b'lines\n'])
758        vf.add_lines((b'f-id', b'rev-3'), [(b'f-id', b'rev-2')], [b'lines\n'])
759        vf.add_lines((b'f-id', b'rev-6'), [(b'f-id', b'rev-5')], [b'lines\n'])
760        repo.commit_write_group()
761        # Request in random order, to make sure the output order isn't based on
762        # the request
763        request_keys = set((b'f-id', b'rev-%d' % i) for i in range(1, 7))
764        stream = vf.get_record_stream(request_keys, 'unordered', False)
765        keys = [r.key for r in stream]
766        # We want to get the keys back in disk order, but it doesn't matter
767        # which pack we read from first. So this can come back in 2 orders
768        alt1 = [(b'f-id', b'rev-%d' % i) for i in [4, 3, 6, 5, 1, 2]]
769        alt2 = [(b'f-id', b'rev-%d' % i) for i in [5, 1, 2, 4, 3, 6]]
770        if keys != alt1 and keys != alt2:
771            self.fail('Returned key order did not match either expected order.'
772                      ' expected %s or %s, not %s'
773                      % (alt1, alt2, keys))
774
775
776class LowLevelKnitDataTests(TestCase):
777
778    def create_gz_content(self, text):
779        sio = BytesIO()
780        with gzip.GzipFile(mode='wb', fileobj=sio) as gz_file:
781            gz_file.write(text)
782        return sio.getvalue()
783
784    def make_multiple_records(self):
785        """Create the content for multiple records."""
786        sha1sum = osutils.sha_string(b'foo\nbar\n')
787        total_txt = []
788        gz_txt = self.create_gz_content(b'version rev-id-1 2 %s\n'
789                                        b'foo\n'
790                                        b'bar\n'
791                                        b'end rev-id-1\n'
792                                        % (sha1sum,))
793        record_1 = (0, len(gz_txt), sha1sum)
794        total_txt.append(gz_txt)
795        sha1sum = osutils.sha_string(b'baz\n')
796        gz_txt = self.create_gz_content(b'version rev-id-2 1 %s\n'
797                                        b'baz\n'
798                                        b'end rev-id-2\n'
799                                        % (sha1sum,))
800        record_2 = (record_1[1], len(gz_txt), sha1sum)
801        total_txt.append(gz_txt)
802        return total_txt, record_1, record_2
803
804    def test_valid_knit_data(self):
805        sha1sum = osutils.sha_string(b'foo\nbar\n')
806        gz_txt = self.create_gz_content(b'version rev-id-1 2 %s\n'
807                                        b'foo\n'
808                                        b'bar\n'
809                                        b'end rev-id-1\n'
810                                        % (sha1sum,))
811        transport = MockTransport([gz_txt])
812        access = _KnitKeyAccess(transport, ConstantMapper('filename'))
813        knit = KnitVersionedFiles(None, access)
814        records = [((b'rev-id-1',), ((b'rev-id-1',), 0, len(gz_txt)))]
815
816        contents = list(knit._read_records_iter(records))
817        self.assertEqual([((b'rev-id-1',), [b'foo\n', b'bar\n'],
818                           b'4e48e2c9a3d2ca8a708cb0cc545700544efb5021')], contents)
819
820        raw_contents = list(knit._read_records_iter_raw(records))
821        self.assertEqual([((b'rev-id-1',), gz_txt, sha1sum)], raw_contents)
822
823    def test_multiple_records_valid(self):
824        total_txt, record_1, record_2 = self.make_multiple_records()
825        transport = MockTransport([b''.join(total_txt)])
826        access = _KnitKeyAccess(transport, ConstantMapper('filename'))
827        knit = KnitVersionedFiles(None, access)
828        records = [((b'rev-id-1',), ((b'rev-id-1',), record_1[0], record_1[1])),
829                   ((b'rev-id-2',), ((b'rev-id-2',), record_2[0], record_2[1]))]
830
831        contents = list(knit._read_records_iter(records))
832        self.assertEqual([((b'rev-id-1',), [b'foo\n', b'bar\n'], record_1[2]),
833                          ((b'rev-id-2',), [b'baz\n'], record_2[2])],
834                         contents)
835
836        raw_contents = list(knit._read_records_iter_raw(records))
837        self.assertEqual([((b'rev-id-1',), total_txt[0], record_1[2]),
838                          ((b'rev-id-2',), total_txt[1], record_2[2])],
839                         raw_contents)
840
841    def test_not_enough_lines(self):
842        sha1sum = osutils.sha_string(b'foo\n')
843        # record says 2 lines data says 1
844        gz_txt = self.create_gz_content(b'version rev-id-1 2 %s\n'
845                                        b'foo\n'
846                                        b'end rev-id-1\n'
847                                        % (sha1sum,))
848        transport = MockTransport([gz_txt])
849        access = _KnitKeyAccess(transport, ConstantMapper('filename'))
850        knit = KnitVersionedFiles(None, access)
851        records = [((b'rev-id-1',), ((b'rev-id-1',), 0, len(gz_txt)))]
852        self.assertRaises(KnitCorrupt, list,
853                          knit._read_records_iter(records))
854
855        # read_records_iter_raw won't detect that sort of mismatch/corruption
856        raw_contents = list(knit._read_records_iter_raw(records))
857        self.assertEqual([((b'rev-id-1',), gz_txt, sha1sum)], raw_contents)
858
859    def test_too_many_lines(self):
860        sha1sum = osutils.sha_string(b'foo\nbar\n')
861        # record says 1 lines data says 2
862        gz_txt = self.create_gz_content(b'version rev-id-1 1 %s\n'
863                                        b'foo\n'
864                                        b'bar\n'
865                                        b'end rev-id-1\n'
866                                        % (sha1sum,))
867        transport = MockTransport([gz_txt])
868        access = _KnitKeyAccess(transport, ConstantMapper('filename'))
869        knit = KnitVersionedFiles(None, access)
870        records = [((b'rev-id-1',), ((b'rev-id-1',), 0, len(gz_txt)))]
871        self.assertRaises(KnitCorrupt, list,
872                          knit._read_records_iter(records))
873
874        # read_records_iter_raw won't detect that sort of mismatch/corruption
875        raw_contents = list(knit._read_records_iter_raw(records))
876        self.assertEqual([((b'rev-id-1',), gz_txt, sha1sum)], raw_contents)
877
878    def test_mismatched_version_id(self):
879        sha1sum = osutils.sha_string(b'foo\nbar\n')
880        gz_txt = self.create_gz_content(b'version rev-id-1 2 %s\n'
881                                        b'foo\n'
882                                        b'bar\n'
883                                        b'end rev-id-1\n'
884                                        % (sha1sum,))
885        transport = MockTransport([gz_txt])
886        access = _KnitKeyAccess(transport, ConstantMapper('filename'))
887        knit = KnitVersionedFiles(None, access)
888        # We are asking for rev-id-2, but the data is rev-id-1
889        records = [((b'rev-id-2',), ((b'rev-id-2',), 0, len(gz_txt)))]
890        self.assertRaises(KnitCorrupt, list,
891                          knit._read_records_iter(records))
892
893        # read_records_iter_raw detects mismatches in the header
894        self.assertRaises(KnitCorrupt, list,
895                          knit._read_records_iter_raw(records))
896
897    def test_uncompressed_data(self):
898        sha1sum = osutils.sha_string(b'foo\nbar\n')
899        txt = (b'version rev-id-1 2 %s\n'
900               b'foo\n'
901               b'bar\n'
902               b'end rev-id-1\n'
903               % (sha1sum,))
904        transport = MockTransport([txt])
905        access = _KnitKeyAccess(transport, ConstantMapper('filename'))
906        knit = KnitVersionedFiles(None, access)
907        records = [((b'rev-id-1',), ((b'rev-id-1',), 0, len(txt)))]
908
909        # We don't have valid gzip data ==> corrupt
910        self.assertRaises(KnitCorrupt, list,
911                          knit._read_records_iter(records))
912
913        # read_records_iter_raw will notice the bad data
914        self.assertRaises(KnitCorrupt, list,
915                          knit._read_records_iter_raw(records))
916
917    def test_corrupted_data(self):
918        sha1sum = osutils.sha_string(b'foo\nbar\n')
919        gz_txt = self.create_gz_content(b'version rev-id-1 2 %s\n'
920                                        b'foo\n'
921                                        b'bar\n'
922                                        b'end rev-id-1\n'
923                                        % (sha1sum,))
924        # Change 2 bytes in the middle to \xff
925        gz_txt = gz_txt[:10] + b'\xff\xff' + gz_txt[12:]
926        transport = MockTransport([gz_txt])
927        access = _KnitKeyAccess(transport, ConstantMapper('filename'))
928        knit = KnitVersionedFiles(None, access)
929        records = [((b'rev-id-1',), ((b'rev-id-1',), 0, len(gz_txt)))]
930        self.assertRaises(KnitCorrupt, list,
931                          knit._read_records_iter(records))
932        # read_records_iter_raw will barf on bad gz data
933        self.assertRaises(KnitCorrupt, list,
934                          knit._read_records_iter_raw(records))
935
936
937class LowLevelKnitIndexTests(TestCase):
938
939    @property
940    def _load_data(self):
941        from .._knit_load_data_py import _load_data_py
942        return _load_data_py
943
944    def get_knit_index(self, transport, name, mode):
945        mapper = ConstantMapper(name)
946        self.overrideAttr(knit, '_load_data', self._load_data)
947
948        def allow_writes():
949            return 'w' in mode
950        return _KndxIndex(transport, mapper, lambda: None, allow_writes, lambda: True)
951
952    def test_create_file(self):
953        transport = MockTransport()
954        index = self.get_knit_index(transport, "filename", "w")
955        index.keys()
956        call = transport.calls.pop(0)
957        # call[1][1] is a BytesIO - we can't test it by simple equality.
958        self.assertEqual('put_file_non_atomic', call[0])
959        self.assertEqual('filename.kndx', call[1][0])
960        # With no history, _KndxIndex writes a new index:
961        self.assertEqual(_KndxIndex.HEADER,
962                         call[1][1].getvalue())
963        self.assertEqual({'create_parent_dir': True}, call[2])
964
965    def test_read_utf8_version_id(self):
966        unicode_revision_id = u"version-\N{CYRILLIC CAPITAL LETTER A}"
967        utf8_revision_id = unicode_revision_id.encode('utf-8')
968        transport = MockTransport([
969            _KndxIndex.HEADER,
970            b'%s option 0 1 :' % (utf8_revision_id,)
971            ])
972        index = self.get_knit_index(transport, "filename", "r")
973        # _KndxIndex is a private class, and deals in utf8 revision_ids, not
974        # Unicode revision_ids.
975        self.assertEqual({(utf8_revision_id,): ()},
976                         index.get_parent_map(index.keys()))
977        self.assertFalse((unicode_revision_id,) in index.keys())
978
979    def test_read_utf8_parents(self):
980        unicode_revision_id = u"version-\N{CYRILLIC CAPITAL LETTER A}"
981        utf8_revision_id = unicode_revision_id.encode('utf-8')
982        transport = MockTransport([
983            _KndxIndex.HEADER,
984            b"version option 0 1 .%s :" % (utf8_revision_id,)
985            ])
986        index = self.get_knit_index(transport, "filename", "r")
987        self.assertEqual({(b"version",): ((utf8_revision_id,),)},
988                         index.get_parent_map(index.keys()))
989
990    def test_read_ignore_corrupted_lines(self):
991        transport = MockTransport([
992            _KndxIndex.HEADER,
993            b"corrupted",
994            b"corrupted options 0 1 .b .c ",
995            b"version options 0 1 :"
996            ])
997        index = self.get_knit_index(transport, "filename", "r")
998        self.assertEqual(1, len(index.keys()))
999        self.assertEqual({(b"version",)}, index.keys())
1000
1001    def test_read_corrupted_header(self):
1002        transport = MockTransport([b'not a bzr knit index header\n'])
1003        index = self.get_knit_index(transport, "filename", "r")
1004        self.assertRaises(KnitHeaderError, index.keys)
1005
1006    def test_read_duplicate_entries(self):
1007        transport = MockTransport([
1008            _KndxIndex.HEADER,
1009            b"parent options 0 1 :",
1010            b"version options1 0 1 0 :",
1011            b"version options2 1 2 .other :",
1012            b"version options3 3 4 0 .other :"
1013            ])
1014        index = self.get_knit_index(transport, "filename", "r")
1015        self.assertEqual(2, len(index.keys()))
1016        # check that the index used is the first one written. (Specific
1017        # to KnitIndex style indices.
1018        self.assertEqual(b"1", index._dictionary_compress([(b"version",)]))
1019        self.assertEqual(((b"version",), 3, 4),
1020                         index.get_position((b"version",)))
1021        self.assertEqual([b"options3"], index.get_options((b"version",)))
1022        self.assertEqual({(b"version",): ((b"parent",), (b"other",))},
1023                         index.get_parent_map([(b"version",)]))
1024
1025    def test_read_compressed_parents(self):
1026        transport = MockTransport([
1027            _KndxIndex.HEADER,
1028            b"a option 0 1 :",
1029            b"b option 0 1 0 :",
1030            b"c option 0 1 1 0 :",
1031            ])
1032        index = self.get_knit_index(transport, "filename", "r")
1033        self.assertEqual({(b"b",): ((b"a",),), (b"c",): ((b"b",), (b"a",))},
1034                         index.get_parent_map([(b"b",), (b"c",)]))
1035
1036    def test_write_utf8_version_id(self):
1037        unicode_revision_id = u"version-\N{CYRILLIC CAPITAL LETTER A}"
1038        utf8_revision_id = unicode_revision_id.encode('utf-8')
1039        transport = MockTransport([
1040            _KndxIndex.HEADER
1041            ])
1042        index = self.get_knit_index(transport, "filename", "r")
1043        index.add_records([
1044            ((utf8_revision_id,), [b"option"], ((utf8_revision_id,), 0, 1), [])])
1045        call = transport.calls.pop(0)
1046        # call[1][1] is a BytesIO - we can't test it by simple equality.
1047        self.assertEqual('put_file_non_atomic', call[0])
1048        self.assertEqual('filename.kndx', call[1][0])
1049        # With no history, _KndxIndex writes a new index:
1050        self.assertEqual(_KndxIndex.HEADER +
1051                         b"\n%s option 0 1  :" % (utf8_revision_id,),
1052                         call[1][1].getvalue())
1053        self.assertEqual({'create_parent_dir': True}, call[2])
1054
1055    def test_write_utf8_parents(self):
1056        unicode_revision_id = u"version-\N{CYRILLIC CAPITAL LETTER A}"
1057        utf8_revision_id = unicode_revision_id.encode('utf-8')
1058        transport = MockTransport([
1059            _KndxIndex.HEADER
1060            ])
1061        index = self.get_knit_index(transport, "filename", "r")
1062        index.add_records([
1063            ((b"version",), [b"option"], ((b"version",), 0, 1), [(utf8_revision_id,)])])
1064        call = transport.calls.pop(0)
1065        # call[1][1] is a BytesIO - we can't test it by simple equality.
1066        self.assertEqual('put_file_non_atomic', call[0])
1067        self.assertEqual('filename.kndx', call[1][0])
1068        # With no history, _KndxIndex writes a new index:
1069        self.assertEqual(_KndxIndex.HEADER +
1070                         b"\nversion option 0 1 .%s :" % (utf8_revision_id,),
1071                         call[1][1].getvalue())
1072        self.assertEqual({'create_parent_dir': True}, call[2])
1073
1074    def test_keys(self):
1075        transport = MockTransport([
1076            _KndxIndex.HEADER
1077            ])
1078        index = self.get_knit_index(transport, "filename", "r")
1079
1080        self.assertEqual(set(), index.keys())
1081
1082        index.add_records([((b"a",), [b"option"], ((b"a",), 0, 1), [])])
1083        self.assertEqual({(b"a",)}, index.keys())
1084
1085        index.add_records([((b"a",), [b"option"], ((b"a",), 0, 1), [])])
1086        self.assertEqual({(b"a",)}, index.keys())
1087
1088        index.add_records([((b"b",), [b"option"], ((b"b",), 0, 1), [])])
1089        self.assertEqual({(b"a",), (b"b",)}, index.keys())
1090
1091    def add_a_b(self, index, random_id=None):
1092        kwargs = {}
1093        if random_id is not None:
1094            kwargs["random_id"] = random_id
1095        index.add_records([
1096            ((b"a",), [b"option"], ((b"a",), 0, 1), [(b"b",)]),
1097            ((b"a",), [b"opt"], ((b"a",), 1, 2), [(b"c",)]),
1098            ((b"b",), [b"option"], ((b"b",), 2, 3), [(b"a",)])
1099            ], **kwargs)
1100
1101    def assertIndexIsAB(self, index):
1102        self.assertEqual({
1103            (b'a',): ((b'c',),),
1104            (b'b',): ((b'a',),),
1105            },
1106            index.get_parent_map(index.keys()))
1107        self.assertEqual(((b"a",), 1, 2), index.get_position((b"a",)))
1108        self.assertEqual(((b"b",), 2, 3), index.get_position((b"b",)))
1109        self.assertEqual([b"opt"], index.get_options((b"a",)))
1110
1111    def test_add_versions(self):
1112        transport = MockTransport([
1113            _KndxIndex.HEADER
1114            ])
1115        index = self.get_knit_index(transport, "filename", "r")
1116
1117        self.add_a_b(index)
1118        call = transport.calls.pop(0)
1119        # call[1][1] is a BytesIO - we can't test it by simple equality.
1120        self.assertEqual('put_file_non_atomic', call[0])
1121        self.assertEqual('filename.kndx', call[1][0])
1122        # With no history, _KndxIndex writes a new index:
1123        self.assertEqual(
1124            _KndxIndex.HEADER +
1125            b"\na option 0 1 .b :"
1126            b"\na opt 1 2 .c :"
1127            b"\nb option 2 3 0 :",
1128            call[1][1].getvalue())
1129        self.assertEqual({'create_parent_dir': True}, call[2])
1130        self.assertIndexIsAB(index)
1131
1132    def test_add_versions_random_id_is_accepted(self):
1133        transport = MockTransport([
1134            _KndxIndex.HEADER
1135            ])
1136        index = self.get_knit_index(transport, "filename", "r")
1137        self.add_a_b(index, random_id=True)
1138
1139    def test_delay_create_and_add_versions(self):
1140        transport = MockTransport()
1141
1142        index = self.get_knit_index(transport, "filename", "w")
1143        # dir_mode=0777)
1144        self.assertEqual([], transport.calls)
1145        self.add_a_b(index)
1146        # self.assertEqual(
1147        # [    {"dir_mode": 0777, "create_parent_dir": True, "mode": "wb"},
1148        #    kwargs)
1149        # Two calls: one during which we load the existing index (and when its
1150        # missing create it), then a second where we write the contents out.
1151        self.assertEqual(2, len(transport.calls))
1152        call = transport.calls.pop(0)
1153        self.assertEqual('put_file_non_atomic', call[0])
1154        self.assertEqual('filename.kndx', call[1][0])
1155        # With no history, _KndxIndex writes a new index:
1156        self.assertEqual(_KndxIndex.HEADER, call[1][1].getvalue())
1157        self.assertEqual({'create_parent_dir': True}, call[2])
1158        call = transport.calls.pop(0)
1159        # call[1][1] is a BytesIO - we can't test it by simple equality.
1160        self.assertEqual('put_file_non_atomic', call[0])
1161        self.assertEqual('filename.kndx', call[1][0])
1162        # With no history, _KndxIndex writes a new index:
1163        self.assertEqual(
1164            _KndxIndex.HEADER +
1165            b"\na option 0 1 .b :"
1166            b"\na opt 1 2 .c :"
1167            b"\nb option 2 3 0 :",
1168            call[1][1].getvalue())
1169        self.assertEqual({'create_parent_dir': True}, call[2])
1170
1171    def assertTotalBuildSize(self, size, keys, positions):
1172        self.assertEqual(size,
1173                         knit._get_total_build_size(None, keys, positions))
1174
1175    def test__get_total_build_size(self):
1176        positions = {
1177            (b'a',): (('fulltext', False), ((b'a',), 0, 100), None),
1178            (b'b',): (('line-delta', False), ((b'b',), 100, 21), (b'a',)),
1179            (b'c',): (('line-delta', False), ((b'c',), 121, 35), (b'b',)),
1180            (b'd',): (('line-delta', False), ((b'd',), 156, 12), (b'b',)),
1181            }
1182        self.assertTotalBuildSize(100, [(b'a',)], positions)
1183        self.assertTotalBuildSize(121, [(b'b',)], positions)
1184        # c needs both a & b
1185        self.assertTotalBuildSize(156, [(b'c',)], positions)
1186        # we shouldn't count 'b' twice
1187        self.assertTotalBuildSize(156, [(b'b',), (b'c',)], positions)
1188        self.assertTotalBuildSize(133, [(b'd',)], positions)
1189        self.assertTotalBuildSize(168, [(b'c',), (b'd',)], positions)
1190
1191    def test_get_position(self):
1192        transport = MockTransport([
1193            _KndxIndex.HEADER,
1194            b"a option 0 1 :",
1195            b"b option 1 2 :"
1196            ])
1197        index = self.get_knit_index(transport, "filename", "r")
1198
1199        self.assertEqual(((b"a",), 0, 1), index.get_position((b"a",)))
1200        self.assertEqual(((b"b",), 1, 2), index.get_position((b"b",)))
1201
1202    def test_get_method(self):
1203        transport = MockTransport([
1204            _KndxIndex.HEADER,
1205            b"a fulltext,unknown 0 1 :",
1206            b"b unknown,line-delta 1 2 :",
1207            b"c bad 3 4 :"
1208            ])
1209        index = self.get_knit_index(transport, "filename", "r")
1210
1211        self.assertEqual("fulltext", index.get_method(b"a"))
1212        self.assertEqual("line-delta", index.get_method(b"b"))
1213        self.assertRaises(knit.KnitIndexUnknownMethod, index.get_method, b"c")
1214
1215    def test_get_options(self):
1216        transport = MockTransport([
1217            _KndxIndex.HEADER,
1218            b"a opt1 0 1 :",
1219            b"b opt2,opt3 1 2 :"
1220            ])
1221        index = self.get_knit_index(transport, "filename", "r")
1222
1223        self.assertEqual([b"opt1"], index.get_options(b"a"))
1224        self.assertEqual([b"opt2", b"opt3"], index.get_options(b"b"))
1225
1226    def test_get_parent_map(self):
1227        transport = MockTransport([
1228            _KndxIndex.HEADER,
1229            b"a option 0 1 :",
1230            b"b option 1 2 0 .c :",
1231            b"c option 1 2 1 0 .e :"
1232            ])
1233        index = self.get_knit_index(transport, "filename", "r")
1234
1235        self.assertEqual({
1236            (b"a",): (),
1237            (b"b",): ((b"a",), (b"c",)),
1238            (b"c",): ((b"b",), (b"a",), (b"e",)),
1239            }, index.get_parent_map(index.keys()))
1240
1241    def test_impossible_parent(self):
1242        """Test we get KnitCorrupt if the parent couldn't possibly exist."""
1243        transport = MockTransport([
1244            _KndxIndex.HEADER,
1245            b"a option 0 1 :",
1246            b"b option 0 1 4 :"  # We don't have a 4th record
1247            ])
1248        index = self.get_knit_index(transport, 'filename', 'r')
1249        self.assertRaises(KnitCorrupt, index.keys)
1250
1251    def test_corrupted_parent(self):
1252        transport = MockTransport([
1253            _KndxIndex.HEADER,
1254            b"a option 0 1 :",
1255            b"b option 0 1 :",
1256            b"c option 0 1 1v :",  # Can't have a parent of '1v'
1257            ])
1258        index = self.get_knit_index(transport, 'filename', 'r')
1259        self.assertRaises(KnitCorrupt, index.keys)
1260
1261    def test_corrupted_parent_in_list(self):
1262        transport = MockTransport([
1263            _KndxIndex.HEADER,
1264            b"a option 0 1 :",
1265            b"b option 0 1 :",
1266            b"c option 0 1 1 v :",  # Can't have a parent of 'v'
1267            ])
1268        index = self.get_knit_index(transport, 'filename', 'r')
1269        self.assertRaises(KnitCorrupt, index.keys)
1270
1271    def test_invalid_position(self):
1272        transport = MockTransport([
1273            _KndxIndex.HEADER,
1274            b"a option 1v 1 :",
1275            ])
1276        index = self.get_knit_index(transport, 'filename', 'r')
1277        self.assertRaises(KnitCorrupt, index.keys)
1278
1279    def test_invalid_size(self):
1280        transport = MockTransport([
1281            _KndxIndex.HEADER,
1282            b"a option 1 1v :",
1283            ])
1284        index = self.get_knit_index(transport, 'filename', 'r')
1285        self.assertRaises(KnitCorrupt, index.keys)
1286
1287    def test_scan_unvalidated_index_not_implemented(self):
1288        transport = MockTransport()
1289        index = self.get_knit_index(transport, 'filename', 'r')
1290        self.assertRaises(
1291            NotImplementedError, index.scan_unvalidated_index,
1292            'dummy graph_index')
1293        self.assertRaises(
1294            NotImplementedError, index.get_missing_compression_parents)
1295
1296    def test_short_line(self):
1297        transport = MockTransport([
1298            _KndxIndex.HEADER,
1299            b"a option 0 10  :",
1300            b"b option 10 10 0",  # This line isn't terminated, ignored
1301            ])
1302        index = self.get_knit_index(transport, "filename", "r")
1303        self.assertEqual({(b'a',)}, index.keys())
1304
1305    def test_skip_incomplete_record(self):
1306        # A line with bogus data should just be skipped
1307        transport = MockTransport([
1308            _KndxIndex.HEADER,
1309            b"a option 0 10  :",
1310            b"b option 10 10 0",  # This line isn't terminated, ignored
1311            b"c option 20 10 0 :",  # Properly terminated, and starts with '\n'
1312            ])
1313        index = self.get_knit_index(transport, "filename", "r")
1314        self.assertEqual({(b'a',), (b'c',)}, index.keys())
1315
1316    def test_trailing_characters(self):
1317        # A line with bogus data should just be skipped
1318        transport = MockTransport([
1319            _KndxIndex.HEADER,
1320            b"a option 0 10  :",
1321            b"b option 10 10 0 :a",  # This line has extra trailing characters
1322            b"c option 20 10 0 :",  # Properly terminated, and starts with '\n'
1323            ])
1324        index = self.get_knit_index(transport, "filename", "r")
1325        self.assertEqual({(b'a',), (b'c',)}, index.keys())
1326
1327
1328class LowLevelKnitIndexTests_c(LowLevelKnitIndexTests):
1329
1330    _test_needs_features = [compiled_knit_feature]
1331
1332    @property
1333    def _load_data(self):
1334        from .._knit_load_data_pyx import _load_data_c
1335        return _load_data_c
1336
1337
1338class Test_KnitAnnotator(TestCaseWithMemoryTransport):
1339
1340    def make_annotator(self):
1341        factory = knit.make_pack_factory(True, True, 1)
1342        vf = factory(self.get_transport())
1343        return knit._KnitAnnotator(vf)
1344
1345    def test__expand_fulltext(self):
1346        ann = self.make_annotator()
1347        rev_key = (b'rev-id',)
1348        ann._num_compression_children[rev_key] = 1
1349        res = ann._expand_record(rev_key, ((b'parent-id',),), None,
1350                                 [b'line1\n', b'line2\n'], ('fulltext', True))
1351        # The content object and text lines should be cached appropriately
1352        self.assertEqual([b'line1\n', b'line2'], res)
1353        content_obj = ann._content_objects[rev_key]
1354        self.assertEqual([b'line1\n', b'line2\n'], content_obj._lines)
1355        self.assertEqual(res, content_obj.text())
1356        self.assertEqual(res, ann._text_cache[rev_key])
1357
1358    def test__expand_delta_comp_parent_not_available(self):
1359        # Parent isn't available yet, so we return nothing, but queue up this
1360        # node for later processing
1361        ann = self.make_annotator()
1362        rev_key = (b'rev-id',)
1363        parent_key = (b'parent-id',)
1364        record = [b'0,1,1\n', b'new-line\n']
1365        details = ('line-delta', False)
1366        res = ann._expand_record(rev_key, (parent_key,), parent_key,
1367                                 record, details)
1368        self.assertEqual(None, res)
1369        self.assertTrue(parent_key in ann._pending_deltas)
1370        pending = ann._pending_deltas[parent_key]
1371        self.assertEqual(1, len(pending))
1372        self.assertEqual((rev_key, (parent_key,), record, details), pending[0])
1373
1374    def test__expand_record_tracks_num_children(self):
1375        ann = self.make_annotator()
1376        rev_key = (b'rev-id',)
1377        rev2_key = (b'rev2-id',)
1378        parent_key = (b'parent-id',)
1379        record = [b'0,1,1\n', b'new-line\n']
1380        details = ('line-delta', False)
1381        ann._num_compression_children[parent_key] = 2
1382        ann._expand_record(parent_key, (), None, [b'line1\n', b'line2\n'],
1383                           ('fulltext', False))
1384        res = ann._expand_record(rev_key, (parent_key,), parent_key,
1385                                 record, details)
1386        self.assertEqual({parent_key: 1}, ann._num_compression_children)
1387        # Expanding the second child should remove the content object, and the
1388        # num_compression_children entry
1389        res = ann._expand_record(rev2_key, (parent_key,), parent_key,
1390                                 record, details)
1391        self.assertFalse(parent_key in ann._content_objects)
1392        self.assertEqual({}, ann._num_compression_children)
1393        # We should not cache the content_objects for rev2 and rev, because
1394        # they do not have compression children of their own.
1395        self.assertEqual({}, ann._content_objects)
1396
1397    def test__expand_delta_records_blocks(self):
1398        ann = self.make_annotator()
1399        rev_key = (b'rev-id',)
1400        parent_key = (b'parent-id',)
1401        record = [b'0,1,1\n', b'new-line\n']
1402        details = ('line-delta', True)
1403        ann._num_compression_children[parent_key] = 2
1404        ann._expand_record(parent_key, (), None,
1405                           [b'line1\n', b'line2\n', b'line3\n'],
1406                           ('fulltext', False))
1407        ann._expand_record(rev_key, (parent_key,), parent_key, record, details)
1408        self.assertEqual({(rev_key, parent_key): [(1, 1, 1), (3, 3, 0)]},
1409                         ann._matching_blocks)
1410        rev2_key = (b'rev2-id',)
1411        record = [b'0,1,1\n', b'new-line\n']
1412        details = ('line-delta', False)
1413        ann._expand_record(rev2_key, (parent_key,),
1414                           parent_key, record, details)
1415        self.assertEqual([(1, 1, 2), (3, 3, 0)],
1416                         ann._matching_blocks[(rev2_key, parent_key)])
1417
1418    def test__get_parent_ann_uses_matching_blocks(self):
1419        ann = self.make_annotator()
1420        rev_key = (b'rev-id',)
1421        parent_key = (b'parent-id',)
1422        parent_ann = [(parent_key,)] * 3
1423        block_key = (rev_key, parent_key)
1424        ann._annotations_cache[parent_key] = parent_ann
1425        ann._matching_blocks[block_key] = [(0, 1, 1), (3, 3, 0)]
1426        # We should not try to access any parent_lines content, because we know
1427        # we already have the matching blocks
1428        par_ann, blocks = ann._get_parent_annotations_and_matches(rev_key,
1429                                                                  [b'1\n', b'2\n', b'3\n'], parent_key)
1430        self.assertEqual(parent_ann, par_ann)
1431        self.assertEqual([(0, 1, 1), (3, 3, 0)], blocks)
1432        self.assertEqual({}, ann._matching_blocks)
1433
1434    def test__process_pending(self):
1435        ann = self.make_annotator()
1436        rev_key = (b'rev-id',)
1437        p1_key = (b'p1-id',)
1438        p2_key = (b'p2-id',)
1439        record = [b'0,1,1\n', b'new-line\n']
1440        details = ('line-delta', False)
1441        p1_record = [b'line1\n', b'line2\n']
1442        ann._num_compression_children[p1_key] = 1
1443        res = ann._expand_record(rev_key, (p1_key, p2_key), p1_key,
1444                                 record, details)
1445        self.assertEqual(None, res)
1446        # self.assertTrue(p1_key in ann._pending_deltas)
1447        self.assertEqual({}, ann._pending_annotation)
1448        # Now insert p1, and we should be able to expand the delta
1449        res = ann._expand_record(p1_key, (), None, p1_record,
1450                                 ('fulltext', False))
1451        self.assertEqual(p1_record, res)
1452        ann._annotations_cache[p1_key] = [(p1_key,)] * 2
1453        res = ann._process_pending(p1_key)
1454        self.assertEqual([], res)
1455        self.assertFalse(p1_key in ann._pending_deltas)
1456        self.assertTrue(p2_key in ann._pending_annotation)
1457        self.assertEqual({p2_key: [(rev_key, (p1_key, p2_key))]},
1458                         ann._pending_annotation)
1459        # Now fill in parent 2, and pending annotation should be satisfied
1460        res = ann._expand_record(p2_key, (), None, [], ('fulltext', False))
1461        ann._annotations_cache[p2_key] = []
1462        res = ann._process_pending(p2_key)
1463        self.assertEqual([rev_key], res)
1464        self.assertEqual({}, ann._pending_annotation)
1465        self.assertEqual({}, ann._pending_deltas)
1466
1467    def test_record_delta_removes_basis(self):
1468        ann = self.make_annotator()
1469        ann._expand_record((b'parent-id',), (), None,
1470                           [b'line1\n', b'line2\n'], ('fulltext', False))
1471        ann._num_compression_children[b'parent-id'] = 2
1472
1473    def test_annotate_special_text(self):
1474        ann = self.make_annotator()
1475        vf = ann._vf
1476        rev1_key = (b'rev-1',)
1477        rev2_key = (b'rev-2',)
1478        rev3_key = (b'rev-3',)
1479        spec_key = (b'special:',)
1480        vf.add_lines(rev1_key, [], [b'initial content\n'])
1481        vf.add_lines(rev2_key, [rev1_key], [b'initial content\n',
1482                                            b'common content\n',
1483                                            b'content in 2\n'])
1484        vf.add_lines(rev3_key, [rev1_key], [b'initial content\n',
1485                                            b'common content\n',
1486                                            b'content in 3\n'])
1487        spec_text = (b'initial content\n'
1488                     b'common content\n'
1489                     b'content in 2\n'
1490                     b'content in 3\n')
1491        ann.add_special_text(spec_key, [rev2_key, rev3_key], spec_text)
1492        anns, lines = ann.annotate(spec_key)
1493        self.assertEqual([(rev1_key,),
1494                          (rev2_key, rev3_key),
1495                          (rev2_key,),
1496                          (rev3_key,),
1497                          ], anns)
1498        self.assertEqualDiff(spec_text, b''.join(lines))
1499
1500
1501class KnitTests(TestCaseWithTransport):
1502    """Class containing knit test helper routines."""
1503
1504    def make_test_knit(self, annotate=False, name='test'):
1505        mapper = ConstantMapper(name)
1506        return make_file_factory(annotate, mapper)(self.get_transport())
1507
1508
1509class TestBadShaError(KnitTests):
1510    """Tests for handling of sha errors."""
1511
1512    def test_sha_exception_has_text(self):
1513        # having the failed text included in the error allows for recovery.
1514        source = self.make_test_knit()
1515        target = self.make_test_knit(name="target")
1516        if not source._max_delta_chain:
1517            raise TestNotApplicable(
1518                "cannot get delta-caused sha failures without deltas.")
1519        # create a basis
1520        basis = (b'basis',)
1521        broken = (b'broken',)
1522        source.add_lines(basis, (), [b'foo\n'])
1523        source.add_lines(broken, (basis,), [b'foo\n', b'bar\n'])
1524        # Seed target with a bad basis text
1525        target.add_lines(basis, (), [b'gam\n'])
1526        target.insert_record_stream(
1527            source.get_record_stream([broken], 'unordered', False))
1528        err = self.assertRaises(KnitCorrupt,
1529                                next(target.get_record_stream([broken], 'unordered', True
1530                                                              )).get_bytes_as, 'chunked')
1531        self.assertEqual([b'gam\n', b'bar\n'], err.content)
1532        # Test for formatting with live data
1533        self.assertStartsWith(str(err), "Knit ")
1534
1535
1536class TestKnitIndex(KnitTests):
1537
1538    def test_add_versions_dictionary_compresses(self):
1539        """Adding versions to the index should update the lookup dict"""
1540        knit = self.make_test_knit()
1541        idx = knit._index
1542        idx.add_records([((b'a-1',), [b'fulltext'], ((b'a-1',), 0, 0), [])])
1543        self.check_file_contents('test.kndx',
1544                                 b'# bzr knit index 8\n'
1545                                 b'\n'
1546                                 b'a-1 fulltext 0 0  :'
1547                                 )
1548        idx.add_records([
1549            ((b'a-2',), [b'fulltext'], ((b'a-2',), 0, 0), [(b'a-1',)]),
1550            ((b'a-3',), [b'fulltext'], ((b'a-3',), 0, 0), [(b'a-2',)]),
1551            ])
1552        self.check_file_contents('test.kndx',
1553                                 b'# bzr knit index 8\n'
1554                                 b'\n'
1555                                 b'a-1 fulltext 0 0  :\n'
1556                                 b'a-2 fulltext 0 0 0 :\n'
1557                                 b'a-3 fulltext 0 0 1 :'
1558                                 )
1559        self.assertEqual({(b'a-3',), (b'a-1',), (b'a-2',)}, idx.keys())
1560        self.assertEqual({
1561            (b'a-1',): (((b'a-1',), 0, 0), None, (), ('fulltext', False)),
1562            (b'a-2',): (((b'a-2',), 0, 0), None, ((b'a-1',),), ('fulltext', False)),
1563            (b'a-3',): (((b'a-3',), 0, 0), None, ((b'a-2',),), ('fulltext', False)),
1564            }, idx.get_build_details(idx.keys()))
1565        self.assertEqual({(b'a-1',): (),
1566                          (b'a-2',): ((b'a-1',),),
1567                          (b'a-3',): ((b'a-2',),), },
1568                         idx.get_parent_map(idx.keys()))
1569
1570    def test_add_versions_fails_clean(self):
1571        """If add_versions fails in the middle, it restores a pristine state.
1572
1573        Any modifications that are made to the index are reset if all versions
1574        cannot be added.
1575        """
1576        # This cheats a little bit by passing in a generator which will
1577        # raise an exception before the processing finishes
1578        # Other possibilities would be to have an version with the wrong number
1579        # of entries, or to make the backing transport unable to write any
1580        # files.
1581
1582        knit = self.make_test_knit()
1583        idx = knit._index
1584        idx.add_records([((b'a-1',), [b'fulltext'], ((b'a-1',), 0, 0), [])])
1585
1586        class StopEarly(Exception):
1587            pass
1588
1589        def generate_failure():
1590            """Add some entries and then raise an exception"""
1591            yield ((b'a-2',), [b'fulltext'], (None, 0, 0), (b'a-1',))
1592            yield ((b'a-3',), [b'fulltext'], (None, 0, 0), (b'a-2',))
1593            raise StopEarly()
1594
1595        # Assert the pre-condition
1596        def assertA1Only():
1597            self.assertEqual({(b'a-1',)}, set(idx.keys()))
1598            self.assertEqual(
1599                {(b'a-1',): (((b'a-1',), 0, 0), None, (), ('fulltext', False))},
1600                idx.get_build_details([(b'a-1',)]))
1601            self.assertEqual({(b'a-1',): ()}, idx.get_parent_map(idx.keys()))
1602
1603        assertA1Only()
1604        self.assertRaises(StopEarly, idx.add_records, generate_failure())
1605        # And it shouldn't be modified
1606        assertA1Only()
1607
1608    def test_knit_index_ignores_empty_files(self):
1609        # There was a race condition in older bzr, where a ^C at the right time
1610        # could leave an empty .kndx file, which bzr would later claim was a
1611        # corrupted file since the header was not present. In reality, the file
1612        # just wasn't created, so it should be ignored.
1613        t = transport.get_transport_from_path('.')
1614        t.put_bytes('test.kndx', b'')
1615
1616        knit = self.make_test_knit()
1617
1618    def test_knit_index_checks_header(self):
1619        t = transport.get_transport_from_path('.')
1620        t.put_bytes('test.kndx', b'# not really a knit header\n\n')
1621        k = self.make_test_knit()
1622        self.assertRaises(KnitHeaderError, k.keys)
1623
1624
1625class TestGraphIndexKnit(KnitTests):
1626    """Tests for knits using a GraphIndex rather than a KnitIndex."""
1627
1628    def make_g_index(self, name, ref_lists=0, nodes=[]):
1629        builder = GraphIndexBuilder(ref_lists)
1630        for node, references, value in nodes:
1631            builder.add_node(node, references, value)
1632        stream = builder.finish()
1633        trans = self.get_transport()
1634        size = trans.put_file(name, stream)
1635        return GraphIndex(trans, name, size)
1636
1637    def two_graph_index(self, deltas=False, catch_adds=False):
1638        """Build a two-graph index.
1639
1640        :param deltas: If true, use underlying indices with two node-ref
1641            lists and 'parent' set to a delta-compressed against tail.
1642        """
1643        # build a complex graph across several indices.
1644        if deltas:
1645            # delta compression inn the index
1646            index1 = self.make_g_index('1', 2, [
1647                ((b'tip', ), b'N0 100', ([(b'parent', )], [], )),
1648                ((b'tail', ), b'', ([], []))])
1649            index2 = self.make_g_index('2', 2, [
1650                ((b'parent', ), b' 100 78',
1651                 ([(b'tail', ), (b'ghost', )], [(b'tail', )])),
1652                ((b'separate', ), b'', ([], []))])
1653        else:
1654            # just blob location and graph in the index.
1655            index1 = self.make_g_index('1', 1, [
1656                ((b'tip', ), b'N0 100', ([(b'parent', )], )),
1657                ((b'tail', ), b'', ([], ))])
1658            index2 = self.make_g_index('2', 1, [
1659                ((b'parent', ), b' 100 78', ([(b'tail', ), (b'ghost', )], )),
1660                ((b'separate', ), b'', ([], ))])
1661        combined_index = CombinedGraphIndex([index1, index2])
1662        if catch_adds:
1663            self.combined_index = combined_index
1664            self.caught_entries = []
1665            add_callback = self.catch_add
1666        else:
1667            add_callback = None
1668        return _KnitGraphIndex(combined_index, lambda: True, deltas=deltas,
1669                               add_callback=add_callback)
1670
1671    def test_keys(self):
1672        index = self.two_graph_index()
1673        self.assertEqual({(b'tail',), (b'tip',), (b'parent',), (b'separate',)},
1674                         set(index.keys()))
1675
1676    def test_get_position(self):
1677        index = self.two_graph_index()
1678        self.assertEqual(
1679            (index._graph_index._indices[0], 0, 100), index.get_position((b'tip',)))
1680        self.assertEqual(
1681            (index._graph_index._indices[1], 100, 78), index.get_position((b'parent',)))
1682
1683    def test_get_method_deltas(self):
1684        index = self.two_graph_index(deltas=True)
1685        self.assertEqual('fulltext', index.get_method((b'tip',)))
1686        self.assertEqual('line-delta', index.get_method((b'parent',)))
1687
1688    def test_get_method_no_deltas(self):
1689        # check that the parent-history lookup is ignored with deltas=False.
1690        index = self.two_graph_index(deltas=False)
1691        self.assertEqual('fulltext', index.get_method((b'tip',)))
1692        self.assertEqual('fulltext', index.get_method((b'parent',)))
1693
1694    def test_get_options_deltas(self):
1695        index = self.two_graph_index(deltas=True)
1696        self.assertEqual([b'fulltext', b'no-eol'],
1697                         index.get_options((b'tip',)))
1698        self.assertEqual([b'line-delta'], index.get_options((b'parent',)))
1699
1700    def test_get_options_no_deltas(self):
1701        # check that the parent-history lookup is ignored with deltas=False.
1702        index = self.two_graph_index(deltas=False)
1703        self.assertEqual([b'fulltext', b'no-eol'],
1704                         index.get_options((b'tip',)))
1705        self.assertEqual([b'fulltext'], index.get_options((b'parent',)))
1706
1707    def test_get_parent_map(self):
1708        index = self.two_graph_index()
1709        self.assertEqual({(b'parent',): ((b'tail',), (b'ghost',))},
1710                         index.get_parent_map([(b'parent',), (b'ghost',)]))
1711
1712    def catch_add(self, entries):
1713        self.caught_entries.append(entries)
1714
1715    def test_add_no_callback_errors(self):
1716        index = self.two_graph_index()
1717        self.assertRaises(errors.ReadOnlyError, index.add_records,
1718                          [((b'new',), b'fulltext,no-eol', (None, 50, 60), [b'separate'])])
1719
1720    def test_add_version_smoke(self):
1721        index = self.two_graph_index(catch_adds=True)
1722        index.add_records([((b'new',), b'fulltext,no-eol', (None, 50, 60),
1723                            [(b'separate',)])])
1724        self.assertEqual([[((b'new', ), b'N50 60', (((b'separate',),),))]],
1725                         self.caught_entries)
1726
1727    def test_add_version_delta_not_delta_index(self):
1728        index = self.two_graph_index(catch_adds=True)
1729        self.assertRaises(KnitCorrupt, index.add_records,
1730                          [((b'new',), b'no-eol,line-delta', (None, 0, 100), [(b'parent',)])])
1731        self.assertEqual([], self.caught_entries)
1732
1733    def test_add_version_same_dup(self):
1734        index = self.two_graph_index(catch_adds=True)
1735        # options can be spelt two different ways
1736        index.add_records(
1737            [((b'tip',), b'fulltext,no-eol', (None, 0, 100), [(b'parent',)])])
1738        index.add_records(
1739            [((b'tip',), b'no-eol,fulltext', (None, 0, 100), [(b'parent',)])])
1740        # position/length are ignored (because each pack could have fulltext or
1741        # delta, and be at a different position.
1742        index.add_records([((b'tip',), b'fulltext,no-eol', (None, 50, 100),
1743                            [(b'parent',)])])
1744        index.add_records([((b'tip',), b'fulltext,no-eol', (None, 0, 1000),
1745                            [(b'parent',)])])
1746        # but neither should have added data:
1747        self.assertEqual([[], [], [], []], self.caught_entries)
1748
1749    def test_add_version_different_dup(self):
1750        index = self.two_graph_index(deltas=True, catch_adds=True)
1751        # change options
1752        self.assertRaises(KnitCorrupt, index.add_records,
1753                          [((b'tip',), b'line-delta', (None, 0, 100), [(b'parent',)])])
1754        self.assertRaises(KnitCorrupt, index.add_records,
1755                          [((b'tip',), b'fulltext', (None, 0, 100), [(b'parent',)])])
1756        # parents
1757        self.assertRaises(KnitCorrupt, index.add_records,
1758                          [((b'tip',), b'fulltext,no-eol', (None, 0, 100), [])])
1759        self.assertEqual([], self.caught_entries)
1760
1761    def test_add_versions_nodeltas(self):
1762        index = self.two_graph_index(catch_adds=True)
1763        index.add_records([
1764            ((b'new',), b'fulltext,no-eol', (None, 50, 60), [(b'separate',)]),
1765            ((b'new2',), b'fulltext', (None, 0, 6), [(b'new',)]),
1766            ])
1767        self.assertEqual([((b'new', ), b'N50 60', (((b'separate',),),)),
1768                          ((b'new2', ), b' 0 6', (((b'new',),),))],
1769                         sorted(self.caught_entries[0]))
1770        self.assertEqual(1, len(self.caught_entries))
1771
1772    def test_add_versions_deltas(self):
1773        index = self.two_graph_index(deltas=True, catch_adds=True)
1774        index.add_records([
1775            ((b'new',), b'fulltext,no-eol', (None, 50, 60), [(b'separate',)]),
1776            ((b'new2',), b'line-delta', (None, 0, 6), [(b'new',)]),
1777            ])
1778        self.assertEqual([((b'new', ), b'N50 60', (((b'separate',),), ())),
1779                          ((b'new2', ), b' 0 6', (((b'new',),), ((b'new',),), ))],
1780                         sorted(self.caught_entries[0]))
1781        self.assertEqual(1, len(self.caught_entries))
1782
1783    def test_add_versions_delta_not_delta_index(self):
1784        index = self.two_graph_index(catch_adds=True)
1785        self.assertRaises(KnitCorrupt, index.add_records,
1786                          [((b'new',), b'no-eol,line-delta', (None, 0, 100), [(b'parent',)])])
1787        self.assertEqual([], self.caught_entries)
1788
1789    def test_add_versions_random_id_accepted(self):
1790        index = self.two_graph_index(catch_adds=True)
1791        index.add_records([], random_id=True)
1792
1793    def test_add_versions_same_dup(self):
1794        index = self.two_graph_index(catch_adds=True)
1795        # options can be spelt two different ways
1796        index.add_records([((b'tip',), b'fulltext,no-eol', (None, 0, 100),
1797                            [(b'parent',)])])
1798        index.add_records([((b'tip',), b'no-eol,fulltext', (None, 0, 100),
1799                            [(b'parent',)])])
1800        # position/length are ignored (because each pack could have fulltext or
1801        # delta, and be at a different position.
1802        index.add_records([((b'tip',), b'fulltext,no-eol', (None, 50, 100),
1803                            [(b'parent',)])])
1804        index.add_records([((b'tip',), b'fulltext,no-eol', (None, 0, 1000),
1805                            [(b'parent',)])])
1806        # but neither should have added data.
1807        self.assertEqual([[], [], [], []], self.caught_entries)
1808
1809    def test_add_versions_different_dup(self):
1810        index = self.two_graph_index(deltas=True, catch_adds=True)
1811        # change options
1812        self.assertRaises(KnitCorrupt, index.add_records,
1813                          [((b'tip',), b'line-delta', (None, 0, 100), [(b'parent',)])])
1814        self.assertRaises(KnitCorrupt, index.add_records,
1815                          [((b'tip',), b'fulltext', (None, 0, 100), [(b'parent',)])])
1816        # parents
1817        self.assertRaises(KnitCorrupt, index.add_records,
1818                          [((b'tip',), b'fulltext,no-eol', (None, 0, 100), [])])
1819        # change options in the second record
1820        self.assertRaises(KnitCorrupt, index.add_records,
1821                          [((b'tip',), b'fulltext,no-eol', (None, 0, 100), [(b'parent',)]),
1822                           ((b'tip',), b'line-delta', (None, 0, 100), [(b'parent',)])])
1823        self.assertEqual([], self.caught_entries)
1824
1825    def make_g_index_missing_compression_parent(self):
1826        graph_index = self.make_g_index('missing_comp', 2,
1827                                        [((b'tip', ), b' 100 78',
1828                                          ([(b'missing-parent', ), (b'ghost', )], [(b'missing-parent', )]))])
1829        return graph_index
1830
1831    def make_g_index_missing_parent(self):
1832        graph_index = self.make_g_index('missing_parent', 2,
1833                                        [((b'parent', ), b' 100 78', ([], [])),
1834                                         ((b'tip', ), b' 100 78',
1835                                            ([(b'parent', ), (b'missing-parent', )], [(b'parent', )])),
1836                                         ])
1837        return graph_index
1838
1839    def make_g_index_no_external_refs(self):
1840        graph_index = self.make_g_index('no_external_refs', 2,
1841                                        [((b'rev', ), b' 100 78',
1842                                          ([(b'parent', ), (b'ghost', )], []))])
1843        return graph_index
1844
1845    def test_add_good_unvalidated_index(self):
1846        unvalidated = self.make_g_index_no_external_refs()
1847        combined = CombinedGraphIndex([unvalidated])
1848        index = _KnitGraphIndex(combined, lambda: True, deltas=True)
1849        index.scan_unvalidated_index(unvalidated)
1850        self.assertEqual(frozenset(), index.get_missing_compression_parents())
1851
1852    def test_add_missing_compression_parent_unvalidated_index(self):
1853        unvalidated = self.make_g_index_missing_compression_parent()
1854        combined = CombinedGraphIndex([unvalidated])
1855        index = _KnitGraphIndex(combined, lambda: True, deltas=True)
1856        index.scan_unvalidated_index(unvalidated)
1857        # This also checks that its only the compression parent that is
1858        # examined, otherwise 'ghost' would also be reported as a missing
1859        # parent.
1860        self.assertEqual(
1861            frozenset([(b'missing-parent',)]),
1862            index.get_missing_compression_parents())
1863
1864    def test_add_missing_noncompression_parent_unvalidated_index(self):
1865        unvalidated = self.make_g_index_missing_parent()
1866        combined = CombinedGraphIndex([unvalidated])
1867        index = _KnitGraphIndex(combined, lambda: True, deltas=True,
1868                                track_external_parent_refs=True)
1869        index.scan_unvalidated_index(unvalidated)
1870        self.assertEqual(
1871            frozenset([(b'missing-parent',)]), index.get_missing_parents())
1872
1873    def test_track_external_parent_refs(self):
1874        g_index = self.make_g_index('empty', 2, [])
1875        combined = CombinedGraphIndex([g_index])
1876        index = _KnitGraphIndex(combined, lambda: True, deltas=True,
1877                                add_callback=self.catch_add, track_external_parent_refs=True)
1878        self.caught_entries = []
1879        index.add_records([
1880            ((b'new-key',), b'fulltext,no-eol', (None, 50, 60),
1881             [(b'parent-1',), (b'parent-2',)])])
1882        self.assertEqual(
1883            frozenset([(b'parent-1',), (b'parent-2',)]),
1884            index.get_missing_parents())
1885
1886    def test_add_unvalidated_index_with_present_external_references(self):
1887        index = self.two_graph_index(deltas=True)
1888        # Ugly hack to get at one of the underlying GraphIndex objects that
1889        # two_graph_index built.
1890        unvalidated = index._graph_index._indices[1]
1891        # 'parent' is an external ref of _indices[1] (unvalidated), but is
1892        # present in _indices[0].
1893        index.scan_unvalidated_index(unvalidated)
1894        self.assertEqual(frozenset(), index.get_missing_compression_parents())
1895
1896    def make_new_missing_parent_g_index(self, name):
1897        missing_parent = name.encode('ascii') + b'-missing-parent'
1898        graph_index = self.make_g_index(name, 2,
1899                                        [((name.encode('ascii') + b'tip', ), b' 100 78',
1900                                          ([(missing_parent, ), (b'ghost', )], [(missing_parent, )]))])
1901        return graph_index
1902
1903    def test_add_mulitiple_unvalidated_indices_with_missing_parents(self):
1904        g_index_1 = self.make_new_missing_parent_g_index('one')
1905        g_index_2 = self.make_new_missing_parent_g_index('two')
1906        combined = CombinedGraphIndex([g_index_1, g_index_2])
1907        index = _KnitGraphIndex(combined, lambda: True, deltas=True)
1908        index.scan_unvalidated_index(g_index_1)
1909        index.scan_unvalidated_index(g_index_2)
1910        self.assertEqual(
1911            frozenset([(b'one-missing-parent',), (b'two-missing-parent',)]),
1912            index.get_missing_compression_parents())
1913
1914    def test_add_mulitiple_unvalidated_indices_with_mutual_dependencies(self):
1915        graph_index_a = self.make_g_index('one', 2,
1916                                          [((b'parent-one', ), b' 100 78', ([(b'non-compression-parent',)], [])),
1917                                           ((b'child-of-two', ), b' 100 78',
1918                                              ([(b'parent-two',)], [(b'parent-two',)]))])
1919        graph_index_b = self.make_g_index('two', 2,
1920                                          [((b'parent-two', ), b' 100 78', ([(b'non-compression-parent',)], [])),
1921                                           ((b'child-of-one', ), b' 100 78',
1922                                              ([(b'parent-one',)], [(b'parent-one',)]))])
1923        combined = CombinedGraphIndex([graph_index_a, graph_index_b])
1924        index = _KnitGraphIndex(combined, lambda: True, deltas=True)
1925        index.scan_unvalidated_index(graph_index_a)
1926        index.scan_unvalidated_index(graph_index_b)
1927        self.assertEqual(
1928            frozenset([]), index.get_missing_compression_parents())
1929
1930
1931class TestNoParentsGraphIndexKnit(KnitTests):
1932    """Tests for knits using _KnitGraphIndex with no parents."""
1933
1934    def make_g_index(self, name, ref_lists=0, nodes=[]):
1935        builder = GraphIndexBuilder(ref_lists)
1936        for node, references in nodes:
1937            builder.add_node(node, references)
1938        stream = builder.finish()
1939        trans = self.get_transport()
1940        size = trans.put_file(name, stream)
1941        return GraphIndex(trans, name, size)
1942
1943    def test_add_good_unvalidated_index(self):
1944        unvalidated = self.make_g_index('unvalidated')
1945        combined = CombinedGraphIndex([unvalidated])
1946        index = _KnitGraphIndex(combined, lambda: True, parents=False)
1947        index.scan_unvalidated_index(unvalidated)
1948        self.assertEqual(frozenset(),
1949                         index.get_missing_compression_parents())
1950
1951    def test_parents_deltas_incompatible(self):
1952        index = CombinedGraphIndex([])
1953        self.assertRaises(knit.KnitError, _KnitGraphIndex, lambda: True,
1954                          index, deltas=True, parents=False)
1955
1956    def two_graph_index(self, catch_adds=False):
1957        """Build a two-graph index.
1958
1959        :param deltas: If true, use underlying indices with two node-ref
1960            lists and 'parent' set to a delta-compressed against tail.
1961        """
1962        # put several versions in the index.
1963        index1 = self.make_g_index('1', 0, [
1964            ((b'tip', ), b'N0 100'),
1965            ((b'tail', ), b'')])
1966        index2 = self.make_g_index('2', 0, [
1967            ((b'parent', ), b' 100 78'),
1968            ((b'separate', ), b'')])
1969        combined_index = CombinedGraphIndex([index1, index2])
1970        if catch_adds:
1971            self.combined_index = combined_index
1972            self.caught_entries = []
1973            add_callback = self.catch_add
1974        else:
1975            add_callback = None
1976        return _KnitGraphIndex(combined_index, lambda: True, parents=False,
1977                               add_callback=add_callback)
1978
1979    def test_keys(self):
1980        index = self.two_graph_index()
1981        self.assertEqual({(b'tail',), (b'tip',), (b'parent',), (b'separate',)},
1982                         set(index.keys()))
1983
1984    def test_get_position(self):
1985        index = self.two_graph_index()
1986        self.assertEqual((index._graph_index._indices[0], 0, 100),
1987                         index.get_position((b'tip',)))
1988        self.assertEqual((index._graph_index._indices[1], 100, 78),
1989                         index.get_position((b'parent',)))
1990
1991    def test_get_method(self):
1992        index = self.two_graph_index()
1993        self.assertEqual('fulltext', index.get_method((b'tip',)))
1994        self.assertEqual([b'fulltext'], index.get_options((b'parent',)))
1995
1996    def test_get_options(self):
1997        index = self.two_graph_index()
1998        self.assertEqual([b'fulltext', b'no-eol'],
1999                         index.get_options((b'tip',)))
2000        self.assertEqual([b'fulltext'], index.get_options((b'parent',)))
2001
2002    def test_get_parent_map(self):
2003        index = self.two_graph_index()
2004        self.assertEqual({(b'parent',): None},
2005                         index.get_parent_map([(b'parent',), (b'ghost',)]))
2006
2007    def catch_add(self, entries):
2008        self.caught_entries.append(entries)
2009
2010    def test_add_no_callback_errors(self):
2011        index = self.two_graph_index()
2012        self.assertRaises(errors.ReadOnlyError, index.add_records,
2013                          [((b'new',), b'fulltext,no-eol', (None, 50, 60), [(b'separate',)])])
2014
2015    def test_add_version_smoke(self):
2016        index = self.two_graph_index(catch_adds=True)
2017        index.add_records(
2018            [((b'new',), b'fulltext,no-eol', (None, 50, 60), [])])
2019        self.assertEqual([[((b'new', ), b'N50 60')]],
2020                         self.caught_entries)
2021
2022    def test_add_version_delta_not_delta_index(self):
2023        index = self.two_graph_index(catch_adds=True)
2024        self.assertRaises(KnitCorrupt, index.add_records,
2025                          [((b'new',), b'no-eol,line-delta', (None, 0, 100), [])])
2026        self.assertEqual([], self.caught_entries)
2027
2028    def test_add_version_same_dup(self):
2029        index = self.two_graph_index(catch_adds=True)
2030        # options can be spelt two different ways
2031        index.add_records(
2032            [((b'tip',), b'fulltext,no-eol', (None, 0, 100), [])])
2033        index.add_records(
2034            [((b'tip',), b'no-eol,fulltext', (None, 0, 100), [])])
2035        # position/length are ignored (because each pack could have fulltext or
2036        # delta, and be at a different position.
2037        index.add_records(
2038            [((b'tip',), b'fulltext,no-eol', (None, 50, 100), [])])
2039        index.add_records(
2040            [((b'tip',), b'fulltext,no-eol', (None, 0, 1000), [])])
2041        # but neither should have added data.
2042        self.assertEqual([[], [], [], []], self.caught_entries)
2043
2044    def test_add_version_different_dup(self):
2045        index = self.two_graph_index(catch_adds=True)
2046        # change options
2047        self.assertRaises(KnitCorrupt, index.add_records,
2048                          [((b'tip',), b'no-eol,line-delta', (None, 0, 100), [])])
2049        self.assertRaises(KnitCorrupt, index.add_records,
2050                          [((b'tip',), b'line-delta,no-eol', (None, 0, 100), [])])
2051        self.assertRaises(KnitCorrupt, index.add_records,
2052                          [((b'tip',), b'fulltext', (None, 0, 100), [])])
2053        # parents
2054        self.assertRaises(KnitCorrupt, index.add_records,
2055                          [((b'tip',), b'fulltext,no-eol', (None, 0, 100), [(b'parent',)])])
2056        self.assertEqual([], self.caught_entries)
2057
2058    def test_add_versions(self):
2059        index = self.two_graph_index(catch_adds=True)
2060        index.add_records([
2061            ((b'new',), b'fulltext,no-eol', (None, 50, 60), []),
2062            ((b'new2',), b'fulltext', (None, 0, 6), []),
2063            ])
2064        self.assertEqual([((b'new', ), b'N50 60'), ((b'new2', ), b' 0 6')],
2065                         sorted(self.caught_entries[0]))
2066        self.assertEqual(1, len(self.caught_entries))
2067
2068    def test_add_versions_delta_not_delta_index(self):
2069        index = self.two_graph_index(catch_adds=True)
2070        self.assertRaises(KnitCorrupt, index.add_records,
2071                          [((b'new',), b'no-eol,line-delta', (None, 0, 100), [(b'parent',)])])
2072        self.assertEqual([], self.caught_entries)
2073
2074    def test_add_versions_parents_not_parents_index(self):
2075        index = self.two_graph_index(catch_adds=True)
2076        self.assertRaises(KnitCorrupt, index.add_records,
2077                          [((b'new',), b'no-eol,fulltext', (None, 0, 100), [(b'parent',)])])
2078        self.assertEqual([], self.caught_entries)
2079
2080    def test_add_versions_random_id_accepted(self):
2081        index = self.two_graph_index(catch_adds=True)
2082        index.add_records([], random_id=True)
2083
2084    def test_add_versions_same_dup(self):
2085        index = self.two_graph_index(catch_adds=True)
2086        # options can be spelt two different ways
2087        index.add_records(
2088            [((b'tip',), b'fulltext,no-eol', (None, 0, 100), [])])
2089        index.add_records(
2090            [((b'tip',), b'no-eol,fulltext', (None, 0, 100), [])])
2091        # position/length are ignored (because each pack could have fulltext or
2092        # delta, and be at a different position.
2093        index.add_records(
2094            [((b'tip',), b'fulltext,no-eol', (None, 50, 100), [])])
2095        index.add_records(
2096            [((b'tip',), b'fulltext,no-eol', (None, 0, 1000), [])])
2097        # but neither should have added data.
2098        self.assertEqual([[], [], [], []], self.caught_entries)
2099
2100    def test_add_versions_different_dup(self):
2101        index = self.two_graph_index(catch_adds=True)
2102        # change options
2103        self.assertRaises(KnitCorrupt, index.add_records,
2104                          [((b'tip',), b'no-eol,line-delta', (None, 0, 100), [])])
2105        self.assertRaises(KnitCorrupt, index.add_records,
2106                          [((b'tip',), b'line-delta,no-eol', (None, 0, 100), [])])
2107        self.assertRaises(KnitCorrupt, index.add_records,
2108                          [((b'tip',), b'fulltext', (None, 0, 100), [])])
2109        # parents
2110        self.assertRaises(KnitCorrupt, index.add_records,
2111                          [((b'tip',), b'fulltext,no-eol', (None, 0, 100), [(b'parent',)])])
2112        # change options in the second record
2113        self.assertRaises(KnitCorrupt, index.add_records,
2114                          [((b'tip',), b'fulltext,no-eol', (None, 0, 100), []),
2115                           ((b'tip',), b'no-eol,line-delta', (None, 0, 100), [])])
2116        self.assertEqual([], self.caught_entries)
2117
2118
2119class TestKnitVersionedFiles(KnitTests):
2120
2121    def assertGroupKeysForIo(self, exp_groups, keys, non_local_keys,
2122                             positions, _min_buffer_size=None):
2123        kvf = self.make_test_knit()
2124        if _min_buffer_size is None:
2125            _min_buffer_size = knit._STREAM_MIN_BUFFER_SIZE
2126        self.assertEqual(exp_groups, kvf._group_keys_for_io(keys,
2127                                                            non_local_keys, positions,
2128                                                            _min_buffer_size=_min_buffer_size))
2129
2130    def assertSplitByPrefix(self, expected_map, expected_prefix_order,
2131                            keys):
2132        split, prefix_order = KnitVersionedFiles._split_by_prefix(keys)
2133        self.assertEqual(expected_map, split)
2134        self.assertEqual(expected_prefix_order, prefix_order)
2135
2136    def test__group_keys_for_io(self):
2137        ft_detail = ('fulltext', False)
2138        ld_detail = ('line-delta', False)
2139        f_a = (b'f', b'a')
2140        f_b = (b'f', b'b')
2141        f_c = (b'f', b'c')
2142        g_a = (b'g', b'a')
2143        g_b = (b'g', b'b')
2144        g_c = (b'g', b'c')
2145        positions = {
2146            f_a: (ft_detail, (f_a, 0, 100), None),
2147            f_b: (ld_detail, (f_b, 100, 21), f_a),
2148            f_c: (ld_detail, (f_c, 180, 15), f_b),
2149            g_a: (ft_detail, (g_a, 121, 35), None),
2150            g_b: (ld_detail, (g_b, 156, 12), g_a),
2151            g_c: (ld_detail, (g_c, 195, 13), g_a),
2152            }
2153        self.assertGroupKeysForIo([([f_a], set())],
2154                                  [f_a], [], positions)
2155        self.assertGroupKeysForIo([([f_a], {f_a})],
2156                                  [f_a], [f_a], positions)
2157        self.assertGroupKeysForIo([([f_a, f_b], set([]))],
2158                                  [f_a, f_b], [], positions)
2159        self.assertGroupKeysForIo([([f_a, f_b], {f_b})],
2160                                  [f_a, f_b], [f_b], positions)
2161        self.assertGroupKeysForIo([([f_a, f_b, g_a, g_b], set())],
2162                                  [f_a, g_a, f_b, g_b], [], positions)
2163        self.assertGroupKeysForIo([([f_a, f_b, g_a, g_b], set())],
2164                                  [f_a, g_a, f_b, g_b], [], positions,
2165                                  _min_buffer_size=150)
2166        self.assertGroupKeysForIo([([f_a, f_b], set()), ([g_a, g_b], set())],
2167                                  [f_a, g_a, f_b, g_b], [], positions,
2168                                  _min_buffer_size=100)
2169        self.assertGroupKeysForIo([([f_c], set()), ([g_b], set())],
2170                                  [f_c, g_b], [], positions,
2171                                  _min_buffer_size=125)
2172        self.assertGroupKeysForIo([([g_b, f_c], set())],
2173                                  [g_b, f_c], [], positions,
2174                                  _min_buffer_size=125)
2175
2176    def test__split_by_prefix(self):
2177        self.assertSplitByPrefix({b'f': [(b'f', b'a'), (b'f', b'b')],
2178                                  b'g': [(b'g', b'b'), (b'g', b'a')],
2179                                  }, [b'f', b'g'],
2180                                 [(b'f', b'a'), (b'g', b'b'),
2181                                  (b'g', b'a'), (b'f', b'b')])
2182
2183        self.assertSplitByPrefix({b'f': [(b'f', b'a'), (b'f', b'b')],
2184                                  b'g': [(b'g', b'b'), (b'g', b'a')],
2185                                  }, [b'f', b'g'],
2186                                 [(b'f', b'a'), (b'f', b'b'),
2187                                  (b'g', b'b'), (b'g', b'a')])
2188
2189        self.assertSplitByPrefix({b'f': [(b'f', b'a'), (b'f', b'b')],
2190                                  b'g': [(b'g', b'b'), (b'g', b'a')],
2191                                  }, [b'f', b'g'],
2192                                 [(b'f', b'a'), (b'f', b'b'),
2193                                  (b'g', b'b'), (b'g', b'a')])
2194
2195        self.assertSplitByPrefix({b'f': [(b'f', b'a'), (b'f', b'b')],
2196                                  b'g': [(b'g', b'b'), (b'g', b'a')],
2197                                  b'': [(b'a',), (b'b',)]
2198                                  }, [b'f', b'g', b''],
2199                                 [(b'f', b'a'), (b'g', b'b'),
2200                                  (b'a',), (b'b',),
2201                                  (b'g', b'a'), (b'f', b'b')])
2202
2203
2204class TestStacking(KnitTests):
2205
2206    def get_basis_and_test_knit(self):
2207        basis = self.make_test_knit(name='basis')
2208        basis = RecordingVersionedFilesDecorator(basis)
2209        test = self.make_test_knit(name='test')
2210        test.add_fallback_versioned_files(basis)
2211        return basis, test
2212
2213    def test_add_fallback_versioned_files(self):
2214        basis = self.make_test_knit(name='basis')
2215        test = self.make_test_knit(name='test')
2216        # It must not error; other tests test that the fallback is referred to
2217        # when accessing data.
2218        test.add_fallback_versioned_files(basis)
2219
2220    def test_add_lines(self):
2221        # lines added to the test are not added to the basis
2222        basis, test = self.get_basis_and_test_knit()
2223        key = (b'foo',)
2224        key_basis = (b'bar',)
2225        key_cross_border = (b'quux',)
2226        key_delta = (b'zaphod',)
2227        test.add_lines(key, (), [b'foo\n'])
2228        self.assertEqual({}, basis.get_parent_map([key]))
2229        # lines added to the test that reference across the stack do a
2230        # fulltext.
2231        basis.add_lines(key_basis, (), [b'foo\n'])
2232        basis.calls = []
2233        test.add_lines(key_cross_border, (key_basis,), [b'foo\n'])
2234        self.assertEqual('fulltext', test._index.get_method(key_cross_border))
2235        # we don't even need to look at the basis to see that this should be
2236        # stored as a fulltext
2237        self.assertEqual([], basis.calls)
2238        # Subsequent adds do delta.
2239        basis.calls = []
2240        test.add_lines(key_delta, (key_cross_border,), [b'foo\n'])
2241        self.assertEqual('line-delta', test._index.get_method(key_delta))
2242        self.assertEqual([], basis.calls)
2243
2244    def test_annotate(self):
2245        # annotations from the test knit are answered without asking the basis
2246        basis, test = self.get_basis_and_test_knit()
2247        key = (b'foo',)
2248        key_basis = (b'bar',)
2249        test.add_lines(key, (), [b'foo\n'])
2250        details = test.annotate(key)
2251        self.assertEqual([(key, b'foo\n')], details)
2252        self.assertEqual([], basis.calls)
2253        # But texts that are not in the test knit are looked for in the basis
2254        # directly.
2255        basis.add_lines(key_basis, (), [b'foo\n', b'bar\n'])
2256        basis.calls = []
2257        details = test.annotate(key_basis)
2258        self.assertEqual(
2259            [(key_basis, b'foo\n'), (key_basis, b'bar\n')], details)
2260        # Not optimised to date:
2261        # self.assertEqual([("annotate", key_basis)], basis.calls)
2262        self.assertEqual([('get_parent_map', {key_basis}),
2263                          ('get_parent_map', {key_basis}),
2264                          ('get_record_stream', [key_basis], 'topological', True)],
2265                         basis.calls)
2266
2267    def test_check(self):
2268        # At the moment checking a stacked knit does implicitly check the
2269        # fallback files.
2270        basis, test = self.get_basis_and_test_knit()
2271        test.check()
2272
2273    def test_get_parent_map(self):
2274        # parents in the test knit are answered without asking the basis
2275        basis, test = self.get_basis_and_test_knit()
2276        key = (b'foo',)
2277        key_basis = (b'bar',)
2278        key_missing = (b'missing',)
2279        test.add_lines(key, (), [])
2280        parent_map = test.get_parent_map([key])
2281        self.assertEqual({key: ()}, parent_map)
2282        self.assertEqual([], basis.calls)
2283        # But parents that are not in the test knit are looked for in the basis
2284        basis.add_lines(key_basis, (), [])
2285        basis.calls = []
2286        parent_map = test.get_parent_map([key, key_basis, key_missing])
2287        self.assertEqual({key: (),
2288                          key_basis: ()}, parent_map)
2289        self.assertEqual([("get_parent_map", {key_basis, key_missing})],
2290                         basis.calls)
2291
2292    def test_get_record_stream_unordered_fulltexts(self):
2293        # records from the test knit are answered without asking the basis:
2294        basis, test = self.get_basis_and_test_knit()
2295        key = (b'foo',)
2296        key_basis = (b'bar',)
2297        key_missing = (b'missing',)
2298        test.add_lines(key, (), [b'foo\n'])
2299        records = list(test.get_record_stream([key], 'unordered', True))
2300        self.assertEqual(1, len(records))
2301        self.assertEqual([], basis.calls)
2302        # Missing (from test knit) objects are retrieved from the basis:
2303        basis.add_lines(key_basis, (), [b'foo\n', b'bar\n'])
2304        basis.calls = []
2305        records = list(test.get_record_stream([key_basis, key_missing],
2306                                              'unordered', True))
2307        self.assertEqual(2, len(records))
2308        calls = list(basis.calls)
2309        for record in records:
2310            self.assertSubset([record.key], (key_basis, key_missing))
2311            if record.key == key_missing:
2312                self.assertIsInstance(record, AbsentContentFactory)
2313            else:
2314                reference = list(basis.get_record_stream([key_basis],
2315                                                         'unordered', True))[0]
2316                self.assertEqual(reference.key, record.key)
2317                self.assertEqual(reference.sha1, record.sha1)
2318                self.assertEqual(reference.storage_kind, record.storage_kind)
2319                self.assertEqual(reference.get_bytes_as(reference.storage_kind),
2320                                 record.get_bytes_as(record.storage_kind))
2321                self.assertEqual(reference.get_bytes_as('fulltext'),
2322                                 record.get_bytes_as('fulltext'))
2323        # It's not strictly minimal, but it seems reasonable for now for it to
2324        # ask which fallbacks have which parents.
2325        self.assertEqual([
2326            ("get_parent_map", {key_basis, key_missing}),
2327            ("get_record_stream", [key_basis], 'unordered', True)],
2328            calls)
2329
2330    def test_get_record_stream_ordered_fulltexts(self):
2331        # ordering is preserved down into the fallback store.
2332        basis, test = self.get_basis_and_test_knit()
2333        key = (b'foo',)
2334        key_basis = (b'bar',)
2335        key_basis_2 = (b'quux',)
2336        key_missing = (b'missing',)
2337        test.add_lines(key, (key_basis,), [b'foo\n'])
2338        # Missing (from test knit) objects are retrieved from the basis:
2339        basis.add_lines(key_basis, (key_basis_2,), [b'foo\n', b'bar\n'])
2340        basis.add_lines(key_basis_2, (), [b'quux\n'])
2341        basis.calls = []
2342        # ask for in non-topological order
2343        records = list(test.get_record_stream(
2344            [key, key_basis, key_missing, key_basis_2], 'topological', True))
2345        self.assertEqual(4, len(records))
2346        results = []
2347        for record in records:
2348            self.assertSubset([record.key],
2349                              (key_basis, key_missing, key_basis_2, key))
2350            if record.key == key_missing:
2351                self.assertIsInstance(record, AbsentContentFactory)
2352            else:
2353                results.append((record.key, record.sha1, record.storage_kind,
2354                                record.get_bytes_as('fulltext')))
2355        calls = list(basis.calls)
2356        order = [record[0] for record in results]
2357        self.assertEqual([key_basis_2, key_basis, key], order)
2358        for result in results:
2359            if result[0] == key:
2360                source = test
2361            else:
2362                source = basis
2363            record = next(source.get_record_stream([result[0]], 'unordered',
2364                                                   True))
2365            self.assertEqual(record.key, result[0])
2366            self.assertEqual(record.sha1, result[1])
2367            # We used to check that the storage kind matched, but actually it
2368            # depends on whether it was sourced from the basis, or in a single
2369            # group, because asking for full texts returns proxy objects to a
2370            # _ContentMapGenerator object; so checking the kind is unneeded.
2371            self.assertEqual(record.get_bytes_as('fulltext'), result[3])
2372        # It's not strictly minimal, but it seems reasonable for now for it to
2373        # ask which fallbacks have which parents.
2374        self.assertEqual(2, len(calls))
2375        self.assertEqual(
2376            ("get_parent_map", {key_basis, key_basis_2, key_missing}),
2377            calls[0])
2378        # topological is requested from the fallback, because that is what
2379        # was requested at the top level.
2380        self.assertIn(
2381            calls[1], [
2382                ("get_record_stream", [key_basis_2,
2383                                       key_basis], 'topological', True),
2384                ("get_record_stream", [key_basis, key_basis_2], 'topological', True)])
2385
2386    def test_get_record_stream_unordered_deltas(self):
2387        # records from the test knit are answered without asking the basis:
2388        basis, test = self.get_basis_and_test_knit()
2389        key = (b'foo',)
2390        key_basis = (b'bar',)
2391        key_missing = (b'missing',)
2392        test.add_lines(key, (), [b'foo\n'])
2393        records = list(test.get_record_stream([key], 'unordered', False))
2394        self.assertEqual(1, len(records))
2395        self.assertEqual([], basis.calls)
2396        # Missing (from test knit) objects are retrieved from the basis:
2397        basis.add_lines(key_basis, (), [b'foo\n', b'bar\n'])
2398        basis.calls = []
2399        records = list(test.get_record_stream([key_basis, key_missing],
2400                                              'unordered', False))
2401        self.assertEqual(2, len(records))
2402        calls = list(basis.calls)
2403        for record in records:
2404            self.assertSubset([record.key], (key_basis, key_missing))
2405            if record.key == key_missing:
2406                self.assertIsInstance(record, AbsentContentFactory)
2407            else:
2408                reference = list(basis.get_record_stream([key_basis],
2409                                                         'unordered', False))[0]
2410                self.assertEqual(reference.key, record.key)
2411                self.assertEqual(reference.sha1, record.sha1)
2412                self.assertEqual(reference.storage_kind, record.storage_kind)
2413                self.assertEqual(reference.get_bytes_as(reference.storage_kind),
2414                                 record.get_bytes_as(record.storage_kind))
2415        # It's not strictly minimal, but it seems reasonable for now for it to
2416        # ask which fallbacks have which parents.
2417        self.assertEqual([
2418            ("get_parent_map", {key_basis, key_missing}),
2419            ("get_record_stream", [key_basis], 'unordered', False)],
2420            calls)
2421
2422    def test_get_record_stream_ordered_deltas(self):
2423        # ordering is preserved down into the fallback store.
2424        basis, test = self.get_basis_and_test_knit()
2425        key = (b'foo',)
2426        key_basis = (b'bar',)
2427        key_basis_2 = (b'quux',)
2428        key_missing = (b'missing',)
2429        test.add_lines(key, (key_basis,), [b'foo\n'])
2430        # Missing (from test knit) objects are retrieved from the basis:
2431        basis.add_lines(key_basis, (key_basis_2,), [b'foo\n', b'bar\n'])
2432        basis.add_lines(key_basis_2, (), [b'quux\n'])
2433        basis.calls = []
2434        # ask for in non-topological order
2435        records = list(test.get_record_stream(
2436            [key, key_basis, key_missing, key_basis_2], 'topological', False))
2437        self.assertEqual(4, len(records))
2438        results = []
2439        for record in records:
2440            self.assertSubset([record.key],
2441                              (key_basis, key_missing, key_basis_2, key))
2442            if record.key == key_missing:
2443                self.assertIsInstance(record, AbsentContentFactory)
2444            else:
2445                results.append((record.key, record.sha1, record.storage_kind,
2446                                record.get_bytes_as(record.storage_kind)))
2447        calls = list(basis.calls)
2448        order = [record[0] for record in results]
2449        self.assertEqual([key_basis_2, key_basis, key], order)
2450        for result in results:
2451            if result[0] == key:
2452                source = test
2453            else:
2454                source = basis
2455            record = next(source.get_record_stream([result[0]], 'unordered',
2456                                                   False))
2457            self.assertEqual(record.key, result[0])
2458            self.assertEqual(record.sha1, result[1])
2459            self.assertEqual(record.storage_kind, result[2])
2460            self.assertEqual(record.get_bytes_as(
2461                record.storage_kind), result[3])
2462        # It's not strictly minimal, but it seems reasonable for now for it to
2463        # ask which fallbacks have which parents.
2464        self.assertEqual([
2465            ("get_parent_map", {key_basis, key_basis_2, key_missing}),
2466            ("get_record_stream", [key_basis_2, key_basis], 'topological', False)],
2467            calls)
2468
2469    def test_get_sha1s(self):
2470        # sha1's in the test knit are answered without asking the basis
2471        basis, test = self.get_basis_and_test_knit()
2472        key = (b'foo',)
2473        key_basis = (b'bar',)
2474        key_missing = (b'missing',)
2475        test.add_lines(key, (), [b'foo\n'])
2476        key_sha1sum = osutils.sha_string(b'foo\n')
2477        sha1s = test.get_sha1s([key])
2478        self.assertEqual({key: key_sha1sum}, sha1s)
2479        self.assertEqual([], basis.calls)
2480        # But texts that are not in the test knit are looked for in the basis
2481        # directly (rather than via text reconstruction) so that remote servers
2482        # etc don't have to answer with full content.
2483        basis.add_lines(key_basis, (), [b'foo\n', b'bar\n'])
2484        basis_sha1sum = osutils.sha_string(b'foo\nbar\n')
2485        basis.calls = []
2486        sha1s = test.get_sha1s([key, key_missing, key_basis])
2487        self.assertEqual({key: key_sha1sum,
2488                          key_basis: basis_sha1sum}, sha1s)
2489        self.assertEqual([("get_sha1s", {key_basis, key_missing})],
2490                         basis.calls)
2491
2492    def test_insert_record_stream(self):
2493        # records are inserted as normal; insert_record_stream builds on
2494        # add_lines, so a smoke test should be all that's needed:
2495        key_basis = (b'bar',)
2496        key_delta = (b'zaphod',)
2497        basis, test = self.get_basis_and_test_knit()
2498        source = self.make_test_knit(name='source')
2499        basis.add_lines(key_basis, (), [b'foo\n'])
2500        basis.calls = []
2501        source.add_lines(key_basis, (), [b'foo\n'])
2502        source.add_lines(key_delta, (key_basis,), [b'bar\n'])
2503        stream = source.get_record_stream([key_delta], 'unordered', False)
2504        test.insert_record_stream(stream)
2505        # XXX: this does somewhat too many calls in making sure of whether it
2506        # has to recreate the full text.
2507        self.assertEqual([("get_parent_map", {key_basis}),
2508                          ('get_parent_map', {key_basis}),
2509                          ('get_record_stream', [key_basis], 'unordered', True)],
2510                         basis.calls)
2511        self.assertEqual({key_delta: (key_basis,)},
2512                         test.get_parent_map([key_delta]))
2513        self.assertEqual(b'bar\n', next(test.get_record_stream([key_delta],
2514                                                               'unordered', True)).get_bytes_as('fulltext'))
2515
2516    def test_iter_lines_added_or_present_in_keys(self):
2517        # Lines from the basis are returned, and lines for a given key are only
2518        # returned once.
2519        key1 = (b'foo1',)
2520        key2 = (b'foo2',)
2521        # all sources are asked for keys:
2522        basis, test = self.get_basis_and_test_knit()
2523        basis.add_lines(key1, (), [b"foo"])
2524        basis.calls = []
2525        lines = list(test.iter_lines_added_or_present_in_keys([key1]))
2526        self.assertEqual([(b"foo\n", key1)], lines)
2527        self.assertEqual([("iter_lines_added_or_present_in_keys", {key1})],
2528                         basis.calls)
2529        # keys in both are not duplicated:
2530        test.add_lines(key2, (), [b"bar\n"])
2531        basis.add_lines(key2, (), [b"bar\n"])
2532        basis.calls = []
2533        lines = list(test.iter_lines_added_or_present_in_keys([key2]))
2534        self.assertEqual([(b"bar\n", key2)], lines)
2535        self.assertEqual([], basis.calls)
2536
2537    def test_keys(self):
2538        key1 = (b'foo1',)
2539        key2 = (b'foo2',)
2540        # all sources are asked for keys:
2541        basis, test = self.get_basis_and_test_knit()
2542        keys = test.keys()
2543        self.assertEqual(set(), set(keys))
2544        self.assertEqual([("keys",)], basis.calls)
2545        # keys from a basis are returned:
2546        basis.add_lines(key1, (), [])
2547        basis.calls = []
2548        keys = test.keys()
2549        self.assertEqual({key1}, set(keys))
2550        self.assertEqual([("keys",)], basis.calls)
2551        # keys in both are not duplicated:
2552        test.add_lines(key2, (), [])
2553        basis.add_lines(key2, (), [])
2554        basis.calls = []
2555        keys = test.keys()
2556        self.assertEqual(2, len(keys))
2557        self.assertEqual({key1, key2}, set(keys))
2558        self.assertEqual([("keys",)], basis.calls)
2559
2560    def test_add_mpdiffs(self):
2561        # records are inserted as normal; add_mpdiff builds on
2562        # add_lines, so a smoke test should be all that's needed:
2563        key_basis = (b'bar',)
2564        key_delta = (b'zaphod',)
2565        basis, test = self.get_basis_and_test_knit()
2566        source = self.make_test_knit(name='source')
2567        basis.add_lines(key_basis, (), [b'foo\n'])
2568        basis.calls = []
2569        source.add_lines(key_basis, (), [b'foo\n'])
2570        source.add_lines(key_delta, (key_basis,), [b'bar\n'])
2571        diffs = source.make_mpdiffs([key_delta])
2572        test.add_mpdiffs([(key_delta, (key_basis,),
2573                           source.get_sha1s([key_delta])[key_delta], diffs[0])])
2574        self.assertEqual([("get_parent_map", {key_basis}),
2575                          ('get_record_stream', [key_basis], 'unordered', True), ],
2576                         basis.calls)
2577        self.assertEqual({key_delta: (key_basis,)},
2578                         test.get_parent_map([key_delta]))
2579        self.assertEqual(b'bar\n', next(test.get_record_stream([key_delta],
2580                                                               'unordered', True)).get_bytes_as('fulltext'))
2581
2582    def test_make_mpdiffs(self):
2583        # Generating an mpdiff across a stacking boundary should detect parent
2584        # texts regions.
2585        key = (b'foo',)
2586        key_left = (b'bar',)
2587        key_right = (b'zaphod',)
2588        basis, test = self.get_basis_and_test_knit()
2589        basis.add_lines(key_left, (), [b'bar\n'])
2590        basis.add_lines(key_right, (), [b'zaphod\n'])
2591        basis.calls = []
2592        test.add_lines(key, (key_left, key_right),
2593                       [b'bar\n', b'foo\n', b'zaphod\n'])
2594        diffs = test.make_mpdiffs([key])
2595        self.assertEqual([
2596            multiparent.MultiParent([multiparent.ParentText(0, 0, 0, 1),
2597                                     multiparent.NewText([b'foo\n']),
2598                                     multiparent.ParentText(1, 0, 2, 1)])],
2599                         diffs)
2600        self.assertEqual(3, len(basis.calls))
2601        self.assertEqual([
2602            ("get_parent_map", {key_left, key_right}),
2603            ("get_parent_map", {key_left, key_right}),
2604            ],
2605            basis.calls[:-1])
2606        last_call = basis.calls[-1]
2607        self.assertEqual('get_record_stream', last_call[0])
2608        self.assertEqual({key_left, key_right}, set(last_call[1]))
2609        self.assertEqual('topological', last_call[2])
2610        self.assertEqual(True, last_call[3])
2611
2612
2613class TestNetworkBehaviour(KnitTests):
2614    """Tests for getting data out of/into knits over the network."""
2615
2616    def test_include_delta_closure_generates_a_knit_delta_closure(self):
2617        vf = self.make_test_knit(name='test')
2618        # put in three texts, giving ft, delta, delta
2619        vf.add_lines((b'base',), (), [b'base\n', b'content\n'])
2620        vf.add_lines((b'd1',), ((b'base',),), [b'd1\n'])
2621        vf.add_lines((b'd2',), ((b'd1',),), [b'd2\n'])
2622        # But heuristics could interfere, so check what happened:
2623        self.assertEqual(['knit-ft-gz', 'knit-delta-gz', 'knit-delta-gz'],
2624                         [record.storage_kind for record in
2625                          vf.get_record_stream([(b'base',), (b'd1',), (b'd2',)],
2626                                               'topological', False)])
2627        # generate a stream of just the deltas include_delta_closure=True,
2628        # serialise to the network, and check that we get a delta closure on the wire.
2629        stream = vf.get_record_stream(
2630            [(b'd1',), (b'd2',)], 'topological', True)
2631        netb = [record.get_bytes_as(record.storage_kind) for record in stream]
2632        # The first bytes should be a memo from _ContentMapGenerator, and the
2633        # second bytes should be empty (because its a API proxy not something
2634        # for wire serialisation.
2635        self.assertEqual(b'', netb[1])
2636        bytes = netb[0]
2637        kind, line_end = network_bytes_to_kind_and_offset(bytes)
2638        self.assertEqual('knit-delta-closure', kind)
2639
2640
2641class TestContentMapGenerator(KnitTests):
2642    """Tests for ContentMapGenerator"""
2643
2644    def test_get_record_stream_gives_records(self):
2645        vf = self.make_test_knit(name='test')
2646        # put in three texts, giving ft, delta, delta
2647        vf.add_lines((b'base',), (), [b'base\n', b'content\n'])
2648        vf.add_lines((b'd1',), ((b'base',),), [b'd1\n'])
2649        vf.add_lines((b'd2',), ((b'd1',),), [b'd2\n'])
2650        keys = [(b'd1',), (b'd2',)]
2651        generator = _VFContentMapGenerator(vf, keys,
2652                                           global_map=vf.get_parent_map(keys))
2653        for record in generator.get_record_stream():
2654            if record.key == (b'd1',):
2655                self.assertEqual(b'd1\n', record.get_bytes_as('fulltext'))
2656            else:
2657                self.assertEqual(b'd2\n', record.get_bytes_as('fulltext'))
2658
2659    def test_get_record_stream_kinds_are_raw(self):
2660        vf = self.make_test_knit(name='test')
2661        # put in three texts, giving ft, delta, delta
2662        vf.add_lines((b'base',), (), [b'base\n', b'content\n'])
2663        vf.add_lines((b'd1',), ((b'base',),), [b'd1\n'])
2664        vf.add_lines((b'd2',), ((b'd1',),), [b'd2\n'])
2665        keys = [(b'base',), (b'd1',), (b'd2',)]
2666        generator = _VFContentMapGenerator(vf, keys,
2667                                           global_map=vf.get_parent_map(keys))
2668        kinds = {(b'base',): 'knit-delta-closure',
2669                 (b'd1',): 'knit-delta-closure-ref',
2670                 (b'd2',): 'knit-delta-closure-ref',
2671                 }
2672        for record in generator.get_record_stream():
2673            self.assertEqual(kinds[record.key], record.storage_kind)
2674