1# Copyright (C) 2006-2011 Canonical Ltd
2#
3# This program is free software; you can redistribute it and/or modify
4# it under the terms of the GNU General Public License as published by
5# the Free Software Foundation; either version 2 of the License, or
6# (at your option) any later version.
7#
8# This program is distributed in the hope that it will be useful,
9# but WITHOUT ANY WARRANTY; without even the implied warranty of
10# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11# GNU General Public License for more details.
12#
13# You should have received a copy of the GNU General Public License
14# along with this program; if not, write to the Free Software
15# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
17"""Versioned text file storage api."""
18
19from copy import copy
20from io import BytesIO
21import itertools
22import os
23import struct
24from zlib import adler32
25
26from ..lazy_import import lazy_import
27lazy_import(globals(), """
28from breezy import (
29    annotate,
30    bencode,
31    graph as _mod_graph,
32    osutils,
33    multiparent,
34    tsort,
35    revision,
36    urlutils,
37    )
38from breezy.bzr import (
39    groupcompress,
40    index,
41    knit,
42    )
43""")
44from .. import (
45    errors,
46    )
47from ..registry import Registry
48from ..textmerge import TextMerge
49
50
51adapter_registry = Registry()
52adapter_registry.register_lazy(('knit-annotated-delta-gz', 'knit-delta-gz'),
53                               'breezy.bzr.knit', 'DeltaAnnotatedToUnannotated')
54adapter_registry.register_lazy(('knit-annotated-ft-gz', 'knit-ft-gz'),
55                               'breezy.bzr.knit', 'FTAnnotatedToUnannotated')
56for target_storage_kind in ('fulltext', 'chunked', 'lines'):
57    adapter_registry.register_lazy(('knit-delta-gz', target_storage_kind), 'breezy.bzr.knit',
58                                   'DeltaPlainToFullText')
59    adapter_registry.register_lazy(('knit-ft-gz', target_storage_kind), 'breezy.bzr.knit',
60                                   'FTPlainToFullText')
61    adapter_registry.register_lazy(('knit-annotated-ft-gz', target_storage_kind),
62                                   'breezy.bzr.knit', 'FTAnnotatedToFullText')
63    adapter_registry.register_lazy(('knit-annotated-delta-gz', target_storage_kind),
64                                   'breezy.bzr.knit', 'DeltaAnnotatedToFullText')
65
66
67class UnavailableRepresentation(errors.InternalBzrError):
68
69    _fmt = ("The encoding '%(wanted)s' is not available for key %(key)s which "
70            "is encoded as '%(native)s'.")
71
72    def __init__(self, key, wanted, native):
73        errors.InternalBzrError.__init__(self)
74        self.wanted = wanted
75        self.native = native
76        self.key = key
77
78
79class ExistingContent(errors.BzrError):
80
81    _fmt = "The content being inserted is already present."
82
83
84class ContentFactory(object):
85    """Abstract interface for insertion and retrieval from a VersionedFile.
86
87    :ivar sha1: None, or the sha1 of the content fulltext.
88    :ivar size: None, or the size of the content fulltext.
89    :ivar storage_kind: The native storage kind of this factory. One of
90        'mpdiff', 'knit-annotated-ft', 'knit-annotated-delta', 'knit-ft',
91        'knit-delta', 'fulltext', 'knit-annotated-ft-gz',
92        'knit-annotated-delta-gz', 'knit-ft-gz', 'knit-delta-gz'.
93    :ivar key: The key of this content. Each key is a tuple with a single
94        string in it.
95    :ivar parents: A tuple of parent keys for self.key. If the object has
96        no parent information, None (as opposed to () for an empty list of
97        parents).
98    """
99
100    def __init__(self):
101        """Create a ContentFactory."""
102        self.sha1 = None
103        self.size = None
104        self.storage_kind = None
105        self.key = None
106        self.parents = None
107
108
109class ChunkedContentFactory(ContentFactory):
110    """Static data content factory.
111
112    This takes a 'chunked' list of strings. The only requirement on 'chunked' is
113    that ''.join(lines) becomes a valid fulltext. A tuple of a single string
114    satisfies this, as does a list of lines.
115
116    :ivar sha1: None, or the sha1 of the content fulltext.
117    :ivar size: None, or the size of the content fulltext.
118    :ivar storage_kind: The native storage kind of this factory. Always
119        'chunked'
120    :ivar key: The key of this content. Each key is a tuple with a single
121        string in it.
122    :ivar parents: A tuple of parent keys for self.key. If the object has
123        no parent information, None (as opposed to () for an empty list of
124        parents).
125    :ivar chunks_are_lines: Whether chunks are lines.
126     """
127
128    def __init__(self, key, parents, sha1, chunks, chunks_are_lines=None):
129        """Create a ContentFactory."""
130        self.sha1 = sha1
131        self.size = sum(map(len, chunks))
132        self.storage_kind = 'chunked'
133        self.key = key
134        self.parents = parents
135        self._chunks = chunks
136        self._chunks_are_lines = chunks_are_lines
137
138    def get_bytes_as(self, storage_kind):
139        if storage_kind == 'chunked':
140            return self._chunks
141        elif storage_kind == 'fulltext':
142            return b''.join(self._chunks)
143        elif storage_kind == 'lines':
144            if self._chunks_are_lines:
145                return self._chunks
146            return list(osutils.chunks_to_lines(self._chunks))
147        raise UnavailableRepresentation(self.key, storage_kind,
148                                        self.storage_kind)
149
150    def iter_bytes_as(self, storage_kind):
151        if storage_kind == 'chunked':
152            return iter(self._chunks)
153        elif storage_kind == 'lines':
154            if self._chunks_are_lines:
155                return iter(self._chunks)
156            return iter(osutils.chunks_to_lines(self._chunks))
157        raise UnavailableRepresentation(self.key, storage_kind,
158                                        self.storage_kind)
159
160class FulltextContentFactory(ContentFactory):
161    """Static data content factory.
162
163    This takes a fulltext when created and just returns that during
164    get_bytes_as('fulltext').
165
166    :ivar sha1: None, or the sha1 of the content fulltext.
167    :ivar storage_kind: The native storage kind of this factory. Always
168        'fulltext'.
169    :ivar key: The key of this content. Each key is a tuple with a single
170        string in it.
171    :ivar parents: A tuple of parent keys for self.key. If the object has
172        no parent information, None (as opposed to () for an empty list of
173        parents).
174     """
175
176    def __init__(self, key, parents, sha1, text):
177        """Create a ContentFactory."""
178        self.sha1 = sha1
179        self.size = len(text)
180        self.storage_kind = 'fulltext'
181        self.key = key
182        self.parents = parents
183        if not isinstance(text, bytes):
184            raise TypeError(text)
185        self._text = text
186
187    def get_bytes_as(self, storage_kind):
188        if storage_kind == self.storage_kind:
189            return self._text
190        elif storage_kind == 'chunked':
191            return [self._text]
192        elif storage_kind == 'lines':
193            return osutils.split_lines(self._text)
194        raise UnavailableRepresentation(self.key, storage_kind,
195                                        self.storage_kind)
196
197    def iter_bytes_as(self, storage_kind):
198        if storage_kind == 'chunked':
199            return iter([self._text])
200        elif storage_kind == 'lines':
201            return iter(osutils.split_lines(self._text))
202        raise UnavailableRepresentation(self.key, storage_kind,
203                                        self.storage_kind)
204
205
206class FileContentFactory(ContentFactory):
207    """File-based content factory.
208    """
209
210    def __init__(self, key, parents, fileobj, sha1=None, size=None):
211        self.key = key
212        self.parents = parents
213        self.file = fileobj
214        self.storage_kind = 'file'
215        self.sha1 = sha1
216        self.size = size
217
218    def get_bytes_as(self, storage_kind):
219        self.file.seek(0)
220        if storage_kind == 'fulltext':
221            return self.file.read()
222        elif storage_kind == 'chunked':
223            return list(osutils.file_iterator(self.file))
224        elif storage_kind == 'lines':
225            return list(self.file.readlines())
226        raise UnavailableRepresentation(self.key, storage_kind,
227                                        self.storage_kind)
228
229    def iter_bytes_as(self, storage_kind):
230        self.file.seek(0)
231        if storage_kind == 'chunked':
232            return osutils.file_iterator(self.file)
233        elif storage_kind == 'lines':
234            return self.file
235        raise UnavailableRepresentation(self.key, storage_kind,
236                                        self.storage_kind)
237
238
239class AbsentContentFactory(ContentFactory):
240    """A placeholder content factory for unavailable texts.
241
242    :ivar sha1: None.
243    :ivar storage_kind: 'absent'.
244    :ivar key: The key of this content. Each key is a tuple with a single
245        string in it.
246    :ivar parents: None.
247    """
248
249    def __init__(self, key):
250        """Create a ContentFactory."""
251        self.sha1 = None
252        self.size = None
253        self.storage_kind = 'absent'
254        self.key = key
255        self.parents = None
256
257    def get_bytes_as(self, storage_kind):
258        raise ValueError('A request was made for key: %s, but that'
259                         ' content is not available, and the calling'
260                         ' code does not handle if it is missing.'
261                         % (self.key,))
262
263    def iter_bytes_as(self, storage_kind):
264        raise ValueError('A request was made for key: %s, but that'
265                         ' content is not available, and the calling'
266                         ' code does not handle if it is missing.'
267                         % (self.key,))
268
269
270class AdapterFactory(ContentFactory):
271    """A content factory to adapt between key prefix's."""
272
273    def __init__(self, key, parents, adapted):
274        """Create an adapter factory instance."""
275        self.key = key
276        self.parents = parents
277        self._adapted = adapted
278
279    def __getattr__(self, attr):
280        """Return a member from the adapted object."""
281        if attr in ('key', 'parents'):
282            return self.__dict__[attr]
283        else:
284            return getattr(self._adapted, attr)
285
286
287def filter_absent(record_stream):
288    """Adapt a record stream to remove absent records."""
289    for record in record_stream:
290        if record.storage_kind != 'absent':
291            yield record
292
293
294class _MPDiffGenerator(object):
295    """Pull out the functionality for generating mp_diffs."""
296
297    def __init__(self, vf, keys):
298        self.vf = vf
299        # This is the order the keys were requested in
300        self.ordered_keys = tuple(keys)
301        # keys + their parents, what we need to compute the diffs
302        self.needed_keys = ()
303        # Map from key: mp_diff
304        self.diffs = {}
305        # Map from key: parents_needed (may have ghosts)
306        self.parent_map = {}
307        # Parents that aren't present
308        self.ghost_parents = ()
309        # Map from parent_key => number of children for this text
310        self.refcounts = {}
311        # Content chunks that are cached while we still need them
312        self.chunks = {}
313
314    def _find_needed_keys(self):
315        """Find the set of keys we need to request.
316
317        This includes all the original keys passed in, and the non-ghost
318        parents of those keys.
319
320        :return: (needed_keys, refcounts)
321            needed_keys is the set of all texts we need to extract
322            refcounts is a dict of {key: num_children} letting us know when we
323                no longer need to cache a given parent text
324        """
325        # All the keys and their parents
326        needed_keys = set(self.ordered_keys)
327        parent_map = self.vf.get_parent_map(needed_keys)
328        self.parent_map = parent_map
329        # TODO: Should we be using a different construct here? I think this
330        #       uses difference_update internally, and we expect the result to
331        #       be tiny
332        missing_keys = needed_keys.difference(parent_map)
333        if missing_keys:
334            raise errors.RevisionNotPresent(list(missing_keys)[0], self.vf)
335        # Parents that might be missing. They are allowed to be ghosts, but we
336        # should check for them
337        refcounts = {}
338        setdefault = refcounts.setdefault
339        just_parents = set()
340        for child_key, parent_keys in parent_map.items():
341            if not parent_keys:
342                # parent_keys may be None if a given VersionedFile claims to
343                # not support graph operations.
344                continue
345            just_parents.update(parent_keys)
346            needed_keys.update(parent_keys)
347            for p in parent_keys:
348                refcounts[p] = setdefault(p, 0) + 1
349        just_parents.difference_update(parent_map)
350        # Remove any parents that are actually ghosts from the needed set
351        self.present_parents = set(self.vf.get_parent_map(just_parents))
352        self.ghost_parents = just_parents.difference(self.present_parents)
353        needed_keys.difference_update(self.ghost_parents)
354        self.needed_keys = needed_keys
355        self.refcounts = refcounts
356        return needed_keys, refcounts
357
358    def _compute_diff(self, key, parent_lines, lines):
359        """Compute a single mp_diff, and store it in self._diffs"""
360        if len(parent_lines) > 0:
361            # XXX: _extract_blocks is not usefully defined anywhere...
362            #      It was meant to extract the left-parent diff without
363            #      having to recompute it for Knit content (pack-0.92,
364            #      etc). That seems to have regressed somewhere
365            left_parent_blocks = self.vf._extract_blocks(key,
366                                                         parent_lines[0], lines)
367        else:
368            left_parent_blocks = None
369        diff = multiparent.MultiParent.from_lines(lines,
370                                                  parent_lines, left_parent_blocks)
371        self.diffs[key] = diff
372
373    def _process_one_record(self, key, this_chunks):
374        parent_keys = None
375        if key in self.parent_map:
376            # This record should be ready to diff, since we requested
377            # content in 'topological' order
378            parent_keys = self.parent_map.pop(key)
379            # If a VersionedFile claims 'no-graph' support, then it may return
380            # None for any parent request, so we replace it with an empty tuple
381            if parent_keys is None:
382                parent_keys = ()
383            parent_lines = []
384            for p in parent_keys:
385                # Alternatively we could check p not in self.needed_keys, but
386                # ghost_parents should be tiny versus huge
387                if p in self.ghost_parents:
388                    continue
389                refcount = self.refcounts[p]
390                if refcount == 1:  # Last child reference
391                    self.refcounts.pop(p)
392                    parent_chunks = self.chunks.pop(p)
393                else:
394                    self.refcounts[p] = refcount - 1
395                    parent_chunks = self.chunks[p]
396                p_lines = osutils.chunks_to_lines(parent_chunks)
397                # TODO: Should we cache the line form? We did the
398                #       computation to get it, but storing it this way will
399                #       be less memory efficient...
400                parent_lines.append(p_lines)
401                del p_lines
402            lines = osutils.chunks_to_lines(this_chunks)
403            # Since we needed the lines, we'll go ahead and cache them this way
404            this_chunks = lines
405            self._compute_diff(key, parent_lines, lines)
406            del lines
407        # Is this content required for any more children?
408        if key in self.refcounts:
409            self.chunks[key] = this_chunks
410
411    def _extract_diffs(self):
412        needed_keys, refcounts = self._find_needed_keys()
413        for record in self.vf.get_record_stream(needed_keys,
414                                                'topological', True):
415            if record.storage_kind == 'absent':
416                raise errors.RevisionNotPresent(record.key, self.vf)
417            self._process_one_record(record.key,
418                                     record.get_bytes_as('chunked'))
419
420    def compute_diffs(self):
421        self._extract_diffs()
422        dpop = self.diffs.pop
423        return [dpop(k) for k in self.ordered_keys]
424
425
426class VersionedFile(object):
427    """Versioned text file storage.
428
429    A versioned file manages versions of line-based text files,
430    keeping track of the originating version for each line.
431
432    To clients the "lines" of the file are represented as a list of
433    strings. These strings will typically have terminal newline
434    characters, but this is not required.  In particular files commonly
435    do not have a newline at the end of the file.
436
437    Texts are identified by a version-id string.
438    """
439
440    @staticmethod
441    def check_not_reserved_id(version_id):
442        revision.check_not_reserved_id(version_id)
443
444    def copy_to(self, name, transport):
445        """Copy this versioned file to name on transport."""
446        raise NotImplementedError(self.copy_to)
447
448    def get_record_stream(self, versions, ordering, include_delta_closure):
449        """Get a stream of records for versions.
450
451        :param versions: The versions to include. Each version is a tuple
452            (version,).
453        :param ordering: Either 'unordered' or 'topological'. A topologically
454            sorted stream has compression parents strictly before their
455            children.
456        :param include_delta_closure: If True then the closure across any
457            compression parents will be included (in the data content of the
458            stream, not in the emitted records). This guarantees that
459            'fulltext' can be used successfully on every record.
460        :return: An iterator of ContentFactory objects, each of which is only
461            valid until the iterator is advanced.
462        """
463        raise NotImplementedError(self.get_record_stream)
464
465    def has_version(self, version_id):
466        """Returns whether version is present."""
467        raise NotImplementedError(self.has_version)
468
469    def insert_record_stream(self, stream):
470        """Insert a record stream into this versioned file.
471
472        :param stream: A stream of records to insert.
473        :return: None
474        :seealso VersionedFile.get_record_stream:
475        """
476        raise NotImplementedError
477
478    def add_lines(self, version_id, parents, lines, parent_texts=None,
479                  left_matching_blocks=None, nostore_sha=None, random_id=False,
480                  check_content=True):
481        """Add a single text on top of the versioned file.
482
483        Must raise RevisionAlreadyPresent if the new version is
484        already present in file history.
485
486        Must raise RevisionNotPresent if any of the given parents are
487        not present in file history.
488
489        :param lines: A list of lines. Each line must be a bytestring. And all
490            of them except the last must be terminated with \n and contain no
491            other \n's. The last line may either contain no \n's or a single
492            terminated \n. If the lines list does meet this constraint the add
493            routine may error or may succeed - but you will be unable to read
494            the data back accurately. (Checking the lines have been split
495            correctly is expensive and extremely unlikely to catch bugs so it
496            is not done at runtime unless check_content is True.)
497        :param parent_texts: An optional dictionary containing the opaque
498            representations of some or all of the parents of version_id to
499            allow delta optimisations.  VERY IMPORTANT: the texts must be those
500            returned by add_lines or data corruption can be caused.
501        :param left_matching_blocks: a hint about which areas are common
502            between the text and its left-hand-parent.  The format is
503            the SequenceMatcher.get_matching_blocks format.
504        :param nostore_sha: Raise ExistingContent and do not add the lines to
505            the versioned file if the digest of the lines matches this.
506        :param random_id: If True a random id has been selected rather than
507            an id determined by some deterministic process such as a converter
508            from a foreign VCS. When True the backend may choose not to check
509            for uniqueness of the resulting key within the versioned file, so
510            this should only be done when the result is expected to be unique
511            anyway.
512        :param check_content: If True, the lines supplied are verified to be
513            bytestrings that are correctly formed lines.
514        :return: The text sha1, the number of bytes in the text, and an opaque
515                 representation of the inserted version which can be provided
516                 back to future add_lines calls in the parent_texts dictionary.
517        """
518        self._check_write_ok()
519        return self._add_lines(version_id, parents, lines, parent_texts,
520                               left_matching_blocks, nostore_sha, random_id, check_content)
521
522    def _add_lines(self, version_id, parents, lines, parent_texts,
523                   left_matching_blocks, nostore_sha, random_id, check_content):
524        """Helper to do the class specific add_lines."""
525        raise NotImplementedError(self.add_lines)
526
527    def add_lines_with_ghosts(self, version_id, parents, lines,
528                              parent_texts=None, nostore_sha=None, random_id=False,
529                              check_content=True, left_matching_blocks=None):
530        """Add lines to the versioned file, allowing ghosts to be present.
531
532        This takes the same parameters as add_lines and returns the same.
533        """
534        self._check_write_ok()
535        return self._add_lines_with_ghosts(version_id, parents, lines,
536                                           parent_texts, nostore_sha, random_id, check_content, left_matching_blocks)
537
538    def _add_lines_with_ghosts(self, version_id, parents, lines, parent_texts,
539                               nostore_sha, random_id, check_content, left_matching_blocks):
540        """Helper to do class specific add_lines_with_ghosts."""
541        raise NotImplementedError(self.add_lines_with_ghosts)
542
543    def check(self, progress_bar=None):
544        """Check the versioned file for integrity."""
545        raise NotImplementedError(self.check)
546
547    def _check_lines_not_unicode(self, lines):
548        """Check that lines being added to a versioned file are not unicode."""
549        for line in lines:
550            if not isinstance(line, bytes):
551                raise errors.BzrBadParameterUnicode("lines")
552
553    def _check_lines_are_lines(self, lines):
554        """Check that the lines really are full lines without inline EOL."""
555        for line in lines:
556            if b'\n' in line[:-1]:
557                raise errors.BzrBadParameterContainsNewline("lines")
558
559    def get_format_signature(self):
560        """Get a text description of the data encoding in this file.
561
562        :since: 0.90
563        """
564        raise NotImplementedError(self.get_format_signature)
565
566    def make_mpdiffs(self, version_ids):
567        """Create multiparent diffs for specified versions."""
568        # XXX: Can't use _MPDiffGenerator just yet. This is because version_ids
569        #      is a list of strings, not keys. And while self.get_record_stream
570        #      is supported, it takes *keys*, while self.get_parent_map() takes
571        #      strings... *sigh*
572        knit_versions = set()
573        knit_versions.update(version_ids)
574        parent_map = self.get_parent_map(version_ids)
575        for version_id in version_ids:
576            try:
577                knit_versions.update(parent_map[version_id])
578            except KeyError:
579                raise errors.RevisionNotPresent(version_id, self)
580        # We need to filter out ghosts, because we can't diff against them.
581        knit_versions = set(self.get_parent_map(knit_versions))
582        lines = dict(zip(knit_versions,
583                         self._get_lf_split_line_list(knit_versions)))
584        diffs = []
585        for version_id in version_ids:
586            target = lines[version_id]
587            try:
588                parents = [lines[p] for p in parent_map[version_id] if p in
589                           knit_versions]
590            except KeyError:
591                # I don't know how this could ever trigger.
592                # parent_map[version_id] was already triggered in the previous
593                # for loop, and lines[p] has the 'if p in knit_versions' check,
594                # so we again won't have a KeyError.
595                raise errors.RevisionNotPresent(version_id, self)
596            if len(parents) > 0:
597                left_parent_blocks = self._extract_blocks(version_id,
598                                                          parents[0], target)
599            else:
600                left_parent_blocks = None
601            diffs.append(multiparent.MultiParent.from_lines(target, parents,
602                                                            left_parent_blocks))
603        return diffs
604
605    def _extract_blocks(self, version_id, source, target):
606        return None
607
608    def add_mpdiffs(self, records):
609        """Add mpdiffs to this VersionedFile.
610
611        Records should be iterables of version, parents, expected_sha1,
612        mpdiff. mpdiff should be a MultiParent instance.
613        """
614        # Does this need to call self._check_write_ok()? (IanC 20070919)
615        vf_parents = {}
616        mpvf = multiparent.MultiMemoryVersionedFile()
617        versions = []
618        for version, parent_ids, expected_sha1, mpdiff in records:
619            versions.append(version)
620            mpvf.add_diff(mpdiff, version, parent_ids)
621        needed_parents = set()
622        for version, parent_ids, expected_sha1, mpdiff in records:
623            needed_parents.update(p for p in parent_ids
624                                  if not mpvf.has_version(p))
625        present_parents = set(self.get_parent_map(needed_parents))
626        for parent_id, lines in zip(present_parents,
627                                    self._get_lf_split_line_list(present_parents)):
628            mpvf.add_version(lines, parent_id, [])
629        for (version, parent_ids, expected_sha1, mpdiff), lines in zip(
630                records, mpvf.get_line_list(versions)):
631            if len(parent_ids) == 1:
632                left_matching_blocks = list(mpdiff.get_matching_blocks(0,
633                                                                       mpvf.get_diff(parent_ids[0]).num_lines()))
634            else:
635                left_matching_blocks = None
636            try:
637                _, _, version_text = self.add_lines_with_ghosts(version,
638                                                                parent_ids, lines, vf_parents,
639                                                                left_matching_blocks=left_matching_blocks)
640            except NotImplementedError:
641                # The vf can't handle ghosts, so add lines normally, which will
642                # (reasonably) fail if there are ghosts in the data.
643                _, _, version_text = self.add_lines(version,
644                                                    parent_ids, lines, vf_parents,
645                                                    left_matching_blocks=left_matching_blocks)
646            vf_parents[version] = version_text
647        sha1s = self.get_sha1s(versions)
648        for version, parent_ids, expected_sha1, mpdiff in records:
649            if expected_sha1 != sha1s[version]:
650                raise errors.VersionedFileInvalidChecksum(version)
651
652    def get_text(self, version_id):
653        """Return version contents as a text string.
654
655        Raises RevisionNotPresent if version is not present in
656        file history.
657        """
658        return b''.join(self.get_lines(version_id))
659    get_string = get_text
660
661    def get_texts(self, version_ids):
662        """Return the texts of listed versions as a list of strings.
663
664        Raises RevisionNotPresent if version is not present in
665        file history.
666        """
667        return [b''.join(self.get_lines(v)) for v in version_ids]
668
669    def get_lines(self, version_id):
670        """Return version contents as a sequence of lines.
671
672        Raises RevisionNotPresent if version is not present in
673        file history.
674        """
675        raise NotImplementedError(self.get_lines)
676
677    def _get_lf_split_line_list(self, version_ids):
678        return [BytesIO(t).readlines() for t in self.get_texts(version_ids)]
679
680    def get_ancestry(self, version_ids):
681        """Return a list of all ancestors of given version(s). This
682        will not include the null revision.
683
684        Must raise RevisionNotPresent if any of the given versions are
685        not present in file history."""
686        raise NotImplementedError(self.get_ancestry)
687
688    def get_ancestry_with_ghosts(self, version_ids):
689        """Return a list of all ancestors of given version(s). This
690        will not include the null revision.
691
692        Must raise RevisionNotPresent if any of the given versions are
693        not present in file history.
694
695        Ghosts that are known about will be included in ancestry list,
696        but are not explicitly marked.
697        """
698        raise NotImplementedError(self.get_ancestry_with_ghosts)
699
700    def get_parent_map(self, version_ids):
701        """Get a map of the parents of version_ids.
702
703        :param version_ids: The version ids to look up parents for.
704        :return: A mapping from version id to parents.
705        """
706        raise NotImplementedError(self.get_parent_map)
707
708    def get_parents_with_ghosts(self, version_id):
709        """Return version names for parents of version_id.
710
711        Will raise RevisionNotPresent if version_id is not present
712        in the history.
713
714        Ghosts that are known about will be included in the parent list,
715        but are not explicitly marked.
716        """
717        try:
718            return list(self.get_parent_map([version_id])[version_id])
719        except KeyError:
720            raise errors.RevisionNotPresent(version_id, self)
721
722    def annotate(self, version_id):
723        """Return a list of (version-id, line) tuples for version_id.
724
725        :raise RevisionNotPresent: If the given version is
726        not present in file history.
727        """
728        raise NotImplementedError(self.annotate)
729
730    def iter_lines_added_or_present_in_versions(self, version_ids=None,
731                                                pb=None):
732        """Iterate over the lines in the versioned file from version_ids.
733
734        This may return lines from other versions. Each item the returned
735        iterator yields is a tuple of a line and a text version that that line
736        is present in (not introduced in).
737
738        Ordering of results is in whatever order is most suitable for the
739        underlying storage format.
740
741        If a progress bar is supplied, it may be used to indicate progress.
742        The caller is responsible for cleaning up progress bars (because this
743        is an iterator).
744
745        NOTES: Lines are normalised: they will all have \n terminators.
746               Lines are returned in arbitrary order.
747
748        :return: An iterator over (line, version_id).
749        """
750        raise NotImplementedError(self.iter_lines_added_or_present_in_versions)
751
752    def plan_merge(self, ver_a, ver_b, base=None):
753        """Return pseudo-annotation indicating how the two versions merge.
754
755        This is computed between versions a and b and their common
756        base.
757
758        Weave lines present in none of them are skipped entirely.
759
760        Legend:
761        killed-base Dead in base revision
762        killed-both Killed in each revision
763        killed-a    Killed in a
764        killed-b    Killed in b
765        unchanged   Alive in both a and b (possibly created in both)
766        new-a       Created in a
767        new-b       Created in b
768        ghost-a     Killed in a, unborn in b
769        ghost-b     Killed in b, unborn in a
770        irrelevant  Not in either revision
771        """
772        raise NotImplementedError(VersionedFile.plan_merge)
773
774    def weave_merge(self, plan, a_marker=TextMerge.A_MARKER,
775                    b_marker=TextMerge.B_MARKER):
776        return PlanWeaveMerge(plan, a_marker, b_marker).merge_lines()[0]
777
778
779class RecordingVersionedFilesDecorator(object):
780    """A minimal versioned files that records calls made on it.
781
782    Only enough methods have been added to support tests using it to date.
783
784    :ivar calls: A list of the calls made; can be reset at any time by
785        assigning [] to it.
786    """
787
788    def __init__(self, backing_vf):
789        """Create a RecordingVersionedFilesDecorator decorating backing_vf.
790
791        :param backing_vf: The versioned file to answer all methods.
792        """
793        self._backing_vf = backing_vf
794        self.calls = []
795
796    def add_lines(self, key, parents, lines, parent_texts=None,
797                  left_matching_blocks=None, nostore_sha=None, random_id=False,
798                  check_content=True):
799        self.calls.append(("add_lines", key, parents, lines, parent_texts,
800                           left_matching_blocks, nostore_sha, random_id, check_content))
801        return self._backing_vf.add_lines(key, parents, lines, parent_texts,
802                                          left_matching_blocks, nostore_sha, random_id, check_content)
803
804    def add_content(self, factory, parent_texts=None,
805                    left_matching_blocks=None, nostore_sha=None, random_id=False,
806                    check_content=True):
807        self.calls.append(("add_content", factory, parent_texts,
808                           left_matching_blocks, nostore_sha, random_id, check_content))
809        return self._backing_vf.add_content(
810            factory, parent_texts, left_matching_blocks, nostore_sha,
811            random_id, check_content)
812
813    def check(self):
814        self._backing_vf.check()
815
816    def get_parent_map(self, keys):
817        self.calls.append(("get_parent_map", copy(keys)))
818        return self._backing_vf.get_parent_map(keys)
819
820    def get_record_stream(self, keys, sort_order, include_delta_closure):
821        self.calls.append(("get_record_stream", list(keys), sort_order,
822                           include_delta_closure))
823        return self._backing_vf.get_record_stream(keys, sort_order,
824                                                  include_delta_closure)
825
826    def get_sha1s(self, keys):
827        self.calls.append(("get_sha1s", copy(keys)))
828        return self._backing_vf.get_sha1s(keys)
829
830    def iter_lines_added_or_present_in_keys(self, keys, pb=None):
831        self.calls.append(("iter_lines_added_or_present_in_keys", copy(keys)))
832        return self._backing_vf.iter_lines_added_or_present_in_keys(keys, pb=pb)
833
834    def keys(self):
835        self.calls.append(("keys",))
836        return self._backing_vf.keys()
837
838
839class OrderingVersionedFilesDecorator(RecordingVersionedFilesDecorator):
840    """A VF that records calls, and returns keys in specific order.
841
842    :ivar calls: A list of the calls made; can be reset at any time by
843        assigning [] to it.
844    """
845
846    def __init__(self, backing_vf, key_priority):
847        """Create a RecordingVersionedFilesDecorator decorating backing_vf.
848
849        :param backing_vf: The versioned file to answer all methods.
850        :param key_priority: A dictionary defining what order keys should be
851            returned from an 'unordered' get_record_stream request.
852            Keys with lower priority are returned first, keys not present in
853            the map get an implicit priority of 0, and are returned in
854            lexicographical order.
855        """
856        RecordingVersionedFilesDecorator.__init__(self, backing_vf)
857        self._key_priority = key_priority
858
859    def get_record_stream(self, keys, sort_order, include_delta_closure):
860        self.calls.append(("get_record_stream", list(keys), sort_order,
861                           include_delta_closure))
862        if sort_order == 'unordered':
863            def sort_key(key):
864                return (self._key_priority.get(key, 0), key)
865            # Use a defined order by asking for the keys one-by-one from the
866            # backing_vf
867            for key in sorted(keys, key=sort_key):
868                for record in self._backing_vf.get_record_stream([key],
869                                                                 'unordered', include_delta_closure):
870                    yield record
871        else:
872            for record in self._backing_vf.get_record_stream(keys, sort_order,
873                                                             include_delta_closure):
874                yield record
875
876
877class KeyMapper(object):
878    """KeyMappers map between keys and underlying partitioned storage."""
879
880    def map(self, key):
881        """Map key to an underlying storage identifier.
882
883        :param key: A key tuple e.g. (b'file-id', b'revision-id').
884        :return: An underlying storage identifier, specific to the partitioning
885            mechanism.
886        """
887        raise NotImplementedError(self.map)
888
889    def unmap(self, partition_id):
890        """Map a partitioned storage id back to a key prefix.
891
892        :param partition_id: The underlying partition id.
893        :return: As much of a key (or prefix) as is derivable from the partition
894            id.
895        """
896        raise NotImplementedError(self.unmap)
897
898
899class ConstantMapper(KeyMapper):
900    """A key mapper that maps to a constant result."""
901
902    def __init__(self, result):
903        """Create a ConstantMapper which will return result for all maps."""
904        self._result = result
905
906    def map(self, key):
907        """See KeyMapper.map()."""
908        return self._result
909
910
911class URLEscapeMapper(KeyMapper):
912    """Base class for use with transport backed storage.
913
914    This provides a map and unmap wrapper that respectively url escape and
915    unescape their outputs and inputs.
916    """
917
918    def map(self, key):
919        """See KeyMapper.map()."""
920        return urlutils.quote(self._map(key))
921
922    def unmap(self, partition_id):
923        """See KeyMapper.unmap()."""
924        return self._unmap(urlutils.unquote(partition_id))
925
926
927class PrefixMapper(URLEscapeMapper):
928    """A key mapper that extracts the first component of a key.
929
930    This mapper is for use with a transport based backend.
931    """
932
933    def _map(self, key):
934        """See KeyMapper.map()."""
935        return key[0].decode('utf-8')
936
937    def _unmap(self, partition_id):
938        """See KeyMapper.unmap()."""
939        return (partition_id.encode('utf-8'),)
940
941
942class HashPrefixMapper(URLEscapeMapper):
943    """A key mapper that combines the first component of a key with a hash.
944
945    This mapper is for use with a transport based backend.
946    """
947
948    def _map(self, key):
949        """See KeyMapper.map()."""
950        prefix = self._escape(key[0])
951        return "%02x/%s" % (adler32(prefix) & 0xff, prefix.decode('utf-8'))
952
953    def _escape(self, prefix):
954        """No escaping needed here."""
955        return prefix
956
957    def _unmap(self, partition_id):
958        """See KeyMapper.unmap()."""
959        return (self._unescape(osutils.basename(partition_id)).encode('utf-8'),)
960
961    def _unescape(self, basename):
962        """No unescaping needed for HashPrefixMapper."""
963        return basename
964
965
966class HashEscapedPrefixMapper(HashPrefixMapper):
967    """Combines the escaped first component of a key with a hash.
968
969    This mapper is for use with a transport based backend.
970    """
971
972    _safe = bytearray(b"abcdefghijklmnopqrstuvwxyz0123456789-_@,.")
973
974    def _escape(self, prefix):
975        """Turn a key element into a filesystem safe string.
976
977        This is similar to a plain urlutils.quote, except
978        it uses specific safe characters, so that it doesn't
979        have to translate a lot of valid file ids.
980        """
981        # @ does not get escaped. This is because it is a valid
982        # filesystem character we use all the time, and it looks
983        # a lot better than seeing %40 all the time.
984        r = [(c in self._safe) and chr(c) or ('%%%02x' % c)
985             for c in bytearray(prefix)]
986        return ''.join(r).encode('ascii')
987
988    def _unescape(self, basename):
989        """Escaped names are easily unescaped by urlutils."""
990        return urlutils.unquote(basename)
991
992
993def make_versioned_files_factory(versioned_file_factory, mapper):
994    """Create a ThunkedVersionedFiles factory.
995
996    This will create a callable which when called creates a
997    ThunkedVersionedFiles on a transport, using mapper to access individual
998    versioned files, and versioned_file_factory to create each individual file.
999    """
1000    def factory(transport):
1001        return ThunkedVersionedFiles(transport, versioned_file_factory, mapper,
1002                                     lambda: True)
1003    return factory
1004
1005
1006class VersionedFiles(object):
1007    """Storage for many versioned files.
1008
1009    This object allows a single keyspace for accessing the history graph and
1010    contents of named bytestrings.
1011
1012    Currently no implementation allows the graph of different key prefixes to
1013    intersect, but the API does allow such implementations in the future.
1014
1015    The keyspace is expressed via simple tuples. Any instance of VersionedFiles
1016    may have a different length key-size, but that size will be constant for
1017    all texts added to or retrieved from it. For instance, breezy uses
1018    instances with a key-size of 2 for storing user files in a repository, with
1019    the first element the fileid, and the second the version of that file.
1020
1021    The use of tuples allows a single code base to support several different
1022    uses with only the mapping logic changing from instance to instance.
1023
1024    :ivar _immediate_fallback_vfs: For subclasses that support stacking,
1025        this is a list of other VersionedFiles immediately underneath this
1026        one.  They may in turn each have further fallbacks.
1027    """
1028
1029    def add_lines(self, key, parents, lines, parent_texts=None,
1030                  left_matching_blocks=None, nostore_sha=None, random_id=False,
1031                  check_content=True):
1032        """Add a text to the store.
1033
1034        :param key: The key tuple of the text to add. If the last element is
1035            None, a CHK string will be generated during the addition.
1036        :param parents: The parents key tuples of the text to add.
1037        :param lines: A list of lines. Each line must be a bytestring. And all
1038            of them except the last must be terminated with \n and contain no
1039            other \n's. The last line may either contain no \n's or a single
1040            terminating \n. If the lines list does meet this constraint the add
1041            routine may error or may succeed - but you will be unable to read
1042            the data back accurately. (Checking the lines have been split
1043            correctly is expensive and extremely unlikely to catch bugs so it
1044            is not done at runtime unless check_content is True.)
1045        :param parent_texts: An optional dictionary containing the opaque
1046            representations of some or all of the parents of version_id to
1047            allow delta optimisations.  VERY IMPORTANT: the texts must be those
1048            returned by add_lines or data corruption can be caused.
1049        :param left_matching_blocks: a hint about which areas are common
1050            between the text and its left-hand-parent.  The format is
1051            the SequenceMatcher.get_matching_blocks format.
1052        :param nostore_sha: Raise ExistingContent and do not add the lines to
1053            the versioned file if the digest of the lines matches this.
1054        :param random_id: If True a random id has been selected rather than
1055            an id determined by some deterministic process such as a converter
1056            from a foreign VCS. When True the backend may choose not to check
1057            for uniqueness of the resulting key within the versioned file, so
1058            this should only be done when the result is expected to be unique
1059            anyway.
1060        :param check_content: If True, the lines supplied are verified to be
1061            bytestrings that are correctly formed lines.
1062        :return: The text sha1, the number of bytes in the text, and an opaque
1063                 representation of the inserted version which can be provided
1064                 back to future add_lines calls in the parent_texts dictionary.
1065        """
1066        raise NotImplementedError(self.add_lines)
1067
1068    def add_content(self, factory, parent_texts=None,
1069                    left_matching_blocks=None, nostore_sha=None, random_id=False,
1070                    check_content=True):
1071        """Add a text to the store from a chunk iterable.
1072
1073        :param key: The key tuple of the text to add. If the last element is
1074            None, a CHK string will be generated during the addition.
1075        :param parents: The parents key tuples of the text to add.
1076        :param chunk_iter: An iterable over bytestrings.
1077        :param parent_texts: An optional dictionary containing the opaque
1078            representations of some or all of the parents of version_id to
1079            allow delta optimisations.  VERY IMPORTANT: the texts must be those
1080            returned by add_lines or data corruption can be caused.
1081        :param left_matching_blocks: a hint about which areas are common
1082            between the text and its left-hand-parent.  The format is
1083            the SequenceMatcher.get_matching_blocks format.
1084        :param nostore_sha: Raise ExistingContent and do not add the lines to
1085            the versioned file if the digest of the lines matches this.
1086        :param random_id: If True a random id has been selected rather than
1087            an id determined by some deterministic process such as a converter
1088            from a foreign VCS. When True the backend may choose not to check
1089            for uniqueness of the resulting key within the versioned file, so
1090            this should only be done when the result is expected to be unique
1091            anyway.
1092        :param check_content: If True, the lines supplied are verified to be
1093            bytestrings that are correctly formed lines.
1094        :return: The text sha1, the number of bytes in the text, and an opaque
1095                 representation of the inserted version which can be provided
1096                 back to future add_lines calls in the parent_texts dictionary.
1097        """
1098        raise NotImplementedError(self.add_content)
1099
1100    def add_mpdiffs(self, records):
1101        """Add mpdiffs to this VersionedFile.
1102
1103        Records should be iterables of version, parents, expected_sha1,
1104        mpdiff. mpdiff should be a MultiParent instance.
1105        """
1106        vf_parents = {}
1107        mpvf = multiparent.MultiMemoryVersionedFile()
1108        versions = []
1109        for version, parent_ids, expected_sha1, mpdiff in records:
1110            versions.append(version)
1111            mpvf.add_diff(mpdiff, version, parent_ids)
1112        needed_parents = set()
1113        for version, parent_ids, expected_sha1, mpdiff in records:
1114            needed_parents.update(p for p in parent_ids
1115                                  if not mpvf.has_version(p))
1116        # It seems likely that adding all the present parents as fulltexts can
1117        # easily exhaust memory.
1118        for record in self.get_record_stream(needed_parents, 'unordered',
1119                                             True):
1120            if record.storage_kind == 'absent':
1121                continue
1122            mpvf.add_version(record.get_bytes_as('lines'), record.key, [])
1123        for (key, parent_keys, expected_sha1, mpdiff), lines in zip(
1124                records, mpvf.get_line_list(versions)):
1125            if len(parent_keys) == 1:
1126                left_matching_blocks = list(mpdiff.get_matching_blocks(0,
1127                                                                       mpvf.get_diff(parent_keys[0]).num_lines()))
1128            else:
1129                left_matching_blocks = None
1130            version_sha1, _, version_text = self.add_lines(key,
1131                                                           parent_keys, lines, vf_parents,
1132                                                           left_matching_blocks=left_matching_blocks)
1133            if version_sha1 != expected_sha1:
1134                raise errors.VersionedFileInvalidChecksum(version)
1135            vf_parents[key] = version_text
1136
1137    def annotate(self, key):
1138        """Return a list of (version-key, line) tuples for the text of key.
1139
1140        :raise RevisionNotPresent: If the key is not present.
1141        """
1142        raise NotImplementedError(self.annotate)
1143
1144    def check(self, progress_bar=None):
1145        """Check this object for integrity.
1146
1147        :param progress_bar: A progress bar to output as the check progresses.
1148        :param keys: Specific keys within the VersionedFiles to check. When
1149            this parameter is not None, check() becomes a generator as per
1150            get_record_stream. The difference to get_record_stream is that
1151            more or deeper checks will be performed.
1152        :return: None, or if keys was supplied a generator as per
1153            get_record_stream.
1154        """
1155        raise NotImplementedError(self.check)
1156
1157    @staticmethod
1158    def check_not_reserved_id(version_id):
1159        revision.check_not_reserved_id(version_id)
1160
1161    def clear_cache(self):
1162        """Clear whatever caches this VersionedFile holds.
1163
1164        This is generally called after an operation has been performed, when we
1165        don't expect to be using this versioned file again soon.
1166        """
1167
1168    def _check_lines_not_unicode(self, lines):
1169        """Check that lines being added to a versioned file are not unicode."""
1170        for line in lines:
1171            if line.__class__ is not bytes:
1172                raise errors.BzrBadParameterUnicode("lines")
1173
1174    def _check_lines_are_lines(self, lines):
1175        """Check that the lines really are full lines without inline EOL."""
1176        for line in lines:
1177            if b'\n' in line[:-1]:
1178                raise errors.BzrBadParameterContainsNewline("lines")
1179
1180    def get_known_graph_ancestry(self, keys):
1181        """Get a KnownGraph instance with the ancestry of keys."""
1182        # most basic implementation is a loop around get_parent_map
1183        pending = set(keys)
1184        parent_map = {}
1185        while pending:
1186            this_parent_map = self.get_parent_map(pending)
1187            parent_map.update(this_parent_map)
1188            pending = set(itertools.chain.from_iterable(
1189                this_parent_map.values()))
1190            pending.difference_update(parent_map)
1191        kg = _mod_graph.KnownGraph(parent_map)
1192        return kg
1193
1194    def get_parent_map(self, keys):
1195        """Get a map of the parents of keys.
1196
1197        :param keys: The keys to look up parents for.
1198        :return: A mapping from keys to parents. Absent keys are absent from
1199            the mapping.
1200        """
1201        raise NotImplementedError(self.get_parent_map)
1202
1203    def get_record_stream(self, keys, ordering, include_delta_closure):
1204        """Get a stream of records for keys.
1205
1206        :param keys: The keys to include.
1207        :param ordering: Either 'unordered' or 'topological'. A topologically
1208            sorted stream has compression parents strictly before their
1209            children.
1210        :param include_delta_closure: If True then the closure across any
1211            compression parents will be included (in the opaque data).
1212        :return: An iterator of ContentFactory objects, each of which is only
1213            valid until the iterator is advanced.
1214        """
1215        raise NotImplementedError(self.get_record_stream)
1216
1217    def get_sha1s(self, keys):
1218        """Get the sha1's of the texts for the given keys.
1219
1220        :param keys: The names of the keys to lookup
1221        :return: a dict from key to sha1 digest. Keys of texts which are not
1222            present in the store are not present in the returned
1223            dictionary.
1224        """
1225        raise NotImplementedError(self.get_sha1s)
1226
1227    __contains__ = index._has_key_from_parent_map
1228
1229    def get_missing_compression_parent_keys(self):
1230        """Return an iterable of keys of missing compression parents.
1231
1232        Check this after calling insert_record_stream to find out if there are
1233        any missing compression parents.  If there are, the records that
1234        depend on them are not able to be inserted safely. The precise
1235        behaviour depends on the concrete VersionedFiles class in use.
1236
1237        Classes that do not support this will raise NotImplementedError.
1238        """
1239        raise NotImplementedError(self.get_missing_compression_parent_keys)
1240
1241    def insert_record_stream(self, stream):
1242        """Insert a record stream into this container.
1243
1244        :param stream: A stream of records to insert.
1245        :return: None
1246        :seealso VersionedFile.get_record_stream:
1247        """
1248        raise NotImplementedError
1249
1250    def iter_lines_added_or_present_in_keys(self, keys, pb=None):
1251        """Iterate over the lines in the versioned files from keys.
1252
1253        This may return lines from other keys. Each item the returned
1254        iterator yields is a tuple of a line and a text version that that line
1255        is present in (not introduced in).
1256
1257        Ordering of results is in whatever order is most suitable for the
1258        underlying storage format.
1259
1260        If a progress bar is supplied, it may be used to indicate progress.
1261        The caller is responsible for cleaning up progress bars (because this
1262        is an iterator).
1263
1264        NOTES:
1265         * Lines are normalised by the underlying store: they will all have \n
1266           terminators.
1267         * Lines are returned in arbitrary order.
1268
1269        :return: An iterator over (line, key).
1270        """
1271        raise NotImplementedError(self.iter_lines_added_or_present_in_keys)
1272
1273    def keys(self):
1274        """Return a iterable of the keys for all the contained texts."""
1275        raise NotImplementedError(self.keys)
1276
1277    def make_mpdiffs(self, keys):
1278        """Create multiparent diffs for specified keys."""
1279        generator = _MPDiffGenerator(self, keys)
1280        return generator.compute_diffs()
1281
1282    def get_annotator(self):
1283        return annotate.Annotator(self)
1284
1285    missing_keys = index._missing_keys_from_parent_map
1286
1287    def _extract_blocks(self, version_id, source, target):
1288        return None
1289
1290    def _transitive_fallbacks(self):
1291        """Return the whole stack of fallback versionedfiles.
1292
1293        This VersionedFiles may have a list of fallbacks, but it doesn't
1294        necessarily know about the whole stack going down, and it can't know
1295        at open time because they may change after the objects are opened.
1296        """
1297        all_fallbacks = []
1298        for a_vfs in self._immediate_fallback_vfs:
1299            all_fallbacks.append(a_vfs)
1300            all_fallbacks.extend(a_vfs._transitive_fallbacks())
1301        return all_fallbacks
1302
1303
1304class ThunkedVersionedFiles(VersionedFiles):
1305    """Storage for many versioned files thunked onto a 'VersionedFile' class.
1306
1307    This object allows a single keyspace for accessing the history graph and
1308    contents of named bytestrings.
1309
1310    Currently no implementation allows the graph of different key prefixes to
1311    intersect, but the API does allow such implementations in the future.
1312    """
1313
1314    def __init__(self, transport, file_factory, mapper, is_locked):
1315        """Create a ThunkedVersionedFiles."""
1316        self._transport = transport
1317        self._file_factory = file_factory
1318        self._mapper = mapper
1319        self._is_locked = is_locked
1320
1321    def add_content(self, factory, parent_texts=None,
1322                    left_matching_blocks=None, nostore_sha=None, random_id=False):
1323        """See VersionedFiles.add_content()."""
1324        lines = factory.get_bytes_as('lines')
1325        return self.add_lines(
1326            factory.key, factory.parents, lines,
1327            parent_texts=parent_texts,
1328            left_matching_blocks=left_matching_blocks,
1329            nostore_sha=nostore_sha,
1330            random_id=random_id,
1331            check_content=True)
1332
1333    def add_lines(self, key, parents, lines, parent_texts=None,
1334                  left_matching_blocks=None, nostore_sha=None, random_id=False,
1335                  check_content=True):
1336        """See VersionedFiles.add_lines()."""
1337        path = self._mapper.map(key)
1338        version_id = key[-1]
1339        parents = [parent[-1] for parent in parents]
1340        vf = self._get_vf(path)
1341        try:
1342            try:
1343                return vf.add_lines_with_ghosts(version_id, parents, lines,
1344                                                parent_texts=parent_texts,
1345                                                left_matching_blocks=left_matching_blocks,
1346                                                nostore_sha=nostore_sha, random_id=random_id,
1347                                                check_content=check_content)
1348            except NotImplementedError:
1349                return vf.add_lines(version_id, parents, lines,
1350                                    parent_texts=parent_texts,
1351                                    left_matching_blocks=left_matching_blocks,
1352                                    nostore_sha=nostore_sha, random_id=random_id,
1353                                    check_content=check_content)
1354        except errors.NoSuchFile:
1355            # parent directory may be missing, try again.
1356            self._transport.mkdir(osutils.dirname(path))
1357            try:
1358                return vf.add_lines_with_ghosts(version_id, parents, lines,
1359                                                parent_texts=parent_texts,
1360                                                left_matching_blocks=left_matching_blocks,
1361                                                nostore_sha=nostore_sha, random_id=random_id,
1362                                                check_content=check_content)
1363            except NotImplementedError:
1364                return vf.add_lines(version_id, parents, lines,
1365                                    parent_texts=parent_texts,
1366                                    left_matching_blocks=left_matching_blocks,
1367                                    nostore_sha=nostore_sha, random_id=random_id,
1368                                    check_content=check_content)
1369
1370    def annotate(self, key):
1371        """Return a list of (version-key, line) tuples for the text of key.
1372
1373        :raise RevisionNotPresent: If the key is not present.
1374        """
1375        prefix = key[:-1]
1376        path = self._mapper.map(prefix)
1377        vf = self._get_vf(path)
1378        origins = vf.annotate(key[-1])
1379        result = []
1380        for origin, line in origins:
1381            result.append((prefix + (origin,), line))
1382        return result
1383
1384    def check(self, progress_bar=None, keys=None):
1385        """See VersionedFiles.check()."""
1386        # XXX: This is over-enthusiastic but as we only thunk for Weaves today
1387        # this is tolerable. Ideally we'd pass keys down to check() and
1388        # have the older VersiondFile interface updated too.
1389        for prefix, vf in self._iter_all_components():
1390            vf.check()
1391        if keys is not None:
1392            return self.get_record_stream(keys, 'unordered', True)
1393
1394    def get_parent_map(self, keys):
1395        """Get a map of the parents of keys.
1396
1397        :param keys: The keys to look up parents for.
1398        :return: A mapping from keys to parents. Absent keys are absent from
1399            the mapping.
1400        """
1401        prefixes = self._partition_keys(keys)
1402        result = {}
1403        for prefix, suffixes in prefixes.items():
1404            path = self._mapper.map(prefix)
1405            vf = self._get_vf(path)
1406            parent_map = vf.get_parent_map(suffixes)
1407            for key, parents in parent_map.items():
1408                result[prefix + (key,)] = tuple(
1409                    prefix + (parent,) for parent in parents)
1410        return result
1411
1412    def _get_vf(self, path):
1413        if not self._is_locked():
1414            raise errors.ObjectNotLocked(self)
1415        return self._file_factory(path, self._transport, create=True,
1416                                  get_scope=lambda: None)
1417
1418    def _partition_keys(self, keys):
1419        """Turn keys into a dict of prefix:suffix_list."""
1420        result = {}
1421        for key in keys:
1422            prefix_keys = result.setdefault(key[:-1], [])
1423            prefix_keys.append(key[-1])
1424        return result
1425
1426    def _iter_all_prefixes(self):
1427        # Identify all key prefixes.
1428        # XXX: A bit hacky, needs polish.
1429        if isinstance(self._mapper, ConstantMapper):
1430            paths = [self._mapper.map(())]
1431            prefixes = [()]
1432        else:
1433            relpaths = set()
1434            for quoted_relpath in self._transport.iter_files_recursive():
1435                path, ext = os.path.splitext(quoted_relpath)
1436                relpaths.add(path)
1437            paths = list(relpaths)
1438            prefixes = [self._mapper.unmap(path) for path in paths]
1439        return zip(paths, prefixes)
1440
1441    def get_record_stream(self, keys, ordering, include_delta_closure):
1442        """See VersionedFiles.get_record_stream()."""
1443        # Ordering will be taken care of by each partitioned store; group keys
1444        # by partition.
1445        keys = sorted(keys)
1446        for prefix, suffixes, vf in self._iter_keys_vf(keys):
1447            suffixes = [(suffix,) for suffix in suffixes]
1448            for record in vf.get_record_stream(suffixes, ordering,
1449                                               include_delta_closure):
1450                if record.parents is not None:
1451                    record.parents = tuple(
1452                        prefix + parent for parent in record.parents)
1453                record.key = prefix + record.key
1454                yield record
1455
1456    def _iter_keys_vf(self, keys):
1457        prefixes = self._partition_keys(keys)
1458        sha1s = {}
1459        for prefix, suffixes in prefixes.items():
1460            path = self._mapper.map(prefix)
1461            vf = self._get_vf(path)
1462            yield prefix, suffixes, vf
1463
1464    def get_sha1s(self, keys):
1465        """See VersionedFiles.get_sha1s()."""
1466        sha1s = {}
1467        for prefix, suffixes, vf in self._iter_keys_vf(keys):
1468            vf_sha1s = vf.get_sha1s(suffixes)
1469            for suffix, sha1 in vf_sha1s.items():
1470                sha1s[prefix + (suffix,)] = sha1
1471        return sha1s
1472
1473    def insert_record_stream(self, stream):
1474        """Insert a record stream into this container.
1475
1476        :param stream: A stream of records to insert.
1477        :return: None
1478        :seealso VersionedFile.get_record_stream:
1479        """
1480        for record in stream:
1481            prefix = record.key[:-1]
1482            key = record.key[-1:]
1483            if record.parents is not None:
1484                parents = [parent[-1:] for parent in record.parents]
1485            else:
1486                parents = None
1487            thunk_record = AdapterFactory(key, parents, record)
1488            path = self._mapper.map(prefix)
1489            # Note that this parses the file many times; we can do better but
1490            # as this only impacts weaves in terms of performance, it is
1491            # tolerable.
1492            vf = self._get_vf(path)
1493            vf.insert_record_stream([thunk_record])
1494
1495    def iter_lines_added_or_present_in_keys(self, keys, pb=None):
1496        """Iterate over the lines in the versioned files from keys.
1497
1498        This may return lines from other keys. Each item the returned
1499        iterator yields is a tuple of a line and a text version that that line
1500        is present in (not introduced in).
1501
1502        Ordering of results is in whatever order is most suitable for the
1503        underlying storage format.
1504
1505        If a progress bar is supplied, it may be used to indicate progress.
1506        The caller is responsible for cleaning up progress bars (because this
1507        is an iterator).
1508
1509        NOTES:
1510         * Lines are normalised by the underlying store: they will all have \n
1511           terminators.
1512         * Lines are returned in arbitrary order.
1513
1514        :return: An iterator over (line, key).
1515        """
1516        for prefix, suffixes, vf in self._iter_keys_vf(keys):
1517            for line, version in vf.iter_lines_added_or_present_in_versions(suffixes):
1518                yield line, prefix + (version,)
1519
1520    def _iter_all_components(self):
1521        for path, prefix in self._iter_all_prefixes():
1522            yield prefix, self._get_vf(path)
1523
1524    def keys(self):
1525        """See VersionedFiles.keys()."""
1526        result = set()
1527        for prefix, vf in self._iter_all_components():
1528            for suffix in vf.versions():
1529                result.add(prefix + (suffix,))
1530        return result
1531
1532
1533class VersionedFilesWithFallbacks(VersionedFiles):
1534
1535    def without_fallbacks(self):
1536        """Return a clone of this object without any fallbacks configured."""
1537        raise NotImplementedError(self.without_fallbacks)
1538
1539    def add_fallback_versioned_files(self, a_versioned_files):
1540        """Add a source of texts for texts not present in this knit.
1541
1542        :param a_versioned_files: A VersionedFiles object.
1543        """
1544        raise NotImplementedError(self.add_fallback_versioned_files)
1545
1546    def get_known_graph_ancestry(self, keys):
1547        """Get a KnownGraph instance with the ancestry of keys."""
1548        parent_map, missing_keys = self._index.find_ancestry(keys)
1549        for fallback in self._transitive_fallbacks():
1550            if not missing_keys:
1551                break
1552            (f_parent_map, f_missing_keys) = fallback._index.find_ancestry(
1553                missing_keys)
1554            parent_map.update(f_parent_map)
1555            missing_keys = f_missing_keys
1556        kg = _mod_graph.KnownGraph(parent_map)
1557        return kg
1558
1559
1560class _PlanMergeVersionedFile(VersionedFiles):
1561    """A VersionedFile for uncommitted and committed texts.
1562
1563    It is intended to allow merges to be planned with working tree texts.
1564    It implements only the small part of the VersionedFiles interface used by
1565    PlanMerge.  It falls back to multiple versionedfiles for data not stored in
1566    _PlanMergeVersionedFile itself.
1567
1568    :ivar: fallback_versionedfiles a list of VersionedFiles objects that can be
1569        queried for missing texts.
1570    """
1571
1572    def __init__(self, file_id):
1573        """Create a _PlanMergeVersionedFile.
1574
1575        :param file_id: Used with _PlanMerge code which is not yet fully
1576            tuple-keyspace aware.
1577        """
1578        self._file_id = file_id
1579        # fallback locations
1580        self.fallback_versionedfiles = []
1581        # Parents for locally held keys.
1582        self._parents = {}
1583        # line data for locally held keys.
1584        self._lines = {}
1585        # key lookup providers
1586        self._providers = [_mod_graph.DictParentsProvider(self._parents)]
1587
1588    def plan_merge(self, ver_a, ver_b, base=None):
1589        """See VersionedFile.plan_merge"""
1590        from ..merge import _PlanMerge
1591        if base is None:
1592            return _PlanMerge(ver_a, ver_b, self, (self._file_id,)).plan_merge()
1593        old_plan = list(_PlanMerge(ver_a, base, self,
1594                                   (self._file_id,)).plan_merge())
1595        new_plan = list(_PlanMerge(ver_a, ver_b, self,
1596                                   (self._file_id,)).plan_merge())
1597        return _PlanMerge._subtract_plans(old_plan, new_plan)
1598
1599    def plan_lca_merge(self, ver_a, ver_b, base=None):
1600        from ..merge import _PlanLCAMerge
1601        graph = _mod_graph.Graph(self)
1602        new_plan = _PlanLCAMerge(
1603            ver_a, ver_b, self, (self._file_id,), graph).plan_merge()
1604        if base is None:
1605            return new_plan
1606        old_plan = _PlanLCAMerge(
1607            ver_a, base, self, (self._file_id,), graph).plan_merge()
1608        return _PlanLCAMerge._subtract_plans(list(old_plan), list(new_plan))
1609
1610    def add_content(self, factory):
1611        return self.add_lines(
1612            factory.key, factory.parents, factory.get_bytes_as('lines'))
1613
1614    def add_lines(self, key, parents, lines):
1615        """See VersionedFiles.add_lines
1616
1617        Lines are added locally, not to fallback versionedfiles.  Also, ghosts
1618        are permitted.  Only reserved ids are permitted.
1619        """
1620        if not isinstance(key, tuple):
1621            raise TypeError(key)
1622        if not revision.is_reserved_id(key[-1]):
1623            raise ValueError('Only reserved ids may be used')
1624        if parents is None:
1625            raise ValueError('Parents may not be None')
1626        if lines is None:
1627            raise ValueError('Lines may not be None')
1628        self._parents[key] = tuple(parents)
1629        self._lines[key] = lines
1630
1631    def get_record_stream(self, keys, ordering, include_delta_closure):
1632        pending = set(keys)
1633        for key in keys:
1634            if key in self._lines:
1635                lines = self._lines[key]
1636                parents = self._parents[key]
1637                pending.remove(key)
1638                yield ChunkedContentFactory(
1639                    key, parents, None, lines,
1640                    chunks_are_lines=True)
1641        for versionedfile in self.fallback_versionedfiles:
1642            for record in versionedfile.get_record_stream(
1643                    pending, 'unordered', True):
1644                if record.storage_kind == 'absent':
1645                    continue
1646                else:
1647                    pending.remove(record.key)
1648                    yield record
1649            if not pending:
1650                return
1651        # report absent entries
1652        for key in pending:
1653            yield AbsentContentFactory(key)
1654
1655    def get_parent_map(self, keys):
1656        """See VersionedFiles.get_parent_map"""
1657        # We create a new provider because a fallback may have been added.
1658        # If we make fallbacks private we can update a stack list and avoid
1659        # object creation thrashing.
1660        keys = set(keys)
1661        result = {}
1662        if revision.NULL_REVISION in keys:
1663            keys.remove(revision.NULL_REVISION)
1664            result[revision.NULL_REVISION] = ()
1665        self._providers = self._providers[:1] + self.fallback_versionedfiles
1666        result.update(
1667            _mod_graph.StackedParentsProvider(
1668                self._providers).get_parent_map(keys))
1669        for key, parents in result.items():
1670            if parents == ():
1671                result[key] = (revision.NULL_REVISION,)
1672        return result
1673
1674
1675class PlanWeaveMerge(TextMerge):
1676    """Weave merge that takes a plan as its input.
1677
1678    This exists so that VersionedFile.plan_merge is implementable.
1679    Most callers will want to use WeaveMerge instead.
1680    """
1681
1682    def __init__(self, plan, a_marker=TextMerge.A_MARKER,
1683                 b_marker=TextMerge.B_MARKER):
1684        TextMerge.__init__(self, a_marker, b_marker)
1685        self.plan = list(plan)
1686
1687    def _merge_struct(self):
1688        lines_a = []
1689        lines_b = []
1690        ch_a = ch_b = False
1691
1692        def outstanding_struct():
1693            if not lines_a and not lines_b:
1694                return
1695            elif ch_a and not ch_b:
1696                # one-sided change:
1697                yield(lines_a,)
1698            elif ch_b and not ch_a:
1699                yield (lines_b,)
1700            elif lines_a == lines_b:
1701                yield(lines_a,)
1702            else:
1703                yield (lines_a, lines_b)
1704
1705        # We previously considered either 'unchanged' or 'killed-both' lines
1706        # to be possible places to resynchronize.  However, assuming agreement
1707        # on killed-both lines may be too aggressive. -- mbp 20060324
1708        for state, line in self.plan:
1709            if state == 'unchanged':
1710                # resync and flush queued conflicts changes if any
1711                for struct in outstanding_struct():
1712                    yield struct
1713                lines_a = []
1714                lines_b = []
1715                ch_a = ch_b = False
1716
1717            if state == 'unchanged':
1718                if line:
1719                    yield ([line],)
1720            elif state == 'killed-a':
1721                ch_a = True
1722                lines_b.append(line)
1723            elif state == 'killed-b':
1724                ch_b = True
1725                lines_a.append(line)
1726            elif state == 'new-a':
1727                ch_a = True
1728                lines_a.append(line)
1729            elif state == 'new-b':
1730                ch_b = True
1731                lines_b.append(line)
1732            elif state == 'conflicted-a':
1733                ch_b = ch_a = True
1734                lines_a.append(line)
1735            elif state == 'conflicted-b':
1736                ch_b = ch_a = True
1737                lines_b.append(line)
1738            elif state == 'killed-both':
1739                # This counts as a change, even though there is no associated
1740                # line
1741                ch_b = ch_a = True
1742            else:
1743                if state not in ('irrelevant', 'ghost-a', 'ghost-b',
1744                                 'killed-base'):
1745                    raise AssertionError(state)
1746        for struct in outstanding_struct():
1747            yield struct
1748
1749    def base_from_plan(self):
1750        """Construct a BASE file from the plan text."""
1751        base_lines = []
1752        for state, line in self.plan:
1753            if state in ('killed-a', 'killed-b', 'killed-both', 'unchanged'):
1754                # If unchanged, then this line is straight from base. If a or b
1755                # or both killed the line, then it *used* to be in base.
1756                base_lines.append(line)
1757            else:
1758                if state not in ('killed-base', 'irrelevant',
1759                                 'ghost-a', 'ghost-b',
1760                                 'new-a', 'new-b',
1761                                 'conflicted-a', 'conflicted-b'):
1762                    # killed-base, irrelevant means it doesn't apply
1763                    # ghost-a/ghost-b are harder to say for sure, but they
1764                    # aren't in the 'inc_c' which means they aren't in the
1765                    # shared base of a & b. So we don't include them.  And
1766                    # obviously if the line is newly inserted, it isn't in base
1767
1768                    # If 'conflicted-a' or b, then it is new vs one base, but
1769                    # old versus another base. However, if we make it present
1770                    # in the base, it will be deleted from the target, and it
1771                    # seems better to get a line doubled in the merge result,
1772                    # rather than have it deleted entirely.
1773                    # Example, each node is the 'text' at that point:
1774                    #           MN
1775                    #          /   \
1776                    #        MaN   MbN
1777                    #         |  X  |
1778                    #        MabN MbaN
1779                    #          \   /
1780                    #           ???
1781                    # There was a criss-cross conflict merge. Both sides
1782                    # include the other, but put themselves first.
1783                    # Weave marks this as a 'clean' merge, picking OTHER over
1784                    # THIS. (Though the details depend on order inserted into
1785                    # weave, etc.)
1786                    # LCA generates a plan:
1787                    # [('unchanged', M),
1788                    #  ('conflicted-b', b),
1789                    #  ('unchanged', a),
1790                    #  ('conflicted-a', b),
1791                    #  ('unchanged', N)]
1792                    # If you mark 'conflicted-*' as part of BASE, then a 3-way
1793                    # merge tool will cleanly generate "MaN" (as BASE vs THIS
1794                    # removes one 'b', and BASE vs OTHER removes the other)
1795                    # If you include neither, 3-way creates a clean "MbabN" as
1796                    # THIS adds one 'b', and OTHER does too.
1797                    # It seems that having the line 2 times is better than
1798                    # having it omitted. (Easier to manually delete than notice
1799                    # it needs to be added.)
1800                    raise AssertionError('Unknown state: %s' % (state,))
1801        return base_lines
1802
1803
1804class WeaveMerge(PlanWeaveMerge):
1805    """Weave merge that takes a VersionedFile and two versions as its input."""
1806
1807    def __init__(self, versionedfile, ver_a, ver_b,
1808                 a_marker=PlanWeaveMerge.A_MARKER, b_marker=PlanWeaveMerge.B_MARKER):
1809        plan = versionedfile.plan_merge(ver_a, ver_b)
1810        PlanWeaveMerge.__init__(self, plan, a_marker, b_marker)
1811
1812
1813class VirtualVersionedFiles(VersionedFiles):
1814    """Dummy implementation for VersionedFiles that uses other functions for
1815    obtaining fulltexts and parent maps.
1816
1817    This is always on the bottom of the stack and uses string keys
1818    (rather than tuples) internally.
1819    """
1820
1821    def __init__(self, get_parent_map, get_lines):
1822        """Create a VirtualVersionedFiles.
1823
1824        :param get_parent_map: Same signature as Repository.get_parent_map.
1825        :param get_lines: Should return lines for specified key or None if
1826                          not available.
1827        """
1828        super(VirtualVersionedFiles, self).__init__()
1829        self._get_parent_map = get_parent_map
1830        self._get_lines = get_lines
1831
1832    def check(self, progressbar=None):
1833        """See VersionedFiles.check.
1834
1835        :note: Always returns True for VirtualVersionedFiles.
1836        """
1837        return True
1838
1839    def add_mpdiffs(self, records):
1840        """See VersionedFiles.mpdiffs.
1841
1842        :note: Not implemented for VirtualVersionedFiles.
1843        """
1844        raise NotImplementedError(self.add_mpdiffs)
1845
1846    def get_parent_map(self, keys):
1847        """See VersionedFiles.get_parent_map."""
1848        parent_view = self._get_parent_map(k for (k,) in keys).items()
1849        return dict(((k,), tuple((p,) for p in v)) for k, v in parent_view)
1850
1851    def get_sha1s(self, keys):
1852        """See VersionedFiles.get_sha1s."""
1853        ret = {}
1854        for (k,) in keys:
1855            lines = self._get_lines(k)
1856            if lines is not None:
1857                if not isinstance(lines, list):
1858                    raise AssertionError
1859                ret[(k,)] = osutils.sha_strings(lines)
1860        return ret
1861
1862    def get_record_stream(self, keys, ordering, include_delta_closure):
1863        """See VersionedFiles.get_record_stream."""
1864        for (k,) in list(keys):
1865            lines = self._get_lines(k)
1866            if lines is not None:
1867                if not isinstance(lines, list):
1868                    raise AssertionError
1869                yield ChunkedContentFactory(
1870                    (k,), None, sha1=osutils.sha_strings(lines),
1871                    chunks=lines, chunks_are_lines=True)
1872            else:
1873                yield AbsentContentFactory((k,))
1874
1875    def iter_lines_added_or_present_in_keys(self, keys, pb=None):
1876        """See VersionedFile.iter_lines_added_or_present_in_versions()."""
1877        for i, (key,) in enumerate(keys):
1878            if pb is not None:
1879                pb.update("Finding changed lines", i, len(keys))
1880            for l in self._get_lines(key):
1881                yield (l, key)
1882
1883
1884class NoDupeAddLinesDecorator(object):
1885    """Decorator for a VersionedFiles that skips doing an add_lines if the key
1886    is already present.
1887    """
1888
1889    def __init__(self, store):
1890        self._store = store
1891
1892    def add_lines(self, key, parents, lines, parent_texts=None,
1893                  left_matching_blocks=None, nostore_sha=None, random_id=False,
1894                  check_content=True):
1895        """See VersionedFiles.add_lines.
1896
1897        This implementation may return None as the third element of the return
1898        value when the original store wouldn't.
1899        """
1900        if nostore_sha:
1901            raise NotImplementedError(
1902                "NoDupeAddLinesDecorator.add_lines does not implement the "
1903                "nostore_sha behaviour.")
1904        if key[-1] is None:
1905            sha1 = osutils.sha_strings(lines)
1906            key = (b"sha1:" + sha1,)
1907        else:
1908            sha1 = None
1909        if key in self._store.get_parent_map([key]):
1910            # This key has already been inserted, so don't do it again.
1911            if sha1 is None:
1912                sha1 = osutils.sha_strings(lines)
1913            return sha1, sum(map(len, lines)), None
1914        return self._store.add_lines(key, parents, lines,
1915                                     parent_texts=parent_texts,
1916                                     left_matching_blocks=left_matching_blocks,
1917                                     nostore_sha=nostore_sha, random_id=random_id,
1918                                     check_content=check_content)
1919
1920    def __getattr__(self, name):
1921        return getattr(self._store, name)
1922
1923
1924def network_bytes_to_kind_and_offset(network_bytes):
1925    """Strip of a record kind from the front of network_bytes.
1926
1927    :param network_bytes: The bytes of a record.
1928    :return: A tuple (storage_kind, offset_of_remaining_bytes)
1929    """
1930    line_end = network_bytes.find(b'\n')
1931    storage_kind = network_bytes[:line_end].decode('ascii')
1932    return storage_kind, line_end + 1
1933
1934
1935class NetworkRecordStream(object):
1936    """A record_stream which reconstitures a serialised stream."""
1937
1938    def __init__(self, bytes_iterator):
1939        """Create a NetworkRecordStream.
1940
1941        :param bytes_iterator: An iterator of bytes. Each item in this
1942            iterator should have been obtained from a record_streams'
1943            record.get_bytes_as(record.storage_kind) call.
1944        """
1945        self._bytes_iterator = bytes_iterator
1946        self._kind_factory = {
1947            'fulltext': fulltext_network_to_record,
1948            'groupcompress-block': groupcompress.network_block_to_records,
1949            'knit-ft-gz': knit.knit_network_to_record,
1950            'knit-delta-gz': knit.knit_network_to_record,
1951            'knit-annotated-ft-gz': knit.knit_network_to_record,
1952            'knit-annotated-delta-gz': knit.knit_network_to_record,
1953            'knit-delta-closure': knit.knit_delta_closure_to_records,
1954            }
1955
1956    def read(self):
1957        """Read the stream.
1958
1959        :return: An iterator as per VersionedFiles.get_record_stream().
1960        """
1961        for bytes in self._bytes_iterator:
1962            storage_kind, line_end = network_bytes_to_kind_and_offset(bytes)
1963            for record in self._kind_factory[storage_kind](
1964                    storage_kind, bytes, line_end):
1965                yield record
1966
1967
1968def fulltext_network_to_record(kind, bytes, line_end):
1969    """Convert a network fulltext record to record."""
1970    meta_len, = struct.unpack('!L', bytes[line_end:line_end + 4])
1971    record_meta = bytes[line_end + 4:line_end + 4 + meta_len]
1972    key, parents = bencode.bdecode_as_tuple(record_meta)
1973    if parents == b'nil':
1974        parents = None
1975    fulltext = bytes[line_end + 4 + meta_len:]
1976    return [FulltextContentFactory(key, parents, None, fulltext)]
1977
1978
1979def _length_prefix(bytes):
1980    return struct.pack('!L', len(bytes))
1981
1982
1983def record_to_fulltext_bytes(record):
1984    if record.parents is None:
1985        parents = b'nil'
1986    else:
1987        parents = record.parents
1988    record_meta = bencode.bencode((record.key, parents))
1989    record_content = record.get_bytes_as('fulltext')
1990    return b"fulltext\n%s%s%s" % (
1991        _length_prefix(record_meta), record_meta, record_content)
1992
1993
1994def sort_groupcompress(parent_map):
1995    """Sort and group the keys in parent_map into groupcompress order.
1996
1997    groupcompress is defined (currently) as reverse-topological order, grouped
1998    by the key prefix.
1999
2000    :return: A sorted-list of keys
2001    """
2002    # gc-optimal ordering is approximately reverse topological,
2003    # properly grouped by file-id.
2004    per_prefix_map = {}
2005    for item in parent_map.items():
2006        key = item[0]
2007        if isinstance(key, bytes) or len(key) == 1:
2008            prefix = b''
2009        else:
2010            prefix = key[0]
2011        try:
2012            per_prefix_map[prefix].append(item)
2013        except KeyError:
2014            per_prefix_map[prefix] = [item]
2015
2016    present_keys = []
2017    for prefix in sorted(per_prefix_map):
2018        present_keys.extend(reversed(tsort.topo_sort(per_prefix_map[prefix])))
2019    return present_keys
2020
2021
2022class _KeyRefs(object):
2023
2024    def __init__(self, track_new_keys=False):
2025        # dict mapping 'key' to 'set of keys referring to that key'
2026        self.refs = {}
2027        if track_new_keys:
2028            # set remembering all new keys
2029            self.new_keys = set()
2030        else:
2031            self.new_keys = None
2032
2033    def clear(self):
2034        if self.refs:
2035            self.refs.clear()
2036        if self.new_keys:
2037            self.new_keys.clear()
2038
2039    def add_references(self, key, refs):
2040        # Record the new references
2041        for referenced in refs:
2042            try:
2043                needed_by = self.refs[referenced]
2044            except KeyError:
2045                needed_by = self.refs[referenced] = set()
2046            needed_by.add(key)
2047        # Discard references satisfied by the new key
2048        self.add_key(key)
2049
2050    def get_new_keys(self):
2051        return self.new_keys
2052
2053    def get_unsatisfied_refs(self):
2054        return self.refs.keys()
2055
2056    def _satisfy_refs_for_key(self, key):
2057        try:
2058            del self.refs[key]
2059        except KeyError:
2060            # No keys depended on this key.  That's ok.
2061            pass
2062
2063    def add_key(self, key):
2064        # satisfy refs for key, and remember that we've seen this key.
2065        self._satisfy_refs_for_key(key)
2066        if self.new_keys is not None:
2067            self.new_keys.add(key)
2068
2069    def satisfy_refs_for_keys(self, keys):
2070        for key in keys:
2071            self._satisfy_refs_for_key(key)
2072
2073    def get_referrers(self):
2074        return set(itertools.chain.from_iterable(self.refs.values()))
2075