1# Copyright (C) 2006-2011 Canonical Ltd 2# 3# This program is free software; you can redistribute it and/or modify 4# it under the terms of the GNU General Public License as published by 5# the Free Software Foundation; either version 2 of the License, or 6# (at your option) any later version. 7# 8# This program is distributed in the hope that it will be useful, 9# but WITHOUT ANY WARRANTY; without even the implied warranty of 10# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11# GNU General Public License for more details. 12# 13# You should have received a copy of the GNU General Public License 14# along with this program; if not, write to the Free Software 15# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 16 17"""Versioned text file storage api.""" 18 19from copy import copy 20from io import BytesIO 21import itertools 22import os 23import struct 24from zlib import adler32 25 26from ..lazy_import import lazy_import 27lazy_import(globals(), """ 28from breezy import ( 29 annotate, 30 bencode, 31 graph as _mod_graph, 32 osutils, 33 multiparent, 34 tsort, 35 revision, 36 urlutils, 37 ) 38from breezy.bzr import ( 39 groupcompress, 40 index, 41 knit, 42 ) 43""") 44from .. import ( 45 errors, 46 ) 47from ..registry import Registry 48from ..textmerge import TextMerge 49 50 51adapter_registry = Registry() 52adapter_registry.register_lazy(('knit-annotated-delta-gz', 'knit-delta-gz'), 53 'breezy.bzr.knit', 'DeltaAnnotatedToUnannotated') 54adapter_registry.register_lazy(('knit-annotated-ft-gz', 'knit-ft-gz'), 55 'breezy.bzr.knit', 'FTAnnotatedToUnannotated') 56for target_storage_kind in ('fulltext', 'chunked', 'lines'): 57 adapter_registry.register_lazy(('knit-delta-gz', target_storage_kind), 'breezy.bzr.knit', 58 'DeltaPlainToFullText') 59 adapter_registry.register_lazy(('knit-ft-gz', target_storage_kind), 'breezy.bzr.knit', 60 'FTPlainToFullText') 61 adapter_registry.register_lazy(('knit-annotated-ft-gz', target_storage_kind), 62 'breezy.bzr.knit', 'FTAnnotatedToFullText') 63 adapter_registry.register_lazy(('knit-annotated-delta-gz', target_storage_kind), 64 'breezy.bzr.knit', 'DeltaAnnotatedToFullText') 65 66 67class UnavailableRepresentation(errors.InternalBzrError): 68 69 _fmt = ("The encoding '%(wanted)s' is not available for key %(key)s which " 70 "is encoded as '%(native)s'.") 71 72 def __init__(self, key, wanted, native): 73 errors.InternalBzrError.__init__(self) 74 self.wanted = wanted 75 self.native = native 76 self.key = key 77 78 79class ExistingContent(errors.BzrError): 80 81 _fmt = "The content being inserted is already present." 82 83 84class ContentFactory(object): 85 """Abstract interface for insertion and retrieval from a VersionedFile. 86 87 :ivar sha1: None, or the sha1 of the content fulltext. 88 :ivar size: None, or the size of the content fulltext. 89 :ivar storage_kind: The native storage kind of this factory. One of 90 'mpdiff', 'knit-annotated-ft', 'knit-annotated-delta', 'knit-ft', 91 'knit-delta', 'fulltext', 'knit-annotated-ft-gz', 92 'knit-annotated-delta-gz', 'knit-ft-gz', 'knit-delta-gz'. 93 :ivar key: The key of this content. Each key is a tuple with a single 94 string in it. 95 :ivar parents: A tuple of parent keys for self.key. If the object has 96 no parent information, None (as opposed to () for an empty list of 97 parents). 98 """ 99 100 def __init__(self): 101 """Create a ContentFactory.""" 102 self.sha1 = None 103 self.size = None 104 self.storage_kind = None 105 self.key = None 106 self.parents = None 107 108 109class ChunkedContentFactory(ContentFactory): 110 """Static data content factory. 111 112 This takes a 'chunked' list of strings. The only requirement on 'chunked' is 113 that ''.join(lines) becomes a valid fulltext. A tuple of a single string 114 satisfies this, as does a list of lines. 115 116 :ivar sha1: None, or the sha1 of the content fulltext. 117 :ivar size: None, or the size of the content fulltext. 118 :ivar storage_kind: The native storage kind of this factory. Always 119 'chunked' 120 :ivar key: The key of this content. Each key is a tuple with a single 121 string in it. 122 :ivar parents: A tuple of parent keys for self.key. If the object has 123 no parent information, None (as opposed to () for an empty list of 124 parents). 125 :ivar chunks_are_lines: Whether chunks are lines. 126 """ 127 128 def __init__(self, key, parents, sha1, chunks, chunks_are_lines=None): 129 """Create a ContentFactory.""" 130 self.sha1 = sha1 131 self.size = sum(map(len, chunks)) 132 self.storage_kind = 'chunked' 133 self.key = key 134 self.parents = parents 135 self._chunks = chunks 136 self._chunks_are_lines = chunks_are_lines 137 138 def get_bytes_as(self, storage_kind): 139 if storage_kind == 'chunked': 140 return self._chunks 141 elif storage_kind == 'fulltext': 142 return b''.join(self._chunks) 143 elif storage_kind == 'lines': 144 if self._chunks_are_lines: 145 return self._chunks 146 return list(osutils.chunks_to_lines(self._chunks)) 147 raise UnavailableRepresentation(self.key, storage_kind, 148 self.storage_kind) 149 150 def iter_bytes_as(self, storage_kind): 151 if storage_kind == 'chunked': 152 return iter(self._chunks) 153 elif storage_kind == 'lines': 154 if self._chunks_are_lines: 155 return iter(self._chunks) 156 return iter(osutils.chunks_to_lines(self._chunks)) 157 raise UnavailableRepresentation(self.key, storage_kind, 158 self.storage_kind) 159 160class FulltextContentFactory(ContentFactory): 161 """Static data content factory. 162 163 This takes a fulltext when created and just returns that during 164 get_bytes_as('fulltext'). 165 166 :ivar sha1: None, or the sha1 of the content fulltext. 167 :ivar storage_kind: The native storage kind of this factory. Always 168 'fulltext'. 169 :ivar key: The key of this content. Each key is a tuple with a single 170 string in it. 171 :ivar parents: A tuple of parent keys for self.key. If the object has 172 no parent information, None (as opposed to () for an empty list of 173 parents). 174 """ 175 176 def __init__(self, key, parents, sha1, text): 177 """Create a ContentFactory.""" 178 self.sha1 = sha1 179 self.size = len(text) 180 self.storage_kind = 'fulltext' 181 self.key = key 182 self.parents = parents 183 if not isinstance(text, bytes): 184 raise TypeError(text) 185 self._text = text 186 187 def get_bytes_as(self, storage_kind): 188 if storage_kind == self.storage_kind: 189 return self._text 190 elif storage_kind == 'chunked': 191 return [self._text] 192 elif storage_kind == 'lines': 193 return osutils.split_lines(self._text) 194 raise UnavailableRepresentation(self.key, storage_kind, 195 self.storage_kind) 196 197 def iter_bytes_as(self, storage_kind): 198 if storage_kind == 'chunked': 199 return iter([self._text]) 200 elif storage_kind == 'lines': 201 return iter(osutils.split_lines(self._text)) 202 raise UnavailableRepresentation(self.key, storage_kind, 203 self.storage_kind) 204 205 206class FileContentFactory(ContentFactory): 207 """File-based content factory. 208 """ 209 210 def __init__(self, key, parents, fileobj, sha1=None, size=None): 211 self.key = key 212 self.parents = parents 213 self.file = fileobj 214 self.storage_kind = 'file' 215 self.sha1 = sha1 216 self.size = size 217 218 def get_bytes_as(self, storage_kind): 219 self.file.seek(0) 220 if storage_kind == 'fulltext': 221 return self.file.read() 222 elif storage_kind == 'chunked': 223 return list(osutils.file_iterator(self.file)) 224 elif storage_kind == 'lines': 225 return list(self.file.readlines()) 226 raise UnavailableRepresentation(self.key, storage_kind, 227 self.storage_kind) 228 229 def iter_bytes_as(self, storage_kind): 230 self.file.seek(0) 231 if storage_kind == 'chunked': 232 return osutils.file_iterator(self.file) 233 elif storage_kind == 'lines': 234 return self.file 235 raise UnavailableRepresentation(self.key, storage_kind, 236 self.storage_kind) 237 238 239class AbsentContentFactory(ContentFactory): 240 """A placeholder content factory for unavailable texts. 241 242 :ivar sha1: None. 243 :ivar storage_kind: 'absent'. 244 :ivar key: The key of this content. Each key is a tuple with a single 245 string in it. 246 :ivar parents: None. 247 """ 248 249 def __init__(self, key): 250 """Create a ContentFactory.""" 251 self.sha1 = None 252 self.size = None 253 self.storage_kind = 'absent' 254 self.key = key 255 self.parents = None 256 257 def get_bytes_as(self, storage_kind): 258 raise ValueError('A request was made for key: %s, but that' 259 ' content is not available, and the calling' 260 ' code does not handle if it is missing.' 261 % (self.key,)) 262 263 def iter_bytes_as(self, storage_kind): 264 raise ValueError('A request was made for key: %s, but that' 265 ' content is not available, and the calling' 266 ' code does not handle if it is missing.' 267 % (self.key,)) 268 269 270class AdapterFactory(ContentFactory): 271 """A content factory to adapt between key prefix's.""" 272 273 def __init__(self, key, parents, adapted): 274 """Create an adapter factory instance.""" 275 self.key = key 276 self.parents = parents 277 self._adapted = adapted 278 279 def __getattr__(self, attr): 280 """Return a member from the adapted object.""" 281 if attr in ('key', 'parents'): 282 return self.__dict__[attr] 283 else: 284 return getattr(self._adapted, attr) 285 286 287def filter_absent(record_stream): 288 """Adapt a record stream to remove absent records.""" 289 for record in record_stream: 290 if record.storage_kind != 'absent': 291 yield record 292 293 294class _MPDiffGenerator(object): 295 """Pull out the functionality for generating mp_diffs.""" 296 297 def __init__(self, vf, keys): 298 self.vf = vf 299 # This is the order the keys were requested in 300 self.ordered_keys = tuple(keys) 301 # keys + their parents, what we need to compute the diffs 302 self.needed_keys = () 303 # Map from key: mp_diff 304 self.diffs = {} 305 # Map from key: parents_needed (may have ghosts) 306 self.parent_map = {} 307 # Parents that aren't present 308 self.ghost_parents = () 309 # Map from parent_key => number of children for this text 310 self.refcounts = {} 311 # Content chunks that are cached while we still need them 312 self.chunks = {} 313 314 def _find_needed_keys(self): 315 """Find the set of keys we need to request. 316 317 This includes all the original keys passed in, and the non-ghost 318 parents of those keys. 319 320 :return: (needed_keys, refcounts) 321 needed_keys is the set of all texts we need to extract 322 refcounts is a dict of {key: num_children} letting us know when we 323 no longer need to cache a given parent text 324 """ 325 # All the keys and their parents 326 needed_keys = set(self.ordered_keys) 327 parent_map = self.vf.get_parent_map(needed_keys) 328 self.parent_map = parent_map 329 # TODO: Should we be using a different construct here? I think this 330 # uses difference_update internally, and we expect the result to 331 # be tiny 332 missing_keys = needed_keys.difference(parent_map) 333 if missing_keys: 334 raise errors.RevisionNotPresent(list(missing_keys)[0], self.vf) 335 # Parents that might be missing. They are allowed to be ghosts, but we 336 # should check for them 337 refcounts = {} 338 setdefault = refcounts.setdefault 339 just_parents = set() 340 for child_key, parent_keys in parent_map.items(): 341 if not parent_keys: 342 # parent_keys may be None if a given VersionedFile claims to 343 # not support graph operations. 344 continue 345 just_parents.update(parent_keys) 346 needed_keys.update(parent_keys) 347 for p in parent_keys: 348 refcounts[p] = setdefault(p, 0) + 1 349 just_parents.difference_update(parent_map) 350 # Remove any parents that are actually ghosts from the needed set 351 self.present_parents = set(self.vf.get_parent_map(just_parents)) 352 self.ghost_parents = just_parents.difference(self.present_parents) 353 needed_keys.difference_update(self.ghost_parents) 354 self.needed_keys = needed_keys 355 self.refcounts = refcounts 356 return needed_keys, refcounts 357 358 def _compute_diff(self, key, parent_lines, lines): 359 """Compute a single mp_diff, and store it in self._diffs""" 360 if len(parent_lines) > 0: 361 # XXX: _extract_blocks is not usefully defined anywhere... 362 # It was meant to extract the left-parent diff without 363 # having to recompute it for Knit content (pack-0.92, 364 # etc). That seems to have regressed somewhere 365 left_parent_blocks = self.vf._extract_blocks(key, 366 parent_lines[0], lines) 367 else: 368 left_parent_blocks = None 369 diff = multiparent.MultiParent.from_lines(lines, 370 parent_lines, left_parent_blocks) 371 self.diffs[key] = diff 372 373 def _process_one_record(self, key, this_chunks): 374 parent_keys = None 375 if key in self.parent_map: 376 # This record should be ready to diff, since we requested 377 # content in 'topological' order 378 parent_keys = self.parent_map.pop(key) 379 # If a VersionedFile claims 'no-graph' support, then it may return 380 # None for any parent request, so we replace it with an empty tuple 381 if parent_keys is None: 382 parent_keys = () 383 parent_lines = [] 384 for p in parent_keys: 385 # Alternatively we could check p not in self.needed_keys, but 386 # ghost_parents should be tiny versus huge 387 if p in self.ghost_parents: 388 continue 389 refcount = self.refcounts[p] 390 if refcount == 1: # Last child reference 391 self.refcounts.pop(p) 392 parent_chunks = self.chunks.pop(p) 393 else: 394 self.refcounts[p] = refcount - 1 395 parent_chunks = self.chunks[p] 396 p_lines = osutils.chunks_to_lines(parent_chunks) 397 # TODO: Should we cache the line form? We did the 398 # computation to get it, but storing it this way will 399 # be less memory efficient... 400 parent_lines.append(p_lines) 401 del p_lines 402 lines = osutils.chunks_to_lines(this_chunks) 403 # Since we needed the lines, we'll go ahead and cache them this way 404 this_chunks = lines 405 self._compute_diff(key, parent_lines, lines) 406 del lines 407 # Is this content required for any more children? 408 if key in self.refcounts: 409 self.chunks[key] = this_chunks 410 411 def _extract_diffs(self): 412 needed_keys, refcounts = self._find_needed_keys() 413 for record in self.vf.get_record_stream(needed_keys, 414 'topological', True): 415 if record.storage_kind == 'absent': 416 raise errors.RevisionNotPresent(record.key, self.vf) 417 self._process_one_record(record.key, 418 record.get_bytes_as('chunked')) 419 420 def compute_diffs(self): 421 self._extract_diffs() 422 dpop = self.diffs.pop 423 return [dpop(k) for k in self.ordered_keys] 424 425 426class VersionedFile(object): 427 """Versioned text file storage. 428 429 A versioned file manages versions of line-based text files, 430 keeping track of the originating version for each line. 431 432 To clients the "lines" of the file are represented as a list of 433 strings. These strings will typically have terminal newline 434 characters, but this is not required. In particular files commonly 435 do not have a newline at the end of the file. 436 437 Texts are identified by a version-id string. 438 """ 439 440 @staticmethod 441 def check_not_reserved_id(version_id): 442 revision.check_not_reserved_id(version_id) 443 444 def copy_to(self, name, transport): 445 """Copy this versioned file to name on transport.""" 446 raise NotImplementedError(self.copy_to) 447 448 def get_record_stream(self, versions, ordering, include_delta_closure): 449 """Get a stream of records for versions. 450 451 :param versions: The versions to include. Each version is a tuple 452 (version,). 453 :param ordering: Either 'unordered' or 'topological'. A topologically 454 sorted stream has compression parents strictly before their 455 children. 456 :param include_delta_closure: If True then the closure across any 457 compression parents will be included (in the data content of the 458 stream, not in the emitted records). This guarantees that 459 'fulltext' can be used successfully on every record. 460 :return: An iterator of ContentFactory objects, each of which is only 461 valid until the iterator is advanced. 462 """ 463 raise NotImplementedError(self.get_record_stream) 464 465 def has_version(self, version_id): 466 """Returns whether version is present.""" 467 raise NotImplementedError(self.has_version) 468 469 def insert_record_stream(self, stream): 470 """Insert a record stream into this versioned file. 471 472 :param stream: A stream of records to insert. 473 :return: None 474 :seealso VersionedFile.get_record_stream: 475 """ 476 raise NotImplementedError 477 478 def add_lines(self, version_id, parents, lines, parent_texts=None, 479 left_matching_blocks=None, nostore_sha=None, random_id=False, 480 check_content=True): 481 """Add a single text on top of the versioned file. 482 483 Must raise RevisionAlreadyPresent if the new version is 484 already present in file history. 485 486 Must raise RevisionNotPresent if any of the given parents are 487 not present in file history. 488 489 :param lines: A list of lines. Each line must be a bytestring. And all 490 of them except the last must be terminated with \n and contain no 491 other \n's. The last line may either contain no \n's or a single 492 terminated \n. If the lines list does meet this constraint the add 493 routine may error or may succeed - but you will be unable to read 494 the data back accurately. (Checking the lines have been split 495 correctly is expensive and extremely unlikely to catch bugs so it 496 is not done at runtime unless check_content is True.) 497 :param parent_texts: An optional dictionary containing the opaque 498 representations of some or all of the parents of version_id to 499 allow delta optimisations. VERY IMPORTANT: the texts must be those 500 returned by add_lines or data corruption can be caused. 501 :param left_matching_blocks: a hint about which areas are common 502 between the text and its left-hand-parent. The format is 503 the SequenceMatcher.get_matching_blocks format. 504 :param nostore_sha: Raise ExistingContent and do not add the lines to 505 the versioned file if the digest of the lines matches this. 506 :param random_id: If True a random id has been selected rather than 507 an id determined by some deterministic process such as a converter 508 from a foreign VCS. When True the backend may choose not to check 509 for uniqueness of the resulting key within the versioned file, so 510 this should only be done when the result is expected to be unique 511 anyway. 512 :param check_content: If True, the lines supplied are verified to be 513 bytestrings that are correctly formed lines. 514 :return: The text sha1, the number of bytes in the text, and an opaque 515 representation of the inserted version which can be provided 516 back to future add_lines calls in the parent_texts dictionary. 517 """ 518 self._check_write_ok() 519 return self._add_lines(version_id, parents, lines, parent_texts, 520 left_matching_blocks, nostore_sha, random_id, check_content) 521 522 def _add_lines(self, version_id, parents, lines, parent_texts, 523 left_matching_blocks, nostore_sha, random_id, check_content): 524 """Helper to do the class specific add_lines.""" 525 raise NotImplementedError(self.add_lines) 526 527 def add_lines_with_ghosts(self, version_id, parents, lines, 528 parent_texts=None, nostore_sha=None, random_id=False, 529 check_content=True, left_matching_blocks=None): 530 """Add lines to the versioned file, allowing ghosts to be present. 531 532 This takes the same parameters as add_lines and returns the same. 533 """ 534 self._check_write_ok() 535 return self._add_lines_with_ghosts(version_id, parents, lines, 536 parent_texts, nostore_sha, random_id, check_content, left_matching_blocks) 537 538 def _add_lines_with_ghosts(self, version_id, parents, lines, parent_texts, 539 nostore_sha, random_id, check_content, left_matching_blocks): 540 """Helper to do class specific add_lines_with_ghosts.""" 541 raise NotImplementedError(self.add_lines_with_ghosts) 542 543 def check(self, progress_bar=None): 544 """Check the versioned file for integrity.""" 545 raise NotImplementedError(self.check) 546 547 def _check_lines_not_unicode(self, lines): 548 """Check that lines being added to a versioned file are not unicode.""" 549 for line in lines: 550 if not isinstance(line, bytes): 551 raise errors.BzrBadParameterUnicode("lines") 552 553 def _check_lines_are_lines(self, lines): 554 """Check that the lines really are full lines without inline EOL.""" 555 for line in lines: 556 if b'\n' in line[:-1]: 557 raise errors.BzrBadParameterContainsNewline("lines") 558 559 def get_format_signature(self): 560 """Get a text description of the data encoding in this file. 561 562 :since: 0.90 563 """ 564 raise NotImplementedError(self.get_format_signature) 565 566 def make_mpdiffs(self, version_ids): 567 """Create multiparent diffs for specified versions.""" 568 # XXX: Can't use _MPDiffGenerator just yet. This is because version_ids 569 # is a list of strings, not keys. And while self.get_record_stream 570 # is supported, it takes *keys*, while self.get_parent_map() takes 571 # strings... *sigh* 572 knit_versions = set() 573 knit_versions.update(version_ids) 574 parent_map = self.get_parent_map(version_ids) 575 for version_id in version_ids: 576 try: 577 knit_versions.update(parent_map[version_id]) 578 except KeyError: 579 raise errors.RevisionNotPresent(version_id, self) 580 # We need to filter out ghosts, because we can't diff against them. 581 knit_versions = set(self.get_parent_map(knit_versions)) 582 lines = dict(zip(knit_versions, 583 self._get_lf_split_line_list(knit_versions))) 584 diffs = [] 585 for version_id in version_ids: 586 target = lines[version_id] 587 try: 588 parents = [lines[p] for p in parent_map[version_id] if p in 589 knit_versions] 590 except KeyError: 591 # I don't know how this could ever trigger. 592 # parent_map[version_id] was already triggered in the previous 593 # for loop, and lines[p] has the 'if p in knit_versions' check, 594 # so we again won't have a KeyError. 595 raise errors.RevisionNotPresent(version_id, self) 596 if len(parents) > 0: 597 left_parent_blocks = self._extract_blocks(version_id, 598 parents[0], target) 599 else: 600 left_parent_blocks = None 601 diffs.append(multiparent.MultiParent.from_lines(target, parents, 602 left_parent_blocks)) 603 return diffs 604 605 def _extract_blocks(self, version_id, source, target): 606 return None 607 608 def add_mpdiffs(self, records): 609 """Add mpdiffs to this VersionedFile. 610 611 Records should be iterables of version, parents, expected_sha1, 612 mpdiff. mpdiff should be a MultiParent instance. 613 """ 614 # Does this need to call self._check_write_ok()? (IanC 20070919) 615 vf_parents = {} 616 mpvf = multiparent.MultiMemoryVersionedFile() 617 versions = [] 618 for version, parent_ids, expected_sha1, mpdiff in records: 619 versions.append(version) 620 mpvf.add_diff(mpdiff, version, parent_ids) 621 needed_parents = set() 622 for version, parent_ids, expected_sha1, mpdiff in records: 623 needed_parents.update(p for p in parent_ids 624 if not mpvf.has_version(p)) 625 present_parents = set(self.get_parent_map(needed_parents)) 626 for parent_id, lines in zip(present_parents, 627 self._get_lf_split_line_list(present_parents)): 628 mpvf.add_version(lines, parent_id, []) 629 for (version, parent_ids, expected_sha1, mpdiff), lines in zip( 630 records, mpvf.get_line_list(versions)): 631 if len(parent_ids) == 1: 632 left_matching_blocks = list(mpdiff.get_matching_blocks(0, 633 mpvf.get_diff(parent_ids[0]).num_lines())) 634 else: 635 left_matching_blocks = None 636 try: 637 _, _, version_text = self.add_lines_with_ghosts(version, 638 parent_ids, lines, vf_parents, 639 left_matching_blocks=left_matching_blocks) 640 except NotImplementedError: 641 # The vf can't handle ghosts, so add lines normally, which will 642 # (reasonably) fail if there are ghosts in the data. 643 _, _, version_text = self.add_lines(version, 644 parent_ids, lines, vf_parents, 645 left_matching_blocks=left_matching_blocks) 646 vf_parents[version] = version_text 647 sha1s = self.get_sha1s(versions) 648 for version, parent_ids, expected_sha1, mpdiff in records: 649 if expected_sha1 != sha1s[version]: 650 raise errors.VersionedFileInvalidChecksum(version) 651 652 def get_text(self, version_id): 653 """Return version contents as a text string. 654 655 Raises RevisionNotPresent if version is not present in 656 file history. 657 """ 658 return b''.join(self.get_lines(version_id)) 659 get_string = get_text 660 661 def get_texts(self, version_ids): 662 """Return the texts of listed versions as a list of strings. 663 664 Raises RevisionNotPresent if version is not present in 665 file history. 666 """ 667 return [b''.join(self.get_lines(v)) for v in version_ids] 668 669 def get_lines(self, version_id): 670 """Return version contents as a sequence of lines. 671 672 Raises RevisionNotPresent if version is not present in 673 file history. 674 """ 675 raise NotImplementedError(self.get_lines) 676 677 def _get_lf_split_line_list(self, version_ids): 678 return [BytesIO(t).readlines() for t in self.get_texts(version_ids)] 679 680 def get_ancestry(self, version_ids): 681 """Return a list of all ancestors of given version(s). This 682 will not include the null revision. 683 684 Must raise RevisionNotPresent if any of the given versions are 685 not present in file history.""" 686 raise NotImplementedError(self.get_ancestry) 687 688 def get_ancestry_with_ghosts(self, version_ids): 689 """Return a list of all ancestors of given version(s). This 690 will not include the null revision. 691 692 Must raise RevisionNotPresent if any of the given versions are 693 not present in file history. 694 695 Ghosts that are known about will be included in ancestry list, 696 but are not explicitly marked. 697 """ 698 raise NotImplementedError(self.get_ancestry_with_ghosts) 699 700 def get_parent_map(self, version_ids): 701 """Get a map of the parents of version_ids. 702 703 :param version_ids: The version ids to look up parents for. 704 :return: A mapping from version id to parents. 705 """ 706 raise NotImplementedError(self.get_parent_map) 707 708 def get_parents_with_ghosts(self, version_id): 709 """Return version names for parents of version_id. 710 711 Will raise RevisionNotPresent if version_id is not present 712 in the history. 713 714 Ghosts that are known about will be included in the parent list, 715 but are not explicitly marked. 716 """ 717 try: 718 return list(self.get_parent_map([version_id])[version_id]) 719 except KeyError: 720 raise errors.RevisionNotPresent(version_id, self) 721 722 def annotate(self, version_id): 723 """Return a list of (version-id, line) tuples for version_id. 724 725 :raise RevisionNotPresent: If the given version is 726 not present in file history. 727 """ 728 raise NotImplementedError(self.annotate) 729 730 def iter_lines_added_or_present_in_versions(self, version_ids=None, 731 pb=None): 732 """Iterate over the lines in the versioned file from version_ids. 733 734 This may return lines from other versions. Each item the returned 735 iterator yields is a tuple of a line and a text version that that line 736 is present in (not introduced in). 737 738 Ordering of results is in whatever order is most suitable for the 739 underlying storage format. 740 741 If a progress bar is supplied, it may be used to indicate progress. 742 The caller is responsible for cleaning up progress bars (because this 743 is an iterator). 744 745 NOTES: Lines are normalised: they will all have \n terminators. 746 Lines are returned in arbitrary order. 747 748 :return: An iterator over (line, version_id). 749 """ 750 raise NotImplementedError(self.iter_lines_added_or_present_in_versions) 751 752 def plan_merge(self, ver_a, ver_b, base=None): 753 """Return pseudo-annotation indicating how the two versions merge. 754 755 This is computed between versions a and b and their common 756 base. 757 758 Weave lines present in none of them are skipped entirely. 759 760 Legend: 761 killed-base Dead in base revision 762 killed-both Killed in each revision 763 killed-a Killed in a 764 killed-b Killed in b 765 unchanged Alive in both a and b (possibly created in both) 766 new-a Created in a 767 new-b Created in b 768 ghost-a Killed in a, unborn in b 769 ghost-b Killed in b, unborn in a 770 irrelevant Not in either revision 771 """ 772 raise NotImplementedError(VersionedFile.plan_merge) 773 774 def weave_merge(self, plan, a_marker=TextMerge.A_MARKER, 775 b_marker=TextMerge.B_MARKER): 776 return PlanWeaveMerge(plan, a_marker, b_marker).merge_lines()[0] 777 778 779class RecordingVersionedFilesDecorator(object): 780 """A minimal versioned files that records calls made on it. 781 782 Only enough methods have been added to support tests using it to date. 783 784 :ivar calls: A list of the calls made; can be reset at any time by 785 assigning [] to it. 786 """ 787 788 def __init__(self, backing_vf): 789 """Create a RecordingVersionedFilesDecorator decorating backing_vf. 790 791 :param backing_vf: The versioned file to answer all methods. 792 """ 793 self._backing_vf = backing_vf 794 self.calls = [] 795 796 def add_lines(self, key, parents, lines, parent_texts=None, 797 left_matching_blocks=None, nostore_sha=None, random_id=False, 798 check_content=True): 799 self.calls.append(("add_lines", key, parents, lines, parent_texts, 800 left_matching_blocks, nostore_sha, random_id, check_content)) 801 return self._backing_vf.add_lines(key, parents, lines, parent_texts, 802 left_matching_blocks, nostore_sha, random_id, check_content) 803 804 def add_content(self, factory, parent_texts=None, 805 left_matching_blocks=None, nostore_sha=None, random_id=False, 806 check_content=True): 807 self.calls.append(("add_content", factory, parent_texts, 808 left_matching_blocks, nostore_sha, random_id, check_content)) 809 return self._backing_vf.add_content( 810 factory, parent_texts, left_matching_blocks, nostore_sha, 811 random_id, check_content) 812 813 def check(self): 814 self._backing_vf.check() 815 816 def get_parent_map(self, keys): 817 self.calls.append(("get_parent_map", copy(keys))) 818 return self._backing_vf.get_parent_map(keys) 819 820 def get_record_stream(self, keys, sort_order, include_delta_closure): 821 self.calls.append(("get_record_stream", list(keys), sort_order, 822 include_delta_closure)) 823 return self._backing_vf.get_record_stream(keys, sort_order, 824 include_delta_closure) 825 826 def get_sha1s(self, keys): 827 self.calls.append(("get_sha1s", copy(keys))) 828 return self._backing_vf.get_sha1s(keys) 829 830 def iter_lines_added_or_present_in_keys(self, keys, pb=None): 831 self.calls.append(("iter_lines_added_or_present_in_keys", copy(keys))) 832 return self._backing_vf.iter_lines_added_or_present_in_keys(keys, pb=pb) 833 834 def keys(self): 835 self.calls.append(("keys",)) 836 return self._backing_vf.keys() 837 838 839class OrderingVersionedFilesDecorator(RecordingVersionedFilesDecorator): 840 """A VF that records calls, and returns keys in specific order. 841 842 :ivar calls: A list of the calls made; can be reset at any time by 843 assigning [] to it. 844 """ 845 846 def __init__(self, backing_vf, key_priority): 847 """Create a RecordingVersionedFilesDecorator decorating backing_vf. 848 849 :param backing_vf: The versioned file to answer all methods. 850 :param key_priority: A dictionary defining what order keys should be 851 returned from an 'unordered' get_record_stream request. 852 Keys with lower priority are returned first, keys not present in 853 the map get an implicit priority of 0, and are returned in 854 lexicographical order. 855 """ 856 RecordingVersionedFilesDecorator.__init__(self, backing_vf) 857 self._key_priority = key_priority 858 859 def get_record_stream(self, keys, sort_order, include_delta_closure): 860 self.calls.append(("get_record_stream", list(keys), sort_order, 861 include_delta_closure)) 862 if sort_order == 'unordered': 863 def sort_key(key): 864 return (self._key_priority.get(key, 0), key) 865 # Use a defined order by asking for the keys one-by-one from the 866 # backing_vf 867 for key in sorted(keys, key=sort_key): 868 for record in self._backing_vf.get_record_stream([key], 869 'unordered', include_delta_closure): 870 yield record 871 else: 872 for record in self._backing_vf.get_record_stream(keys, sort_order, 873 include_delta_closure): 874 yield record 875 876 877class KeyMapper(object): 878 """KeyMappers map between keys and underlying partitioned storage.""" 879 880 def map(self, key): 881 """Map key to an underlying storage identifier. 882 883 :param key: A key tuple e.g. (b'file-id', b'revision-id'). 884 :return: An underlying storage identifier, specific to the partitioning 885 mechanism. 886 """ 887 raise NotImplementedError(self.map) 888 889 def unmap(self, partition_id): 890 """Map a partitioned storage id back to a key prefix. 891 892 :param partition_id: The underlying partition id. 893 :return: As much of a key (or prefix) as is derivable from the partition 894 id. 895 """ 896 raise NotImplementedError(self.unmap) 897 898 899class ConstantMapper(KeyMapper): 900 """A key mapper that maps to a constant result.""" 901 902 def __init__(self, result): 903 """Create a ConstantMapper which will return result for all maps.""" 904 self._result = result 905 906 def map(self, key): 907 """See KeyMapper.map().""" 908 return self._result 909 910 911class URLEscapeMapper(KeyMapper): 912 """Base class for use with transport backed storage. 913 914 This provides a map and unmap wrapper that respectively url escape and 915 unescape their outputs and inputs. 916 """ 917 918 def map(self, key): 919 """See KeyMapper.map().""" 920 return urlutils.quote(self._map(key)) 921 922 def unmap(self, partition_id): 923 """See KeyMapper.unmap().""" 924 return self._unmap(urlutils.unquote(partition_id)) 925 926 927class PrefixMapper(URLEscapeMapper): 928 """A key mapper that extracts the first component of a key. 929 930 This mapper is for use with a transport based backend. 931 """ 932 933 def _map(self, key): 934 """See KeyMapper.map().""" 935 return key[0].decode('utf-8') 936 937 def _unmap(self, partition_id): 938 """See KeyMapper.unmap().""" 939 return (partition_id.encode('utf-8'),) 940 941 942class HashPrefixMapper(URLEscapeMapper): 943 """A key mapper that combines the first component of a key with a hash. 944 945 This mapper is for use with a transport based backend. 946 """ 947 948 def _map(self, key): 949 """See KeyMapper.map().""" 950 prefix = self._escape(key[0]) 951 return "%02x/%s" % (adler32(prefix) & 0xff, prefix.decode('utf-8')) 952 953 def _escape(self, prefix): 954 """No escaping needed here.""" 955 return prefix 956 957 def _unmap(self, partition_id): 958 """See KeyMapper.unmap().""" 959 return (self._unescape(osutils.basename(partition_id)).encode('utf-8'),) 960 961 def _unescape(self, basename): 962 """No unescaping needed for HashPrefixMapper.""" 963 return basename 964 965 966class HashEscapedPrefixMapper(HashPrefixMapper): 967 """Combines the escaped first component of a key with a hash. 968 969 This mapper is for use with a transport based backend. 970 """ 971 972 _safe = bytearray(b"abcdefghijklmnopqrstuvwxyz0123456789-_@,.") 973 974 def _escape(self, prefix): 975 """Turn a key element into a filesystem safe string. 976 977 This is similar to a plain urlutils.quote, except 978 it uses specific safe characters, so that it doesn't 979 have to translate a lot of valid file ids. 980 """ 981 # @ does not get escaped. This is because it is a valid 982 # filesystem character we use all the time, and it looks 983 # a lot better than seeing %40 all the time. 984 r = [(c in self._safe) and chr(c) or ('%%%02x' % c) 985 for c in bytearray(prefix)] 986 return ''.join(r).encode('ascii') 987 988 def _unescape(self, basename): 989 """Escaped names are easily unescaped by urlutils.""" 990 return urlutils.unquote(basename) 991 992 993def make_versioned_files_factory(versioned_file_factory, mapper): 994 """Create a ThunkedVersionedFiles factory. 995 996 This will create a callable which when called creates a 997 ThunkedVersionedFiles on a transport, using mapper to access individual 998 versioned files, and versioned_file_factory to create each individual file. 999 """ 1000 def factory(transport): 1001 return ThunkedVersionedFiles(transport, versioned_file_factory, mapper, 1002 lambda: True) 1003 return factory 1004 1005 1006class VersionedFiles(object): 1007 """Storage for many versioned files. 1008 1009 This object allows a single keyspace for accessing the history graph and 1010 contents of named bytestrings. 1011 1012 Currently no implementation allows the graph of different key prefixes to 1013 intersect, but the API does allow such implementations in the future. 1014 1015 The keyspace is expressed via simple tuples. Any instance of VersionedFiles 1016 may have a different length key-size, but that size will be constant for 1017 all texts added to or retrieved from it. For instance, breezy uses 1018 instances with a key-size of 2 for storing user files in a repository, with 1019 the first element the fileid, and the second the version of that file. 1020 1021 The use of tuples allows a single code base to support several different 1022 uses with only the mapping logic changing from instance to instance. 1023 1024 :ivar _immediate_fallback_vfs: For subclasses that support stacking, 1025 this is a list of other VersionedFiles immediately underneath this 1026 one. They may in turn each have further fallbacks. 1027 """ 1028 1029 def add_lines(self, key, parents, lines, parent_texts=None, 1030 left_matching_blocks=None, nostore_sha=None, random_id=False, 1031 check_content=True): 1032 """Add a text to the store. 1033 1034 :param key: The key tuple of the text to add. If the last element is 1035 None, a CHK string will be generated during the addition. 1036 :param parents: The parents key tuples of the text to add. 1037 :param lines: A list of lines. Each line must be a bytestring. And all 1038 of them except the last must be terminated with \n and contain no 1039 other \n's. The last line may either contain no \n's or a single 1040 terminating \n. If the lines list does meet this constraint the add 1041 routine may error or may succeed - but you will be unable to read 1042 the data back accurately. (Checking the lines have been split 1043 correctly is expensive and extremely unlikely to catch bugs so it 1044 is not done at runtime unless check_content is True.) 1045 :param parent_texts: An optional dictionary containing the opaque 1046 representations of some or all of the parents of version_id to 1047 allow delta optimisations. VERY IMPORTANT: the texts must be those 1048 returned by add_lines or data corruption can be caused. 1049 :param left_matching_blocks: a hint about which areas are common 1050 between the text and its left-hand-parent. The format is 1051 the SequenceMatcher.get_matching_blocks format. 1052 :param nostore_sha: Raise ExistingContent and do not add the lines to 1053 the versioned file if the digest of the lines matches this. 1054 :param random_id: If True a random id has been selected rather than 1055 an id determined by some deterministic process such as a converter 1056 from a foreign VCS. When True the backend may choose not to check 1057 for uniqueness of the resulting key within the versioned file, so 1058 this should only be done when the result is expected to be unique 1059 anyway. 1060 :param check_content: If True, the lines supplied are verified to be 1061 bytestrings that are correctly formed lines. 1062 :return: The text sha1, the number of bytes in the text, and an opaque 1063 representation of the inserted version which can be provided 1064 back to future add_lines calls in the parent_texts dictionary. 1065 """ 1066 raise NotImplementedError(self.add_lines) 1067 1068 def add_content(self, factory, parent_texts=None, 1069 left_matching_blocks=None, nostore_sha=None, random_id=False, 1070 check_content=True): 1071 """Add a text to the store from a chunk iterable. 1072 1073 :param key: The key tuple of the text to add. If the last element is 1074 None, a CHK string will be generated during the addition. 1075 :param parents: The parents key tuples of the text to add. 1076 :param chunk_iter: An iterable over bytestrings. 1077 :param parent_texts: An optional dictionary containing the opaque 1078 representations of some or all of the parents of version_id to 1079 allow delta optimisations. VERY IMPORTANT: the texts must be those 1080 returned by add_lines or data corruption can be caused. 1081 :param left_matching_blocks: a hint about which areas are common 1082 between the text and its left-hand-parent. The format is 1083 the SequenceMatcher.get_matching_blocks format. 1084 :param nostore_sha: Raise ExistingContent and do not add the lines to 1085 the versioned file if the digest of the lines matches this. 1086 :param random_id: If True a random id has been selected rather than 1087 an id determined by some deterministic process such as a converter 1088 from a foreign VCS. When True the backend may choose not to check 1089 for uniqueness of the resulting key within the versioned file, so 1090 this should only be done when the result is expected to be unique 1091 anyway. 1092 :param check_content: If True, the lines supplied are verified to be 1093 bytestrings that are correctly formed lines. 1094 :return: The text sha1, the number of bytes in the text, and an opaque 1095 representation of the inserted version which can be provided 1096 back to future add_lines calls in the parent_texts dictionary. 1097 """ 1098 raise NotImplementedError(self.add_content) 1099 1100 def add_mpdiffs(self, records): 1101 """Add mpdiffs to this VersionedFile. 1102 1103 Records should be iterables of version, parents, expected_sha1, 1104 mpdiff. mpdiff should be a MultiParent instance. 1105 """ 1106 vf_parents = {} 1107 mpvf = multiparent.MultiMemoryVersionedFile() 1108 versions = [] 1109 for version, parent_ids, expected_sha1, mpdiff in records: 1110 versions.append(version) 1111 mpvf.add_diff(mpdiff, version, parent_ids) 1112 needed_parents = set() 1113 for version, parent_ids, expected_sha1, mpdiff in records: 1114 needed_parents.update(p for p in parent_ids 1115 if not mpvf.has_version(p)) 1116 # It seems likely that adding all the present parents as fulltexts can 1117 # easily exhaust memory. 1118 for record in self.get_record_stream(needed_parents, 'unordered', 1119 True): 1120 if record.storage_kind == 'absent': 1121 continue 1122 mpvf.add_version(record.get_bytes_as('lines'), record.key, []) 1123 for (key, parent_keys, expected_sha1, mpdiff), lines in zip( 1124 records, mpvf.get_line_list(versions)): 1125 if len(parent_keys) == 1: 1126 left_matching_blocks = list(mpdiff.get_matching_blocks(0, 1127 mpvf.get_diff(parent_keys[0]).num_lines())) 1128 else: 1129 left_matching_blocks = None 1130 version_sha1, _, version_text = self.add_lines(key, 1131 parent_keys, lines, vf_parents, 1132 left_matching_blocks=left_matching_blocks) 1133 if version_sha1 != expected_sha1: 1134 raise errors.VersionedFileInvalidChecksum(version) 1135 vf_parents[key] = version_text 1136 1137 def annotate(self, key): 1138 """Return a list of (version-key, line) tuples for the text of key. 1139 1140 :raise RevisionNotPresent: If the key is not present. 1141 """ 1142 raise NotImplementedError(self.annotate) 1143 1144 def check(self, progress_bar=None): 1145 """Check this object for integrity. 1146 1147 :param progress_bar: A progress bar to output as the check progresses. 1148 :param keys: Specific keys within the VersionedFiles to check. When 1149 this parameter is not None, check() becomes a generator as per 1150 get_record_stream. The difference to get_record_stream is that 1151 more or deeper checks will be performed. 1152 :return: None, or if keys was supplied a generator as per 1153 get_record_stream. 1154 """ 1155 raise NotImplementedError(self.check) 1156 1157 @staticmethod 1158 def check_not_reserved_id(version_id): 1159 revision.check_not_reserved_id(version_id) 1160 1161 def clear_cache(self): 1162 """Clear whatever caches this VersionedFile holds. 1163 1164 This is generally called after an operation has been performed, when we 1165 don't expect to be using this versioned file again soon. 1166 """ 1167 1168 def _check_lines_not_unicode(self, lines): 1169 """Check that lines being added to a versioned file are not unicode.""" 1170 for line in lines: 1171 if line.__class__ is not bytes: 1172 raise errors.BzrBadParameterUnicode("lines") 1173 1174 def _check_lines_are_lines(self, lines): 1175 """Check that the lines really are full lines without inline EOL.""" 1176 for line in lines: 1177 if b'\n' in line[:-1]: 1178 raise errors.BzrBadParameterContainsNewline("lines") 1179 1180 def get_known_graph_ancestry(self, keys): 1181 """Get a KnownGraph instance with the ancestry of keys.""" 1182 # most basic implementation is a loop around get_parent_map 1183 pending = set(keys) 1184 parent_map = {} 1185 while pending: 1186 this_parent_map = self.get_parent_map(pending) 1187 parent_map.update(this_parent_map) 1188 pending = set(itertools.chain.from_iterable( 1189 this_parent_map.values())) 1190 pending.difference_update(parent_map) 1191 kg = _mod_graph.KnownGraph(parent_map) 1192 return kg 1193 1194 def get_parent_map(self, keys): 1195 """Get a map of the parents of keys. 1196 1197 :param keys: The keys to look up parents for. 1198 :return: A mapping from keys to parents. Absent keys are absent from 1199 the mapping. 1200 """ 1201 raise NotImplementedError(self.get_parent_map) 1202 1203 def get_record_stream(self, keys, ordering, include_delta_closure): 1204 """Get a stream of records for keys. 1205 1206 :param keys: The keys to include. 1207 :param ordering: Either 'unordered' or 'topological'. A topologically 1208 sorted stream has compression parents strictly before their 1209 children. 1210 :param include_delta_closure: If True then the closure across any 1211 compression parents will be included (in the opaque data). 1212 :return: An iterator of ContentFactory objects, each of which is only 1213 valid until the iterator is advanced. 1214 """ 1215 raise NotImplementedError(self.get_record_stream) 1216 1217 def get_sha1s(self, keys): 1218 """Get the sha1's of the texts for the given keys. 1219 1220 :param keys: The names of the keys to lookup 1221 :return: a dict from key to sha1 digest. Keys of texts which are not 1222 present in the store are not present in the returned 1223 dictionary. 1224 """ 1225 raise NotImplementedError(self.get_sha1s) 1226 1227 __contains__ = index._has_key_from_parent_map 1228 1229 def get_missing_compression_parent_keys(self): 1230 """Return an iterable of keys of missing compression parents. 1231 1232 Check this after calling insert_record_stream to find out if there are 1233 any missing compression parents. If there are, the records that 1234 depend on them are not able to be inserted safely. The precise 1235 behaviour depends on the concrete VersionedFiles class in use. 1236 1237 Classes that do not support this will raise NotImplementedError. 1238 """ 1239 raise NotImplementedError(self.get_missing_compression_parent_keys) 1240 1241 def insert_record_stream(self, stream): 1242 """Insert a record stream into this container. 1243 1244 :param stream: A stream of records to insert. 1245 :return: None 1246 :seealso VersionedFile.get_record_stream: 1247 """ 1248 raise NotImplementedError 1249 1250 def iter_lines_added_or_present_in_keys(self, keys, pb=None): 1251 """Iterate over the lines in the versioned files from keys. 1252 1253 This may return lines from other keys. Each item the returned 1254 iterator yields is a tuple of a line and a text version that that line 1255 is present in (not introduced in). 1256 1257 Ordering of results is in whatever order is most suitable for the 1258 underlying storage format. 1259 1260 If a progress bar is supplied, it may be used to indicate progress. 1261 The caller is responsible for cleaning up progress bars (because this 1262 is an iterator). 1263 1264 NOTES: 1265 * Lines are normalised by the underlying store: they will all have \n 1266 terminators. 1267 * Lines are returned in arbitrary order. 1268 1269 :return: An iterator over (line, key). 1270 """ 1271 raise NotImplementedError(self.iter_lines_added_or_present_in_keys) 1272 1273 def keys(self): 1274 """Return a iterable of the keys for all the contained texts.""" 1275 raise NotImplementedError(self.keys) 1276 1277 def make_mpdiffs(self, keys): 1278 """Create multiparent diffs for specified keys.""" 1279 generator = _MPDiffGenerator(self, keys) 1280 return generator.compute_diffs() 1281 1282 def get_annotator(self): 1283 return annotate.Annotator(self) 1284 1285 missing_keys = index._missing_keys_from_parent_map 1286 1287 def _extract_blocks(self, version_id, source, target): 1288 return None 1289 1290 def _transitive_fallbacks(self): 1291 """Return the whole stack of fallback versionedfiles. 1292 1293 This VersionedFiles may have a list of fallbacks, but it doesn't 1294 necessarily know about the whole stack going down, and it can't know 1295 at open time because they may change after the objects are opened. 1296 """ 1297 all_fallbacks = [] 1298 for a_vfs in self._immediate_fallback_vfs: 1299 all_fallbacks.append(a_vfs) 1300 all_fallbacks.extend(a_vfs._transitive_fallbacks()) 1301 return all_fallbacks 1302 1303 1304class ThunkedVersionedFiles(VersionedFiles): 1305 """Storage for many versioned files thunked onto a 'VersionedFile' class. 1306 1307 This object allows a single keyspace for accessing the history graph and 1308 contents of named bytestrings. 1309 1310 Currently no implementation allows the graph of different key prefixes to 1311 intersect, but the API does allow such implementations in the future. 1312 """ 1313 1314 def __init__(self, transport, file_factory, mapper, is_locked): 1315 """Create a ThunkedVersionedFiles.""" 1316 self._transport = transport 1317 self._file_factory = file_factory 1318 self._mapper = mapper 1319 self._is_locked = is_locked 1320 1321 def add_content(self, factory, parent_texts=None, 1322 left_matching_blocks=None, nostore_sha=None, random_id=False): 1323 """See VersionedFiles.add_content().""" 1324 lines = factory.get_bytes_as('lines') 1325 return self.add_lines( 1326 factory.key, factory.parents, lines, 1327 parent_texts=parent_texts, 1328 left_matching_blocks=left_matching_blocks, 1329 nostore_sha=nostore_sha, 1330 random_id=random_id, 1331 check_content=True) 1332 1333 def add_lines(self, key, parents, lines, parent_texts=None, 1334 left_matching_blocks=None, nostore_sha=None, random_id=False, 1335 check_content=True): 1336 """See VersionedFiles.add_lines().""" 1337 path = self._mapper.map(key) 1338 version_id = key[-1] 1339 parents = [parent[-1] for parent in parents] 1340 vf = self._get_vf(path) 1341 try: 1342 try: 1343 return vf.add_lines_with_ghosts(version_id, parents, lines, 1344 parent_texts=parent_texts, 1345 left_matching_blocks=left_matching_blocks, 1346 nostore_sha=nostore_sha, random_id=random_id, 1347 check_content=check_content) 1348 except NotImplementedError: 1349 return vf.add_lines(version_id, parents, lines, 1350 parent_texts=parent_texts, 1351 left_matching_blocks=left_matching_blocks, 1352 nostore_sha=nostore_sha, random_id=random_id, 1353 check_content=check_content) 1354 except errors.NoSuchFile: 1355 # parent directory may be missing, try again. 1356 self._transport.mkdir(osutils.dirname(path)) 1357 try: 1358 return vf.add_lines_with_ghosts(version_id, parents, lines, 1359 parent_texts=parent_texts, 1360 left_matching_blocks=left_matching_blocks, 1361 nostore_sha=nostore_sha, random_id=random_id, 1362 check_content=check_content) 1363 except NotImplementedError: 1364 return vf.add_lines(version_id, parents, lines, 1365 parent_texts=parent_texts, 1366 left_matching_blocks=left_matching_blocks, 1367 nostore_sha=nostore_sha, random_id=random_id, 1368 check_content=check_content) 1369 1370 def annotate(self, key): 1371 """Return a list of (version-key, line) tuples for the text of key. 1372 1373 :raise RevisionNotPresent: If the key is not present. 1374 """ 1375 prefix = key[:-1] 1376 path = self._mapper.map(prefix) 1377 vf = self._get_vf(path) 1378 origins = vf.annotate(key[-1]) 1379 result = [] 1380 for origin, line in origins: 1381 result.append((prefix + (origin,), line)) 1382 return result 1383 1384 def check(self, progress_bar=None, keys=None): 1385 """See VersionedFiles.check().""" 1386 # XXX: This is over-enthusiastic but as we only thunk for Weaves today 1387 # this is tolerable. Ideally we'd pass keys down to check() and 1388 # have the older VersiondFile interface updated too. 1389 for prefix, vf in self._iter_all_components(): 1390 vf.check() 1391 if keys is not None: 1392 return self.get_record_stream(keys, 'unordered', True) 1393 1394 def get_parent_map(self, keys): 1395 """Get a map of the parents of keys. 1396 1397 :param keys: The keys to look up parents for. 1398 :return: A mapping from keys to parents. Absent keys are absent from 1399 the mapping. 1400 """ 1401 prefixes = self._partition_keys(keys) 1402 result = {} 1403 for prefix, suffixes in prefixes.items(): 1404 path = self._mapper.map(prefix) 1405 vf = self._get_vf(path) 1406 parent_map = vf.get_parent_map(suffixes) 1407 for key, parents in parent_map.items(): 1408 result[prefix + (key,)] = tuple( 1409 prefix + (parent,) for parent in parents) 1410 return result 1411 1412 def _get_vf(self, path): 1413 if not self._is_locked(): 1414 raise errors.ObjectNotLocked(self) 1415 return self._file_factory(path, self._transport, create=True, 1416 get_scope=lambda: None) 1417 1418 def _partition_keys(self, keys): 1419 """Turn keys into a dict of prefix:suffix_list.""" 1420 result = {} 1421 for key in keys: 1422 prefix_keys = result.setdefault(key[:-1], []) 1423 prefix_keys.append(key[-1]) 1424 return result 1425 1426 def _iter_all_prefixes(self): 1427 # Identify all key prefixes. 1428 # XXX: A bit hacky, needs polish. 1429 if isinstance(self._mapper, ConstantMapper): 1430 paths = [self._mapper.map(())] 1431 prefixes = [()] 1432 else: 1433 relpaths = set() 1434 for quoted_relpath in self._transport.iter_files_recursive(): 1435 path, ext = os.path.splitext(quoted_relpath) 1436 relpaths.add(path) 1437 paths = list(relpaths) 1438 prefixes = [self._mapper.unmap(path) for path in paths] 1439 return zip(paths, prefixes) 1440 1441 def get_record_stream(self, keys, ordering, include_delta_closure): 1442 """See VersionedFiles.get_record_stream().""" 1443 # Ordering will be taken care of by each partitioned store; group keys 1444 # by partition. 1445 keys = sorted(keys) 1446 for prefix, suffixes, vf in self._iter_keys_vf(keys): 1447 suffixes = [(suffix,) for suffix in suffixes] 1448 for record in vf.get_record_stream(suffixes, ordering, 1449 include_delta_closure): 1450 if record.parents is not None: 1451 record.parents = tuple( 1452 prefix + parent for parent in record.parents) 1453 record.key = prefix + record.key 1454 yield record 1455 1456 def _iter_keys_vf(self, keys): 1457 prefixes = self._partition_keys(keys) 1458 sha1s = {} 1459 for prefix, suffixes in prefixes.items(): 1460 path = self._mapper.map(prefix) 1461 vf = self._get_vf(path) 1462 yield prefix, suffixes, vf 1463 1464 def get_sha1s(self, keys): 1465 """See VersionedFiles.get_sha1s().""" 1466 sha1s = {} 1467 for prefix, suffixes, vf in self._iter_keys_vf(keys): 1468 vf_sha1s = vf.get_sha1s(suffixes) 1469 for suffix, sha1 in vf_sha1s.items(): 1470 sha1s[prefix + (suffix,)] = sha1 1471 return sha1s 1472 1473 def insert_record_stream(self, stream): 1474 """Insert a record stream into this container. 1475 1476 :param stream: A stream of records to insert. 1477 :return: None 1478 :seealso VersionedFile.get_record_stream: 1479 """ 1480 for record in stream: 1481 prefix = record.key[:-1] 1482 key = record.key[-1:] 1483 if record.parents is not None: 1484 parents = [parent[-1:] for parent in record.parents] 1485 else: 1486 parents = None 1487 thunk_record = AdapterFactory(key, parents, record) 1488 path = self._mapper.map(prefix) 1489 # Note that this parses the file many times; we can do better but 1490 # as this only impacts weaves in terms of performance, it is 1491 # tolerable. 1492 vf = self._get_vf(path) 1493 vf.insert_record_stream([thunk_record]) 1494 1495 def iter_lines_added_or_present_in_keys(self, keys, pb=None): 1496 """Iterate over the lines in the versioned files from keys. 1497 1498 This may return lines from other keys. Each item the returned 1499 iterator yields is a tuple of a line and a text version that that line 1500 is present in (not introduced in). 1501 1502 Ordering of results is in whatever order is most suitable for the 1503 underlying storage format. 1504 1505 If a progress bar is supplied, it may be used to indicate progress. 1506 The caller is responsible for cleaning up progress bars (because this 1507 is an iterator). 1508 1509 NOTES: 1510 * Lines are normalised by the underlying store: they will all have \n 1511 terminators. 1512 * Lines are returned in arbitrary order. 1513 1514 :return: An iterator over (line, key). 1515 """ 1516 for prefix, suffixes, vf in self._iter_keys_vf(keys): 1517 for line, version in vf.iter_lines_added_or_present_in_versions(suffixes): 1518 yield line, prefix + (version,) 1519 1520 def _iter_all_components(self): 1521 for path, prefix in self._iter_all_prefixes(): 1522 yield prefix, self._get_vf(path) 1523 1524 def keys(self): 1525 """See VersionedFiles.keys().""" 1526 result = set() 1527 for prefix, vf in self._iter_all_components(): 1528 for suffix in vf.versions(): 1529 result.add(prefix + (suffix,)) 1530 return result 1531 1532 1533class VersionedFilesWithFallbacks(VersionedFiles): 1534 1535 def without_fallbacks(self): 1536 """Return a clone of this object without any fallbacks configured.""" 1537 raise NotImplementedError(self.without_fallbacks) 1538 1539 def add_fallback_versioned_files(self, a_versioned_files): 1540 """Add a source of texts for texts not present in this knit. 1541 1542 :param a_versioned_files: A VersionedFiles object. 1543 """ 1544 raise NotImplementedError(self.add_fallback_versioned_files) 1545 1546 def get_known_graph_ancestry(self, keys): 1547 """Get a KnownGraph instance with the ancestry of keys.""" 1548 parent_map, missing_keys = self._index.find_ancestry(keys) 1549 for fallback in self._transitive_fallbacks(): 1550 if not missing_keys: 1551 break 1552 (f_parent_map, f_missing_keys) = fallback._index.find_ancestry( 1553 missing_keys) 1554 parent_map.update(f_parent_map) 1555 missing_keys = f_missing_keys 1556 kg = _mod_graph.KnownGraph(parent_map) 1557 return kg 1558 1559 1560class _PlanMergeVersionedFile(VersionedFiles): 1561 """A VersionedFile for uncommitted and committed texts. 1562 1563 It is intended to allow merges to be planned with working tree texts. 1564 It implements only the small part of the VersionedFiles interface used by 1565 PlanMerge. It falls back to multiple versionedfiles for data not stored in 1566 _PlanMergeVersionedFile itself. 1567 1568 :ivar: fallback_versionedfiles a list of VersionedFiles objects that can be 1569 queried for missing texts. 1570 """ 1571 1572 def __init__(self, file_id): 1573 """Create a _PlanMergeVersionedFile. 1574 1575 :param file_id: Used with _PlanMerge code which is not yet fully 1576 tuple-keyspace aware. 1577 """ 1578 self._file_id = file_id 1579 # fallback locations 1580 self.fallback_versionedfiles = [] 1581 # Parents for locally held keys. 1582 self._parents = {} 1583 # line data for locally held keys. 1584 self._lines = {} 1585 # key lookup providers 1586 self._providers = [_mod_graph.DictParentsProvider(self._parents)] 1587 1588 def plan_merge(self, ver_a, ver_b, base=None): 1589 """See VersionedFile.plan_merge""" 1590 from ..merge import _PlanMerge 1591 if base is None: 1592 return _PlanMerge(ver_a, ver_b, self, (self._file_id,)).plan_merge() 1593 old_plan = list(_PlanMerge(ver_a, base, self, 1594 (self._file_id,)).plan_merge()) 1595 new_plan = list(_PlanMerge(ver_a, ver_b, self, 1596 (self._file_id,)).plan_merge()) 1597 return _PlanMerge._subtract_plans(old_plan, new_plan) 1598 1599 def plan_lca_merge(self, ver_a, ver_b, base=None): 1600 from ..merge import _PlanLCAMerge 1601 graph = _mod_graph.Graph(self) 1602 new_plan = _PlanLCAMerge( 1603 ver_a, ver_b, self, (self._file_id,), graph).plan_merge() 1604 if base is None: 1605 return new_plan 1606 old_plan = _PlanLCAMerge( 1607 ver_a, base, self, (self._file_id,), graph).plan_merge() 1608 return _PlanLCAMerge._subtract_plans(list(old_plan), list(new_plan)) 1609 1610 def add_content(self, factory): 1611 return self.add_lines( 1612 factory.key, factory.parents, factory.get_bytes_as('lines')) 1613 1614 def add_lines(self, key, parents, lines): 1615 """See VersionedFiles.add_lines 1616 1617 Lines are added locally, not to fallback versionedfiles. Also, ghosts 1618 are permitted. Only reserved ids are permitted. 1619 """ 1620 if not isinstance(key, tuple): 1621 raise TypeError(key) 1622 if not revision.is_reserved_id(key[-1]): 1623 raise ValueError('Only reserved ids may be used') 1624 if parents is None: 1625 raise ValueError('Parents may not be None') 1626 if lines is None: 1627 raise ValueError('Lines may not be None') 1628 self._parents[key] = tuple(parents) 1629 self._lines[key] = lines 1630 1631 def get_record_stream(self, keys, ordering, include_delta_closure): 1632 pending = set(keys) 1633 for key in keys: 1634 if key in self._lines: 1635 lines = self._lines[key] 1636 parents = self._parents[key] 1637 pending.remove(key) 1638 yield ChunkedContentFactory( 1639 key, parents, None, lines, 1640 chunks_are_lines=True) 1641 for versionedfile in self.fallback_versionedfiles: 1642 for record in versionedfile.get_record_stream( 1643 pending, 'unordered', True): 1644 if record.storage_kind == 'absent': 1645 continue 1646 else: 1647 pending.remove(record.key) 1648 yield record 1649 if not pending: 1650 return 1651 # report absent entries 1652 for key in pending: 1653 yield AbsentContentFactory(key) 1654 1655 def get_parent_map(self, keys): 1656 """See VersionedFiles.get_parent_map""" 1657 # We create a new provider because a fallback may have been added. 1658 # If we make fallbacks private we can update a stack list and avoid 1659 # object creation thrashing. 1660 keys = set(keys) 1661 result = {} 1662 if revision.NULL_REVISION in keys: 1663 keys.remove(revision.NULL_REVISION) 1664 result[revision.NULL_REVISION] = () 1665 self._providers = self._providers[:1] + self.fallback_versionedfiles 1666 result.update( 1667 _mod_graph.StackedParentsProvider( 1668 self._providers).get_parent_map(keys)) 1669 for key, parents in result.items(): 1670 if parents == (): 1671 result[key] = (revision.NULL_REVISION,) 1672 return result 1673 1674 1675class PlanWeaveMerge(TextMerge): 1676 """Weave merge that takes a plan as its input. 1677 1678 This exists so that VersionedFile.plan_merge is implementable. 1679 Most callers will want to use WeaveMerge instead. 1680 """ 1681 1682 def __init__(self, plan, a_marker=TextMerge.A_MARKER, 1683 b_marker=TextMerge.B_MARKER): 1684 TextMerge.__init__(self, a_marker, b_marker) 1685 self.plan = list(plan) 1686 1687 def _merge_struct(self): 1688 lines_a = [] 1689 lines_b = [] 1690 ch_a = ch_b = False 1691 1692 def outstanding_struct(): 1693 if not lines_a and not lines_b: 1694 return 1695 elif ch_a and not ch_b: 1696 # one-sided change: 1697 yield(lines_a,) 1698 elif ch_b and not ch_a: 1699 yield (lines_b,) 1700 elif lines_a == lines_b: 1701 yield(lines_a,) 1702 else: 1703 yield (lines_a, lines_b) 1704 1705 # We previously considered either 'unchanged' or 'killed-both' lines 1706 # to be possible places to resynchronize. However, assuming agreement 1707 # on killed-both lines may be too aggressive. -- mbp 20060324 1708 for state, line in self.plan: 1709 if state == 'unchanged': 1710 # resync and flush queued conflicts changes if any 1711 for struct in outstanding_struct(): 1712 yield struct 1713 lines_a = [] 1714 lines_b = [] 1715 ch_a = ch_b = False 1716 1717 if state == 'unchanged': 1718 if line: 1719 yield ([line],) 1720 elif state == 'killed-a': 1721 ch_a = True 1722 lines_b.append(line) 1723 elif state == 'killed-b': 1724 ch_b = True 1725 lines_a.append(line) 1726 elif state == 'new-a': 1727 ch_a = True 1728 lines_a.append(line) 1729 elif state == 'new-b': 1730 ch_b = True 1731 lines_b.append(line) 1732 elif state == 'conflicted-a': 1733 ch_b = ch_a = True 1734 lines_a.append(line) 1735 elif state == 'conflicted-b': 1736 ch_b = ch_a = True 1737 lines_b.append(line) 1738 elif state == 'killed-both': 1739 # This counts as a change, even though there is no associated 1740 # line 1741 ch_b = ch_a = True 1742 else: 1743 if state not in ('irrelevant', 'ghost-a', 'ghost-b', 1744 'killed-base'): 1745 raise AssertionError(state) 1746 for struct in outstanding_struct(): 1747 yield struct 1748 1749 def base_from_plan(self): 1750 """Construct a BASE file from the plan text.""" 1751 base_lines = [] 1752 for state, line in self.plan: 1753 if state in ('killed-a', 'killed-b', 'killed-both', 'unchanged'): 1754 # If unchanged, then this line is straight from base. If a or b 1755 # or both killed the line, then it *used* to be in base. 1756 base_lines.append(line) 1757 else: 1758 if state not in ('killed-base', 'irrelevant', 1759 'ghost-a', 'ghost-b', 1760 'new-a', 'new-b', 1761 'conflicted-a', 'conflicted-b'): 1762 # killed-base, irrelevant means it doesn't apply 1763 # ghost-a/ghost-b are harder to say for sure, but they 1764 # aren't in the 'inc_c' which means they aren't in the 1765 # shared base of a & b. So we don't include them. And 1766 # obviously if the line is newly inserted, it isn't in base 1767 1768 # If 'conflicted-a' or b, then it is new vs one base, but 1769 # old versus another base. However, if we make it present 1770 # in the base, it will be deleted from the target, and it 1771 # seems better to get a line doubled in the merge result, 1772 # rather than have it deleted entirely. 1773 # Example, each node is the 'text' at that point: 1774 # MN 1775 # / \ 1776 # MaN MbN 1777 # | X | 1778 # MabN MbaN 1779 # \ / 1780 # ??? 1781 # There was a criss-cross conflict merge. Both sides 1782 # include the other, but put themselves first. 1783 # Weave marks this as a 'clean' merge, picking OTHER over 1784 # THIS. (Though the details depend on order inserted into 1785 # weave, etc.) 1786 # LCA generates a plan: 1787 # [('unchanged', M), 1788 # ('conflicted-b', b), 1789 # ('unchanged', a), 1790 # ('conflicted-a', b), 1791 # ('unchanged', N)] 1792 # If you mark 'conflicted-*' as part of BASE, then a 3-way 1793 # merge tool will cleanly generate "MaN" (as BASE vs THIS 1794 # removes one 'b', and BASE vs OTHER removes the other) 1795 # If you include neither, 3-way creates a clean "MbabN" as 1796 # THIS adds one 'b', and OTHER does too. 1797 # It seems that having the line 2 times is better than 1798 # having it omitted. (Easier to manually delete than notice 1799 # it needs to be added.) 1800 raise AssertionError('Unknown state: %s' % (state,)) 1801 return base_lines 1802 1803 1804class WeaveMerge(PlanWeaveMerge): 1805 """Weave merge that takes a VersionedFile and two versions as its input.""" 1806 1807 def __init__(self, versionedfile, ver_a, ver_b, 1808 a_marker=PlanWeaveMerge.A_MARKER, b_marker=PlanWeaveMerge.B_MARKER): 1809 plan = versionedfile.plan_merge(ver_a, ver_b) 1810 PlanWeaveMerge.__init__(self, plan, a_marker, b_marker) 1811 1812 1813class VirtualVersionedFiles(VersionedFiles): 1814 """Dummy implementation for VersionedFiles that uses other functions for 1815 obtaining fulltexts and parent maps. 1816 1817 This is always on the bottom of the stack and uses string keys 1818 (rather than tuples) internally. 1819 """ 1820 1821 def __init__(self, get_parent_map, get_lines): 1822 """Create a VirtualVersionedFiles. 1823 1824 :param get_parent_map: Same signature as Repository.get_parent_map. 1825 :param get_lines: Should return lines for specified key or None if 1826 not available. 1827 """ 1828 super(VirtualVersionedFiles, self).__init__() 1829 self._get_parent_map = get_parent_map 1830 self._get_lines = get_lines 1831 1832 def check(self, progressbar=None): 1833 """See VersionedFiles.check. 1834 1835 :note: Always returns True for VirtualVersionedFiles. 1836 """ 1837 return True 1838 1839 def add_mpdiffs(self, records): 1840 """See VersionedFiles.mpdiffs. 1841 1842 :note: Not implemented for VirtualVersionedFiles. 1843 """ 1844 raise NotImplementedError(self.add_mpdiffs) 1845 1846 def get_parent_map(self, keys): 1847 """See VersionedFiles.get_parent_map.""" 1848 parent_view = self._get_parent_map(k for (k,) in keys).items() 1849 return dict(((k,), tuple((p,) for p in v)) for k, v in parent_view) 1850 1851 def get_sha1s(self, keys): 1852 """See VersionedFiles.get_sha1s.""" 1853 ret = {} 1854 for (k,) in keys: 1855 lines = self._get_lines(k) 1856 if lines is not None: 1857 if not isinstance(lines, list): 1858 raise AssertionError 1859 ret[(k,)] = osutils.sha_strings(lines) 1860 return ret 1861 1862 def get_record_stream(self, keys, ordering, include_delta_closure): 1863 """See VersionedFiles.get_record_stream.""" 1864 for (k,) in list(keys): 1865 lines = self._get_lines(k) 1866 if lines is not None: 1867 if not isinstance(lines, list): 1868 raise AssertionError 1869 yield ChunkedContentFactory( 1870 (k,), None, sha1=osutils.sha_strings(lines), 1871 chunks=lines, chunks_are_lines=True) 1872 else: 1873 yield AbsentContentFactory((k,)) 1874 1875 def iter_lines_added_or_present_in_keys(self, keys, pb=None): 1876 """See VersionedFile.iter_lines_added_or_present_in_versions().""" 1877 for i, (key,) in enumerate(keys): 1878 if pb is not None: 1879 pb.update("Finding changed lines", i, len(keys)) 1880 for l in self._get_lines(key): 1881 yield (l, key) 1882 1883 1884class NoDupeAddLinesDecorator(object): 1885 """Decorator for a VersionedFiles that skips doing an add_lines if the key 1886 is already present. 1887 """ 1888 1889 def __init__(self, store): 1890 self._store = store 1891 1892 def add_lines(self, key, parents, lines, parent_texts=None, 1893 left_matching_blocks=None, nostore_sha=None, random_id=False, 1894 check_content=True): 1895 """See VersionedFiles.add_lines. 1896 1897 This implementation may return None as the third element of the return 1898 value when the original store wouldn't. 1899 """ 1900 if nostore_sha: 1901 raise NotImplementedError( 1902 "NoDupeAddLinesDecorator.add_lines does not implement the " 1903 "nostore_sha behaviour.") 1904 if key[-1] is None: 1905 sha1 = osutils.sha_strings(lines) 1906 key = (b"sha1:" + sha1,) 1907 else: 1908 sha1 = None 1909 if key in self._store.get_parent_map([key]): 1910 # This key has already been inserted, so don't do it again. 1911 if sha1 is None: 1912 sha1 = osutils.sha_strings(lines) 1913 return sha1, sum(map(len, lines)), None 1914 return self._store.add_lines(key, parents, lines, 1915 parent_texts=parent_texts, 1916 left_matching_blocks=left_matching_blocks, 1917 nostore_sha=nostore_sha, random_id=random_id, 1918 check_content=check_content) 1919 1920 def __getattr__(self, name): 1921 return getattr(self._store, name) 1922 1923 1924def network_bytes_to_kind_and_offset(network_bytes): 1925 """Strip of a record kind from the front of network_bytes. 1926 1927 :param network_bytes: The bytes of a record. 1928 :return: A tuple (storage_kind, offset_of_remaining_bytes) 1929 """ 1930 line_end = network_bytes.find(b'\n') 1931 storage_kind = network_bytes[:line_end].decode('ascii') 1932 return storage_kind, line_end + 1 1933 1934 1935class NetworkRecordStream(object): 1936 """A record_stream which reconstitures a serialised stream.""" 1937 1938 def __init__(self, bytes_iterator): 1939 """Create a NetworkRecordStream. 1940 1941 :param bytes_iterator: An iterator of bytes. Each item in this 1942 iterator should have been obtained from a record_streams' 1943 record.get_bytes_as(record.storage_kind) call. 1944 """ 1945 self._bytes_iterator = bytes_iterator 1946 self._kind_factory = { 1947 'fulltext': fulltext_network_to_record, 1948 'groupcompress-block': groupcompress.network_block_to_records, 1949 'knit-ft-gz': knit.knit_network_to_record, 1950 'knit-delta-gz': knit.knit_network_to_record, 1951 'knit-annotated-ft-gz': knit.knit_network_to_record, 1952 'knit-annotated-delta-gz': knit.knit_network_to_record, 1953 'knit-delta-closure': knit.knit_delta_closure_to_records, 1954 } 1955 1956 def read(self): 1957 """Read the stream. 1958 1959 :return: An iterator as per VersionedFiles.get_record_stream(). 1960 """ 1961 for bytes in self._bytes_iterator: 1962 storage_kind, line_end = network_bytes_to_kind_and_offset(bytes) 1963 for record in self._kind_factory[storage_kind]( 1964 storage_kind, bytes, line_end): 1965 yield record 1966 1967 1968def fulltext_network_to_record(kind, bytes, line_end): 1969 """Convert a network fulltext record to record.""" 1970 meta_len, = struct.unpack('!L', bytes[line_end:line_end + 4]) 1971 record_meta = bytes[line_end + 4:line_end + 4 + meta_len] 1972 key, parents = bencode.bdecode_as_tuple(record_meta) 1973 if parents == b'nil': 1974 parents = None 1975 fulltext = bytes[line_end + 4 + meta_len:] 1976 return [FulltextContentFactory(key, parents, None, fulltext)] 1977 1978 1979def _length_prefix(bytes): 1980 return struct.pack('!L', len(bytes)) 1981 1982 1983def record_to_fulltext_bytes(record): 1984 if record.parents is None: 1985 parents = b'nil' 1986 else: 1987 parents = record.parents 1988 record_meta = bencode.bencode((record.key, parents)) 1989 record_content = record.get_bytes_as('fulltext') 1990 return b"fulltext\n%s%s%s" % ( 1991 _length_prefix(record_meta), record_meta, record_content) 1992 1993 1994def sort_groupcompress(parent_map): 1995 """Sort and group the keys in parent_map into groupcompress order. 1996 1997 groupcompress is defined (currently) as reverse-topological order, grouped 1998 by the key prefix. 1999 2000 :return: A sorted-list of keys 2001 """ 2002 # gc-optimal ordering is approximately reverse topological, 2003 # properly grouped by file-id. 2004 per_prefix_map = {} 2005 for item in parent_map.items(): 2006 key = item[0] 2007 if isinstance(key, bytes) or len(key) == 1: 2008 prefix = b'' 2009 else: 2010 prefix = key[0] 2011 try: 2012 per_prefix_map[prefix].append(item) 2013 except KeyError: 2014 per_prefix_map[prefix] = [item] 2015 2016 present_keys = [] 2017 for prefix in sorted(per_prefix_map): 2018 present_keys.extend(reversed(tsort.topo_sort(per_prefix_map[prefix]))) 2019 return present_keys 2020 2021 2022class _KeyRefs(object): 2023 2024 def __init__(self, track_new_keys=False): 2025 # dict mapping 'key' to 'set of keys referring to that key' 2026 self.refs = {} 2027 if track_new_keys: 2028 # set remembering all new keys 2029 self.new_keys = set() 2030 else: 2031 self.new_keys = None 2032 2033 def clear(self): 2034 if self.refs: 2035 self.refs.clear() 2036 if self.new_keys: 2037 self.new_keys.clear() 2038 2039 def add_references(self, key, refs): 2040 # Record the new references 2041 for referenced in refs: 2042 try: 2043 needed_by = self.refs[referenced] 2044 except KeyError: 2045 needed_by = self.refs[referenced] = set() 2046 needed_by.add(key) 2047 # Discard references satisfied by the new key 2048 self.add_key(key) 2049 2050 def get_new_keys(self): 2051 return self.new_keys 2052 2053 def get_unsatisfied_refs(self): 2054 return self.refs.keys() 2055 2056 def _satisfy_refs_for_key(self, key): 2057 try: 2058 del self.refs[key] 2059 except KeyError: 2060 # No keys depended on this key. That's ok. 2061 pass 2062 2063 def add_key(self, key): 2064 # satisfy refs for key, and remember that we've seen this key. 2065 self._satisfy_refs_for_key(key) 2066 if self.new_keys is not None: 2067 self.new_keys.add(key) 2068 2069 def satisfy_refs_for_keys(self, keys): 2070 for key in keys: 2071 self._satisfy_refs_for_key(key) 2072 2073 def get_referrers(self): 2074 return set(itertools.chain.from_iterable(self.refs.values())) 2075