1# coding: utf-8 2# metadata.py -- code related to various metadata computation and access. 3# 4# Copyright 2019 Google, Inc <martinvonz@google.com> 5# Copyright 2020 Pierre-Yves David <pierre-yves.david@octobus.net> 6# 7# This software may be used and distributed according to the terms of the 8# GNU General Public License version 2 or any later version. 9from __future__ import absolute_import, print_function 10 11import multiprocessing 12import struct 13 14from .node import nullrev 15from . import ( 16 error, 17 util, 18) 19 20from .revlogutils import ( 21 flagutil as sidedataflag, 22 sidedata as sidedatamod, 23) 24 25 26class ChangingFiles(object): 27 """A class recording the changes made to files by a changeset 28 29 Actions performed on files are gathered into 3 sets: 30 31 - added: files actively added in the changeset. 32 - merged: files whose history got merged 33 - removed: files removed in the revision 34 - salvaged: files that might have been deleted by a merge but were not 35 - touched: files affected by the merge 36 37 and copies information is held by 2 mappings 38 39 - copied_from_p1: {"<new-name>": "<source-name-in-p1>"} mapping for copies 40 - copied_from_p2: {"<new-name>": "<source-name-in-p2>"} mapping for copies 41 42 See their inline help for details. 43 """ 44 45 def __init__( 46 self, 47 touched=None, 48 added=None, 49 removed=None, 50 merged=None, 51 salvaged=None, 52 p1_copies=None, 53 p2_copies=None, 54 ): 55 self._added = set(() if added is None else added) 56 self._merged = set(() if merged is None else merged) 57 self._removed = set(() if removed is None else removed) 58 self._touched = set(() if touched is None else touched) 59 self._salvaged = set(() if salvaged is None else salvaged) 60 self._touched.update(self._added) 61 self._touched.update(self._merged) 62 self._touched.update(self._removed) 63 self._p1_copies = dict(() if p1_copies is None else p1_copies) 64 self._p2_copies = dict(() if p2_copies is None else p2_copies) 65 66 def __eq__(self, other): 67 return ( 68 self.added == other.added 69 and self.merged == other.merged 70 and self.removed == other.removed 71 and self.salvaged == other.salvaged 72 and self.touched == other.touched 73 and self.copied_from_p1 == other.copied_from_p1 74 and self.copied_from_p2 == other.copied_from_p2 75 ) 76 77 @property 78 def has_copies_info(self): 79 return bool( 80 self.removed 81 or self.merged 82 or self.salvaged 83 or self.copied_from_p1 84 or self.copied_from_p2 85 ) 86 87 @util.propertycache 88 def added(self): 89 """files actively added in the changeset 90 91 Any file present in that revision that was absent in all the changeset's 92 parents. 93 94 In case of merge, this means a file absent in one of the parents but 95 existing in the other will *not* be contained in this set. (They were 96 added by an ancestor) 97 """ 98 return frozenset(self._added) 99 100 def mark_added(self, filename): 101 if 'added' in vars(self): 102 del self.added 103 self._added.add(filename) 104 self.mark_touched(filename) 105 106 def update_added(self, filenames): 107 for f in filenames: 108 self.mark_added(f) 109 110 @util.propertycache 111 def merged(self): 112 """files actively merged during a merge 113 114 Any modified files which had modification on both size that needed merging. 115 116 In this case a new filenode was created and it has two parents. 117 """ 118 return frozenset(self._merged) 119 120 def mark_merged(self, filename): 121 if 'merged' in vars(self): 122 del self.merged 123 self._merged.add(filename) 124 self.mark_touched(filename) 125 126 def update_merged(self, filenames): 127 for f in filenames: 128 self.mark_merged(f) 129 130 @util.propertycache 131 def removed(self): 132 """files actively removed by the changeset 133 134 In case of merge this will only contain the set of files removing "new" 135 content. For any file absent in the current changeset: 136 137 a) If the file exists in both parents, it is clearly "actively" removed 138 by this changeset. 139 140 b) If a file exists in only one parent and in none of the common 141 ancestors, then the file was newly added in one of the merged branches 142 and then got "actively" removed. 143 144 c) If a file exists in only one parent and at least one of the common 145 ancestors using the same filenode, then the file was unchanged on one 146 side and deleted on the other side. The merge "passively" propagated 147 that deletion, but didn't "actively" remove the file. In this case the 148 file is *not* included in the `removed` set. 149 150 d) If a file exists in only one parent and at least one of the common 151 ancestors using a different filenode, then the file was changed on one 152 side and removed on the other side. The merge process "actively" 153 decided to drop the new change and delete the file. Unlike in the 154 previous case, (c), the file included in the `removed` set. 155 156 Summary table for merge: 157 158 case | exists in parents | exists in gca || removed 159 (a) | both | * || yes 160 (b) | one | none || yes 161 (c) | one | same filenode || no 162 (d) | one | new filenode || yes 163 """ 164 return frozenset(self._removed) 165 166 def mark_removed(self, filename): 167 if 'removed' in vars(self): 168 del self.removed 169 self._removed.add(filename) 170 self.mark_touched(filename) 171 172 def update_removed(self, filenames): 173 for f in filenames: 174 self.mark_removed(f) 175 176 @util.propertycache 177 def salvaged(self): 178 """files that might have been deleted by a merge, but still exists. 179 180 During a merge, the manifest merging might select some files for 181 removal, or for a removed/changed conflict. If at commit time the file 182 still exists, its removal was "reverted" and the file is "salvaged" 183 """ 184 return frozenset(self._salvaged) 185 186 def mark_salvaged(self, filename): 187 if "salvaged" in vars(self): 188 del self.salvaged 189 self._salvaged.add(filename) 190 self.mark_touched(filename) 191 192 def update_salvaged(self, filenames): 193 for f in filenames: 194 self.mark_salvaged(f) 195 196 @util.propertycache 197 def touched(self): 198 """files either actively modified, added or removed""" 199 return frozenset(self._touched) 200 201 def mark_touched(self, filename): 202 if 'touched' in vars(self): 203 del self.touched 204 self._touched.add(filename) 205 206 def update_touched(self, filenames): 207 for f in filenames: 208 self.mark_touched(f) 209 210 @util.propertycache 211 def copied_from_p1(self): 212 return self._p1_copies.copy() 213 214 def mark_copied_from_p1(self, source, dest): 215 if 'copied_from_p1' in vars(self): 216 del self.copied_from_p1 217 self._p1_copies[dest] = source 218 219 def update_copies_from_p1(self, copies): 220 for dest, source in copies.items(): 221 self.mark_copied_from_p1(source, dest) 222 223 @util.propertycache 224 def copied_from_p2(self): 225 return self._p2_copies.copy() 226 227 def mark_copied_from_p2(self, source, dest): 228 if 'copied_from_p2' in vars(self): 229 del self.copied_from_p2 230 self._p2_copies[dest] = source 231 232 def update_copies_from_p2(self, copies): 233 for dest, source in copies.items(): 234 self.mark_copied_from_p2(source, dest) 235 236 237def compute_all_files_changes(ctx): 238 """compute the files changed by a revision""" 239 p1 = ctx.p1() 240 p2 = ctx.p2() 241 if p1.rev() == nullrev and p2.rev() == nullrev: 242 return _process_root(ctx) 243 elif p1.rev() != nullrev and p2.rev() == nullrev: 244 return _process_linear(p1, ctx) 245 elif p1.rev() == nullrev and p2.rev() != nullrev: 246 # In the wild, one can encounter changeset where p1 is null but p2 is not 247 return _process_linear(p1, ctx, parent=2) 248 elif p1.rev() == p2.rev(): 249 # In the wild, one can encounter such "non-merge" 250 return _process_linear(p1, ctx) 251 else: 252 return _process_merge(p1, p2, ctx) 253 254 255def _process_root(ctx): 256 """compute the appropriate changed files for a changeset with no parents""" 257 # Simple, there was nothing before it, so everything is added. 258 md = ChangingFiles() 259 manifest = ctx.manifest() 260 for filename in manifest: 261 md.mark_added(filename) 262 return md 263 264 265def _process_linear(parent_ctx, children_ctx, parent=1): 266 """compute the appropriate changed files for a changeset with a single parent""" 267 md = ChangingFiles() 268 parent_manifest = parent_ctx.manifest() 269 children_manifest = children_ctx.manifest() 270 271 copies_candidate = [] 272 273 for filename, d in parent_manifest.diff(children_manifest).items(): 274 if d[1][0] is None: 275 # no filenode for the "new" value, file is absent 276 md.mark_removed(filename) 277 else: 278 copies_candidate.append(filename) 279 if d[0][0] is None: 280 # not filenode for the "old" value file was absent 281 md.mark_added(filename) 282 else: 283 # filenode for both "old" and "new" 284 md.mark_touched(filename) 285 286 if parent == 1: 287 copied = md.mark_copied_from_p1 288 elif parent == 2: 289 copied = md.mark_copied_from_p2 290 else: 291 assert False, "bad parent value %d" % parent 292 293 for filename in copies_candidate: 294 copy_info = children_ctx[filename].renamed() 295 if copy_info: 296 source, srcnode = copy_info 297 copied(source, filename) 298 299 return md 300 301 302def _process_merge(p1_ctx, p2_ctx, ctx): 303 """compute the appropriate changed files for a changeset with two parents 304 305 This is a more advance case. The information we need to record is summarise 306 in the following table: 307 308 ┌──────────────┬──────────────┬──────────────┬──────────────┬──────────────┐ 309 │ diff ╲ diff │ ø │ (Some, None) │ (None, Some) │ (Some, Some) │ 310 │ p2 ╲ p1 │ │ │ │ │ 311 ├──────────────┼──────────────┼──────────────┼──────────────┼──────────────┤ 312 │ │ │ No Changes │ No Changes │ │ 313 │ ø │ No Changes │ OR │ OR │ No Changes │ 314 │ │ │ Deleted[1] │ Salvaged[2]│ [3] │ 315 ├──────────────┼──────────────┼──────────────┼──────────────┼──────────────┤ 316 │ │ No Changes │ │ │ │ 317 │ (Some, None) │ OR │ Deleted │ ø │ ø │ 318 │ │ Deleted[1] │ │ │ │ 319 ├──────────────┼──────────────┼──────────────┼──────────────┼──────────────┤ 320 │ │ No Changes │ │ │ Touched │ 321 │ (None, Some) │ OR │ ø │ Added │OR Salvaged │ 322 │ │ Salvaged[2]│ │ (copied?) │ (copied?) │ 323 ├──────────────┼──────────────┼──────────────┼──────────────┼──────────────┤ 324 │ │ │ │ Touched │ Merged │ 325 │ (Some, Some) │ No Changes │ ø │OR Salvaged │OR Touched │ 326 │ │ [3] │ │ (copied?) │ (copied?) │ 327 └──────────────┴──────────────┴──────────────┴──────────────┴──────────────┘ 328 329 Special case [1]: 330 331 The situation is: 332 - parent-A: file exists, 333 - parent-B: no file, 334 - working-copy: no file. 335 336 Detecting a "deletion" will depend on the presence of actual change on 337 the "parent-A" branch: 338 339 Subcase or : if the state of the file in "parent-A" is unchanged 340 compared to the merge ancestors, then parent-A branch left the file 341 untouched while parent-B deleted it. We simply apply the change from 342 "parent-B" branch the file was automatically dropped. 343 The result is: 344 - file is not recorded as touched by the merge. 345 346 Subcase or : otherwise, the change from parent-A branch were explicitly dropped and 347 the file was "deleted again". From a user perspective, the message 348 about "locally changed" while "remotely deleted" (or the other way 349 around) was issued and the user chose to deleted the file. 350 The result: 351 - file is recorded as touched by the merge. 352 353 354 Special case [2]: 355 356 The situation is: 357 - parent-A: no file, 358 - parent-B: file, 359 - working-copy: file (same content as parent-B). 360 361 There are three subcases depending on the ancestors contents: 362 363 - A) the file is missing in all ancestors, 364 - B) at least one ancestor has the file with filenode ≠ from parent-B, 365 - C) all ancestors use the same filenode as parent-B, 366 367 Subcase (A) is the simpler, nothing happend on parent-A side while 368 parent-B added it. 369 370 The result: 371 - the file is not marked as touched by the merge. 372 373 Subcase (B) is the counter part of "Special case [1]", the file was 374 modified on parent-B side, while parent-A side deleted it. However this 375 time, the conflict was solved by keeping the file (and its 376 modification). We consider the file as "salvaged". 377 378 The result: 379 - the file is marked as "salvaged" by the merge. 380 381 Subcase (C) is subtle variation of the case above. In this case, the 382 file in unchanged on the parent-B side and actively removed on the 383 parent-A side. So the merge machinery correctly decide it should be 384 removed. However, the file was explicitly restored to its parent-B 385 content before the merge was commited. The file is be marked 386 as salvaged too. From the merge result perspective, this is similar to 387 Subcase (B), however from the merge resolution perspective they differ 388 since in (C), there was some conflict not obvious solution to the 389 merge (That got reversed) 390 391 Special case [3]: 392 393 The situation is: 394 - parent-A: file, 395 - parent-B: file (different filenode as parent-A), 396 - working-copy: file (same filenode as parent-B). 397 398 This case is in theory much simple, for this to happens, this mean the 399 filenode in parent-A is purely replacing the one in parent-B (either a 400 descendant, or a full new file history, see changeset). So the merge 401 introduce no changes, and the file is not affected by the merge... 402 403 However, in the wild it is possible to find commit with the above is not 404 True. For example repository have some commit where the *new* node is an 405 ancestor of the node in parent-A, or where parent-A and parent-B are two 406 branches of the same file history, yet not merge-filenode were created 407 (while the "merge" should have led to a "modification"). 408 409 Detecting such cases (and not recording the file as modified) would be a 410 nice bonus. However do not any of this yet. 411 """ 412 413 repo = ctx.repo() 414 md = ChangingFiles() 415 416 m = ctx.manifest() 417 p1m = p1_ctx.manifest() 418 p2m = p2_ctx.manifest() 419 diff_p1 = p1m.diff(m) 420 diff_p2 = p2m.diff(m) 421 422 cahs = ctx.repo().changelog.commonancestorsheads( 423 p1_ctx.node(), p2_ctx.node() 424 ) 425 if not cahs: 426 cahs = [nullrev] 427 mas = [ctx.repo()[r].manifest() for r in cahs] 428 429 copy_candidates = [] 430 431 # Dealing with case happens automatically. Since there are no entry in 432 # d1 nor d2, we won't iterate on it ever. 433 434 # Iteration over d1 content will deal with all cases, but the one in the 435 # first column of the table. 436 for filename, d1 in diff_p1.items(): 437 438 d2 = diff_p2.pop(filename, None) 439 440 if d2 is None: 441 # this deal with the first line of the table. 442 _process_other_unchanged(md, mas, filename, d1) 443 else: 444 445 if d1[0][0] is None and d2[0][0] is None: 446 # case — both deleted the file. 447 md.mark_added(filename) 448 copy_candidates.append(filename) 449 elif d1[1][0] is None and d2[1][0] is None: 450 # case — both deleted the file. 451 md.mark_removed(filename) 452 elif d1[1][0] is not None and d2[1][0] is not None: 453 if d1[0][0] is None or d2[0][0] is None: 454 if any(_find(ma, filename) is not None for ma in mas): 455 # case or 456 md.mark_salvaged(filename) 457 else: 458 # case : touched 459 md.mark_touched(filename) 460 else: 461 fctx = repo.filectx(filename, fileid=d1[1][0]) 462 if fctx.p2().rev() == nullrev: 463 # case 464 # lets assume we can trust the file history. If the 465 # filenode is not a merge, the file was not merged. 466 md.mark_touched(filename) 467 else: 468 # case 469 md.mark_merged(filename) 470 copy_candidates.append(filename) 471 else: 472 # Impossible case, the post-merge file status cannot be None on 473 # one side and Something on the other side. 474 assert False, "unreachable" 475 476 # Iteration over remaining d2 content deal with the first column of the 477 # table. 478 for filename, d2 in diff_p2.items(): 479 _process_other_unchanged(md, mas, filename, d2) 480 481 for filename in copy_candidates: 482 copy_info = ctx[filename].renamed() 483 if copy_info: 484 source, srcnode = copy_info 485 if source in p1_ctx and p1_ctx[source].filenode() == srcnode: 486 md.mark_copied_from_p1(source, filename) 487 elif source in p2_ctx and p2_ctx[source].filenode() == srcnode: 488 md.mark_copied_from_p2(source, filename) 489 return md 490 491 492def _find(manifest, filename): 493 """return the associate filenode or None""" 494 if filename not in manifest: 495 return None 496 return manifest.find(filename)[0] 497 498 499def _process_other_unchanged(md, mas, filename, diff): 500 source_node = diff[0][0] 501 target_node = diff[1][0] 502 503 if source_node is not None and target_node is None: 504 if any(not _find(ma, filename) == source_node for ma in mas): 505 # case of 506 md.mark_removed(filename) 507 # else, we have case or : no change need to be recorded 508 elif source_node is None and target_node is not None: 509 if any(_find(ma, filename) is not None for ma in mas): 510 # case or 511 md.mark_salvaged(filename) 512 # else, we have case or : simple merge without intervention 513 elif source_node is not None and target_node is not None: 514 # case or : simple merge without intervention 515 # 516 # In buggy case where source_node is not an ancestors of target_node. 517 # There should have a been a new filenode created, recording this as 518 # "modified". We do not deal with them yet. 519 pass 520 else: 521 # An impossible case, the diff algorithm should not return entry if the 522 # file is missing on both side. 523 assert False, "unreachable" 524 525 526def _missing_from_all_ancestors(mas, filename): 527 return all(_find(ma, filename) is None for ma in mas) 528 529 530def computechangesetfilesadded(ctx): 531 """return the list of files added in a changeset""" 532 added = [] 533 for f in ctx.files(): 534 if not any(f in p for p in ctx.parents()): 535 added.append(f) 536 return added 537 538 539def get_removal_filter(ctx, x=None): 540 """return a function to detect files "wrongly" detected as `removed` 541 542 When a file is removed relative to p1 in a merge, this 543 function determines whether the absence is due to a 544 deletion from a parent, or whether the merge commit 545 itself deletes the file. We decide this by doing a 546 simplified three way merge of the manifest entry for 547 the file. There are two ways we decide the merge 548 itself didn't delete a file: 549 - neither parent (nor the merge) contain the file 550 - exactly one parent contains the file, and that 551 parent has the same filelog entry as the merge 552 ancestor (or all of them if there two). In other 553 words, that parent left the file unchanged while the 554 other one deleted it. 555 One way to think about this is that deleting a file is 556 similar to emptying it, so the list of changed files 557 should be similar either way. The computation 558 described above is not done directly in _filecommit 559 when creating the list of changed files, however 560 it does something very similar by comparing filelog 561 nodes. 562 """ 563 564 if x is not None: 565 p1, p2, m1, m2 = x 566 else: 567 p1 = ctx.p1() 568 p2 = ctx.p2() 569 m1 = p1.manifest() 570 m2 = p2.manifest() 571 572 @util.cachefunc 573 def mas(): 574 p1n = p1.node() 575 p2n = p2.node() 576 cahs = ctx.repo().changelog.commonancestorsheads(p1n, p2n) 577 if not cahs: 578 cahs = [nullrev] 579 return [ctx.repo()[r].manifest() for r in cahs] 580 581 def deletionfromparent(f): 582 if f in m1: 583 return f not in m2 and all( 584 f in ma and ma.find(f) == m1.find(f) for ma in mas() 585 ) 586 elif f in m2: 587 return all(f in ma and ma.find(f) == m2.find(f) for ma in mas()) 588 else: 589 return True 590 591 return deletionfromparent 592 593 594def computechangesetfilesremoved(ctx): 595 """return the list of files removed in a changeset""" 596 removed = [] 597 for f in ctx.files(): 598 if f not in ctx: 599 removed.append(f) 600 if removed: 601 rf = get_removal_filter(ctx) 602 removed = [r for r in removed if not rf(r)] 603 return removed 604 605 606def computechangesetfilesmerged(ctx): 607 """return the list of files merged in a changeset""" 608 merged = [] 609 if len(ctx.parents()) < 2: 610 return merged 611 for f in ctx.files(): 612 if f in ctx: 613 fctx = ctx[f] 614 parents = fctx._filelog.parents(fctx._filenode) 615 if parents[1] != ctx.repo().nullid: 616 merged.append(f) 617 return merged 618 619 620def computechangesetcopies(ctx): 621 """return the copies data for a changeset 622 623 The copies data are returned as a pair of dictionnary (p1copies, p2copies). 624 625 Each dictionnary are in the form: `{newname: oldname}` 626 """ 627 p1copies = {} 628 p2copies = {} 629 p1 = ctx.p1() 630 p2 = ctx.p2() 631 narrowmatch = ctx._repo.narrowmatch() 632 for dst in ctx.files(): 633 if not narrowmatch(dst) or dst not in ctx: 634 continue 635 copied = ctx[dst].renamed() 636 if not copied: 637 continue 638 src, srcnode = copied 639 if src in p1 and p1[src].filenode() == srcnode: 640 p1copies[dst] = src 641 elif src in p2 and p2[src].filenode() == srcnode: 642 p2copies[dst] = src 643 return p1copies, p2copies 644 645 646def encodecopies(files, copies): 647 items = [] 648 for i, dst in enumerate(files): 649 if dst in copies: 650 items.append(b'%d\0%s' % (i, copies[dst])) 651 if len(items) != len(copies): 652 raise error.ProgrammingError( 653 b'some copy targets missing from file list' 654 ) 655 return b"\n".join(items) 656 657 658def decodecopies(files, data): 659 try: 660 copies = {} 661 if not data: 662 return copies 663 for l in data.split(b'\n'): 664 strindex, src = l.split(b'\0') 665 i = int(strindex) 666 dst = files[i] 667 copies[dst] = src 668 return copies 669 except (ValueError, IndexError): 670 # Perhaps someone had chosen the same key name (e.g. "p1copies") and 671 # used different syntax for the value. 672 return None 673 674 675def encodefileindices(files, subset): 676 subset = set(subset) 677 indices = [] 678 for i, f in enumerate(files): 679 if f in subset: 680 indices.append(b'%d' % i) 681 return b'\n'.join(indices) 682 683 684def decodefileindices(files, data): 685 try: 686 subset = [] 687 if not data: 688 return subset 689 for strindex in data.split(b'\n'): 690 i = int(strindex) 691 if i < 0 or i >= len(files): 692 return None 693 subset.append(files[i]) 694 return subset 695 except (ValueError, IndexError): 696 # Perhaps someone had chosen the same key name (e.g. "added") and 697 # used different syntax for the value. 698 return None 699 700 701# see mercurial/helptext/internals/revlogs.txt for details about the format 702 703ACTION_MASK = int("111" "00", 2) 704# note: untouched file used as copy source will as `000` for this mask. 705ADDED_FLAG = int("001" "00", 2) 706MERGED_FLAG = int("010" "00", 2) 707REMOVED_FLAG = int("011" "00", 2) 708SALVAGED_FLAG = int("100" "00", 2) 709TOUCHED_FLAG = int("101" "00", 2) 710 711COPIED_MASK = int("11", 2) 712COPIED_FROM_P1_FLAG = int("10", 2) 713COPIED_FROM_P2_FLAG = int("11", 2) 714 715# structure is <flag><filename-end><copy-source> 716INDEX_HEADER = struct.Struct(">L") 717INDEX_ENTRY = struct.Struct(">bLL") 718 719 720def encode_files_sidedata(files): 721 all_files = set(files.touched) 722 all_files.update(files.copied_from_p1.values()) 723 all_files.update(files.copied_from_p2.values()) 724 all_files = sorted(all_files) 725 file_idx = {f: i for (i, f) in enumerate(all_files)} 726 file_idx[None] = 0 727 728 chunks = [INDEX_HEADER.pack(len(all_files))] 729 730 filename_length = 0 731 for f in all_files: 732 filename_size = len(f) 733 filename_length += filename_size 734 flag = 0 735 if f in files.added: 736 flag |= ADDED_FLAG 737 elif f in files.merged: 738 flag |= MERGED_FLAG 739 elif f in files.removed: 740 flag |= REMOVED_FLAG 741 elif f in files.salvaged: 742 flag |= SALVAGED_FLAG 743 elif f in files.touched: 744 flag |= TOUCHED_FLAG 745 746 copy = None 747 if f in files.copied_from_p1: 748 flag |= COPIED_FROM_P1_FLAG 749 copy = files.copied_from_p1.get(f) 750 elif f in files.copied_from_p2: 751 copy = files.copied_from_p2.get(f) 752 flag |= COPIED_FROM_P2_FLAG 753 copy_idx = file_idx[copy] 754 chunks.append(INDEX_ENTRY.pack(flag, filename_length, copy_idx)) 755 chunks.extend(all_files) 756 return {sidedatamod.SD_FILES: b''.join(chunks)} 757 758 759def decode_files_sidedata(sidedata): 760 md = ChangingFiles() 761 raw = sidedata.get(sidedatamod.SD_FILES) 762 763 if raw is None: 764 return md 765 766 copies = [] 767 all_files = [] 768 769 assert len(raw) >= INDEX_HEADER.size 770 total_files = INDEX_HEADER.unpack_from(raw, 0)[0] 771 772 offset = INDEX_HEADER.size 773 file_offset_base = offset + (INDEX_ENTRY.size * total_files) 774 file_offset_last = file_offset_base 775 776 assert len(raw) >= file_offset_base 777 778 for idx in range(total_files): 779 flag, file_end, copy_idx = INDEX_ENTRY.unpack_from(raw, offset) 780 file_end += file_offset_base 781 filename = raw[file_offset_last:file_end] 782 filesize = file_end - file_offset_last 783 assert len(filename) == filesize 784 offset += INDEX_ENTRY.size 785 file_offset_last = file_end 786 all_files.append(filename) 787 if flag & ACTION_MASK == ADDED_FLAG: 788 md.mark_added(filename) 789 elif flag & ACTION_MASK == MERGED_FLAG: 790 md.mark_merged(filename) 791 elif flag & ACTION_MASK == REMOVED_FLAG: 792 md.mark_removed(filename) 793 elif flag & ACTION_MASK == SALVAGED_FLAG: 794 md.mark_salvaged(filename) 795 elif flag & ACTION_MASK == TOUCHED_FLAG: 796 md.mark_touched(filename) 797 798 copied = None 799 if flag & COPIED_MASK == COPIED_FROM_P1_FLAG: 800 copied = md.mark_copied_from_p1 801 elif flag & COPIED_MASK == COPIED_FROM_P2_FLAG: 802 copied = md.mark_copied_from_p2 803 804 if copied is not None: 805 copies.append((copied, filename, copy_idx)) 806 807 for copied, filename, copy_idx in copies: 808 copied(all_files[copy_idx], filename) 809 810 return md 811 812 813def _getsidedata(srcrepo, rev): 814 ctx = srcrepo[rev] 815 files = compute_all_files_changes(ctx) 816 return encode_files_sidedata(files), files.has_copies_info 817 818 819def copies_sidedata_computer(repo, revlog, rev, existing_sidedata): 820 sidedata, has_copies_info = _getsidedata(repo, rev) 821 flags_to_add = sidedataflag.REVIDX_HASCOPIESINFO if has_copies_info else 0 822 return sidedata, (flags_to_add, 0) 823 824 825def _sidedata_worker(srcrepo, revs_queue, sidedata_queue, tokens): 826 """The function used by worker precomputing sidedata 827 828 It read an input queue containing revision numbers 829 It write in an output queue containing (rev, <sidedata-map>) 830 831 The `None` input value is used as a stop signal. 832 833 The `tokens` semaphore is user to avoid having too many unprocessed 834 entries. The workers needs to acquire one token before fetching a task. 835 They will be released by the consumer of the produced data. 836 """ 837 tokens.acquire() 838 rev = revs_queue.get() 839 while rev is not None: 840 data = _getsidedata(srcrepo, rev) 841 sidedata_queue.put((rev, data)) 842 tokens.acquire() 843 rev = revs_queue.get() 844 # processing of `None` is completed, release the token. 845 tokens.release() 846 847 848BUFF_PER_WORKER = 50 849 850 851def _get_worker_sidedata_adder(srcrepo, destrepo): 852 """The parallel version of the sidedata computation 853 854 This code spawn a pool of worker that precompute a buffer of sidedata 855 before we actually need them""" 856 # avoid circular import copies -> scmutil -> worker -> copies 857 from . import worker 858 859 nbworkers = worker._numworkers(srcrepo.ui) 860 861 tokens = multiprocessing.BoundedSemaphore(nbworkers * BUFF_PER_WORKER) 862 revsq = multiprocessing.Queue() 863 sidedataq = multiprocessing.Queue() 864 865 assert srcrepo.filtername is None 866 # queue all tasks beforehand, revision numbers are small and it make 867 # synchronisation simpler 868 # 869 # Since the computation for each node can be quite expensive, the overhead 870 # of using a single queue is not revelant. In practice, most computation 871 # are fast but some are very expensive and dominate all the other smaller 872 # cost. 873 for r in srcrepo.changelog.revs(): 874 revsq.put(r) 875 # queue the "no more tasks" markers 876 for i in range(nbworkers): 877 revsq.put(None) 878 879 allworkers = [] 880 for i in range(nbworkers): 881 args = (srcrepo, revsq, sidedataq, tokens) 882 w = multiprocessing.Process(target=_sidedata_worker, args=args) 883 allworkers.append(w) 884 w.start() 885 886 # dictionnary to store results for revision higher than we one we are 887 # looking for. For example, if we need the sidedatamap for 42, and 43 is 888 # received, when shelve 43 for later use. 889 staging = {} 890 891 def sidedata_companion(repo, revlog, rev, old_sidedata): 892 # Is the data previously shelved ? 893 data = staging.pop(rev, None) 894 if data is None: 895 # look at the queued result until we find the one we are lookig 896 # for (shelve the other ones) 897 r, data = sidedataq.get() 898 while r != rev: 899 staging[r] = data 900 r, data = sidedataq.get() 901 tokens.release() 902 sidedata, has_copies_info = data 903 new_flag = 0 904 if has_copies_info: 905 new_flag = sidedataflag.REVIDX_HASCOPIESINFO 906 return sidedata, (new_flag, 0) 907 908 return sidedata_companion 909