1# dirstatemap.py
2#
3# This software may be used and distributed according to the terms of the
4# GNU General Public License version 2 or any later version.
5
6from __future__ import absolute_import
7
8import errno
9
10from .i18n import _
11
12from . import (
13    error,
14    pathutil,
15    policy,
16    pycompat,
17    txnutil,
18    util,
19)
20
21from .dirstateutils import (
22    docket as docketmod,
23    v2,
24)
25
26parsers = policy.importmod('parsers')
27rustmod = policy.importrust('dirstate')
28
29propertycache = util.propertycache
30
31if rustmod is None:
32    DirstateItem = parsers.DirstateItem
33else:
34    DirstateItem = rustmod.DirstateItem
35
36rangemask = 0x7FFFFFFF
37
38
39class _dirstatemapcommon(object):
40    """
41    Methods that are identical for both implementations of the dirstatemap
42    class, with and without Rust extensions enabled.
43    """
44
45    # please pytype
46
47    _map = None
48    copymap = None
49
50    def __init__(self, ui, opener, root, nodeconstants, use_dirstate_v2):
51        self._use_dirstate_v2 = use_dirstate_v2
52        self._nodeconstants = nodeconstants
53        self._ui = ui
54        self._opener = opener
55        self._root = root
56        self._filename = b'dirstate'
57        self._nodelen = 20  # Also update Rust code when changing this!
58        self._parents = None
59        self._dirtyparents = False
60        self._docket = None
61
62        # for consistent view between _pl() and _read() invocations
63        self._pendingmode = None
64
65    def preload(self):
66        """Loads the underlying data, if it's not already loaded"""
67        self._map
68
69    def get(self, key, default=None):
70        return self._map.get(key, default)
71
72    def __len__(self):
73        return len(self._map)
74
75    def __iter__(self):
76        return iter(self._map)
77
78    def __contains__(self, key):
79        return key in self._map
80
81    def __getitem__(self, item):
82        return self._map[item]
83
84    ### sub-class utility method
85    #
86    # Use to allow for generic implementation of some method while still coping
87    # with minor difference between implementation.
88
89    def _dirs_incr(self, filename, old_entry=None):
90        """incremente the dirstate counter if applicable
91
92        This might be a no-op for some subclass who deal with directory
93        tracking in a different way.
94        """
95
96    def _dirs_decr(self, filename, old_entry=None, remove_variant=False):
97        """decremente the dirstate counter if applicable
98
99        This might be a no-op for some subclass who deal with directory
100        tracking in a different way.
101        """
102
103    def _refresh_entry(self, f, entry):
104        """record updated state of an entry"""
105
106    def _insert_entry(self, f, entry):
107        """add a new dirstate entry (or replace an unrelated one)
108
109        The fact it is actually new is the responsability of the caller
110        """
111
112    def _drop_entry(self, f):
113        """remove any entry for file f
114
115        This should also drop associated copy information
116
117        The fact we actually need to drop it is the responsability of the caller"""
118
119    ### method to manipulate the entries
120
121    def set_possibly_dirty(self, filename):
122        """record that the current state of the file on disk is unknown"""
123        entry = self[filename]
124        entry.set_possibly_dirty()
125        self._refresh_entry(filename, entry)
126
127    def set_clean(self, filename, mode, size, mtime):
128        """mark a file as back to a clean state"""
129        entry = self[filename]
130        size = size & rangemask
131        entry.set_clean(mode, size, mtime)
132        self._refresh_entry(filename, entry)
133        self.copymap.pop(filename, None)
134
135    def set_tracked(self, filename):
136        new = False
137        entry = self.get(filename)
138        if entry is None:
139            self._dirs_incr(filename)
140            entry = DirstateItem(
141                wc_tracked=True,
142            )
143
144            self._insert_entry(filename, entry)
145            new = True
146        elif not entry.tracked:
147            self._dirs_incr(filename, entry)
148            entry.set_tracked()
149            self._refresh_entry(filename, entry)
150            new = True
151        else:
152            # XXX This is probably overkill for more case, but we need this to
153            # fully replace the `normallookup` call with `set_tracked` one.
154            # Consider smoothing this in the future.
155            entry.set_possibly_dirty()
156            self._refresh_entry(filename, entry)
157        return new
158
159    def set_untracked(self, f):
160        """Mark a file as no longer tracked in the dirstate map"""
161        entry = self.get(f)
162        if entry is None:
163            return False
164        else:
165            self._dirs_decr(f, old_entry=entry, remove_variant=not entry.added)
166            if not entry.p2_info:
167                self.copymap.pop(f, None)
168            entry.set_untracked()
169            self._refresh_entry(f, entry)
170            return True
171
172    def reset_state(
173        self,
174        filename,
175        wc_tracked=False,
176        p1_tracked=False,
177        p2_info=False,
178        has_meaningful_mtime=True,
179        has_meaningful_data=True,
180        parentfiledata=None,
181    ):
182        """Set a entry to a given state, diregarding all previous state
183
184        This is to be used by the part of the dirstate API dedicated to
185        adjusting the dirstate after a update/merge.
186
187        note: calling this might result to no entry existing at all if the
188        dirstate map does not see any point at having one for this file
189        anymore.
190        """
191        # copy information are now outdated
192        # (maybe new information should be in directly passed to this function)
193        self.copymap.pop(filename, None)
194
195        if not (p1_tracked or p2_info or wc_tracked):
196            old_entry = self._map.get(filename)
197            self._drop_entry(filename)
198            self._dirs_decr(filename, old_entry=old_entry)
199            return
200
201        old_entry = self._map.get(filename)
202        self._dirs_incr(filename, old_entry)
203        entry = DirstateItem(
204            wc_tracked=wc_tracked,
205            p1_tracked=p1_tracked,
206            p2_info=p2_info,
207            has_meaningful_mtime=has_meaningful_mtime,
208            parentfiledata=parentfiledata,
209        )
210        self._insert_entry(filename, entry)
211
212    ### disk interaction
213
214    def _opendirstatefile(self):
215        fp, mode = txnutil.trypending(self._root, self._opener, self._filename)
216        if self._pendingmode is not None and self._pendingmode != mode:
217            fp.close()
218            raise error.Abort(
219                _(b'working directory state may be changed parallelly')
220            )
221        self._pendingmode = mode
222        return fp
223
224    def _readdirstatefile(self, size=-1):
225        try:
226            with self._opendirstatefile() as fp:
227                return fp.read(size)
228        except IOError as err:
229            if err.errno != errno.ENOENT:
230                raise
231            # File doesn't exist, so the current state is empty
232            return b''
233
234    @property
235    def docket(self):
236        if not self._docket:
237            if not self._use_dirstate_v2:
238                raise error.ProgrammingError(
239                    b'dirstate only has a docket in v2 format'
240                )
241            self._docket = docketmod.DirstateDocket.parse(
242                self._readdirstatefile(), self._nodeconstants
243            )
244        return self._docket
245
246    def write_v2_no_append(self, tr, st, meta, packed):
247        old_docket = self.docket
248        new_docket = docketmod.DirstateDocket.with_new_uuid(
249            self.parents(), len(packed), meta
250        )
251        data_filename = new_docket.data_filename()
252        if tr:
253            tr.add(data_filename, 0)
254        self._opener.write(data_filename, packed)
255        # Write the new docket after the new data file has been
256        # written. Because `st` was opened with `atomictemp=True`,
257        # the actual `.hg/dirstate` file is only affected on close.
258        st.write(new_docket.serialize())
259        st.close()
260        # Remove the old data file after the new docket pointing to
261        # the new data file was written.
262        if old_docket.uuid:
263            data_filename = old_docket.data_filename()
264            unlink = lambda _tr=None: self._opener.unlink(data_filename)
265            if tr:
266                category = b"dirstate-v2-clean-" + old_docket.uuid
267                tr.addpostclose(category, unlink)
268            else:
269                unlink()
270        self._docket = new_docket
271
272    ### reading/setting parents
273
274    def parents(self):
275        if not self._parents:
276            if self._use_dirstate_v2:
277                self._parents = self.docket.parents
278            else:
279                read_len = self._nodelen * 2
280                st = self._readdirstatefile(read_len)
281                l = len(st)
282                if l == read_len:
283                    self._parents = (
284                        st[: self._nodelen],
285                        st[self._nodelen : 2 * self._nodelen],
286                    )
287                elif l == 0:
288                    self._parents = (
289                        self._nodeconstants.nullid,
290                        self._nodeconstants.nullid,
291                    )
292                else:
293                    raise error.Abort(
294                        _(b'working directory state appears damaged!')
295                    )
296
297        return self._parents
298
299
300class dirstatemap(_dirstatemapcommon):
301    """Map encapsulating the dirstate's contents.
302
303    The dirstate contains the following state:
304
305    - `identity` is the identity of the dirstate file, which can be used to
306      detect when changes have occurred to the dirstate file.
307
308    - `parents` is a pair containing the parents of the working copy. The
309      parents are updated by calling `setparents`.
310
311    - the state map maps filenames to tuples of (state, mode, size, mtime),
312      where state is a single character representing 'normal', 'added',
313      'removed', or 'merged'. It is read by treating the dirstate as a
314      dict.  File state is updated by calling various methods (see each
315      documentation for details):
316
317      - `reset_state`,
318      - `set_tracked`
319      - `set_untracked`
320      - `set_clean`
321      - `set_possibly_dirty`
322
323    - `copymap` maps destination filenames to their source filename.
324
325    The dirstate also provides the following views onto the state:
326
327    - `filefoldmap` is a dict mapping normalized filenames to the denormalized
328      form that they appear as in the dirstate.
329
330    - `dirfoldmap` is a dict mapping normalized directory names to the
331      denormalized form that they appear as in the dirstate.
332    """
333
334    ### Core data storage and access
335
336    @propertycache
337    def _map(self):
338        self._map = {}
339        self.read()
340        return self._map
341
342    @propertycache
343    def copymap(self):
344        self.copymap = {}
345        self._map
346        return self.copymap
347
348    def clear(self):
349        self._map.clear()
350        self.copymap.clear()
351        self.setparents(self._nodeconstants.nullid, self._nodeconstants.nullid)
352        util.clearcachedproperty(self, b"_dirs")
353        util.clearcachedproperty(self, b"_alldirs")
354        util.clearcachedproperty(self, b"filefoldmap")
355        util.clearcachedproperty(self, b"dirfoldmap")
356
357    def items(self):
358        return pycompat.iteritems(self._map)
359
360    # forward for python2,3 compat
361    iteritems = items
362
363    def debug_iter(self, all):
364        """
365        Return an iterator of (filename, state, mode, size, mtime) tuples
366
367        `all` is unused when Rust is not enabled
368        """
369        for (filename, item) in self.items():
370            yield (filename, item.state, item.mode, item.size, item.mtime)
371
372    def keys(self):
373        return self._map.keys()
374
375    ### reading/setting parents
376
377    def setparents(self, p1, p2, fold_p2=False):
378        self._parents = (p1, p2)
379        self._dirtyparents = True
380        copies = {}
381        if fold_p2:
382            for f, s in pycompat.iteritems(self._map):
383                # Discard "merged" markers when moving away from a merge state
384                if s.p2_info:
385                    source = self.copymap.pop(f, None)
386                    if source:
387                        copies[f] = source
388                    s.drop_merge_data()
389        return copies
390
391    ### disk interaction
392
393    def read(self):
394        # ignore HG_PENDING because identity is used only for writing
395        self.identity = util.filestat.frompath(
396            self._opener.join(self._filename)
397        )
398
399        if self._use_dirstate_v2:
400            if not self.docket.uuid:
401                return
402            st = self._opener.read(self.docket.data_filename())
403        else:
404            st = self._readdirstatefile()
405
406        if not st:
407            return
408
409        # TODO: adjust this estimate for dirstate-v2
410        if util.safehasattr(parsers, b'dict_new_presized'):
411            # Make an estimate of the number of files in the dirstate based on
412            # its size. This trades wasting some memory for avoiding costly
413            # resizes. Each entry have a prefix of 17 bytes followed by one or
414            # two path names. Studies on various large-scale real-world repositories
415            # found 54 bytes a reasonable upper limit for the average path names.
416            # Copy entries are ignored for the sake of this estimate.
417            self._map = parsers.dict_new_presized(len(st) // 71)
418
419        # Python's garbage collector triggers a GC each time a certain number
420        # of container objects (the number being defined by
421        # gc.get_threshold()) are allocated. parse_dirstate creates a tuple
422        # for each file in the dirstate. The C version then immediately marks
423        # them as not to be tracked by the collector. However, this has no
424        # effect on when GCs are triggered, only on what objects the GC looks
425        # into. This means that O(number of files) GCs are unavoidable.
426        # Depending on when in the process's lifetime the dirstate is parsed,
427        # this can get very expensive. As a workaround, disable GC while
428        # parsing the dirstate.
429        #
430        # (we cannot decorate the function directly since it is in a C module)
431        if self._use_dirstate_v2:
432            p = self.docket.parents
433            meta = self.docket.tree_metadata
434            parse_dirstate = util.nogc(v2.parse_dirstate)
435            parse_dirstate(self._map, self.copymap, st, meta)
436        else:
437            parse_dirstate = util.nogc(parsers.parse_dirstate)
438            p = parse_dirstate(self._map, self.copymap, st)
439        if not self._dirtyparents:
440            self.setparents(*p)
441
442        # Avoid excess attribute lookups by fast pathing certain checks
443        self.__contains__ = self._map.__contains__
444        self.__getitem__ = self._map.__getitem__
445        self.get = self._map.get
446
447    def write(self, tr, st, now):
448        if self._use_dirstate_v2:
449            packed, meta = v2.pack_dirstate(self._map, self.copymap, now)
450            self.write_v2_no_append(tr, st, meta, packed)
451        else:
452            packed = parsers.pack_dirstate(
453                self._map, self.copymap, self.parents(), now
454            )
455            st.write(packed)
456            st.close()
457        self._dirtyparents = False
458
459    @propertycache
460    def identity(self):
461        self._map
462        return self.identity
463
464    ### code related to maintaining and accessing "extra" property
465    # (e.g. "has_dir")
466
467    def _dirs_incr(self, filename, old_entry=None):
468        """incremente the dirstate counter if applicable"""
469        if (
470            old_entry is None or old_entry.removed
471        ) and "_dirs" in self.__dict__:
472            self._dirs.addpath(filename)
473        if old_entry is None and "_alldirs" in self.__dict__:
474            self._alldirs.addpath(filename)
475
476    def _dirs_decr(self, filename, old_entry=None, remove_variant=False):
477        """decremente the dirstate counter if applicable"""
478        if old_entry is not None:
479            if "_dirs" in self.__dict__ and not old_entry.removed:
480                self._dirs.delpath(filename)
481            if "_alldirs" in self.__dict__ and not remove_variant:
482                self._alldirs.delpath(filename)
483        elif remove_variant and "_alldirs" in self.__dict__:
484            self._alldirs.addpath(filename)
485        if "filefoldmap" in self.__dict__:
486            normed = util.normcase(filename)
487            self.filefoldmap.pop(normed, None)
488
489    @propertycache
490    def filefoldmap(self):
491        """Returns a dictionary mapping normalized case paths to their
492        non-normalized versions.
493        """
494        try:
495            makefilefoldmap = parsers.make_file_foldmap
496        except AttributeError:
497            pass
498        else:
499            return makefilefoldmap(
500                self._map, util.normcasespec, util.normcasefallback
501            )
502
503        f = {}
504        normcase = util.normcase
505        for name, s in pycompat.iteritems(self._map):
506            if not s.removed:
507                f[normcase(name)] = name
508        f[b'.'] = b'.'  # prevents useless util.fspath() invocation
509        return f
510
511    @propertycache
512    def dirfoldmap(self):
513        f = {}
514        normcase = util.normcase
515        for name in self._dirs:
516            f[normcase(name)] = name
517        return f
518
519    def hastrackeddir(self, d):
520        """
521        Returns True if the dirstate contains a tracked (not removed) file
522        in this directory.
523        """
524        return d in self._dirs
525
526    def hasdir(self, d):
527        """
528        Returns True if the dirstate contains a file (tracked or removed)
529        in this directory.
530        """
531        return d in self._alldirs
532
533    @propertycache
534    def _dirs(self):
535        return pathutil.dirs(self._map, only_tracked=True)
536
537    @propertycache
538    def _alldirs(self):
539        return pathutil.dirs(self._map)
540
541    ### code related to manipulation of entries and copy-sources
542
543    def _refresh_entry(self, f, entry):
544        if not entry.any_tracked:
545            self._map.pop(f, None)
546
547    def _insert_entry(self, f, entry):
548        self._map[f] = entry
549
550    def _drop_entry(self, f):
551        self._map.pop(f, None)
552        self.copymap.pop(f, None)
553
554
555if rustmod is not None:
556
557    class dirstatemap(_dirstatemapcommon):
558
559        ### Core data storage and access
560
561        @propertycache
562        def _map(self):
563            """
564            Fills the Dirstatemap when called.
565            """
566            # ignore HG_PENDING because identity is used only for writing
567            self.identity = util.filestat.frompath(
568                self._opener.join(self._filename)
569            )
570
571            if self._use_dirstate_v2:
572                if self.docket.uuid:
573                    # TODO: use mmap when possible
574                    data = self._opener.read(self.docket.data_filename())
575                else:
576                    data = b''
577                self._map = rustmod.DirstateMap.new_v2(
578                    data, self.docket.data_size, self.docket.tree_metadata
579                )
580                parents = self.docket.parents
581            else:
582                self._map, parents = rustmod.DirstateMap.new_v1(
583                    self._readdirstatefile()
584                )
585
586            if parents and not self._dirtyparents:
587                self.setparents(*parents)
588
589            self.__contains__ = self._map.__contains__
590            self.__getitem__ = self._map.__getitem__
591            self.get = self._map.get
592            return self._map
593
594        @property
595        def copymap(self):
596            return self._map.copymap()
597
598        def debug_iter(self, all):
599            """
600            Return an iterator of (filename, state, mode, size, mtime) tuples
601
602            `all`: also include with `state == b' '` dirstate tree nodes that
603            don't have an associated `DirstateItem`.
604
605            """
606            return self._map.debug_iter(all)
607
608        def clear(self):
609            self._map.clear()
610            self.setparents(
611                self._nodeconstants.nullid, self._nodeconstants.nullid
612            )
613            util.clearcachedproperty(self, b"_dirs")
614            util.clearcachedproperty(self, b"_alldirs")
615            util.clearcachedproperty(self, b"dirfoldmap")
616
617        def items(self):
618            return self._map.items()
619
620        # forward for python2,3 compat
621        iteritems = items
622
623        def keys(self):
624            return iter(self._map)
625
626        ### reading/setting parents
627
628        def setparents(self, p1, p2, fold_p2=False):
629            self._parents = (p1, p2)
630            self._dirtyparents = True
631            copies = {}
632            if fold_p2:
633                # Collect into an intermediate list to avoid a `RuntimeError`
634                # exception due to mutation during iteration.
635                # TODO: move this the whole loop to Rust where `iter_mut`
636                # enables in-place mutation of elements of a collection while
637                # iterating it, without mutating the collection itself.
638                files_with_p2_info = [
639                    f for f, s in self._map.items() if s.p2_info
640                ]
641                rust_map = self._map
642                for f in files_with_p2_info:
643                    e = rust_map.get(f)
644                    source = self.copymap.pop(f, None)
645                    if source:
646                        copies[f] = source
647                    e.drop_merge_data()
648                    rust_map.set_dirstate_item(f, e)
649            return copies
650
651        ### disk interaction
652
653        @propertycache
654        def identity(self):
655            self._map
656            return self.identity
657
658        def write(self, tr, st, now):
659            if not self._use_dirstate_v2:
660                p1, p2 = self.parents()
661                packed = self._map.write_v1(p1, p2, now)
662                st.write(packed)
663                st.close()
664                self._dirtyparents = False
665                return
666
667            # We can only append to an existing data file if there is one
668            can_append = self.docket.uuid is not None
669            packed, meta, append = self._map.write_v2(now, can_append)
670            if append:
671                docket = self.docket
672                data_filename = docket.data_filename()
673                if tr:
674                    tr.add(data_filename, docket.data_size)
675                with self._opener(data_filename, b'r+b') as fp:
676                    fp.seek(docket.data_size)
677                    assert fp.tell() == docket.data_size
678                    written = fp.write(packed)
679                    if written is not None:  # py2 may return None
680                        assert written == len(packed), (written, len(packed))
681                docket.data_size += len(packed)
682                docket.parents = self.parents()
683                docket.tree_metadata = meta
684                st.write(docket.serialize())
685                st.close()
686            else:
687                self.write_v2_no_append(tr, st, meta, packed)
688            # Reload from the newly-written file
689            util.clearcachedproperty(self, b"_map")
690            self._dirtyparents = False
691
692        ### code related to maintaining and accessing "extra" property
693        # (e.g. "has_dir")
694
695        @propertycache
696        def filefoldmap(self):
697            """Returns a dictionary mapping normalized case paths to their
698            non-normalized versions.
699            """
700            return self._map.filefoldmapasdict()
701
702        def hastrackeddir(self, d):
703            return self._map.hastrackeddir(d)
704
705        def hasdir(self, d):
706            return self._map.hasdir(d)
707
708        @propertycache
709        def dirfoldmap(self):
710            f = {}
711            normcase = util.normcase
712            for name in self._map.tracked_dirs():
713                f[normcase(name)] = name
714            return f
715
716        ### code related to manipulation of entries and copy-sources
717
718        def _refresh_entry(self, f, entry):
719            if not entry.any_tracked:
720                self._map.drop_item_and_copy_source(f)
721            else:
722                self._map.addfile(f, entry)
723
724        def _insert_entry(self, f, entry):
725            self._map.addfile(f, entry)
726
727        def _drop_entry(self, f):
728            self._map.drop_item_and_copy_source(f)
729
730        def __setitem__(self, key, value):
731            assert isinstance(value, DirstateItem)
732            self._map.set_dirstate_item(key, value)
733