1# grep.py - logic for history walk and grep
2#
3# Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4#
5# This software may be used and distributed according to the terms of the
6# GNU General Public License version 2 or any later version.
7
8from __future__ import absolute_import
9
10import difflib
11import errno
12
13from .i18n import _
14
15from . import (
16    error,
17    match as matchmod,
18    pycompat,
19    scmutil,
20    util,
21)
22
23
24def matchlines(body, regexp):
25    begin = 0
26    linenum = 0
27    while begin < len(body):
28        match = regexp.search(body, begin)
29        if not match:
30            break
31        mstart, mend = match.span()
32        linenum += body.count(b'\n', begin, mstart) + 1
33        lstart = body.rfind(b'\n', begin, mstart) + 1 or begin
34        begin = body.find(b'\n', mend) + 1 or len(body) + 1
35        lend = begin - 1
36        yield linenum, mstart - lstart, mend - lstart, body[lstart:lend]
37
38
39class linestate(object):
40    def __init__(self, line, linenum, colstart, colend):
41        self.line = line
42        self.linenum = linenum
43        self.colstart = colstart
44        self.colend = colend
45
46    def __hash__(self):
47        return hash(self.line)
48
49    def __eq__(self, other):
50        return self.line == other.line
51
52    def findpos(self, regexp):
53        """Iterate all (start, end) indices of matches"""
54        yield self.colstart, self.colend
55        p = self.colend
56        while p < len(self.line):
57            m = regexp.search(self.line, p)
58            if not m:
59                break
60            if m.end() == p:
61                p += 1
62            else:
63                yield m.span()
64                p = m.end()
65
66
67def difflinestates(a, b):
68    sm = difflib.SequenceMatcher(None, a, b)
69    for tag, alo, ahi, blo, bhi in sm.get_opcodes():
70        if tag == 'insert':
71            for i in pycompat.xrange(blo, bhi):
72                yield (b'+', b[i])
73        elif tag == 'delete':
74            for i in pycompat.xrange(alo, ahi):
75                yield (b'-', a[i])
76        elif tag == 'replace':
77            for i in pycompat.xrange(alo, ahi):
78                yield (b'-', a[i])
79            for i in pycompat.xrange(blo, bhi):
80                yield (b'+', b[i])
81
82
83class grepsearcher(object):
84    """Search files and revisions for lines matching the given pattern
85
86    Options:
87    - all_files to search unchanged files at that revision.
88    - diff to search files in the parent revision so diffs can be generated.
89    - follow to skip files across copies and renames.
90    """
91
92    def __init__(
93        self, ui, repo, regexp, all_files=False, diff=False, follow=False
94    ):
95        self._ui = ui
96        self._repo = repo
97        self._regexp = regexp
98        self._all_files = all_files
99        self._diff = diff
100        self._follow = follow
101
102        self._getfile = util.lrucachefunc(repo.file)
103        self._getrenamed = scmutil.getrenamedfn(repo)
104
105        self._matches = {}
106        self._copies = {}
107        self._skip = set()
108        self._revfiles = {}
109
110    def skipfile(self, fn, rev):
111        """Exclude the given file (and the copy at the specified revision)
112        from future search"""
113        copy = self._copies.get(rev, {}).get(fn)
114        self._skip.add(fn)
115        if copy:
116            self._skip.add(copy)
117
118    def searchfiles(self, revs, makefilematcher):
119        """Walk files and revisions to yield (fn, ctx, pstates, states)
120        matches
121
122        states is a list of linestate objects. pstates may be empty unless
123        diff is True.
124        """
125        for ctx in scmutil.walkchangerevs(
126            self._repo, revs, makefilematcher, self._prep
127        ):
128            rev = ctx.rev()
129            parent = ctx.p1().rev()
130            for fn in sorted(self._revfiles.get(rev, [])):
131                states = self._matches[rev][fn]
132                copy = self._copies.get(rev, {}).get(fn)
133                if fn in self._skip:
134                    if copy:
135                        self._skip.add(copy)
136                    continue
137                pstates = self._matches.get(parent, {}).get(copy or fn, [])
138                if pstates or states:
139                    yield fn, ctx, pstates, states
140            del self._revfiles[rev]
141            # We will keep the matches dict for the duration of the window
142            # clear the matches dict once the window is over
143            if not self._revfiles:
144                self._matches.clear()
145
146    def _grepbody(self, fn, rev, body):
147        self._matches[rev].setdefault(fn, [])
148        m = self._matches[rev][fn]
149        if body is None:
150            return
151
152        for lnum, cstart, cend, line in matchlines(body, self._regexp):
153            s = linestate(line, lnum, cstart, cend)
154            m.append(s)
155
156    def _readfile(self, ctx, fn):
157        rev = ctx.rev()
158        if rev is None:
159            fctx = ctx[fn]
160            try:
161                return fctx.data()
162            except IOError as e:
163                if e.errno != errno.ENOENT:
164                    raise
165        else:
166            flog = self._getfile(fn)
167            fnode = ctx.filenode(fn)
168            try:
169                return flog.read(fnode)
170            except error.CensoredNodeError:
171                self._ui.warn(
172                    _(
173                        b'cannot search in censored file: '
174                        b'%(filename)s:%(revnum)s\n'
175                    )
176                    % {b'filename': fn, b'revnum': pycompat.bytestr(rev)}
177                )
178
179    def _prep(self, ctx, fmatch):
180        rev = ctx.rev()
181        pctx = ctx.p1()
182        self._matches.setdefault(rev, {})
183        if self._diff:
184            parent = pctx.rev()
185            self._matches.setdefault(parent, {})
186        files = self._revfiles.setdefault(rev, [])
187        if rev is None:
188            # in `hg grep pattern`, 2/3 of the time is spent is spent in
189            # pathauditor checks without this in mozilla-central
190            contextmanager = self._repo.wvfs.audit.cached
191        else:
192            contextmanager = util.nullcontextmanager
193        with contextmanager():
194            # TODO: maybe better to warn missing files?
195            if self._all_files:
196                fmatch = matchmod.badmatch(fmatch, lambda f, msg: None)
197                filenames = ctx.matches(fmatch)
198            else:
199                filenames = (f for f in ctx.files() if fmatch(f))
200            for fn in filenames:
201                # fn might not exist in the revision (could be a file removed by
202                # the revision). We could check `fn not in ctx` even when rev is
203                # None, but it's less racy to protect againt that in readfile.
204                if rev is not None and fn not in ctx:
205                    continue
206
207                copy = None
208                if self._follow:
209                    copy = self._getrenamed(fn, rev)
210                    if copy:
211                        self._copies.setdefault(rev, {})[fn] = copy
212                        if fn in self._skip:
213                            self._skip.add(copy)
214                if fn in self._skip:
215                    continue
216                files.append(fn)
217
218                if fn not in self._matches[rev]:
219                    self._grepbody(fn, rev, self._readfile(ctx, fn))
220
221                if self._diff:
222                    pfn = copy or fn
223                    if pfn not in self._matches[parent] and pfn in pctx:
224                        self._grepbody(pfn, parent, self._readfile(pctx, pfn))
225