1# grep.py - logic for history walk and grep 2# 3# Copyright 2005-2007 Olivia Mackall <olivia@selenic.com> 4# 5# This software may be used and distributed according to the terms of the 6# GNU General Public License version 2 or any later version. 7 8from __future__ import absolute_import 9 10import difflib 11import errno 12 13from .i18n import _ 14 15from . import ( 16 error, 17 match as matchmod, 18 pycompat, 19 scmutil, 20 util, 21) 22 23 24def matchlines(body, regexp): 25 begin = 0 26 linenum = 0 27 while begin < len(body): 28 match = regexp.search(body, begin) 29 if not match: 30 break 31 mstart, mend = match.span() 32 linenum += body.count(b'\n', begin, mstart) + 1 33 lstart = body.rfind(b'\n', begin, mstart) + 1 or begin 34 begin = body.find(b'\n', mend) + 1 or len(body) + 1 35 lend = begin - 1 36 yield linenum, mstart - lstart, mend - lstart, body[lstart:lend] 37 38 39class linestate(object): 40 def __init__(self, line, linenum, colstart, colend): 41 self.line = line 42 self.linenum = linenum 43 self.colstart = colstart 44 self.colend = colend 45 46 def __hash__(self): 47 return hash(self.line) 48 49 def __eq__(self, other): 50 return self.line == other.line 51 52 def findpos(self, regexp): 53 """Iterate all (start, end) indices of matches""" 54 yield self.colstart, self.colend 55 p = self.colend 56 while p < len(self.line): 57 m = regexp.search(self.line, p) 58 if not m: 59 break 60 if m.end() == p: 61 p += 1 62 else: 63 yield m.span() 64 p = m.end() 65 66 67def difflinestates(a, b): 68 sm = difflib.SequenceMatcher(None, a, b) 69 for tag, alo, ahi, blo, bhi in sm.get_opcodes(): 70 if tag == 'insert': 71 for i in pycompat.xrange(blo, bhi): 72 yield (b'+', b[i]) 73 elif tag == 'delete': 74 for i in pycompat.xrange(alo, ahi): 75 yield (b'-', a[i]) 76 elif tag == 'replace': 77 for i in pycompat.xrange(alo, ahi): 78 yield (b'-', a[i]) 79 for i in pycompat.xrange(blo, bhi): 80 yield (b'+', b[i]) 81 82 83class grepsearcher(object): 84 """Search files and revisions for lines matching the given pattern 85 86 Options: 87 - all_files to search unchanged files at that revision. 88 - diff to search files in the parent revision so diffs can be generated. 89 - follow to skip files across copies and renames. 90 """ 91 92 def __init__( 93 self, ui, repo, regexp, all_files=False, diff=False, follow=False 94 ): 95 self._ui = ui 96 self._repo = repo 97 self._regexp = regexp 98 self._all_files = all_files 99 self._diff = diff 100 self._follow = follow 101 102 self._getfile = util.lrucachefunc(repo.file) 103 self._getrenamed = scmutil.getrenamedfn(repo) 104 105 self._matches = {} 106 self._copies = {} 107 self._skip = set() 108 self._revfiles = {} 109 110 def skipfile(self, fn, rev): 111 """Exclude the given file (and the copy at the specified revision) 112 from future search""" 113 copy = self._copies.get(rev, {}).get(fn) 114 self._skip.add(fn) 115 if copy: 116 self._skip.add(copy) 117 118 def searchfiles(self, revs, makefilematcher): 119 """Walk files and revisions to yield (fn, ctx, pstates, states) 120 matches 121 122 states is a list of linestate objects. pstates may be empty unless 123 diff is True. 124 """ 125 for ctx in scmutil.walkchangerevs( 126 self._repo, revs, makefilematcher, self._prep 127 ): 128 rev = ctx.rev() 129 parent = ctx.p1().rev() 130 for fn in sorted(self._revfiles.get(rev, [])): 131 states = self._matches[rev][fn] 132 copy = self._copies.get(rev, {}).get(fn) 133 if fn in self._skip: 134 if copy: 135 self._skip.add(copy) 136 continue 137 pstates = self._matches.get(parent, {}).get(copy or fn, []) 138 if pstates or states: 139 yield fn, ctx, pstates, states 140 del self._revfiles[rev] 141 # We will keep the matches dict for the duration of the window 142 # clear the matches dict once the window is over 143 if not self._revfiles: 144 self._matches.clear() 145 146 def _grepbody(self, fn, rev, body): 147 self._matches[rev].setdefault(fn, []) 148 m = self._matches[rev][fn] 149 if body is None: 150 return 151 152 for lnum, cstart, cend, line in matchlines(body, self._regexp): 153 s = linestate(line, lnum, cstart, cend) 154 m.append(s) 155 156 def _readfile(self, ctx, fn): 157 rev = ctx.rev() 158 if rev is None: 159 fctx = ctx[fn] 160 try: 161 return fctx.data() 162 except IOError as e: 163 if e.errno != errno.ENOENT: 164 raise 165 else: 166 flog = self._getfile(fn) 167 fnode = ctx.filenode(fn) 168 try: 169 return flog.read(fnode) 170 except error.CensoredNodeError: 171 self._ui.warn( 172 _( 173 b'cannot search in censored file: ' 174 b'%(filename)s:%(revnum)s\n' 175 ) 176 % {b'filename': fn, b'revnum': pycompat.bytestr(rev)} 177 ) 178 179 def _prep(self, ctx, fmatch): 180 rev = ctx.rev() 181 pctx = ctx.p1() 182 self._matches.setdefault(rev, {}) 183 if self._diff: 184 parent = pctx.rev() 185 self._matches.setdefault(parent, {}) 186 files = self._revfiles.setdefault(rev, []) 187 if rev is None: 188 # in `hg grep pattern`, 2/3 of the time is spent is spent in 189 # pathauditor checks without this in mozilla-central 190 contextmanager = self._repo.wvfs.audit.cached 191 else: 192 contextmanager = util.nullcontextmanager 193 with contextmanager(): 194 # TODO: maybe better to warn missing files? 195 if self._all_files: 196 fmatch = matchmod.badmatch(fmatch, lambda f, msg: None) 197 filenames = ctx.matches(fmatch) 198 else: 199 filenames = (f for f in ctx.files() if fmatch(f)) 200 for fn in filenames: 201 # fn might not exist in the revision (could be a file removed by 202 # the revision). We could check `fn not in ctx` even when rev is 203 # None, but it's less racy to protect againt that in readfile. 204 if rev is not None and fn not in ctx: 205 continue 206 207 copy = None 208 if self._follow: 209 copy = self._getrenamed(fn, rev) 210 if copy: 211 self._copies.setdefault(rev, {})[fn] = copy 212 if fn in self._skip: 213 self._skip.add(copy) 214 if fn in self._skip: 215 continue 216 files.append(fn) 217 218 if fn not in self._matches[rev]: 219 self._grepbody(fn, rev, self._readfile(ctx, fn)) 220 221 if self._diff: 222 pfn = copy or fn 223 if pfn not in self._matches[parent] and pfn in pctx: 224 self._grepbody(pfn, parent, self._readfile(pctx, pfn)) 225