1# Copyright 2007 Bryan O'Sullivan <bos@serpentine.com> 2# Copyright 2007 Alexis S. L. Carvalho <alexis@cecm.usp.br> 3# 4# This software may be used and distributed according to the terms of the 5# GNU General Public License version 2 or any later version. 6 7from __future__ import absolute_import, print_function 8 9import posixpath 10 11from mercurial.i18n import _ 12from mercurial import ( 13 error, 14 pycompat, 15) 16from . import common 17 18SKIPREV = common.SKIPREV 19 20 21def rpairs(path): 22 """Yield tuples with path split at '/', starting with the full path. 23 No leading, trailing or double '/', please. 24 >>> for x in rpairs(b'foo/bar/baz'): print(x) 25 ('foo/bar/baz', '') 26 ('foo/bar', 'baz') 27 ('foo', 'bar/baz') 28 ('.', 'foo/bar/baz') 29 """ 30 i = len(path) 31 while i != -1: 32 yield path[:i], path[i + 1 :] 33 i = path.rfind(b'/', 0, i) 34 yield b'.', path 35 36 37def normalize(path): 38 """We use posixpath.normpath to support cross-platform path format. 39 However, it doesn't handle None input. So we wrap it up.""" 40 if path is None: 41 return None 42 return posixpath.normpath(path) 43 44 45class filemapper(object): 46 """Map and filter filenames when importing. 47 A name can be mapped to itself, a new name, or None (omit from new 48 repository).""" 49 50 def __init__(self, ui, path=None): 51 self.ui = ui 52 self.include = {} 53 self.exclude = {} 54 self.rename = {} 55 self.targetprefixes = None 56 if path: 57 if self.parse(path): 58 raise error.Abort(_(b'errors in filemap')) 59 60 def parse(self, path): 61 errs = 0 62 63 def check(name, mapping, listname): 64 if not name: 65 self.ui.warn( 66 _(b'%s:%d: path to %s is missing\n') 67 % (lex.infile, lex.lineno, listname) 68 ) 69 return 1 70 if name in mapping: 71 self.ui.warn( 72 _(b'%s:%d: %r already in %s list\n') 73 % (lex.infile, lex.lineno, name, listname) 74 ) 75 return 1 76 if name.startswith(b'/') or name.endswith(b'/') or b'//' in name: 77 self.ui.warn( 78 _(b'%s:%d: superfluous / in %s %r\n') 79 % (lex.infile, lex.lineno, listname, pycompat.bytestr(name)) 80 ) 81 return 1 82 return 0 83 84 lex = common.shlexer( 85 filepath=path, wordchars=b'!@#$%^&*()-=+[]{}|;:,./<>?' 86 ) 87 cmd = lex.get_token() 88 while cmd: 89 if cmd == b'include': 90 name = normalize(lex.get_token()) 91 errs += check(name, self.exclude, b'exclude') 92 self.include[name] = name 93 elif cmd == b'exclude': 94 name = normalize(lex.get_token()) 95 errs += check(name, self.include, b'include') 96 errs += check(name, self.rename, b'rename') 97 self.exclude[name] = name 98 elif cmd == b'rename': 99 src = normalize(lex.get_token()) 100 dest = normalize(lex.get_token()) 101 errs += check(src, self.exclude, b'exclude') 102 self.rename[src] = dest 103 elif cmd == b'source': 104 errs += self.parse(normalize(lex.get_token())) 105 else: 106 self.ui.warn( 107 _(b'%s:%d: unknown directive %r\n') 108 % (lex.infile, lex.lineno, pycompat.bytestr(cmd)) 109 ) 110 errs += 1 111 cmd = lex.get_token() 112 return errs 113 114 def lookup(self, name, mapping): 115 name = normalize(name) 116 for pre, suf in rpairs(name): 117 try: 118 return mapping[pre], pre, suf 119 except KeyError: 120 pass 121 return b'', name, b'' 122 123 def istargetfile(self, filename): 124 """Return true if the given target filename is covered as a destination 125 of the filemap. This is useful for identifying what parts of the target 126 repo belong to the source repo and what parts don't.""" 127 if self.targetprefixes is None: 128 self.targetprefixes = set() 129 for before, after in pycompat.iteritems(self.rename): 130 self.targetprefixes.add(after) 131 132 # If "." is a target, then all target files are considered from the 133 # source. 134 if not self.targetprefixes or b'.' in self.targetprefixes: 135 return True 136 137 filename = normalize(filename) 138 for pre, suf in rpairs(filename): 139 # This check is imperfect since it doesn't account for the 140 # include/exclude list, but it should work in filemaps that don't 141 # apply include/exclude to the same source directories they are 142 # renaming. 143 if pre in self.targetprefixes: 144 return True 145 return False 146 147 def __call__(self, name): 148 if self.include: 149 inc = self.lookup(name, self.include)[0] 150 else: 151 inc = name 152 if self.exclude: 153 exc = self.lookup(name, self.exclude)[0] 154 else: 155 exc = b'' 156 if (not self.include and exc) or (len(inc) <= len(exc)): 157 return None 158 newpre, pre, suf = self.lookup(name, self.rename) 159 if newpre: 160 if newpre == b'.': 161 return suf 162 if suf: 163 if newpre.endswith(b'/'): 164 return newpre + suf 165 return newpre + b'/' + suf 166 return newpre 167 return name 168 169 def active(self): 170 return bool(self.include or self.exclude or self.rename) 171 172 173# This class does two additional things compared to a regular source: 174# 175# - Filter and rename files. This is mostly wrapped by the filemapper 176# class above. We hide the original filename in the revision that is 177# returned by getchanges to be able to find things later in getfile. 178# 179# - Return only revisions that matter for the files we're interested in. 180# This involves rewriting the parents of the original revision to 181# create a graph that is restricted to those revisions. 182# 183# This set of revisions includes not only revisions that directly 184# touch files we're interested in, but also merges that merge two 185# or more interesting revisions. 186 187 188class filemap_source(common.converter_source): 189 def __init__(self, ui, baseconverter, filemap): 190 super(filemap_source, self).__init__(ui, baseconverter.repotype) 191 self.base = baseconverter 192 self.filemapper = filemapper(ui, filemap) 193 self.commits = {} 194 # if a revision rev has parent p in the original revision graph, then 195 # rev will have parent self.parentmap[p] in the restricted graph. 196 self.parentmap = {} 197 # self.wantedancestors[rev] is the set of all ancestors of rev that 198 # are in the restricted graph. 199 self.wantedancestors = {} 200 self.convertedorder = None 201 self._rebuilt = False 202 self.origparents = {} 203 self.children = {} 204 self.seenchildren = {} 205 # experimental config: convert.ignoreancestorcheck 206 self.ignoreancestorcheck = self.ui.configbool( 207 b'convert', b'ignoreancestorcheck' 208 ) 209 210 def before(self): 211 self.base.before() 212 213 def after(self): 214 self.base.after() 215 216 def setrevmap(self, revmap): 217 # rebuild our state to make things restartable 218 # 219 # To avoid calling getcommit for every revision that has already 220 # been converted, we rebuild only the parentmap, delaying the 221 # rebuild of wantedancestors until we need it (i.e. until a 222 # merge). 223 # 224 # We assume the order argument lists the revisions in 225 # topological order, so that we can infer which revisions were 226 # wanted by previous runs. 227 self._rebuilt = not revmap 228 seen = {SKIPREV: SKIPREV} 229 dummyset = set() 230 converted = [] 231 for rev in revmap.order: 232 mapped = revmap[rev] 233 wanted = mapped not in seen 234 if wanted: 235 seen[mapped] = rev 236 self.parentmap[rev] = rev 237 else: 238 self.parentmap[rev] = seen[mapped] 239 self.wantedancestors[rev] = dummyset 240 arg = seen[mapped] 241 if arg == SKIPREV: 242 arg = None 243 converted.append((rev, wanted, arg)) 244 self.convertedorder = converted 245 return self.base.setrevmap(revmap) 246 247 def rebuild(self): 248 if self._rebuilt: 249 return True 250 self._rebuilt = True 251 self.parentmap.clear() 252 self.wantedancestors.clear() 253 self.seenchildren.clear() 254 for rev, wanted, arg in self.convertedorder: 255 if rev not in self.origparents: 256 try: 257 self.origparents[rev] = self.getcommit(rev).parents 258 except error.RepoLookupError: 259 self.ui.debug(b"unknown revmap source: %s\n" % rev) 260 continue 261 if arg is not None: 262 self.children[arg] = self.children.get(arg, 0) + 1 263 264 for rev, wanted, arg in self.convertedorder: 265 try: 266 parents = self.origparents[rev] 267 except KeyError: 268 continue # unknown revmap source 269 if wanted: 270 self.mark_wanted(rev, parents) 271 else: 272 self.mark_not_wanted(rev, arg) 273 self._discard(arg, *parents) 274 275 return True 276 277 def getheads(self): 278 return self.base.getheads() 279 280 def getcommit(self, rev): 281 # We want to save a reference to the commit objects to be able 282 # to rewrite their parents later on. 283 c = self.commits[rev] = self.base.getcommit(rev) 284 for p in c.parents: 285 self.children[p] = self.children.get(p, 0) + 1 286 return c 287 288 def numcommits(self): 289 return self.base.numcommits() 290 291 def _cachedcommit(self, rev): 292 if rev in self.commits: 293 return self.commits[rev] 294 return self.base.getcommit(rev) 295 296 def _discard(self, *revs): 297 for r in revs: 298 if r is None: 299 continue 300 self.seenchildren[r] = self.seenchildren.get(r, 0) + 1 301 if self.seenchildren[r] == self.children[r]: 302 self.wantedancestors.pop(r, None) 303 self.parentmap.pop(r, None) 304 del self.seenchildren[r] 305 if self._rebuilt: 306 del self.children[r] 307 308 def wanted(self, rev, i): 309 # Return True if we're directly interested in rev. 310 # 311 # i is an index selecting one of the parents of rev (if rev 312 # has no parents, i is None). getchangedfiles will give us 313 # the list of files that are different in rev and in the parent 314 # indicated by i. If we're interested in any of these files, 315 # we're interested in rev. 316 try: 317 files = self.base.getchangedfiles(rev, i) 318 except NotImplementedError: 319 raise error.Abort(_(b"source repository doesn't support --filemap")) 320 for f in files: 321 if self.filemapper(f): 322 return True 323 324 # The include directive is documented to include nothing else (though 325 # valid branch closes are included). 326 if self.filemapper.include: 327 return False 328 329 # Allow empty commits in the source revision through. The getchanges() 330 # method doesn't even bother calling this if it determines that the 331 # close marker is significant (i.e. all of the branch ancestors weren't 332 # eliminated). Therefore if there *is* a close marker, getchanges() 333 # doesn't consider it significant, and this revision should be dropped. 334 return not files and b'close' not in self.commits[rev].extra 335 336 def mark_not_wanted(self, rev, p): 337 # Mark rev as not interesting and update data structures. 338 339 if p is None: 340 # A root revision. Use SKIPREV to indicate that it doesn't 341 # map to any revision in the restricted graph. Put SKIPREV 342 # in the set of wanted ancestors to simplify code elsewhere 343 self.parentmap[rev] = SKIPREV 344 self.wantedancestors[rev] = {SKIPREV} 345 return 346 347 # Reuse the data from our parent. 348 self.parentmap[rev] = self.parentmap[p] 349 self.wantedancestors[rev] = self.wantedancestors[p] 350 351 def mark_wanted(self, rev, parents): 352 # Mark rev ss wanted and update data structures. 353 354 # rev will be in the restricted graph, so children of rev in 355 # the original graph should still have rev as a parent in the 356 # restricted graph. 357 self.parentmap[rev] = rev 358 359 # The set of wanted ancestors of rev is the union of the sets 360 # of wanted ancestors of its parents. Plus rev itself. 361 wrev = set() 362 for p in parents: 363 if p in self.wantedancestors: 364 wrev.update(self.wantedancestors[p]) 365 else: 366 self.ui.warn( 367 _(b'warning: %s parent %s is missing\n') % (rev, p) 368 ) 369 wrev.add(rev) 370 self.wantedancestors[rev] = wrev 371 372 def getchanges(self, rev, full): 373 parents = self.commits[rev].parents 374 if len(parents) > 1 and not self.ignoreancestorcheck: 375 self.rebuild() 376 377 # To decide whether we're interested in rev we: 378 # 379 # - calculate what parents rev will have if it turns out we're 380 # interested in it. If it's going to have more than 1 parent, 381 # we're interested in it. 382 # 383 # - otherwise, we'll compare it with the single parent we found. 384 # If any of the files we're interested in is different in the 385 # the two revisions, we're interested in rev. 386 387 # A parent p is interesting if its mapped version (self.parentmap[p]): 388 # - is not SKIPREV 389 # - is still not in the list of parents (we don't want duplicates) 390 # - is not an ancestor of the mapped versions of the other parents or 391 # there is no parent in the same branch than the current revision. 392 mparents = [] 393 knownparents = set() 394 branch = self.commits[rev].branch 395 hasbranchparent = False 396 for i, p1 in enumerate(parents): 397 mp1 = self.parentmap[p1] 398 if mp1 == SKIPREV or mp1 in knownparents: 399 continue 400 401 isancestor = not self.ignoreancestorcheck and any( 402 p2 403 for p2 in parents 404 if p1 != p2 405 and mp1 != self.parentmap[p2] 406 and mp1 in self.wantedancestors[p2] 407 ) 408 if not isancestor and not hasbranchparent and len(parents) > 1: 409 # This could be expensive, avoid unnecessary calls. 410 if self._cachedcommit(p1).branch == branch: 411 hasbranchparent = True 412 mparents.append((p1, mp1, i, isancestor)) 413 knownparents.add(mp1) 414 # Discard parents ancestors of other parents if there is a 415 # non-ancestor one on the same branch than current revision. 416 if hasbranchparent: 417 mparents = [p for p in mparents if not p[3]] 418 wp = None 419 if mparents: 420 wp = max(p[2] for p in mparents) 421 mparents = [p[1] for p in mparents] 422 elif parents: 423 wp = 0 424 425 self.origparents[rev] = parents 426 427 closed = False 428 if b'close' in self.commits[rev].extra: 429 # A branch closing revision is only useful if one of its 430 # parents belong to the branch being closed 431 pbranches = [self._cachedcommit(p).branch for p in mparents] 432 if branch in pbranches: 433 closed = True 434 435 if len(mparents) < 2 and not closed and not self.wanted(rev, wp): 436 # We don't want this revision. 437 # Update our state and tell the convert process to map this 438 # revision to the same revision its parent as mapped to. 439 p = None 440 if parents: 441 p = parents[wp] 442 self.mark_not_wanted(rev, p) 443 self.convertedorder.append((rev, False, p)) 444 self._discard(*parents) 445 return self.parentmap[rev] 446 447 # We want this revision. 448 # Rewrite the parents of the commit object 449 self.commits[rev].parents = mparents 450 self.mark_wanted(rev, parents) 451 self.convertedorder.append((rev, True, None)) 452 self._discard(*parents) 453 454 # Get the real changes and do the filtering/mapping. To be 455 # able to get the files later on in getfile, we hide the 456 # original filename in the rev part of the return value. 457 changes, copies, cleanp2 = self.base.getchanges(rev, full) 458 files = {} 459 ncleanp2 = set(cleanp2) 460 for f, r in changes: 461 newf = self.filemapper(f) 462 if newf and (newf != f or newf not in files): 463 files[newf] = (f, r) 464 if newf != f: 465 ncleanp2.discard(f) 466 files = sorted(files.items()) 467 468 ncopies = {} 469 for c in copies: 470 newc = self.filemapper(c) 471 if newc: 472 newsource = self.filemapper(copies[c]) 473 if newsource: 474 ncopies[newc] = newsource 475 476 return files, ncopies, ncleanp2 477 478 def targetfilebelongstosource(self, targetfilename): 479 return self.filemapper.istargetfile(targetfilename) 480 481 def getfile(self, name, rev): 482 realname, realrev = rev 483 return self.base.getfile(realname, realrev) 484 485 def gettags(self): 486 return self.base.gettags() 487 488 def hasnativeorder(self): 489 return self.base.hasnativeorder() 490 491 def lookuprev(self, rev): 492 return self.base.lookuprev(rev) 493 494 def getbookmarks(self): 495 return self.base.getbookmarks() 496 497 def converted(self, rev, sinkrev): 498 self.base.converted(rev, sinkrev) 499