1// copyright (C) 2004 graydon hoare <graydon@pobox.com> 2// all rights reserved. 3// licensed to the public under the terms of the GNU GPL (>= 2) 4// see the file COPYING for details 5 6#include <cctype> 7#include <cstdlib> 8#include <iostream> 9#include <map> 10#include <set> 11#include <sstream> 12#include <string> 13 14#include <boost/lexical_cast.hpp> 15#include <boost/dynamic_bitset.hpp> 16#include <boost/shared_ptr.hpp> 17 18#include "basic_io.hh" 19#include "change_set.hh" 20#include "constants.hh" 21#include "revision.hh" 22#include "sanity.hh" 23#include "transforms.hh" 24#include "vocab.hh" 25 26 27// calculating least common ancestors is a delicate thing. 28// 29// it turns out that we cannot choose the simple "least common ancestor" 30// for purposes of a merge, because it is possible that there are two 31// equally reachable common ancestors, and this produces ambiguity in the 32// merge. the result -- in a pathological case -- is silently accepting one 33// set of edits while discarding another; not exactly what you want a 34// version control tool to do. 35// 36// a conservative approximation is what we'll call a "subgraph recurring" 37// LCA algorithm. this is somewhat like locating the least common dominator 38// node, but not quite. it is actually just a vanilla LCA search, except 39// that any time there's a fork (a historical merge looks like a fork from 40// our perspective, working backwards from children to parents) it reduces 41// the fork to a common parent via a sequence of pairwise recursive calls 42// to itself before proceeding. this will always resolve to a common parent 43// with no ambiguity, unless it falls off the root of the graph. 44// 45// unfortunately the subgraph recurring algorithm sometimes goes too far 46// back in history -- for example if there is an unambiguous propagate from 47// one branch to another, the entire subgraph preceeding the propagate on 48// the recipient branch is elided, since it is a merge. 49// 50// our current hypothesis is that the *exact* condition we're looking for, 51// when doing a merge, is the least node which dominates one side of the 52// merge and is an ancestor of the other. 53 54typedef unsigned long ctx; 55typedef boost::dynamic_bitset<> bitmap; 56typedef boost::shared_ptr<bitmap> shared_bitmap; 57 58static void 59ensure_parents_loaded(ctx child, 60 std::map<ctx, shared_bitmap> & parents, 61 interner<ctx> & intern, 62 app_state & app) 63{ 64 if (parents.find(child) != parents.end()) 65 return; 66 67 L(F("loading parents for node %d\n") % child); 68 69 std::set<revision_id> imm_parents; 70 app.db.get_revision_parents(revision_id(intern.lookup(child)), imm_parents); 71 72 // The null revision is not a parent for purposes of finding common 73 // ancestors. 74 for (std::set<revision_id>::iterator p = imm_parents.begin(); 75 p != imm_parents.end(); ++p) 76 { 77 if (null_id(*p)) 78 imm_parents.erase(p); 79 } 80 81 shared_bitmap bits = shared_bitmap(new bitmap(parents.size())); 82 83 for (std::set<revision_id>::const_iterator p = imm_parents.begin(); 84 p != imm_parents.end(); ++p) 85 { 86 ctx pn = intern.intern(p->inner()()); 87 L(F("parent %s -> node %d\n") % *p % pn); 88 if (pn >= bits->size()) 89 bits->resize(pn+1); 90 bits->set(pn); 91 } 92 93 parents.insert(std::make_pair(child, bits)); 94} 95 96static bool 97expand_dominators(std::map<ctx, shared_bitmap> & parents, 98 std::map<ctx, shared_bitmap> & dominators, 99 interner<ctx> & intern, 100 app_state & app) 101{ 102 bool something_changed = false; 103 std::vector<ctx> nodes; 104 105 nodes.reserve(dominators.size()); 106 107 // pass 1, pull out all the node numbers we're going to scan this time around 108 for (std::map<ctx, shared_bitmap>::const_iterator e = dominators.begin(); 109 e != dominators.end(); ++e) 110 nodes.push_back(e->first); 111 112 // pass 2, update any of the dominator entries we can 113 for (std::vector<ctx>::const_iterator n = nodes.begin(); 114 n != nodes.end(); ++n) 115 { 116 shared_bitmap bits = dominators[*n]; 117 bitmap saved(*bits); 118 if (bits->size() <= *n) 119 bits->resize(*n + 1); 120 bits->set(*n); 121 122 ensure_parents_loaded(*n, parents, intern, app); 123 shared_bitmap n_parents = parents[*n]; 124 125 bitmap intersection(bits->size()); 126 127 bool first = true; 128 for (unsigned long parent = 0; 129 parent != n_parents->size(); ++parent) 130 { 131 if (! n_parents->test(parent)) 132 continue; 133 134 if (dominators.find(parent) == dominators.end()) 135 dominators.insert(std::make_pair(parent, 136 shared_bitmap(new bitmap()))); 137 shared_bitmap pbits = dominators[parent]; 138 139 if (bits->size() > pbits->size()) 140 pbits->resize(bits->size()); 141 142 if (pbits->size() > bits->size()) 143 bits->resize(pbits->size()); 144 145 if (first) 146 { 147 intersection = (*pbits); 148 first = false; 149 } 150 else 151 intersection &= (*pbits); 152 } 153 154 (*bits) |= intersection; 155 if (*bits != saved) 156 something_changed = true; 157 } 158 return something_changed; 159} 160 161 162static bool 163expand_ancestors(std::map<ctx, shared_bitmap> & parents, 164 std::map<ctx, shared_bitmap> & ancestors, 165 interner<ctx> & intern, 166 app_state & app) 167{ 168 bool something_changed = false; 169 std::vector<ctx> nodes; 170 171 nodes.reserve(ancestors.size()); 172 173 // pass 1, pull out all the node numbers we're going to scan this time around 174 for (std::map<ctx, shared_bitmap>::const_iterator e = ancestors.begin(); 175 e != ancestors.end(); ++e) 176 nodes.push_back(e->first); 177 178 // pass 2, update any of the ancestor entries we can 179 for (std::vector<ctx>::const_iterator n = nodes.begin(); n != nodes.end(); ++n) 180 { 181 shared_bitmap bits = ancestors[*n]; 182 bitmap saved(*bits); 183 if (bits->size() <= *n) 184 bits->resize(*n + 1); 185 bits->set(*n); 186 187 ensure_parents_loaded(*n, parents, intern, app); 188 shared_bitmap n_parents = parents[*n]; 189 for (ctx parent = 0; parent != n_parents->size(); ++parent) 190 { 191 if (! n_parents->test(parent)) 192 continue; 193 194 if (bits->size() <= parent) 195 bits->resize(parent + 1); 196 bits->set(parent); 197 198 if (ancestors.find(parent) == ancestors.end()) 199 ancestors.insert(make_pair(parent, 200 shared_bitmap(new bitmap()))); 201 shared_bitmap pbits = ancestors[parent]; 202 203 if (bits->size() > pbits->size()) 204 pbits->resize(bits->size()); 205 206 if (pbits->size() > bits->size()) 207 bits->resize(pbits->size()); 208 209 (*bits) |= (*pbits); 210 } 211 if (*bits != saved) 212 something_changed = true; 213 } 214 return something_changed; 215} 216 217static bool 218find_intersecting_node(bitmap & fst, 219 bitmap & snd, 220 interner<ctx> const & intern, 221 revision_id & anc) 222{ 223 224 if (fst.size() > snd.size()) 225 snd.resize(fst.size()); 226 else if (snd.size() > fst.size()) 227 fst.resize(snd.size()); 228 229 bitmap intersection = fst & snd; 230 if (intersection.any()) 231 { 232 L(F("found %d intersecting nodes\n") % intersection.count()); 233 for (ctx i = 0; i < intersection.size(); ++i) 234 { 235 if (intersection.test(i)) 236 { 237 anc = revision_id(intern.lookup(i)); 238 return true; 239 } 240 } 241 } 242 return false; 243} 244 245// static void 246// dump_bitset_map(std::string const & hdr, 247// std::map< ctx, shared_bitmap > const & mm) 248// { 249// L(F("dumping [%s] (%d entries)\n") % hdr % mm.size()); 250// for (std::map< ctx, shared_bitmap >::const_iterator i = mm.begin(); 251// i != mm.end(); ++i) 252// { 253// L(F("dump [%s]: %d -> %s\n") % hdr % i->first % (*(i->second))); 254// } 255// } 256 257bool 258find_common_ancestor_for_merge(revision_id const & left, 259 revision_id const & right, 260 revision_id & anc, 261 app_state & app) 262{ 263 interner<ctx> intern; 264 std::map< ctx, shared_bitmap > 265 parents, ancestors, dominators; 266 267 ctx ln = intern.intern(left.inner()()); 268 ctx rn = intern.intern(right.inner()()); 269 270 shared_bitmap lanc = shared_bitmap(new bitmap()); 271 shared_bitmap ranc = shared_bitmap(new bitmap()); 272 shared_bitmap ldom = shared_bitmap(new bitmap()); 273 shared_bitmap rdom = shared_bitmap(new bitmap()); 274 275 ancestors.insert(make_pair(ln, lanc)); 276 ancestors.insert(make_pair(rn, ranc)); 277 dominators.insert(make_pair(ln, ldom)); 278 dominators.insert(make_pair(rn, rdom)); 279 280 L(F("searching for common ancestor, left=%s right=%s\n") % left % right); 281 282 while (expand_ancestors(parents, ancestors, intern, app) || 283 expand_dominators(parents, dominators, intern, app)) 284 { 285 L(F("common ancestor scan [par=%d,anc=%d,dom=%d]\n") % 286 parents.size() % ancestors.size() % dominators.size()); 287 288 if (find_intersecting_node(*lanc, *rdom, intern, anc)) 289 { 290 L(F("found node %d, ancestor of left %s and dominating right %s\n") 291 % anc % left % right); 292 return true; 293 } 294 295 else if (find_intersecting_node(*ranc, *ldom, intern, anc)) 296 { 297 L(F("found node %d, ancestor of right %s and dominating left %s\n") 298 % anc % right % left); 299 return true; 300 } 301 } 302// dump_bitset_map("ancestors", ancestors); 303// dump_bitset_map("dominators", dominators); 304// dump_bitset_map("parents", parents); 305 return false; 306} 307 308 309bool 310find_least_common_ancestor(revision_id const & left, 311 revision_id const & right, 312 revision_id & anc, 313 app_state & app) 314{ 315 interner<ctx> intern; 316 std::map< ctx, shared_bitmap > 317 parents, ancestors, dominators; 318 319 ctx ln = intern.intern(left.inner()()); 320 ctx rn = intern.intern(right.inner()()); 321 322 shared_bitmap lanc = shared_bitmap(new bitmap()); 323 shared_bitmap ranc = shared_bitmap(new bitmap()); 324 325 ancestors.insert(make_pair(ln, lanc)); 326 ancestors.insert(make_pair(rn, ranc)); 327 328 L(F("searching for least common ancestor, left=%s right=%s\n") % left % right); 329 330 while (expand_ancestors(parents, ancestors, intern, app)) 331 { 332 L(F("least common ancestor scan [par=%d,anc=%d]\n") % 333 parents.size() % ancestors.size()); 334 335 if (find_intersecting_node(*lanc, *ranc, intern, anc)) 336 { 337 L(F("found node %d, ancestor of left %s and right %s\n") 338 % anc % left % right); 339 return true; 340 } 341 } 342// dump_bitset_map("ancestors", ancestors); 343// dump_bitset_map("parents", parents); 344 return false; 345} 346 347 348// 349// The idea with this algorithm is to walk from child up to ancestor, 350// recursively, accumulating all the change_sets associated with 351// intermediate nodes into *one big change_set*. 352// 353// clever readers will realize this is an overlapping-subproblem type 354// situation and thus needs to keep a dynamic programming map to keep 355// itself in linear complexity. 356// 357// in fact, we keep two: one which maps to computed results (partial_csets) 358// and one which just keeps a set of all nodes we traversed 359// (visited_nodes). in theory it could be one map with an extra bool stuck 360// on each entry, but I think that would make it even less readable. it's 361// already quite ugly. 362// 363 364static bool 365calculate_change_sets_recursive(revision_id const & ancestor, 366 revision_id const & child, 367 app_state & app, 368 change_set & cumulative_cset, 369 std::map<revision_id, boost::shared_ptr<change_set> > & partial_csets, 370 std::set<revision_id> & visited_nodes) 371{ 372 373 if (ancestor == child) 374 return true; 375 376 visited_nodes.insert(child); 377 378 bool relevant_child = false; 379 380 revision_set rev; 381 app.db.get_revision(child, rev); 382 383 L(F("exploring changesets from parents of %s, seeking towards %s\n") 384 % child % ancestor); 385 386 for(edge_map::const_iterator i = rev.edges.begin(); i != rev.edges.end(); ++i) 387 { 388 bool relevant_parent = false; 389 revision_id curr_parent = edge_old_revision(i); 390 391 if (curr_parent.inner()().empty()) 392 continue; 393 394 change_set cset_to_curr_parent; 395 396 L(F("considering parent %s of %s\n") % curr_parent % child); 397 398 std::map<revision_id, boost::shared_ptr<change_set> >::const_iterator j = 399 partial_csets.find(curr_parent); 400 if (j != partial_csets.end()) 401 { 402 // a recursive call has traversed this parent before and built an 403 // existing cset. just reuse that rather than re-traversing 404 cset_to_curr_parent = *(j->second); 405 relevant_parent = true; 406 } 407 else if (visited_nodes.find(curr_parent) != visited_nodes.end()) 408 { 409 // a recursive call has traversed this parent, but there was no 410 // path from it to the root, so the parent is irrelevant. skip. 411 relevant_parent = false; 412 } 413 else 414 relevant_parent = calculate_change_sets_recursive(ancestor, curr_parent, app, 415 cset_to_curr_parent, 416 partial_csets, 417 visited_nodes); 418 419 if (relevant_parent) 420 { 421 L(F("revision %s is relevant, composing with edge to %s\n") 422 % curr_parent % child); 423 concatenate_change_sets(cset_to_curr_parent, edge_changes(i), cumulative_cset); 424 relevant_child = true; 425 break; 426 } 427 else 428 L(F("parent %s of %s is not relevant\n") % curr_parent % child); 429 } 430 431 // store the partial edge from ancestor -> child, so that if anyone 432 // re-traverses this edge they'll just fetch from the partial_edges 433 // cache. 434 if (relevant_child) 435 partial_csets.insert(std::make_pair(child, 436 boost::shared_ptr<change_set> 437 (new change_set(cumulative_cset)))); 438 439 return relevant_child; 440} 441 442void 443calculate_composite_change_set(revision_id const & ancestor, 444 revision_id const & child, 445 app_state & app, 446 change_set & composed) 447{ 448 L(F("calculating composite changeset between %s and %s\n") 449 % ancestor % child); 450 std::set<revision_id> visited; 451 std::map<revision_id, boost::shared_ptr<change_set> > partial; 452 calculate_change_sets_recursive(ancestor, child, app, composed, partial, visited); 453} 454 455// migration stuff 456// 457// FIXME: these are temporary functions, once we've done the migration to 458// revisions / changesets, we can remove them. 459 460static void 461analyze_manifest_changes(app_state & app, 462 manifest_id const & parent, 463 manifest_id const & child, 464 change_set & cs) 465{ 466 manifest_map m_parent, m_child; 467 app.db.get_manifest(parent, m_parent); 468 app.db.get_manifest(child, m_child); 469 470 for (manifest_map::const_iterator i = m_parent.begin(); 471 i != m_parent.end(); ++i) 472 { 473 manifest_map::const_iterator j = m_child.find(manifest_entry_path(i)); 474 if (j == m_child.end()) 475 cs.delete_file(manifest_entry_path(i)); 476 else if (! (manifest_entry_id(i) == manifest_entry_id(j))) 477 { 478 cs.apply_delta(manifest_entry_path(i), 479 manifest_entry_id(i), 480 manifest_entry_id(j)); 481 } 482 } 483 for (manifest_map::const_iterator i = m_child.begin(); 484 i != m_child.end(); ++i) 485 { 486 manifest_map::const_iterator j = m_parent.find(manifest_entry_path(i)); 487 if (j == m_parent.end()) 488 cs.add_file(manifest_entry_path(i), 489 manifest_entry_id(i)); 490 } 491} 492 493static revision_id 494construct_revisions(app_state & app, 495 manifest_id const & child, 496 std::multimap< manifest_id, manifest_id > const & ancestry, 497 std::map<manifest_id, revision_id> & mapped) 498{ 499 revision_set rev; 500 typedef std::multimap< manifest_id, manifest_id >::const_iterator ci; 501 std::pair<ci,ci> range = ancestry.equal_range(child); 502 for (ci i = range.first; i != range.second; ++i) 503 { 504 manifest_id parent(i->second); 505 revision_id parent_rid; 506 std::map<manifest_id, revision_id>::const_iterator j = mapped.find(parent); 507 508 if (j != mapped.end()) 509 parent_rid = j->second; 510 else 511 { 512 parent_rid = construct_revisions(app, parent, ancestry, mapped); 513 P(F("inserting mapping %d : %s -> %s\n") % mapped.size() % parent % parent_rid);; 514 mapped.insert(std::make_pair(parent, parent_rid)); 515 } 516 517 change_set cs; 518 analyze_manifest_changes(app, parent, child, cs); 519 rev.edges.insert(std::make_pair(parent_rid, 520 std::make_pair(parent, cs))); 521 } 522 523 revision_id rid; 524 if (rev.edges.empty()) 525 { 526 P(F("ignoring empty revision for manifest %s\n") % child); 527 return rid; 528 } 529 530 rev.new_manifest = child; 531 calculate_ident(rev, rid); 532 533 if (!app.db.revision_exists (rid)) 534 { 535 P(F("mapping manifest %s to revision %s\n") % child % rid); 536 app.db.put_revision(rid, rev); 537 } 538 else 539 { 540 P(F("skipping additional path to revision %s\n") % rid); 541 } 542 543 // now hoist all the interesting certs up to the revision 544 std::set<cert_name> cnames; 545 cnames.insert(cert_name(branch_cert_name)); 546 cnames.insert(cert_name(date_cert_name)); 547 cnames.insert(cert_name(author_cert_name)); 548 cnames.insert(cert_name(tag_cert_name)); 549 cnames.insert(cert_name(changelog_cert_name)); 550 cnames.insert(cert_name(comment_cert_name)); 551 cnames.insert(cert_name(testresult_cert_name)); 552 553 std::vector< manifest<cert> > tmp; 554 app.db.get_manifest_certs(child, tmp); 555 erase_bogus_certs(tmp, app); 556 for (std::vector< manifest<cert> >::const_iterator i = tmp.begin(); 557 i != tmp.end(); ++i) 558 { 559 if (cnames.find(i->inner().name) == cnames.end()) 560 continue; 561 cert new_cert; 562 cert_value tv; 563 decode_base64(i->inner().value, tv); 564 make_simple_cert(rid.inner(), i->inner().name, tv, app, new_cert); 565 if (! app.db.revision_cert_exists(revision<cert>(new_cert))) 566 app.db.put_revision_cert(revision<cert>(new_cert)); 567 } 568 return rid; 569} 570 571 572void 573build_changesets(app_state & app) 574{ 575 std::vector< manifest<cert> > tmp; 576 app.db.get_manifest_certs(cert_name("ancestor"), tmp); 577 erase_bogus_certs(tmp, app); 578 579 std::multimap< manifest_id, manifest_id > ancestry; 580 std::set<manifest_id> heads; 581 std::set<manifest_id> total; 582 583 for (std::vector< manifest<cert> >::const_iterator i = tmp.begin(); 584 i != tmp.end(); ++i) 585 { 586 cert_value tv; 587 decode_base64(i->inner().value, tv); 588 manifest_id child, parent; 589 child = i->inner().ident; 590 parent = hexenc<id>(tv()); 591 heads.insert(child); 592 heads.erase(parent); 593 total.insert(child); 594 total.insert(parent); 595 ancestry.insert(std::make_pair(child, parent)); 596 } 597 598 P(F("found a total of %d manifests\n") % total.size()); 599 600 transaction_guard guard(app.db); 601 std::map<manifest_id, revision_id> mapped; 602 for (std::set<manifest_id>::const_iterator i = heads.begin(); 603 i != heads.end(); ++i) 604 { 605 construct_revisions(app, *i, ancestry, mapped); 606 } 607 guard.commit(); 608} 609 610 611// i/o stuff 612 613std::string revision_file_name("revision"); 614 615namespace 616{ 617 namespace syms 618 { 619 std::string const old_revision("old_revision"); 620 std::string const new_manifest("new_manifest"); 621 std::string const old_manifest("old_manifest"); 622 } 623} 624 625 626void 627print_edge(basic_io::printer & printer, 628 edge_entry const & e) 629{ 630 basic_io::stanza st; 631 st.push_hex_pair(syms::old_revision, edge_old_revision(e).inner()()); 632 st.push_hex_pair(syms::old_manifest, edge_old_manifest(e).inner()()); 633 printer.print_stanza(st); 634 print_change_set(printer, edge_changes(e)); 635} 636 637 638void 639print_revision(basic_io::printer & printer, 640 revision_set const & rev) 641{ 642 basic_io::stanza st; 643 st.push_hex_pair(syms::new_manifest, rev.new_manifest.inner()()); 644 printer.print_stanza(st); 645 for (edge_map::const_iterator edge = rev.edges.begin(); 646 edge != rev.edges.end(); ++edge) 647 print_edge(printer, *edge); 648} 649 650 651void 652parse_edge(basic_io::parser & parser, 653 edge_map & es) 654{ 655 change_set cs; 656 manifest_id old_man; 657 revision_id old_rev; 658 std::string tmp; 659 660 parser.esym(syms::old_revision); 661 parser.hex(tmp); 662 old_rev = revision_id(tmp); 663 664 parser.esym(syms::old_manifest); 665 parser.hex(tmp); 666 old_man = manifest_id(tmp); 667 668 parse_change_set(parser, cs); 669 670 es.insert(std::make_pair(old_rev, std::make_pair(old_man, cs))); 671} 672 673 674void 675parse_revision(basic_io::parser & parser, 676 revision_set & rev) 677{ 678 rev.edges.clear(); 679 std::string tmp; 680 parser.esym(syms::new_manifest); 681 parser.hex(tmp); 682 rev.new_manifest = manifest_id(tmp); 683 while (parser.symp(syms::old_revision)) 684 parse_edge(parser, rev.edges); 685} 686 687void 688read_revision_set(data const & dat, 689 revision_set & rev) 690{ 691 std::istringstream iss(dat()); 692 basic_io::input_source src(iss, "revision"); 693 basic_io::tokenizer tok(src); 694 basic_io::parser pars(tok); 695 parse_revision(pars, rev); 696} 697 698void 699read_revision_set(revision_data const & dat, 700 revision_set & rev) 701{ 702 data unpacked; 703 unpack(dat.inner(), unpacked); 704 read_revision_set(unpacked, rev); 705} 706 707void 708write_revision_set(revision_set const & rev, 709 data & dat) 710{ 711 std::ostringstream oss; 712 basic_io::printer pr(oss); 713 print_revision(pr, rev); 714 dat = data(oss.str()); 715} 716 717void 718write_revision_set(revision_set const & rev, 719 revision_data & dat) 720{ 721 data d; 722 write_revision_set(rev, d); 723 base64< gzip<data> > packed; 724 pack(d, packed); 725 dat = revision_data(packed); 726} 727 728#ifdef BUILD_UNIT_TESTS 729#include "unit_tests.hh" 730#include "sanity.hh" 731 732static void 733revision_test() 734{ 735} 736 737void 738add_revision_tests(test_suite * suite) 739{ 740 I(suite); 741 suite->add(BOOST_TEST_CASE(&revision_test)); 742} 743 744 745#endif // BUILD_UNIT_TESTS 746