1 // Copyright (C) 2005 Nathaniel Smith <njs@pobox.com>
2 //               2008 Stephen Leake <stephen_leake@stephe-leake.org>
3 //
4 // This program is made available under the GNU GPL version 2.0 or
5 // greater. See the accompanying file COPYING for details.
6 //
7 // This program is distributed WITHOUT ANY WARRANTY; without even the
8 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
9 // PURPOSE.
10 
11 #include "base.hh"
12 #include <algorithm>
13 #include <set>
14 #include "vector.hh"
15 #include <sstream>
16 
17 #include "basic_io.hh"
18 #include "cset.hh"
19 #include "database.hh"
20 #include "platform-wrapped.hh"
21 #include "roster.hh"
22 #include "revision.hh"
23 #include "vocab.hh"
24 #include "transforms.hh"
25 #include "simplestring_xform.hh"
26 #include "lexical_cast.hh"
27 #include "file_io.hh"
28 #include "parallel_iter.hh"
29 #include "restrictions.hh"
30 #include "safe_map.hh"
31 #include "ui.hh"
32 #include "vocab_cast.hh"
33 
34 using std::inserter;
35 using std::make_pair;
36 using std::map;
37 using std::ostringstream;
38 using std::pair;
39 using std::reverse;
40 using std::set;
41 using std::set_union;
42 using std::stack;
43 using std::string;
44 using std::vector;
45 
46 using boost::lexical_cast;
47 
48 ///////////////////////////////////////////////////////////////////
49 
50 namespace
51 {
52   namespace syms
53   {
54     symbol const birth("birth");
55     symbol const dormant_attr("dormant_attr");
56     symbol const ident("ident");
57 
58     symbol const path_mark("path_mark");
59     symbol const attr_mark("attr_mark");
60   }
61 }
62 
63 template <> void
dump(attr_map_t const & val,string & out)64 dump(attr_map_t const & val, string & out)
65 {
66   ostringstream oss;
67   for (attr_map_t::const_iterator i = val.begin(); i != val.end(); ++i)
68     oss << "attr key: '" << i->first << "'\n"
69         << "  status: " << (i->second.first ? "live" : "dead") << '\n'
70         << "   value: '" << i->second.second << "'\n";
71   out = oss.str();
72 }
73 
74 template <> void
dump(set<revision_id> const & revids,string & out)75 dump(set<revision_id> const & revids, string & out)
76 {
77   out.clear();
78   bool first = true;
79   for (set<revision_id>::const_iterator i = revids.begin();
80        i != revids.end(); ++i)
81     {
82       if (!first)
83         out += ", ";
84       first = false;
85       out += encode_hexenc(i->inner()(), i->inner().made_from);
86     }
87 }
88 
89 template <> void
dump(marking_t const & marking,string & out)90 dump(marking_t const & marking, string & out)
91 {
92   ostringstream oss;
93   string tmp;
94   oss << "birth_revision: " << marking->birth_revision << '\n';
95   dump(marking->parent_name, tmp);
96   oss << "parent_name: " << tmp << '\n';
97   dump(marking->file_content, tmp);
98   oss << "file_content: " << tmp << '\n';
99   oss << "attrs (number: " << marking->attrs.size() << "):\n";
100   for (map<attr_key, set<revision_id> >::const_iterator
101          i = marking->attrs.begin(); i != marking->attrs.end(); ++i)
102     {
103       dump(i->second, tmp);
104       oss << "  " << i->first << ": " << tmp << '\n';
105     }
106   out = oss.str();
107 }
108 
109 template <> void
dump(marking_map const & markings,string & out)110 dump(marking_map const & markings, string & out)
111 {
112   ostringstream oss;
113   for (marking_map::const_iterator i = markings.begin();
114        i != markings.end();
115        ++i)
116     {
117       oss << "Marking for " << i->first << ":\n";
118       string marking_str, indented_marking_str;
119       dump(i->second, marking_str);
120       prefix_lines_with("    ", marking_str, indented_marking_str);
121       oss << indented_marking_str << '\n';
122     }
123   out = oss.str();
124 }
125 
126 //
127 // We have a few concepts of "nullness" here:
128 //
129 // - the_null_node is a node_id. It does not correspond to a real node;
130 //   it's an id you use for the parent of the root, or of any node which
131 //   is detached.
132 //
133 // - the root node has a real node id, just like any other directory.
134 //
135 // - the path_component whose string representation is "", the empty
136 //   string, is the *name* of the root node.  write it as
137 //   path_component() and test for it with component.empty().
138 //
139 // - similarly, the file_path whose string representation is "" also
140 //   names the root node.  write it as file_path() and test for it
141 //   with path.empty().
142 //
143 // - there is no file_path or path_component corresponding to the_null_node.
144 //
145 // We do this in order to support the notion of moving the root directory
146 // around, or applying attributes to the root directory.  Note that the
147 // only supported way to move the root is with the 'pivot_root' operation,
148 // which atomically turns the root directory into a subdirectory and some
149 // existing subdirectory into the root directory.  This is an UI constraint,
150 // not a constraint at this level.
151 
152 u32 last_created_roster = 0;
153 
154 
marking()155 marking::marking()
156   : cow_version(0)
157 { }
158 
marking(marking const & other)159 marking::marking(marking const & other)
160   : cow_version(0),
161     birth_revision(other.birth_revision),
162     parent_name(other.parent_name),
163     file_content(other.file_content),
164     attrs(other.attrs)
165 {
166 }
167 
operator =(marking const & other)168 marking const & marking::operator=(marking const & other)
169 {
170   cow_version = 0;
171   birth_revision = other.birth_revision;
172   parent_name = other.parent_name;
173   file_content = other.file_content;
174   attrs = other.attrs;
175   return *this;
176 }
177 
marking_map()178 marking_map::marking_map()
179   : cow_version(++last_created_roster)
180 { }
181 
marking_map(marking_map const & other)182 marking_map::marking_map(marking_map const & other)
183   : cow_version(++last_created_roster),
184     _store(other._store)
185 {
186   other.cow_version = ++last_created_roster;
187 }
188 
operator =(marking_map const & other)189 marking_map const & marking_map::operator=(marking_map const & other)
190 {
191   cow_version = ++last_created_roster;
192   other.cow_version = ++last_created_roster;
193   _store = other._store;
194   return *this;
195 }
196 
clear()197 void marking_map::clear()
198 {
199   cow_version = ++last_created_roster;
200   _store.clear();
201 }
202 
get_marking(node_id nid) const203 const_marking_t marking_map::get_marking(node_id nid) const
204 {
205   marking_t const & m = _store.get_if_present(nid);
206   I(m);
207   return m;
208 }
209 
get_marking_for_update(node_id nid)210 marking_t const & marking_map::get_marking_for_update(node_id nid)
211 {
212   marking_t const & m = _store.get_unshared_if_present(nid);
213   I(m);
214   if (cow_version == m->cow_version)
215     return m;
216   if (m.unique())
217     {
218       m->cow_version = cow_version;
219       return m;
220     }
221   return _store.set(nid, marking_t(new marking(*m)));
222 }
223 
contains(node_id nid) const224 bool marking_map::contains(node_id nid) const
225 {
226   return static_cast<bool>(_store.get_if_present(nid));
227 }
228 
remove_marking(node_id nid)229 void marking_map::remove_marking(node_id nid)
230 {
231   unsigned pre_sz = _store.size();
232   _store.unset(nid);
233   I(_store.size() == pre_sz - 1);
234 }
235 
put_marking(node_id nid,marking_t const & m)236 void marking_map::put_marking(node_id nid, marking_t const & m)
237 {
238   I(_store.set_if_missing(nid, m));
239 }
240 
put_marking(node_id nid,const_marking_t const & m)241 void marking_map::put_marking(node_id nid, const_marking_t const & m)
242 {
243   I(_store.set_if_missing(nid, boost::const_pointer_cast<marking>(m)));
244 }
245 
put_or_replace_marking(node_id nid,const_marking_t const & m)246 void marking_map::put_or_replace_marking(node_id nid, const_marking_t const & m)
247 {
248   _store.set(nid, boost::const_pointer_cast<marking>(m));
249 }
250 
size() const251 size_t marking_map::size() const
252 {
253   return _store.size();
254 }
255 
begin() const256 marking_map::const_iterator marking_map::begin() const
257 {
258   return _store.begin();
259 }
260 
end() const261 marking_map::const_iterator marking_map::end() const
262 {
263   return _store.end();
264 }
265 
266 
node(node_id i)267 node::node(node_id i)
268   : self(i),
269     parent(the_null_node),
270     name(),
271     type(node_type_none),
272     cow_version(0)
273 {
274 }
275 
276 
node()277 node::node()
278   : self(the_null_node),
279     parent(the_null_node),
280     name(),
281     type(node_type_none),
282     cow_version(0)
283 {
284 }
285 
286 
dir_node(node_id i)287 dir_node::dir_node(node_id i)
288   : node(i)
289 {
290   type = node_type_dir;
291 }
292 
293 
dir_node()294 dir_node::dir_node()
295   : node()
296 {
297   type = node_type_dir;
298 }
299 
300 
301 bool
has_child(path_component const & pc) const302 dir_node::has_child(path_component const & pc) const
303 {
304   return children.find(pc) != children.end();
305 }
306 
307 node_t
get_child(path_component const & pc) const308 dir_node::get_child(path_component const & pc) const
309 {
310   return safe_get(children, pc);
311 }
312 
313 
314 void
attach_child(path_component const & pc,node_t child)315 dir_node::attach_child(path_component const & pc, node_t child)
316 {
317   I(null_node(child->parent));
318   I(child->name.empty());
319   I(cow_version == child->cow_version);
320   safe_insert(children, make_pair(pc, child));
321   child->parent = this->self;
322   child->name = pc;
323 }
324 
325 
326 node_t
detach_child(path_component const & pc)327 dir_node::detach_child(path_component const & pc)
328 {
329   node_t n = get_child(pc);
330   I(cow_version == n->cow_version);
331   n->parent = the_null_node;
332   n->name = path_component();
333   safe_erase(children, pc);
334   return n;
335 }
336 
337 
338 node_t
clone()339 dir_node::clone()
340 {
341   dir_t d = dir_t(new dir_node(self));
342   d->parent = parent;
343   d->name = name;
344   d->attrs = attrs;
345   d->children = children;
346   return d;
347 }
348 
349 
file_node(node_id i,file_id const & f)350 file_node::file_node(node_id i, file_id const & f)
351   : node(i),
352     content(f)
353 {
354   type = node_type_file;
355 }
356 
357 
file_node()358 file_node::file_node()
359   : node()
360 {
361   type = node_type_file;
362 }
363 
364 
365 node_t
clone()366 file_node::clone()
367 {
368   file_t f = file_t(new file_node(self, content));
369   f->parent = parent;
370   f->name = name;
371   f->attrs = attrs;
372   return f;
373 }
374 
375 template <> void
dump(node_t const & n,string & out)376 dump(node_t const & n, string & out)
377 {
378   ostringstream oss;
379   string name;
380   dump(n->name, name);
381   oss << "address: " << n << " (uses: " << n.use_count() << ")\n"
382       << "self: " << n->self << '\n'
383       << "parent: " << n->parent << '\n'
384       << "name: " << name << '\n';
385   string attr_map_s;
386   dump(n->attrs, attr_map_s);
387   oss << "attrs:\n" << attr_map_s;
388   oss << "type: ";
389   if (is_file_t(n))
390     {
391       oss << "file\ncontent: "
392           << downcast_to_file_t(n)->content
393           << '\n';
394     }
395   else
396     {
397       oss << "dir\n";
398       dir_map const & c = downcast_to_dir_t(n)->children;
399       oss << "children: " << c.size() << '\n';
400       for (dir_map::const_iterator i = c.begin(); i != c.end(); ++i)
401         {
402           dump(i->first, name);
403           oss << "  " << name << " -> " << i->second << '\n';
404         }
405     }
406   out = oss.str();
407 }
408 
409 
roster_t()410 roster_t::roster_t()
411   : cow_version(++last_created_roster)
412 {
413 }
414 
roster_t(roster_t const & other)415 roster_t::roster_t(roster_t const & other)
416   : cow_version(++last_created_roster)
417 {
418   root_dir = other.root_dir;
419   nodes = other.nodes;
420   other.cow_version = ++last_created_roster;
421 }
422 
423 roster_t &
operator =(roster_t const & other)424 roster_t::operator=(roster_t const & other)
425 {
426   root_dir = other.root_dir;
427   nodes = other.nodes;
428   cow_version = ++last_created_roster;
429   other.cow_version = ++last_created_roster;
430   return *this;
431 }
432 
433 
dfs_iter(const_dir_t r,bool t=false)434 dfs_iter::dfs_iter(const_dir_t r, bool t = false)
435   : root(r), return_root(root), track_path(t)
436 {
437   if (root && !root->children.empty())
438     stk.push(make_pair(root, root->children.begin()));
439 }
440 
441 bool
finished() const442 dfs_iter::finished() const
443 {
444   return (!return_root) && stk.empty();
445 }
446 
447 string const &
path() const448 dfs_iter::path() const
449 {
450   I(track_path);
451   return curr_path;
452 }
453 
454 const_node_t
operator *() const455 dfs_iter::operator*() const
456 {
457   I(!finished());
458   if (return_root)
459     return root;
460   else
461     {
462       I(!stk.empty());
463       return stk.top().second->second;
464     }
465 }
466 
467 void
advance_top()468 dfs_iter::advance_top()
469 {
470   int prevsize = 0;
471   int nextsize = 0;
472   pair<const_dir_t, dir_map::const_iterator> & stack_top(stk.top());
473   if (track_path)
474     {
475       prevsize = stack_top.second->first().size();
476     }
477 
478   ++stack_top.second;
479 
480   if (track_path)
481     {
482       if (stack_top.second != stack_top.first->children.end())
483         nextsize = stack_top.second->first().size();
484 
485       int tmpsize = curr_path.size()-prevsize;
486       I(tmpsize >= 0);
487       curr_path.resize(tmpsize);
488       if (nextsize != 0)
489         curr_path.insert(curr_path.end(),
490                          stack_top.second->first().begin(),
491                          stack_top.second->first().end());
492     }
493 }
494 
495 void
operator ++()496 dfs_iter::operator++()
497 {
498   I(!finished());
499 
500   if (return_root)
501     {
502       return_root = false;
503       if (!stk.empty())
504         curr_path = stk.top().second->first();
505       return;
506     }
507 
508   // we're not finished, so we need to set up so operator* will return the
509   // right thing.
510   node_t ntmp = stk.top().second->second;
511   if (is_dir_t(ntmp))
512     {
513       dir_t dtmp = downcast_to_dir_t(ntmp);
514       stk.push(make_pair(dtmp, dtmp->children.begin()));
515 
516       if (track_path)
517         {
518           if (!curr_path.empty())
519             curr_path += "/";
520           if (!dtmp->children.empty())
521             curr_path += dtmp->children.begin()->first();
522         }
523     }
524   else
525     {
526       advance_top();
527     }
528 
529   while (!stk.empty()
530          && stk.top().second == stk.top().first->children.end())
531     {
532       stk.pop();
533       if (!stk.empty())
534         {
535           if (track_path)
536             {
537               curr_path.resize(curr_path.size()-1);
538             }
539           advance_top();
540         }
541     }
542 }
543 
544 bool
has_root() const545 roster_t::has_root() const
546 {
547   return static_cast<bool>(root_dir);
548 }
549 
550 
551 inline bool
same_type(const_node_t a,const_node_t b)552 same_type(const_node_t a, const_node_t b)
553 {
554   return is_file_t(a) == is_file_t(b);
555 }
556 
557 
558 bool
shallow_equal(const_node_t a,const_node_t b,bool shallow_compare_dir_children,bool compare_file_contents)559 shallow_equal(const_node_t a, const_node_t b,
560               bool shallow_compare_dir_children,
561               bool compare_file_contents)
562 {
563   if (a->self != b->self)
564     return false;
565 
566   if (a->parent != b->parent)
567     return false;
568 
569   if (a->name != b->name)
570     return false;
571 
572   if (a->attrs != b->attrs)
573     return false;
574 
575   if (! same_type(a,b))
576     return false;
577 
578   if (is_file_t(a))
579     {
580       if (compare_file_contents)
581         {
582           const_file_t fa = downcast_to_file_t(a);
583           const_file_t fb = downcast_to_file_t(b);
584           if (!(fa->content == fb->content))
585             return false;
586         }
587     }
588   else
589     {
590       const_dir_t da = downcast_to_dir_t(a);
591       const_dir_t db = downcast_to_dir_t(b);
592 
593       if (shallow_compare_dir_children)
594         {
595           if (da->children.size() != db->children.size())
596             return false;
597 
598           dir_map::const_iterator
599             i = da->children.begin(),
600             j = db->children.begin();
601 
602           while (i != da->children.end() && j != db->children.end())
603             {
604               if (i->first != j->first)
605                 return false;
606               if (i->second->self != j->second->self)
607                 return false;
608               ++i;
609               ++j;
610             }
611           I(i == da->children.end() && j == db->children.end());
612         }
613     }
614   return true;
615 }
616 
617 
618 // FIXME_ROSTERS: why does this do two loops? why does it pass 'true' for
619 // shallow_compare_dir_children to shallow_equal? -- njs
620 bool
operator ==(roster_t const & other) const621 roster_t::operator==(roster_t const & other) const
622 {
623   node_map::const_iterator i = nodes.begin(), j = other.nodes.begin();
624   while (i != nodes.end() && j != other.nodes.end())
625     {
626       if (i->first != j->first)
627         return false;
628       if (!shallow_equal(i->second, j->second, true))
629         return false;
630       ++i;
631       ++j;
632     }
633 
634   if (i != nodes.end() || j != other.nodes.end())
635     return false;
636 
637   dfs_iter p(root_dir), q(other.root_dir);
638   while (! (p.finished() || q.finished()))
639     {
640       if (!shallow_equal(*p, *q, true))
641         return false;
642       ++p;
643       ++q;
644     }
645 
646   if (!(p.finished() && q.finished()))
647     return false;
648 
649   return true;
650 }
651 
652 // This is exactly the same as roster_t::operator== (and the same FIXME
653 // above applies) except that it does not compare file contents.
654 bool
equal_shapes(roster_t const & a,roster_t const & b)655 equal_shapes(roster_t const & a, roster_t const & b)
656 {
657   node_map::const_iterator i = a.nodes.begin(), j = b.nodes.begin();
658   while (i != a.nodes.end() && j != b.nodes.end())
659     {
660       if (i->first != j->first)
661         return false;
662       if (!shallow_equal(i->second, j->second, true, false))
663         return false;
664       ++i;
665       ++j;
666     }
667 
668   if (i != a.nodes.end() || j != b.nodes.end())
669     return false;
670 
671   dfs_iter p(a.root_dir), q(b.root_dir);
672   while (! (p.finished() || q.finished()))
673     {
674       if (!shallow_equal(*p, *q, true, false))
675         return false;
676       ++p;
677       ++q;
678     }
679 
680   if (!(p.finished() && q.finished()))
681     return false;
682 
683   return true;
684 }
685 
get_node_internal(file_path const & p) const686 node_t roster_t::get_node_internal(file_path const & p) const
687 {
688   MM(*this);
689   MM(p);
690   I(has_root());
691   if (p.empty())
692     return root_dir;
693 
694   dir_t nd = root_dir;
695   string const & pi = p.as_internal();
696   string::size_type start = 0, stop;
697   for (;;)
698     {
699       stop = pi.find('/', start);
700       path_component pc(pi, start, (stop == string::npos
701                                     ? stop : stop - start));
702       dir_map::const_iterator child = nd->children.find(pc);
703 
704       I(child != nd->children.end());
705       if (stop == string::npos)
706         return child->second;
707 
708       start = stop + 1;
709       nd = downcast_to_dir_t(child->second);
710     }
711 }
712 
713 const_node_t
get_node(file_path const & p) const714 roster_t::get_node(file_path const & p) const
715 {
716   return get_node_internal(p);
717 }
718 
719 node_t
get_node_for_update(file_path const & p)720 roster_t::get_node_for_update(file_path const & p)
721 {
722   node_t n = get_node_internal(p);
723   unshare(n);
724   return n;
725 }
726 
727 bool
has_node(node_id n) const728 roster_t::has_node(node_id n) const
729 {
730   return static_cast<bool>(nodes.get_if_present(n));
731 }
732 
733 bool
is_root(node_id n) const734 roster_t::is_root(node_id n) const
735 {
736   return has_root() && root_dir->self == n;
737 }
738 
739 bool
is_attached(node_id n) const740 roster_t::is_attached(node_id n) const
741 {
742   if (!has_root())
743     return false;
744   if (n == root_dir->self)
745     return true;
746   if (!has_node(n))
747     return false;
748 
749   const_node_t node = get_node(n);
750 
751   return !null_node(node->parent);
752 }
753 
754 bool
has_node(file_path const & p) const755 roster_t::has_node(file_path const & p) const
756 {
757   MM(*this);
758   MM(p);
759 
760   if (!has_root())
761     return false;
762   if (p.empty())
763     return true;
764 
765   dir_t nd = root_dir;
766   string const & pi = p.as_internal();
767   string::size_type start = 0, stop;
768   for (;;)
769     {
770       stop = pi.find('/', start);
771       path_component pc(pi, start, (stop == string::npos
772                                     ? stop : stop - start));
773       dir_map::const_iterator child = nd->children.find(pc);
774 
775       if (child == nd->children.end())
776         return false;
777       if (stop == string::npos)
778         return true;
779       if (!is_dir_t(child->second))
780         return false;
781 
782       start = stop + 1;
783       nd = downcast_to_dir_t(child->second);
784     }
785 }
786 
787 const_node_t
get_node(node_id nid) const788 roster_t::get_node(node_id nid) const
789 {
790   node_t const &n(nodes.get_if_present(nid));
791   I(n);
792   return n;
793 }
794 
795 node_t
get_node_for_update(node_id nid)796 roster_t::get_node_for_update(node_id nid)
797 {
798   node_t n(nodes.get_if_present(nid));
799   I(n);
800   unshare(n);
801   return n;
802 }
803 
804 
805 void
get_name(node_id nid,file_path & p) const806 roster_t::get_name(node_id nid, file_path & p) const
807 {
808   I(!null_node(nid));
809 
810   stack<const_node_t> sp;
811   size_t size = 0;
812 
813   while (nid != root_dir->self)
814     {
815       const_node_t n = get_node(nid);
816       sp.push(n);
817       size += n->name().length() + 1;
818       nid = n->parent;
819     }
820 
821   if (size == 0)
822     {
823       p.clear();
824       return;
825     }
826 
827   I(!bookkeeping_path::internal_string_is_bookkeeping_path(typecast_vocab<utf8>(sp.top()->name)));
828 
829   string tmp;
830   tmp.reserve(size);
831 
832   for (;;)
833     {
834       tmp += sp.top()->name();
835       sp.pop();
836       if (sp.empty())
837         break;
838       tmp += "/";
839     }
840 
841   p = file_path(tmp, 0, string::npos);  // short circuit constructor
842 }
843 
844 void
unshare(node_t & n,bool is_in_node_map)845 roster_t::unshare(node_t & n, bool is_in_node_map)
846 {
847   if (cow_version == n->cow_version)
848     return;
849   // we can't get at the (possibly shared) pointer in the node_map,
850   // so if we were given the only pointer then we know the node
851   // isn't in any other rosters
852   if (n.unique())
853     {
854       n->cow_version = cow_version;
855       return;
856     }
857   // here we could theoretically walk up the tree to see if
858   // the node or any of its parents have too many references,
859   // but I'm guessing that the avoided copies won't be worth
860   // the extra search time
861 
862   node_t old = n;
863   n = n->clone();
864   n->cow_version = cow_version;
865   if (is_in_node_map)
866     nodes.set(n->self, n);
867   if (!null_node(n->parent))
868     {
869       node_t p = nodes.get_if_present(n->parent);
870       I(p);
871       unshare(p);
872       downcast_to_dir_t(p)->children[n->name] = n;
873     }
874   if (root_dir && root_dir->self == n->self)
875     {
876       root_dir = downcast_to_dir_t(n);
877     }
878 }
879 
880 void
replace_node_id(node_id from,node_id to)881 roster_t::replace_node_id(node_id from, node_id to)
882 {
883   I(!null_node(from));
884   I(!null_node(to));
885   node_t n = nodes.get_if_present(from);
886   I(n);
887   nodes.unset(from);
888 
889   unshare(n, false);
890 
891   I(nodes.set_if_missing(to, n));
892   n->self = to;
893   if (is_dir_t(n))
894     {
895       dir_t d = downcast_to_dir_t(n);
896       for (dir_map::iterator i = d->children.begin(); i != d->children.end(); ++i)
897         {
898           I(i->second->parent == from);
899           i->second->parent = to;
900         }
901     }
902 }
903 
904 
905 // this records the old location into the old_locations member, to prevent the
906 // same node from being re-attached at the same place.
907 node_id
detach_node(file_path const & p)908 roster_t::detach_node(file_path const & p)
909 {
910   file_path dirname;
911   path_component basename;
912   p.dirname_basename(dirname, basename);
913 
914   I(has_root());
915   if (basename.empty())
916     {
917       // detaching the root dir
918       I(dirname.empty());
919       node_id root_id = root_dir->self;
920       safe_insert(old_locations, make_pair(root_id, make_pair(root_dir->parent,
921                                                               root_dir->name)));
922       // clear ("reset") the root_dir shared_pointer
923       root_dir.reset();
924       I(!has_root());
925       return root_id;
926     }
927 
928   node_t pp = get_node_for_update(dirname);
929   dir_t parent = downcast_to_dir_t(pp);
930   node_t c = parent->get_child(basename);
931   unshare(c);
932   node_id nid = parent->detach_child(basename)->self;
933   safe_insert(old_locations,
934               make_pair(nid, make_pair(parent->self, basename)));
935   I(!null_node(nid));
936   return nid;
937 }
938 
939 void
detach_node(node_id nid)940 roster_t::detach_node(node_id nid)
941 {
942   node_t n = get_node_for_update(nid);
943   if (null_node(n->parent))
944     {
945       // detaching the root dir
946       I(n->name.empty());
947       safe_insert(old_locations,
948                   make_pair(nid, make_pair(n->parent, n->name)));
949       root_dir.reset();
950       I(!has_root());
951     }
952   else
953     {
954       path_component name = n->name;
955       node_t p = get_node_for_update(n->parent);
956       dir_t parent = downcast_to_dir_t(p);
957       I(parent->detach_child(name) == n);
958       safe_insert(old_locations,
959                   make_pair(nid, make_pair(n->parent, name)));
960     }
961 }
962 
963 void
drop_detached_node(node_id nid)964 roster_t::drop_detached_node(node_id nid)
965 {
966   // ensure the node is already detached
967   const_node_t n = get_node(nid);
968   I(null_node(n->parent));
969   I(n->name.empty());
970   // if it's a dir, make sure it's empty
971   if (is_dir_t(n))
972     I(downcast_to_dir_t(n)->children.empty());
973   // all right, kill it
974   nodes.unset(nid);
975 
976   // Resolving a duplicate name conflict via drop one side requires dropping
977   // nodes that were never attached. So we erase the key without checking
978   // whether it was present.
979   old_locations.erase(nid);
980 }
981 
982 
983 // this creates a node in a detached state, but it does _not_ insert an entry
984 // for it into the old_locations member, because there is no old_location to
985 // forbid
986 node_id
create_dir_node(node_id_source & nis)987 roster_t::create_dir_node(node_id_source & nis)
988 {
989   node_id nid = nis.next();
990   create_dir_node(nid);
991   return nid;
992 }
993 void
create_dir_node(node_id nid)994 roster_t::create_dir_node(node_id nid)
995 {
996   dir_t d = dir_t(new dir_node());
997   d->self = nid;
998   d->cow_version = cow_version;
999   nodes.set(nid, d);
1000 }
1001 
1002 
1003 // this creates a node in a detached state, but it does _not_ insert an entry
1004 // for it into the old_locations member, because there is no old_location to
1005 // forbid
1006 node_id
create_file_node(file_id const & content,node_id_source & nis)1007 roster_t::create_file_node(file_id const & content, node_id_source & nis)
1008 {
1009   node_id nid = nis.next();
1010   create_file_node(content, nid);
1011   return nid;
1012 }
1013 
1014 void
create_file_node(file_id const & content,node_id nid)1015 roster_t::create_file_node(file_id const & content, node_id nid)
1016 {
1017   file_t f = file_t(new file_node());
1018   f->self = nid;
1019   f->content = content;
1020   f->cow_version = cow_version;
1021   nodes.set(nid, f);
1022 }
1023 
1024 void
attach_node(node_id nid,file_path const & p)1025 roster_t::attach_node(node_id nid, file_path const & p)
1026 {
1027   MM(p);
1028   if (p.empty())
1029     // attaching the root node
1030     attach_node(nid, the_null_node, path_component());
1031   else
1032     {
1033       file_path dir;
1034       path_component base;
1035       p.dirname_basename(dir, base);
1036       attach_node(nid, get_node(dir)->self, base);
1037     }
1038 }
1039 
1040 void
attach_node(node_id nid,node_id parent,path_component name)1041 roster_t::attach_node(node_id nid, node_id parent, path_component name)
1042 {
1043   node_t n = get_node_for_update(nid);
1044 
1045   I(!null_node(n->self));
1046   // ensure the node is already detached (as best one can)
1047   I(null_node(n->parent));
1048   I(n->name.empty());
1049 
1050   // this iterator might point to old_locations.end(), because old_locations
1051   // only includes entries for renames, not new nodes
1052   map<node_id, pair<node_id, path_component> >::iterator
1053     i = old_locations.find(nid);
1054 
1055   if (null_node(parent) || name.empty())
1056     {
1057       I(null_node(parent) && name.empty());
1058       I(null_node(n->parent));
1059       I(n->name.empty());
1060       I(!has_root());
1061       root_dir = downcast_to_dir_t(n);
1062       I(i == old_locations.end() || i->second != make_pair(root_dir->parent,
1063                                                            root_dir->name));
1064     }
1065   else
1066     {
1067       node_t p = get_node_for_update(parent);
1068       dir_t parent_n = downcast_to_dir_t(p);
1069       parent_n->attach_child(name, n);
1070       I(i == old_locations.end() || i->second != make_pair(n->parent, n->name));
1071     }
1072 
1073   if (i != old_locations.end())
1074     old_locations.erase(i);
1075 }
1076 
1077 void
apply_delta(file_path const & pth,file_id const & old_id,file_id const & new_id)1078 roster_t::apply_delta(file_path const & pth,
1079                       file_id const & old_id,
1080                       file_id const & new_id)
1081 {
1082   node_t n = get_node_for_update(pth);
1083   file_t f = downcast_to_file_t(n);
1084   I(f->content == old_id);
1085   I(!null_node(f->self));
1086   I(!(f->content == new_id));
1087   f->content = new_id;
1088 }
1089 
1090 void
set_content(node_id nid,file_id const & new_id)1091 roster_t::set_content(node_id nid, file_id const & new_id)
1092 {
1093   node_t n = get_node_for_update(nid);
1094   file_t f = downcast_to_file_t(n);
1095   I(!(f->content == new_id));
1096   f->content = new_id;
1097 }
1098 
1099 
1100 void
clear_attr(file_path const & path,attr_key const & key)1101 roster_t::clear_attr(file_path const & path,
1102                      attr_key const & key)
1103 {
1104   set_attr(path, key, make_pair(false, attr_value()));
1105 }
1106 
1107 void
erase_attr(node_id nid,attr_key const & name)1108 roster_t::erase_attr(node_id nid,
1109                      attr_key const & name)
1110 {
1111   node_t n = get_node_for_update(nid);
1112   safe_erase(n->attrs, name);
1113 }
1114 
1115 void
set_attr(file_path const & path,attr_key const & key,attr_value const & val)1116 roster_t::set_attr(file_path const & path,
1117                    attr_key const & key,
1118                    attr_value const & val)
1119 {
1120   set_attr(path, key, make_pair(true, val));
1121 }
1122 
1123 
1124 void
set_attr(file_path const & pth,attr_key const & name,pair<bool,attr_value> const & val)1125 roster_t::set_attr(file_path const & pth,
1126                    attr_key const & name,
1127                    pair<bool, attr_value> const & val)
1128 {
1129   node_t n = get_node_for_update(pth);
1130   I(val.first || val.second().empty());
1131   I(!null_node(n->self));
1132   attr_map_t::iterator i = n->attrs.find(name);
1133   if (i == n->attrs.end())
1134     i = safe_insert(n->attrs, make_pair(name,
1135                                         make_pair(false, attr_value())));
1136   I(i->second != val);
1137   i->second = val;
1138 }
1139 
1140 // same as above, but allowing <unknown> -> <dead> transition
1141 void
set_attr_unknown_to_dead_ok(node_id nid,attr_key const & name,pair<bool,attr_value> const & val)1142 roster_t::set_attr_unknown_to_dead_ok(node_id nid,
1143                                       attr_key const & name,
1144                                       pair<bool, attr_value> const & val)
1145 {
1146   node_t n = get_node_for_update(nid);
1147   I(val.first || val.second().empty());
1148   attr_map_t::iterator i = n->attrs.find(name);
1149   if (i != n->attrs.end())
1150     I(i->second != val);
1151   n->attrs[name] = val;
1152 }
1153 
1154 bool
get_attr(file_path const & pth,attr_key const & name,attr_value & val) const1155 roster_t::get_attr(file_path const & pth,
1156                    attr_key const & name,
1157                    attr_value & val) const
1158 {
1159   I(has_node(pth));
1160 
1161   const_node_t n = get_node(pth);
1162   attr_map_t::const_iterator i = n->attrs.find(name);
1163   if (i != n->attrs.end() && i->second.first)
1164     {
1165       val = i->second.second;
1166       return true;
1167     }
1168   return false;
1169 }
1170 
1171 
1172 template <> void
dump(roster_t const & val,string & out)1173 dump(roster_t const & val, string & out)
1174 {
1175   ostringstream oss;
1176   if (val.root_dir)
1177     oss << "Root node: " << val.root_dir->self << '\n'
1178         << "   at " << val.root_dir << ", uses: " << val.root_dir.use_count() << '\n';
1179   else
1180     oss << "root dir is NULL\n";
1181   for (node_map::const_iterator i = val.nodes.begin(); i != val.nodes.end(); ++i)
1182     {
1183       oss << "\nNode " << i->first << '\n';
1184       string node_s;
1185       dump(i->second, node_s);
1186       oss << node_s;
1187     }
1188   out = oss.str();
1189 }
1190 
1191 template <> void
dump(std::map<node_id,std::pair<node_id,path_component>> const & val,string & out)1192 dump(std::map<node_id, std::pair<node_id, path_component> > const & val, string & out)
1193 {
1194   ostringstream oss;
1195   for (std::map<node_id, std::pair<node_id, path_component> >::const_iterator i = val.begin();
1196        i != val.end();
1197        ++i)
1198     {
1199       oss << "Node " << i->first;
1200       oss << " node " << i->second.first;
1201       oss << " path " << i->second.second << "\n";
1202     }
1203   out = oss.str();
1204 }
1205 
1206 void
check_sane(bool temp_nodes_ok) const1207 roster_t::check_sane(bool temp_nodes_ok) const
1208 {
1209   MM(*this);
1210   MM(old_locations);
1211 
1212   node_id parent_id(the_null_node);
1213   const_dir_t parent_dir;
1214   I(old_locations.empty()); // if fail, some renamed node is still present and detached
1215   I(has_root());
1216   size_t maxdepth = nodes.size();
1217   bool is_first = true;
1218   for (dfs_iter i(root_dir); !i.finished(); ++i)
1219     {
1220       const_node_t const &n(*i);
1221       if (is_first)
1222         {
1223           I(n->name.empty() && null_node(n->parent));
1224           is_first = false;
1225         }
1226       else
1227         {
1228           I(!n->name.empty() && !null_node(n->parent));
1229 
1230           if (n->parent != parent_id)
1231             {
1232               parent_id = n->parent;
1233               parent_dir = downcast_to_dir_t(get_node(parent_id));
1234             }
1235           I(parent_dir->get_child(n->name) == n);
1236         }
1237       for (attr_map_t::const_iterator a = n->attrs.begin(); a != n->attrs.end(); ++a)
1238         {
1239           I(a->second.first || a->second.second().empty());
1240         }
1241       if (is_file_t(n))
1242         {
1243           I(!null_id(downcast_to_file_t(n)->content));
1244         }
1245       node_id nid(n->self);
1246       I(!null_node(nid));
1247       if (!temp_nodes_ok)
1248         {
1249           I(!temp_node(nid));
1250         }
1251       I(n == get_node(nid));
1252       I(maxdepth-- > 0);
1253     }
1254   I(maxdepth == 0); // if fails, some newly created node is not attached
1255 }
1256 
1257 void
check_sane_against(marking_map const & markings,bool temp_nodes_ok) const1258 roster_t::check_sane_against(marking_map const & markings, bool temp_nodes_ok) const
1259 {
1260   check_sane(temp_nodes_ok);
1261 
1262   node_map::const_iterator ri;
1263   marking_map::const_iterator mi;
1264 
1265   for (ri = nodes.begin(), mi = markings.begin();
1266        ri != nodes.end() && mi != markings.end();
1267        ++ri, ++mi)
1268     {
1269       I(!null_id(mi->second->birth_revision));
1270       I(!mi->second->parent_name.empty());
1271 
1272       if (is_file_t(ri->second))
1273         I(!mi->second->file_content.empty());
1274       else
1275         I(mi->second->file_content.empty());
1276 
1277       attr_map_t::const_iterator rai;
1278       map<attr_key, set<revision_id> >::const_iterator mai;
1279       for (rai = ri->second->attrs.begin(), mai = mi->second->attrs.begin();
1280            rai != ri->second->attrs.end() && mai != mi->second->attrs.end();
1281            ++rai, ++mai)
1282         {
1283           I(rai->first == mai->first);
1284           I(!mai->second.empty());
1285         }
1286       I(rai == ri->second->attrs.end() && mai == mi->second->attrs.end());
1287       // TODO: attrs
1288     }
1289 
1290   I(ri == nodes.end() && mi == markings.end());
1291 }
1292 
1293 
temp_node_id_source()1294 temp_node_id_source::temp_node_id_source()
1295   : curr(first_temp_node)
1296 {}
1297 
1298 node_id
next()1299 temp_node_id_source::next()
1300 {
1301     node_id n = curr++;
1302     I(temp_node(n));
1303     return n;
1304 }
1305 
editable_roster_base(roster_t & r,node_id_source & nis)1306 editable_roster_base::editable_roster_base(roster_t & r, node_id_source & nis)
1307   : r(r), nis(nis)
1308 {}
1309 
1310 node_id
detach_node(file_path const & src)1311 editable_roster_base::detach_node(file_path const & src)
1312 {
1313   // L(FL("detach_node('%s')") % src);
1314   return r.detach_node(src);
1315 }
1316 
1317 void
drop_detached_node(node_id nid)1318 editable_roster_base::drop_detached_node(node_id nid)
1319 {
1320   // L(FL("drop_detached_node(%d)") % nid);
1321   r.drop_detached_node(nid);
1322 }
1323 
1324 node_id
create_dir_node()1325 editable_roster_base::create_dir_node()
1326 {
1327   // L(FL("create_dir_node()"));
1328   node_id n = r.create_dir_node(nis);
1329   // L(FL("create_dir_node() -> %d") % n);
1330   return n;
1331 }
1332 
1333 node_id
create_file_node(file_id const & content)1334 editable_roster_base::create_file_node(file_id const & content)
1335 {
1336   // L(FL("create_file_node('%s')") % content);
1337   node_id n = r.create_file_node(content, nis);
1338   // L(FL("create_file_node('%s') -> %d") % content % n);
1339   return n;
1340 }
1341 
1342 void
attach_node(node_id nid,file_path const & dst)1343 editable_roster_base::attach_node(node_id nid, file_path const & dst)
1344 {
1345   // L(FL("attach_node(%d, '%s')") % nid % dst);
1346   MM(dst);
1347   MM(this->r);
1348   r.attach_node(nid, dst);
1349 }
1350 
1351 void
apply_delta(file_path const & pth,file_id const & old_id,file_id const & new_id)1352 editable_roster_base::apply_delta(file_path const & pth,
1353                                   file_id const & old_id,
1354                                   file_id const & new_id)
1355 {
1356   // L(FL("apply_delta('%s', '%s', '%s')") % pth % old_id % new_id);
1357   r.apply_delta(pth, old_id, new_id);
1358 }
1359 
1360 void
clear_attr(file_path const & path,attr_key const & key)1361 editable_roster_base::clear_attr(file_path const & path,
1362                                  attr_key const & key)
1363 {
1364   // L(FL("clear_attr('%s', '%s')") % path % key);
1365   r.clear_attr(path, key);
1366 }
1367 
1368 void
set_attr(file_path const & path,attr_key const & key,attr_value const & val)1369 editable_roster_base::set_attr(file_path const & path,
1370                                attr_key const & key,
1371                                attr_value const & val)
1372 {
1373   // L(FL("set_attr('%s', '%s', '%s')") % path % key % val);
1374   r.set_attr(path, key, val);
1375 }
1376 
1377 void
commit()1378 editable_roster_base::commit()
1379 {
1380 }
1381 
1382 namespace
1383 {
1384   class editable_roster_for_merge
1385     : public editable_roster_base
1386   {
1387   public:
1388     set<node_id> new_nodes;
editable_roster_for_merge(roster_t & r,node_id_source & nis)1389     editable_roster_for_merge(roster_t & r, node_id_source & nis)
1390       : editable_roster_base(r, nis)
1391     {}
create_dir_node()1392     virtual node_id create_dir_node()
1393     {
1394       node_id nid = this->editable_roster_base::create_dir_node();
1395       new_nodes.insert(nid);
1396       return nid;
1397     }
create_file_node(file_id const & content)1398     virtual node_id create_file_node(file_id const & content)
1399     {
1400       node_id nid = this->editable_roster_base::create_file_node(content);
1401       new_nodes.insert(nid);
1402       return nid;
1403     }
1404   };
1405 
1406 
union_new_nodes(roster_t & a,set<node_id> & a_new,roster_t & b,set<node_id> & b_new,node_id_source & nis)1407   void union_new_nodes(roster_t & a, set<node_id> & a_new,
1408                        roster_t & b, set<node_id> & b_new,
1409                        node_id_source & nis)
1410   {
1411     // We must not replace a node whose id is in both a_new and b_new
1412     // with a new temp id that is already in either set.  b_new is
1413     // destructively modified, so record the union of both sets now.
1414     set<node_id> all_new_nids;
1415     std::set_union(a_new.begin(), a_new.end(),
1416                    b_new.begin(), b_new.end(),
1417                    std::inserter(all_new_nids, all_new_nids.begin()));
1418 
1419     // First identify nodes that are new in A but not in B, or new in both.
1420     for (set<node_id>::const_iterator i = a_new.begin(); i != a_new.end(); ++i)
1421       {
1422         node_id const aid = *i;
1423         file_path p;
1424         // SPEEDUP?: climb out only so far as is necessary to find a shared
1425         // id?  possibly faster (since usually will get a hit immediately),
1426         // but may not be worth the effort (since it doesn't take that long to
1427         // get out in any case)
1428         a.get_name(aid, p);
1429         node_id bid = b.get_node(p)->self;
1430         if (b_new.find(bid) != b_new.end())
1431           {
1432             I(temp_node(bid));
1433             node_id new_nid;
1434             do
1435               new_nid = nis.next();
1436             while (all_new_nids.find(new_nid) != all_new_nids.end());
1437 
1438             a.replace_node_id(aid, new_nid);
1439             b.replace_node_id(bid, new_nid);
1440             b_new.erase(bid);
1441           }
1442         else
1443           {
1444             a.replace_node_id(aid, bid);
1445           }
1446       }
1447 
1448     // Now identify nodes that are new in B but not A.
1449     for (set<node_id>::const_iterator i = b_new.begin(); i != b_new.end(); i++)
1450       {
1451         node_id const bid = *i;
1452         file_path p;
1453         // SPEEDUP?: climb out only so far as is necessary to find a shared
1454         // id?  possibly faster (since usually will get a hit immediately),
1455         // but may not be worth the effort (since it doesn't take that long to
1456         // get out in any case)
1457         b.get_name(bid, p);
1458         node_id aid = a.get_node(p)->self;
1459         I(a_new.find(aid) == a_new.end());
1460         b.replace_node_id(bid, aid);
1461       }
1462   }
1463 
1464   void
union_corpses(roster_t & left,roster_t & right)1465   union_corpses(roster_t & left, roster_t & right)
1466   {
1467     // left and right should be equal, except that each may have some attr
1468     // corpses that the other does not
1469     node_map::const_iterator left_i = left.all_nodes().begin();
1470     node_map::const_iterator right_i = right.all_nodes().begin();
1471     while (left_i != left.all_nodes().end() || right_i != right.all_nodes().end())
1472       {
1473         I(left_i->second->self == right_i->second->self);
1474         parallel::iter<attr_map_t> j(left_i->second->attrs,
1475                                      right_i->second->attrs);
1476         // we batch up the modifications until the end, so as not to be
1477         // changing things around under the parallel::iter's feet
1478         set<attr_key> left_missing, right_missing;
1479         while (j.next())
1480           {
1481             MM(j);
1482             switch (j.state())
1483               {
1484               case parallel::invalid:
1485                 I(false);
1486 
1487               case parallel::in_left:
1488                 // this is a corpse
1489                 I(!j.left_data().first);
1490                 right_missing.insert(j.left_key());
1491                 break;
1492 
1493               case parallel::in_right:
1494                 // this is a corpse
1495                 I(!j.right_data().first);
1496                 left_missing.insert(j.right_key());
1497                 break;
1498 
1499               case parallel::in_both:
1500                 break;
1501               }
1502           }
1503         node_t left_n = left_i->second;
1504         for (set<attr_key>::const_iterator j = left_missing.begin();
1505              j != left_missing.end(); ++j)
1506           {
1507             left.unshare(left_n);
1508             safe_insert(left_n->attrs,
1509                         make_pair(*j, make_pair(false, attr_value())));
1510           }
1511         node_t right_n = right_i->second;
1512         for (set<attr_key>::const_iterator j = right_missing.begin();
1513              j != right_missing.end(); ++j)
1514           {
1515             right.unshare(right_n);
1516             safe_insert(right_n->attrs,
1517                         make_pair(*j, make_pair(false, attr_value())));
1518           }
1519         ++left_i;
1520         ++right_i;
1521       }
1522     I(left_i == left.all_nodes().end());
1523     I(right_i == right.all_nodes().end());
1524   }
1525 
1526   // After this, left should == right, and there should be no temporary ids.
1527   // Destroys sets, because that's handy (it has to scan over both, but it can
1528   // skip some double-scanning)
1529   void
unify_rosters(roster_t & left,set<node_id> & left_new,roster_t & right,set<node_id> & right_new,node_id_source & nis)1530   unify_rosters(roster_t & left, set<node_id> & left_new,
1531                 roster_t & right, set<node_id> & right_new,
1532                 node_id_source & nis)
1533   {
1534     // Our basic problem is this: when interpreting a revision with multiple
1535     // csets, those csets all give enough information for us to get the same
1536     // manifest, and even a bit more than that.  However, there is some
1537     // information that is not exposed at the manifest level, and csets alone
1538     // do not give us all we need.  This function is responsible taking the
1539     // two rosters that we get from pure cset application, and fixing them up
1540     // so that they are wholly identical.
1541 
1542     // The first thing that is missing is identification of files.  If one
1543     // cset says "add_file" and the other says nothing, then the add_file is
1544     // not really an add_file.  One of our rosters will have a temp id for
1545     // this file, and the other will not.  In this case, we replace the temp
1546     // id with the other side's permanent id.  However, if both csets say
1547     // "add_file", then this really is a new id; both rosters will have temp
1548     // ids, and we replace both of them with a newly allocated id.  After
1549     // this, the two rosters will have identical node_ids at every path.
1550     union_new_nodes(left, left_new, right, right_new, nis);
1551 
1552     // The other thing we need to fix up is attr corpses.  Live attrs are made
1553     // identical by the csets; but if, say, on one side of a fork an attr is
1554     // added and then deleted, then one of our incoming merge rosters will
1555     // have a corpse for that attr, and the other will not.  We need to make
1556     // sure at both of them end up with the corpse.  This function fixes up
1557     // that.
1558     union_corpses(left, right);
1559   }
1560 
1561   template <typename T> void
mark_unmerged_scalar(set<revision_id> const & parent_marks,T const & parent_val,revision_id const & new_rid,T const & new_val,set<revision_id> & new_marks)1562   mark_unmerged_scalar(set<revision_id> const & parent_marks,
1563                        T const & parent_val,
1564                        revision_id const & new_rid,
1565                        T const & new_val,
1566                        set<revision_id> & new_marks)
1567   {
1568     I(new_marks.empty());
1569     if (parent_val == new_val)
1570       new_marks = parent_marks;
1571     else
1572       new_marks.insert(new_rid);
1573   }
1574 
1575   // This function implements the case.
1576   //   a   b1
1577   //    \ /
1578   //     b2
1579   void
mark_won_merge(set<revision_id> const & a_marks,set<revision_id> const & a_uncommon_ancestors,set<revision_id> const & b1_marks,revision_id const & new_rid,set<revision_id> & new_marks)1580   mark_won_merge(set<revision_id> const & a_marks,
1581                  set<revision_id> const & a_uncommon_ancestors,
1582                  set<revision_id> const & b1_marks,
1583                  revision_id const & new_rid,
1584                  set<revision_id> & new_marks)
1585   {
1586     for (set<revision_id>::const_iterator i = a_marks.begin();
1587          i != a_marks.end(); ++i)
1588       {
1589         if (a_uncommon_ancestors.find(*i) != a_uncommon_ancestors.end())
1590           {
1591             // at least one element of *(a) is not an ancestor of b1
1592             new_marks.clear();
1593             new_marks.insert(new_rid);
1594             return;
1595           }
1596       }
1597     // all elements of *(a) are ancestors of b1; this was a clean merge to b,
1598     // so copy forward the marks.
1599     new_marks = b1_marks;
1600   }
1601 
1602   template <typename T> void
mark_merged_scalar(set<revision_id> const & left_marks,set<revision_id> const & left_uncommon_ancestors,T const & left_val,set<revision_id> const & right_marks,set<revision_id> const & right_uncommon_ancestors,T const & right_val,revision_id const & new_rid,T const & new_val,set<revision_id> & new_marks)1603   mark_merged_scalar(set<revision_id> const & left_marks,
1604                      set<revision_id> const & left_uncommon_ancestors,
1605                      T const & left_val,
1606                      set<revision_id> const & right_marks,
1607                      set<revision_id> const & right_uncommon_ancestors,
1608                      T const & right_val,
1609                      revision_id const & new_rid,
1610                      T const & new_val,
1611                      set<revision_id> & new_marks)
1612   {
1613     I(new_marks.empty());
1614 
1615     // let's not depend on T::operator!= being defined, only on T::operator==
1616     // being defined.
1617     bool diff_from_left = !(new_val == left_val);
1618     bool diff_from_right = !(new_val == right_val);
1619 
1620     // some quick sanity checks
1621     for (set<revision_id>::const_iterator i = left_marks.begin();
1622          i != left_marks.end(); ++i)
1623       I(right_uncommon_ancestors.find(*i) == right_uncommon_ancestors.end());
1624     for (set<revision_id>::const_iterator i = right_marks.begin();
1625          i != right_marks.end(); ++i)
1626       I(left_uncommon_ancestors.find(*i) == left_uncommon_ancestors.end());
1627 
1628     if (diff_from_left && diff_from_right)
1629       new_marks.insert(new_rid);
1630 
1631     else if (diff_from_left && !diff_from_right)
1632       mark_won_merge(left_marks, left_uncommon_ancestors, right_marks,
1633                      new_rid, new_marks);
1634 
1635     else if (!diff_from_left && diff_from_right)
1636       mark_won_merge(right_marks, right_uncommon_ancestors, left_marks,
1637                      new_rid, new_marks);
1638 
1639     else
1640       {
1641         // this is the case
1642         //   a   a
1643         //    \ /
1644         //     a
1645         // so we simply union the mark sets.  This is technically not
1646         // quite the canonical multi-*-merge thing to do; in the case
1647         //     a1*
1648         //    / \      (blah blah; avoid multi-line-comment warning)
1649         //   b   a2
1650         //   |   |
1651         //   a3* |
1652         //    \ /
1653         //     a4
1654         // we will set *(a4) = {a1, a3}, even though the minimal
1655         // common ancestor set is {a3}.  we could fix this by running
1656         // erase_ancestors.  However, there isn't really any point;
1657         // the only operation performed on *(a4) is to test *(a4) > R
1658         // for some revision R.  The truth-value of this test cannot
1659         // be affected by added new revisions to *(a4) that are
1660         // ancestors of revisions that are already in *(a4).
1661         set_union(left_marks.begin(), left_marks.end(),
1662                   right_marks.begin(), right_marks.end(),
1663                   inserter(new_marks, new_marks.begin()));
1664       }
1665   }
1666 
1667   void
mark_new_node(revision_id const & new_rid,const_node_t n,marking_map & mm)1668   mark_new_node(revision_id const & new_rid, const_node_t n, marking_map & mm)
1669   {
1670     marking_t new_marking(new marking());
1671     new_marking->birth_revision = new_rid;
1672     I(new_marking->parent_name.empty());
1673     new_marking->parent_name.insert(new_rid);
1674     I(new_marking->file_content.empty());
1675     if (is_file_t(n))
1676       new_marking->file_content.insert(new_rid);
1677     I(new_marking->attrs.empty());
1678     set<revision_id> singleton;
1679     singleton.insert(new_rid);
1680     for (attr_map_t::const_iterator i = n->attrs.begin();
1681          i != n->attrs.end(); ++i)
1682       new_marking->attrs.insert(make_pair(i->first, singleton));
1683     mm.put_marking(n->self, new_marking);
1684   }
1685 
1686   void
mark_unmerged_node(const_marking_t const & parent_marking,const_node_t parent_n,revision_id const & new_rid,const_node_t n,marking_map & mm)1687   mark_unmerged_node(const_marking_t const & parent_marking,
1688                      const_node_t parent_n,
1689                      revision_id const & new_rid, const_node_t n,
1690                      marking_map & mm)
1691   {
1692     // Our new marking map is initialized as a copy of the parent map.
1693     // So, if nothing's changed, there's nothing to do. Unless this
1694     // is a merge, and the parent marking that was copied happens to
1695     // be the other parent than the one this node came from.
1696     if (n == parent_n || shallow_equal(n, parent_n, true))
1697       {
1698         if (mm.contains(n->self))
1699           {
1700             return;
1701           }
1702         else
1703           {
1704             mm.put_marking(n->self, parent_marking);
1705             return;
1706           }
1707       }
1708 
1709     I(same_type(parent_n, n) && parent_n->self == n->self);
1710 
1711     marking_t new_marking(new marking());
1712 
1713     new_marking->birth_revision = parent_marking->birth_revision;
1714 
1715     mark_unmerged_scalar(parent_marking->parent_name,
1716                          make_pair(parent_n->parent, parent_n->name),
1717                          new_rid,
1718                          make_pair(n->parent, n->name),
1719                          new_marking->parent_name);
1720 
1721     if (is_file_t(n))
1722       mark_unmerged_scalar(parent_marking->file_content,
1723                            downcast_to_file_t(parent_n)->content,
1724                            new_rid,
1725                            downcast_to_file_t(n)->content,
1726                            new_marking->file_content);
1727 
1728     for (attr_map_t::const_iterator i = n->attrs.begin();
1729            i != n->attrs.end(); ++i)
1730       {
1731         set<revision_id> & new_marks = new_marking->attrs[i->first];
1732         I(new_marks.empty());
1733         attr_map_t::const_iterator j = parent_n->attrs.find(i->first);
1734         if (j == parent_n->attrs.end())
1735           new_marks.insert(new_rid);
1736         else
1737           mark_unmerged_scalar(safe_get(parent_marking->attrs, i->first),
1738                                j->second,
1739                                new_rid, i->second, new_marks);
1740       }
1741 
1742     mm.put_or_replace_marking(n->self, new_marking);
1743   }
1744 
1745   void
mark_merged_node(const_marking_t const & left_marking,set<revision_id> const & left_uncommon_ancestors,const_node_t ln,const_marking_t const & right_marking,set<revision_id> const & right_uncommon_ancestors,const_node_t rn,revision_id const & new_rid,const_node_t n,marking_map & mm)1746   mark_merged_node(const_marking_t const & left_marking,
1747                    set<revision_id> const & left_uncommon_ancestors,
1748                    const_node_t ln,
1749                    const_marking_t const & right_marking,
1750                    set<revision_id> const & right_uncommon_ancestors,
1751                    const_node_t rn,
1752                    revision_id const & new_rid,
1753                    const_node_t n,
1754                    marking_map & mm)
1755   {
1756     bool same_nodes = ((ln == rn || shallow_equal(ln, rn, true)) &&
1757                        (ln == n || shallow_equal(ln, n, true)));
1758     if (same_nodes)
1759       {
1760         bool same_markings = left_marking == right_marking
1761           || *left_marking == *right_marking;
1762         if (same_markings)
1763           {
1764             // The child marking will be the same as both parent markings,
1765             // so just leave it as whichever it was copied from.
1766             return;
1767           }
1768       }
1769 
1770     I(same_type(ln, n) && same_type(rn, n));
1771     I(left_marking->birth_revision == right_marking->birth_revision);
1772     marking_t new_marking = mm.get_marking_for_update(n->self);
1773     new_marking->birth_revision = left_marking->birth_revision;
1774     MM(n->self);
1775 
1776     // name
1777     new_marking->parent_name.clear();
1778     mark_merged_scalar(left_marking->parent_name, left_uncommon_ancestors,
1779                        make_pair(ln->parent, ln->name),
1780                        right_marking->parent_name, right_uncommon_ancestors,
1781                        make_pair(rn->parent, rn->name),
1782                        new_rid,
1783                        make_pair(n->parent, n->name),
1784                        new_marking->parent_name);
1785     // content
1786     if (is_file_t(n))
1787       {
1788         const_file_t f = downcast_to_file_t(n);
1789         const_file_t lf = downcast_to_file_t(ln);
1790         const_file_t rf = downcast_to_file_t(rn);
1791         new_marking->file_content.clear();
1792         mark_merged_scalar(left_marking->file_content, left_uncommon_ancestors,
1793                            lf->content,
1794                            right_marking->file_content, right_uncommon_ancestors,
1795                            rf->content,
1796                            new_rid, f->content, new_marking->file_content);
1797       }
1798     // attrs
1799     new_marking->attrs.clear();
1800     for (attr_map_t::const_iterator i = n->attrs.begin();
1801          i != n->attrs.end(); ++i)
1802       {
1803         attr_key const & key = i->first;
1804         MM(key);
1805         attr_map_t::const_iterator li = ln->attrs.find(key);
1806         attr_map_t::const_iterator ri = rn->attrs.find(key);
1807         I(new_marking->attrs.find(key) == new_marking->attrs.end());
1808         // [], when used to refer to a non-existent element, default
1809         // constructs that element and returns a reference to it.  We make use
1810         // of this here.
1811         set<revision_id> & new_marks = new_marking->attrs[key];
1812 
1813         if (li == ln->attrs.end() && ri == rn->attrs.end())
1814           // this is a brand new attribute, never before seen
1815           safe_insert(new_marks, new_rid);
1816 
1817         else if (li != ln->attrs.end() && ri == rn->attrs.end())
1818           // only the left side has seen this attr before
1819           mark_unmerged_scalar(safe_get(left_marking->attrs, key),
1820                                li->second,
1821                                new_rid, i->second, new_marks);
1822 
1823         else if (li == ln->attrs.end() && ri != rn->attrs.end())
1824           // only the right side has seen this attr before
1825           mark_unmerged_scalar(safe_get(right_marking->attrs, key),
1826                                ri->second,
1827                                new_rid, i->second, new_marks);
1828 
1829         else
1830           // both sides have seen this attr before
1831           mark_merged_scalar(safe_get(left_marking->attrs, key),
1832                              left_uncommon_ancestors,
1833                              li->second,
1834                              safe_get(right_marking->attrs, key),
1835                              right_uncommon_ancestors,
1836                              ri->second,
1837                              new_rid, i->second, new_marks);
1838       }
1839 
1840     // some extra sanity checking -- attributes are not allowed to be deleted,
1841     // so we double check that they haven't.
1842     // SPEEDUP?: this code could probably be made more efficient -- but very
1843     // rarely will any node have more than, say, one attribute, so it probably
1844     // doesn't matter.
1845     for (attr_map_t::const_iterator i = ln->attrs.begin();
1846          i != ln->attrs.end(); ++i)
1847       I(n->attrs.find(i->first) != n->attrs.end());
1848     for (attr_map_t::const_iterator i = rn->attrs.begin();
1849          i != rn->attrs.end(); ++i)
1850       I(n->attrs.find(i->first) != n->attrs.end());
1851   }
1852 
drop_extra_markings(roster_t const & ros,marking_map & mm)1853   void drop_extra_markings(roster_t const & ros, marking_map & mm)
1854   {
1855     if (mm.size() > ros.all_nodes().size())
1856       {
1857         std::set<node_id> to_drop;
1858 
1859         marking_map::const_iterator mi = mm.begin(), me = mm.end();
1860         node_map::const_iterator ri = ros.all_nodes().begin(), re = ros.all_nodes().end();
1861 
1862         for (; mi != me; ++mi)
1863           {
1864             if (ri == re)
1865               {
1866                 to_drop.insert(mi->first);
1867               }
1868             else
1869               {
1870                 if (ri->first < mi->first)
1871                   ++ri;
1872                 I(ri == re || ri->first >= mi->first);
1873                 if (ri == re || ri->first > mi->first)
1874                   to_drop.insert(mi->first);
1875               }
1876           }
1877         for (std::set<node_id>::const_iterator i = to_drop.begin();
1878              i != to_drop.end(); ++i)
1879           {
1880             mm.remove_marking(*i);
1881           }
1882       }
1883     I(mm.size() == ros.all_nodes().size());
1884   }
1885 
1886 } // anonymous namespace
1887 
1888 
1889 // This function is also responsible for verifying ancestry invariants --
1890 // those invariants on a roster that involve the structure of the roster's
1891 // parents, rather than just the structure of the roster itself.
1892 void
mark_merge_roster(roster_t const & left_roster,marking_map const & left_markings,set<revision_id> const & left_uncommon_ancestors,roster_t const & right_roster,marking_map const & right_markings,set<revision_id> const & right_uncommon_ancestors,revision_id const & new_rid,roster_t const & merge,marking_map & new_markings)1893 mark_merge_roster(roster_t const & left_roster,
1894                   marking_map const & left_markings,
1895                   set<revision_id> const & left_uncommon_ancestors,
1896                   roster_t const & right_roster,
1897                   marking_map const & right_markings,
1898                   set<revision_id> const & right_uncommon_ancestors,
1899                   revision_id const & new_rid,
1900                   roster_t const & merge,
1901                   marking_map & new_markings)
1902 {
1903   {
1904     int left_err = left_markings.size() - merge.all_nodes().size();
1905     int right_err = right_markings.size() - merge.all_nodes().size();
1906     if (left_err * left_err > right_err * right_err)
1907       new_markings = right_markings;
1908     else
1909       new_markings = left_markings;
1910   }
1911 
1912   for (node_map::const_iterator i = merge.all_nodes().begin();
1913        i != merge.all_nodes().end(); ++i)
1914     {
1915       node_t const & n = i->second;
1916       node_t const &left_node = left_roster.all_nodes().get_if_present(i->first);
1917       node_t const &right_node = right_roster.all_nodes().get_if_present(i->first);
1918 
1919       bool exists_in_left = static_cast<bool>(left_node);
1920       bool exists_in_right = static_cast<bool>(right_node);
1921 
1922       if (!exists_in_left && !exists_in_right)
1923         mark_new_node(new_rid, n, new_markings);
1924 
1925       else if (!exists_in_left && exists_in_right)
1926         {
1927           const_marking_t const & right_marking = right_markings.get_marking(n->self);
1928           // must be unborn on the left (as opposed to dead)
1929           I(right_uncommon_ancestors.find(right_marking->birth_revision)
1930             != right_uncommon_ancestors.end());
1931           mark_unmerged_node(right_marking, right_node,
1932                              new_rid, n, new_markings);
1933         }
1934       else if (exists_in_left && !exists_in_right)
1935         {
1936           const_marking_t const & left_marking = left_markings.get_marking(n->self);
1937           // must be unborn on the right (as opposed to dead)
1938           I(left_uncommon_ancestors.find(left_marking->birth_revision)
1939             != left_uncommon_ancestors.end());
1940           mark_unmerged_node(left_marking, left_node,
1941                              new_rid, n, new_markings);
1942         }
1943       else
1944         {
1945           mark_merged_node(left_markings.get_marking(n->self),
1946                            left_uncommon_ancestors, left_node,
1947                            right_markings.get_marking(n->self),
1948                            right_uncommon_ancestors, right_node,
1949                            new_rid, n, new_markings);
1950         }
1951     }
1952 
1953   drop_extra_markings(merge, new_markings);
1954 }
1955 
1956 namespace {
1957 
1958   class editable_roster_for_nonmerge
1959     : public editable_roster_base
1960   {
1961   public:
editable_roster_for_nonmerge(roster_t & r,node_id_source & nis,revision_id const & rid,marking_map & markings)1962     editable_roster_for_nonmerge(roster_t & r, node_id_source & nis,
1963                                  revision_id const & rid,
1964                                  marking_map & markings)
1965       : editable_roster_base(r, nis),
1966         rid(rid), markings(markings)
1967     {}
1968 
detach_node(file_path const & src)1969     virtual node_id detach_node(file_path const & src)
1970     {
1971       node_id nid = this->editable_roster_base::detach_node(src);
1972       marking_t marking = markings.get_marking_for_update(nid);
1973       marking->parent_name.clear();
1974       marking->parent_name.insert(rid);
1975       return nid;
1976     }
1977 
drop_detached_node(node_id nid)1978     virtual void drop_detached_node(node_id nid)
1979     {
1980       this->editable_roster_base::drop_detached_node(nid);
1981       markings.remove_marking(nid);
1982     }
1983 
create_dir_node()1984     virtual node_id create_dir_node()
1985     {
1986       return handle_new(this->editable_roster_base::create_dir_node());
1987     }
1988 
create_file_node(file_id const & content)1989     virtual node_id create_file_node(file_id const & content)
1990     {
1991       return handle_new(this->editable_roster_base::create_file_node(content));
1992     }
1993 
apply_delta(file_path const & pth,file_id const & old_id,file_id const & new_id)1994     virtual void apply_delta(file_path const & pth,
1995                              file_id const & old_id, file_id const & new_id)
1996     {
1997       this->editable_roster_base::apply_delta(pth, old_id, new_id);
1998       node_id nid = r.get_node(pth)->self;
1999       marking_t marking = markings.get_marking_for_update(nid);
2000       marking->file_content.clear();
2001       marking->file_content.insert(rid);
2002     }
2003 
clear_attr(file_path const & path,attr_key const & key)2004     virtual void clear_attr(file_path const & path, attr_key const & key)
2005     {
2006       this->editable_roster_base::clear_attr(path, key);
2007       handle_attr(path, key);
2008     }
2009 
set_attr(file_path const & path,attr_key const & key,attr_value const & val)2010     virtual void set_attr(file_path const & path, attr_key const & key,
2011                           attr_value const & val)
2012     {
2013       this->editable_roster_base::set_attr(path, key, val);
2014       handle_attr(path, key);
2015     }
2016 
handle_new(node_id nid)2017     node_id handle_new(node_id nid)
2018     {
2019       const_node_t n = r.get_node(nid);
2020       mark_new_node(rid, n, markings);
2021       return nid;
2022     }
2023 
handle_attr(file_path const & pth,attr_key const & name)2024     void handle_attr(file_path const & pth, attr_key const & name)
2025     {
2026       node_id nid = r.get_node(pth)->self;
2027       marking_t marking = markings.get_marking_for_update(nid);
2028       map<attr_key, set<revision_id> >::iterator am = marking->attrs.find(name);
2029       if (am == marking->attrs.end())
2030         {
2031           marking->attrs.insert(make_pair(name, set<revision_id>()));
2032           am = marking->attrs.find(name);
2033         }
2034 
2035       I(am != marking->attrs.end());
2036       am->second.clear();
2037       am->second.insert(rid);
2038     }
2039 
2040   private:
2041     revision_id const & rid;
2042     // markings starts out as the parent's markings
2043     marking_map & markings;
2044   };
2045 
2046 } // anonymous namespace
2047 
2048 // Interface note: make_roster_for_merge and make_roster_for_nonmerge
2049 // each exist in two variants:
2050 //
2051 // 1. A variant that does all of the actual work, taking every single
2052 //    relevant base-level data object as a separate argument.  This
2053 //    variant is called directly by the unit tests, and also by variant 2.
2054 //    It is defined in this file.
2055 //
2056 // 2. A variant that takes a revision object, a revision ID, a database,
2057 //    and a node_id_source.  This variant uses those four things to look
2058 //    up all of the low-level data required by variant 1, then calls
2059 //    variant 1 to get the real work done.  This is the one called by
2060 //    (one variant of) make_roster_for_revision.
2061 //    It, and make_roster_for_revision, is defined in ancestry.cc.
2062 
2063 // yes, this function takes 14 arguments.  I'm very sorry.
2064 void
make_roster_for_merge(revision_id const & left_rid,roster_t const & left_roster,marking_map const & left_markings,cset const & left_cs,set<revision_id> const & left_uncommon_ancestors,revision_id const & right_rid,roster_t const & right_roster,marking_map const & right_markings,cset const & right_cs,set<revision_id> const & right_uncommon_ancestors,revision_id const & new_rid,roster_t & new_roster,marking_map & new_markings,node_id_source & nis)2065 make_roster_for_merge(revision_id const & left_rid,
2066                       roster_t const & left_roster,
2067                       marking_map const & left_markings,
2068                       cset const & left_cs,
2069                       set<revision_id> const & left_uncommon_ancestors,
2070 
2071                       revision_id const & right_rid,
2072                       roster_t const & right_roster,
2073                       marking_map const & right_markings,
2074                       cset const & right_cs,
2075                       set<revision_id> const & right_uncommon_ancestors,
2076 
2077                       revision_id const & new_rid,
2078                       roster_t & new_roster,
2079                       marking_map & new_markings,
2080                       node_id_source & nis)
2081 {
2082   I(!null_id(left_rid) && !null_id(right_rid));
2083   I(left_uncommon_ancestors.find(left_rid) != left_uncommon_ancestors.end());
2084   I(left_uncommon_ancestors.find(right_rid) == left_uncommon_ancestors.end());
2085   I(right_uncommon_ancestors.find(right_rid) != right_uncommon_ancestors.end());
2086   I(right_uncommon_ancestors.find(left_rid) == right_uncommon_ancestors.end());
2087   MM(left_rid);
2088   MM(left_roster);
2089   MM(left_markings);
2090   MM(left_cs);
2091   MM(left_uncommon_ancestors);
2092   MM(right_rid);
2093   MM(right_roster);
2094   MM(right_markings);
2095   MM(right_cs);
2096   MM(right_uncommon_ancestors);
2097   MM(new_rid);
2098   MM(new_roster);
2099   MM(new_markings);
2100   {
2101     temp_node_id_source temp_nis;
2102     new_roster = left_roster;
2103     roster_t from_right_r(right_roster);
2104     MM(from_right_r);
2105 
2106     editable_roster_for_merge from_left_er(new_roster, temp_nis);
2107     editable_roster_for_merge from_right_er(from_right_r, temp_nis);
2108 
2109     left_cs.apply_to(from_left_er);
2110     right_cs.apply_to(from_right_er);
2111 
2112     unify_rosters(new_roster, from_left_er.new_nodes,
2113                   from_right_r, from_right_er.new_nodes,
2114                   nis);
2115 
2116     // Kluge: If both csets have no content changes, and the node_id_source
2117     // passed to this function is a temp_node_id_source, then we are being
2118     // called from get_current_roster_shape, and we should not attempt to
2119     // verify that these rosters match as far as content IDs.
2120     if (left_cs.deltas_applied.empty()
2121         && right_cs.deltas_applied.empty()
2122         && typeid(nis) == typeid(temp_node_id_source))
2123       I(equal_shapes(new_roster, from_right_r));
2124     else
2125       I(new_roster == from_right_r);
2126   }
2127 
2128   // SPEEDUP?: instead of constructing new marking from scratch, track which
2129   // nodes were modified, and scan only them
2130   // load one of the parent markings directly into the new marking map
2131   new_markings.clear();
2132   mark_merge_roster(left_roster, left_markings, left_uncommon_ancestors,
2133                     right_roster, right_markings, right_uncommon_ancestors,
2134                     new_rid, new_roster, new_markings);
2135 }
2136 
2137 
2138 
2139 // Warning: this function expects the parent's roster and markings in the
2140 // 'new_roster' and 'new_markings' parameters, and they are modified
2141 // destructively!
2142 // This function performs an almost identical task to
2143 // mark_roster_with_one_parent; however, for efficiency, it is implemented
2144 // in a different, destructive way.
2145 void
make_roster_for_nonmerge(cset const & cs,revision_id const & new_rid,roster_t & new_roster,marking_map & new_markings,node_id_source & nis)2146 make_roster_for_nonmerge(cset const & cs,
2147                          revision_id const & new_rid,
2148                          roster_t & new_roster, marking_map & new_markings,
2149                          node_id_source & nis)
2150 {
2151   editable_roster_for_nonmerge er(new_roster, nis, new_rid, new_markings);
2152   cs.apply_to(er);
2153 }
2154 
2155 
2156 void
mark_roster_with_no_parents(revision_id const & rid,roster_t const & roster,marking_map & markings)2157 mark_roster_with_no_parents(revision_id const & rid,
2158                             roster_t const & roster,
2159                             marking_map & markings)
2160 {
2161   roster_t mock_parent;
2162   marking_map mock_parent_markings;
2163   mark_roster_with_one_parent(mock_parent, mock_parent_markings,
2164                               rid, roster, markings);
2165 }
2166 
2167 void
mark_roster_with_one_parent(roster_t const & parent,marking_map const & parent_markings,revision_id const & child_rid,roster_t const & child,marking_map & child_markings)2168 mark_roster_with_one_parent(roster_t const & parent,
2169                             marking_map const & parent_markings,
2170                             revision_id const & child_rid,
2171                             roster_t const & child,
2172                             marking_map & child_markings)
2173 {
2174   MM(parent);
2175   MM(parent_markings);
2176   MM(child_rid);
2177   MM(child);
2178   MM(child_markings);
2179 
2180   I(!null_id(child_rid));
2181   child_markings = parent_markings;
2182 
2183   for (node_map::const_iterator i = child.all_nodes().begin();
2184        i != child.all_nodes().end(); ++i)
2185     {
2186       marking_t new_marking;
2187       if (parent.has_node(i->first))
2188         mark_unmerged_node(parent_markings.get_marking(i->first),
2189                            parent.get_node(i->first),
2190                            child_rid, i->second, child_markings);
2191       else
2192         mark_new_node(child_rid, i->second, child_markings);
2193     }
2194   drop_extra_markings(child, child_markings);
2195 
2196   child.check_sane_against(child_markings, true);
2197 }
2198 
2199 
2200 ////////////////////////////////////////////////////////////////////
2201 //   Calculation of a cset
2202 ////////////////////////////////////////////////////////////////////
2203 
2204 
2205 namespace
2206 {
2207 
delta_only_in_from(roster_t const & from,node_id nid,node_t n,cset & cs)2208   void delta_only_in_from(roster_t const & from,
2209                           node_id nid, node_t n,
2210                           cset & cs)
2211   {
2212     file_path pth;
2213     from.get_name(nid, pth);
2214     safe_insert(cs.nodes_deleted, pth);
2215   }
2216 
2217 
delta_only_in_to(roster_t const & to,node_id nid,node_t n,cset & cs)2218   void delta_only_in_to(roster_t const & to, node_id nid, node_t n,
2219                         cset & cs)
2220   {
2221     file_path pth;
2222     to.get_name(nid, pth);
2223     if (is_file_t(n))
2224       {
2225         safe_insert(cs.files_added,
2226                     make_pair(pth, downcast_to_file_t(n)->content));
2227       }
2228     else
2229       {
2230         safe_insert(cs.dirs_added, pth);
2231       }
2232     for (attr_map_t::const_iterator i = n->attrs.begin();
2233          i != n->attrs.end(); ++i)
2234       if (i->second.first)
2235         safe_insert(cs.attrs_set,
2236                     make_pair(make_pair(pth, i->first), i->second.second));
2237   }
2238 
delta_in_both(node_id nid,roster_t const & from,node_t from_n,roster_t const & to,node_t to_n,cset & cs)2239   void delta_in_both(node_id nid,
2240                      roster_t const & from, node_t from_n,
2241                      roster_t const & to, node_t to_n,
2242                      cset & cs)
2243   {
2244     I(same_type(from_n, to_n));
2245     I(from_n->self == to_n->self);
2246 
2247     if (shallow_equal(from_n, to_n, false))
2248       return;
2249 
2250     file_path from_p, to_p;
2251     from.get_name(nid, from_p);
2252     to.get_name(nid, to_p);
2253 
2254     // Compare name and path.
2255     if (from_n->name != to_n->name || from_n->parent != to_n->parent)
2256       safe_insert(cs.nodes_renamed, make_pair(from_p, to_p));
2257 
2258     // Compare file content.
2259     if (is_file_t(from_n))
2260       {
2261         file_t from_f = downcast_to_file_t(from_n);
2262         file_t to_f = downcast_to_file_t(to_n);
2263         if (!(from_f->content == to_f->content))
2264           {
2265             safe_insert(cs.deltas_applied,
2266                         make_pair(to_p, make_pair(from_f->content,
2267                                                    to_f->content)));
2268           }
2269       }
2270 
2271     // Compare attrs.
2272     {
2273       parallel::iter<attr_map_t> i(from_n->attrs, to_n->attrs);
2274       while (i.next())
2275         {
2276           MM(i);
2277           if ((i.state() == parallel::in_left
2278                || (i.state() == parallel::in_both && !i.right_data().first))
2279               && i.left_data().first)
2280             {
2281               safe_insert(cs.attrs_cleared,
2282                           make_pair(to_p, i.left_key()));
2283             }
2284           else if ((i.state() == parallel::in_right
2285                     || (i.state() == parallel::in_both && !i.left_data().first))
2286                    && i.right_data().first)
2287             {
2288               safe_insert(cs.attrs_set,
2289                           make_pair(make_pair(to_p, i.right_key()),
2290                                     i.right_data().second));
2291             }
2292           else if (i.state() == parallel::in_both
2293                    && i.right_data().first
2294                    && i.left_data().first
2295                    && i.right_data().second != i.left_data().second)
2296             {
2297               safe_insert(cs.attrs_set,
2298                           make_pair(make_pair(to_p, i.right_key()),
2299                                     i.right_data().second));
2300             }
2301         }
2302     }
2303   }
2304 }
2305 
2306 void
make_cset(roster_t const & from,roster_t const & to,cset & cs)2307 make_cset(roster_t const & from, roster_t const & to, cset & cs)
2308 {
2309   cs.clear();
2310   parallel::iter<node_map> i(from.all_nodes(), to.all_nodes());
2311   while (i.next())
2312     {
2313       MM(i);
2314       switch (i.state())
2315         {
2316         case parallel::invalid:
2317           I(false);
2318 
2319         case parallel::in_left:
2320           // deleted
2321           delta_only_in_from(from, i.left_key(), i.left_data(), cs);
2322           break;
2323 
2324         case parallel::in_right:
2325           // added
2326           delta_only_in_to(to, i.right_key(), i.right_data(), cs);
2327           break;
2328 
2329         case parallel::in_both:
2330           // moved/renamed/patched/attribute changes
2331           delta_in_both(i.left_key(), from, i.left_data(), to, i.right_data(), cs);
2332           break;
2333         }
2334     }
2335 }
2336 
2337 
2338 // we assume our input is sane
2339 bool
equal_up_to_renumbering(roster_t const & a,marking_map const & a_markings,roster_t const & b,marking_map const & b_markings)2340 equal_up_to_renumbering(roster_t const & a, marking_map const & a_markings,
2341                         roster_t const & b, marking_map const & b_markings)
2342 {
2343   if (a.all_nodes().size() != b.all_nodes().size())
2344     return false;
2345 
2346   for (node_map::const_iterator i = a.all_nodes().begin();
2347        i != a.all_nodes().end(); ++i)
2348     {
2349       file_path p;
2350       a.get_name(i->first, p);
2351       if (!b.has_node(p))
2352         return false;
2353       const_node_t b_n = b.get_node(p);
2354       // we already know names are the same
2355       if (!same_type(i->second, b_n))
2356         return false;
2357       if (i->second->attrs != b_n->attrs)
2358         return false;
2359       if (is_file_t(i->second))
2360         {
2361           if (!(downcast_to_file_t(i->second)->content
2362                 == downcast_to_file_t(b_n)->content))
2363             return false;
2364         }
2365       // nodes match, check the markings too
2366       const_marking_t am = a_markings.get_marking(i->first);
2367       const_marking_t bm = b_markings.get_marking(b_n->self);
2368       if (!(am == bm) && !(*am == *bm))
2369         {
2370           return false;
2371         }
2372     }
2373   return true;
2374 }
2375 
2376 static void
select_restricted_nodes(roster_t const & from,roster_t const & to,node_restriction const & mask,map<node_id,node_t> & selected)2377 select_restricted_nodes(roster_t const & from, roster_t const & to,
2378                         node_restriction const & mask,
2379                         map<node_id, node_t> & selected)
2380 {
2381   selected.clear();
2382   parallel::iter<node_map> i(from.all_nodes(), to.all_nodes());
2383   while (i.next())
2384     {
2385       MM(i);
2386 
2387       switch (i.state())
2388         {
2389         case parallel::invalid:
2390           I(false);
2391 
2392         case parallel::in_left:
2393           // deleted
2394           if (!mask.includes(from, i.left_key()))
2395             selected.insert(make_pair(i.left_key(), i.left_data()));
2396           break;
2397 
2398         case parallel::in_right:
2399           // added
2400           if (mask.includes(to, i.right_key()))
2401             selected.insert(make_pair(i.right_key(), i.right_data()));
2402           break;
2403 
2404         case parallel::in_both:
2405           // moved/renamed/patched/attribute changes
2406           if (mask.includes(from, i.left_key()) ||
2407               mask.includes(to, i.right_key()))
2408             selected.insert(make_pair(i.right_key(), i.right_data()));
2409           else
2410             selected.insert(make_pair(i.left_key(), i.left_data()));
2411           break;
2412         }
2413     }
2414 }
2415 
2416 void
make_restricted_roster(roster_t const & from,roster_t const & to,roster_t & restricted,node_restriction const & mask)2417 make_restricted_roster(roster_t const & from, roster_t const & to,
2418                        roster_t & restricted,
2419                        node_restriction const & mask)
2420 {
2421   MM(from);
2422   MM(to);
2423   MM(restricted);
2424 
2425   I(restricted.all_nodes().empty());
2426 
2427   map<node_id, node_t> selected;
2428 
2429   select_restricted_nodes(from, to, mask, selected);
2430 
2431   int problems = 0;
2432 
2433   while (!selected.empty())
2434     {
2435       map<node_id, node_t>::const_iterator n = selected.begin();
2436 
2437       L(FL("selected node %d %s parent %d")
2438             % n->second->self
2439             % n->second->name
2440             % n->second->parent);
2441 
2442       bool missing_parent = false;
2443 
2444       while (!null_node(n->second->parent) &&
2445              !restricted.has_node(n->second->parent))
2446         {
2447           // we can't add this node until its parent has been added
2448 
2449           L(FL("deferred node %d %s parent %d")
2450             % n->second->self
2451             % n->second->name
2452             % n->second->parent);
2453 
2454           map<node_id, node_t>::const_iterator
2455             p = selected.find(n->second->parent);
2456 
2457           if (p != selected.end())
2458             {
2459               n = p; // see if we can add the parent
2460               I(is_dir_t(n->second));
2461             }
2462           else
2463             {
2464               missing_parent = true;
2465               break;
2466             }
2467         }
2468 
2469       if (!missing_parent)
2470         {
2471           L(FL("adding node %d %s parent %d")
2472             % n->second->self
2473             % n->second->name
2474             % n->second->parent);
2475 
2476           if (is_file_t(n->second))
2477             {
2478               file_t const f = downcast_to_file_t(n->second);
2479               restricted.create_file_node(f->content, f->self);
2480             }
2481           else
2482             restricted.create_dir_node(n->second->self);
2483 
2484           node_t added = restricted.get_node_for_update(n->second->self);
2485           added->attrs = n->second->attrs;
2486 
2487           restricted.attach_node(n->second->self, n->second->parent, n->second->name);
2488         }
2489       else if (from.has_node(n->second->parent) && !to.has_node(n->second->parent))
2490         {
2491           // included a delete that must be excluded
2492           file_path self, parent;
2493           from.get_name(n->second->self, self);
2494           from.get_name(n->second->parent, parent);
2495           W(F("restriction includes deletion of '%s' "
2496               "but excludes deletion of '%s'")
2497             % parent % self);
2498           problems++;
2499         }
2500       else if (!from.has_node(n->second->parent) && to.has_node(n->second->parent))
2501         {
2502           // excluded an add that must be included
2503           file_path self, parent;
2504           to.get_name(n->second->self, self);
2505           to.get_name(n->second->parent, parent);
2506           W(F("restriction excludes addition of '%s' "
2507               "but includes addition of '%s'")
2508             % parent % self);
2509           problems++;
2510         }
2511       else
2512         I(false); // something we missed?!?
2513 
2514       selected.erase(n->first);
2515     }
2516 
2517 
2518   // we cannot call restricted.check_sane(true) unconditionally because the
2519   // restricted roster is very possibly *not* sane. for example, if we run
2520   // the following in a new unversioned directory the from, to and
2521   // restricted rosters will all be empty and thus not sane.
2522   //
2523   // mtn setup .
2524   // mtn status
2525   //
2526   // several tests do this and it seems entirely reasonable. we first check
2527   // that the restricted roster is not empty and only then require it to be
2528   // sane.
2529 
2530   if (!restricted.all_nodes().empty() && !restricted.has_root())
2531    {
2532      W(F("restriction excludes addition of root directory"));
2533      problems++;
2534    }
2535 
2536   E(problems == 0, origin::user, F("invalid restriction"));
2537 
2538   if (!restricted.all_nodes().empty())
2539     restricted.check_sane(true);
2540 
2541 }
2542 
2543 void
select_nodes_modified_by_cset(cset const & cs,roster_t const & old_roster,roster_t const & new_roster,set<node_id> & nodes_modified)2544 select_nodes_modified_by_cset(cset const & cs,
2545                               roster_t const & old_roster,
2546                               roster_t const & new_roster,
2547                               set<node_id> & nodes_modified)
2548 {
2549   nodes_modified.clear();
2550 
2551   set<file_path> modified_prestate_nodes;
2552   set<file_path> modified_poststate_nodes;
2553 
2554   // Pre-state damage
2555 
2556   copy(cs.nodes_deleted.begin(), cs.nodes_deleted.end(),
2557        inserter(modified_prestate_nodes, modified_prestate_nodes.begin()));
2558 
2559   for (map<file_path, file_path>::const_iterator i = cs.nodes_renamed.begin();
2560        i != cs.nodes_renamed.end(); ++i)
2561     modified_prestate_nodes.insert(i->first);
2562 
2563   // Post-state damage
2564 
2565   copy(cs.dirs_added.begin(), cs.dirs_added.end(),
2566        inserter(modified_poststate_nodes, modified_poststate_nodes.begin()));
2567 
2568   for (map<file_path, file_id>::const_iterator i = cs.files_added.begin();
2569        i != cs.files_added.end(); ++i)
2570     modified_poststate_nodes.insert(i->first);
2571 
2572   for (map<file_path, file_path>::const_iterator i = cs.nodes_renamed.begin();
2573        i != cs.nodes_renamed.end(); ++i)
2574     modified_poststate_nodes.insert(i->second);
2575 
2576   for (map<file_path, pair<file_id, file_id> >::const_iterator i = cs.deltas_applied.begin();
2577        i != cs.deltas_applied.end(); ++i)
2578     modified_poststate_nodes.insert(i->first);
2579 
2580   for (set<pair<file_path, attr_key> >::const_iterator i = cs.attrs_cleared.begin();
2581        i != cs.attrs_cleared.end(); ++i)
2582     modified_poststate_nodes.insert(i->first);
2583 
2584   for (map<pair<file_path, attr_key>, attr_value>::const_iterator i = cs.attrs_set.begin();
2585        i != cs.attrs_set.end(); ++i)
2586     modified_poststate_nodes.insert(i->first.first);
2587 
2588   // Finale
2589 
2590   for (set<file_path>::const_iterator i = modified_prestate_nodes.begin();
2591        i != modified_prestate_nodes.end(); ++i)
2592     {
2593       I(old_roster.has_node(*i));
2594       nodes_modified.insert(old_roster.get_node(*i)->self);
2595     }
2596 
2597   for (set<file_path>::const_iterator i = modified_poststate_nodes.begin();
2598        i != modified_poststate_nodes.end(); ++i)
2599     {
2600       I(new_roster.has_node(*i));
2601       nodes_modified.insert(new_roster.get_node(*i)->self);
2602     }
2603 
2604 }
2605 
2606 void
get_file_details(node_id nid,file_id & fid,file_path & pth) const2607 roster_t::get_file_details(node_id nid,
2608                            file_id & fid,
2609                            file_path & pth) const
2610 {
2611   I(has_node(nid));
2612   const_file_t f = downcast_to_file_t(get_node(nid));
2613   fid = f->content;
2614   get_name(nid, pth);
2615 }
2616 
2617 void
extract_path_set(set<file_path> & paths) const2618 roster_t::extract_path_set(set<file_path> & paths) const
2619 {
2620   paths.clear();
2621   if (has_root())
2622     {
2623       for (dfs_iter i(root_dir, true); !i.finished(); ++i)
2624         {
2625           file_path pth = file_path_internal(i.path());
2626           if (!pth.empty())
2627             paths.insert(pth);
2628         }
2629     }
2630 }
2631 
2632 // ??? make more similar to the above (member function, use dfs_iter)
2633 void
get_content_paths(roster_t const & roster,map<file_id,file_path> & paths)2634 get_content_paths(roster_t const & roster, map<file_id, file_path> & paths)
2635 {
2636   node_map const & nodes = roster.all_nodes();
2637   for (node_map::const_iterator i = nodes.begin(); i != nodes.end(); ++i)
2638     {
2639       const_node_t node = roster.get_node(i->first);
2640       if (is_file_t(node))
2641         {
2642           file_path p;
2643           roster.get_name(i->first, p);
2644           const_file_t file = downcast_to_file_t(node);
2645           paths.insert(make_pair(file->content, p));
2646         }
2647     }
2648 }
2649 
2650 ////////////////////////////////////////////////////////////////////
2651 //   I/O routines
2652 ////////////////////////////////////////////////////////////////////
2653 
2654 void
append_with_escaped_quotes(string & collection,string const & item)2655 append_with_escaped_quotes(string & collection, string const & item)
2656 {
2657   size_t mark = 0;
2658   size_t cursor = item.find('"', mark);
2659   while (cursor != string::npos)
2660     {
2661       collection.append(item, mark, cursor - mark);
2662       collection.append(1, '\\');
2663       mark = cursor;
2664       if (mark == item.size())
2665         {
2666           cursor = string::npos;
2667         }
2668       else
2669         {
2670           cursor = item.find('"', mark + 1);
2671         }
2672     }
2673   collection.append(item, mark, item.size() - mark + 1);
2674 }
2675 
2676 void
push_marking(string & contents,bool is_file,const_marking_t const & mark,int symbol_length)2677 push_marking(string & contents,
2678              bool is_file,
2679              const_marking_t const & mark,
2680              int symbol_length)
2681 {
2682 
2683   I(!null_id(mark->birth_revision));
2684 
2685   contents.append(symbol_length - 5, ' ');
2686   contents.append("birth [");
2687   contents.append(encode_hexenc(mark->birth_revision.inner()(), origin::internal));
2688   contents.append("]\n");
2689 
2690   for (set<revision_id>::const_iterator i = mark->parent_name.begin();
2691        i != mark->parent_name.end(); ++i)
2692     {
2693       contents.append(symbol_length - 9, ' ');
2694       contents.append("path_mark [");
2695       contents.append(encode_hexenc(i->inner()(), origin::internal));
2696       contents.append("]\n");
2697     }
2698 
2699   if (is_file)
2700     {
2701       for (set<revision_id>::const_iterator i = mark->file_content.begin();
2702            i != mark->file_content.end(); ++i)
2703         {
2704           contents.append("content_mark [");// always the longest symbol
2705           contents.append(encode_hexenc(i->inner()(), origin::internal));
2706           contents.append("]\n");
2707         }
2708     }
2709   else
2710     I(mark->file_content.empty());
2711 
2712   for (map<attr_key, set<revision_id> >::const_iterator i = mark->attrs.begin();
2713        i != mark->attrs.end(); ++i)
2714     {
2715       for (set<revision_id>::const_iterator j = i->second.begin();
2716            j != i->second.end(); ++j)
2717         {
2718           contents.append(symbol_length - 9, ' ');
2719           contents.append("attr_mark \"");
2720           append_with_escaped_quotes(contents, i->first());
2721           contents.append("\" [");
2722           contents.append(encode_hexenc(j->inner()(), origin::internal));
2723           contents.append("]\n");
2724         }
2725     }
2726 }
2727 
2728 
2729 void
parse_marking(basic_io::parser & pa,marking_t & marking)2730 parse_marking(basic_io::parser & pa,
2731               marking_t & marking)
2732 {
2733   while (pa.symp())
2734     {
2735       string rev;
2736       if (pa.symp(syms::birth))
2737         {
2738           pa.sym();
2739           pa.hex(rev);
2740           marking->birth_revision =
2741             decode_hexenc_as<revision_id>(rev, pa.tok.in.made_from);
2742         }
2743       else if (pa.symp(syms::path_mark))
2744         {
2745           pa.sym();
2746           pa.hex(rev);
2747           safe_insert(marking->parent_name,
2748                       decode_hexenc_as<revision_id>(rev, pa.tok.in.made_from));
2749         }
2750       else if (pa.symp(basic_io::syms::content_mark))
2751         {
2752           pa.sym();
2753           pa.hex(rev);
2754           safe_insert(marking->file_content,
2755                       decode_hexenc_as<revision_id>(rev, pa.tok.in.made_from));
2756         }
2757       else if (pa.symp(syms::attr_mark))
2758         {
2759           string k;
2760           pa.sym();
2761           pa.str(k);
2762           pa.hex(rev);
2763           attr_key key = attr_key(k, pa.tok.in.made_from);
2764           safe_insert(marking->attrs[key],
2765                       decode_hexenc_as<revision_id>(rev, pa.tok.in.made_from));
2766         }
2767       else break;
2768     }
2769 }
2770 
2771 // SPEEDUP?: hand-writing a parser for manifests was a measurable speed win,
2772 // and the original parser was much simpler than basic_io.  After benchmarking
2773 // consider replacing the roster disk format with something that can be
2774 // processed more efficiently.
2775 
2776 void
print_to(data & dat,marking_map const & mm,bool print_local_parts) const2777 roster_t::print_to(data & dat,
2778                    marking_map const & mm,
2779                    bool print_local_parts) const
2780 {
2781   string contents;
2782   I(has_root());
2783 
2784   // approximate byte counts
2785   // a file is name + content (+ birth + path-mark + content-mark + ident)
2786   //   2 sym + name + hash (+ 4 sym + 3 hash + 1 num)
2787   //   24 + name + 43 (+48 + 129 + 10) = 67 + name (+ 187) = ~100 (+ 187)
2788   // a dir is name (+ birth + path-mark + ident)
2789   //   1 sym + name (+ 3 sym + 2 hash + 1 num)
2790   //   12 + name (+ 36 + 86 + 10) = 12 + name (+ 132) = ~52 (+ 132)
2791   // an attr is name/value (+ name/mark)
2792   //   1 sym + 2 name (+ 1 sym + 1 name + 1 hash)
2793   //   12 + 2 name (+ 12 + 43 + name) = ~40 (+ ~70)
2794   // in the monotone tree, there are about 2% as many attrs as nodes
2795 
2796   if (print_local_parts)
2797     {
2798       contents.reserve(nodes.size() * (290 + 0.02 * 110) * 1.1);
2799     }
2800   else
2801     {
2802       contents.reserve(nodes.size() * (100 + 0.02 * 40) * 1.1);
2803     }
2804 
2805   // symbols are:
2806   //   birth        (all local)
2807   //   dormant_attr (any local)
2808   //   ident        (all local)
2809   //   path_mark    (all local)
2810   //   attr_mark    (any local)
2811   //   dir          (all dir)
2812   //   file         (all file)
2813   //   content      (all file)
2814   //   attr         (any)
2815   //   content_mark (all local file)
2816 
2817   // local  file : symbol length 12
2818   // local  dir  : symbol length 9 or 12 (if dormant_attr)
2819   // public file : symbol length 7
2820   // public dir  : symbol length 3 or 4 (if attr)
2821 
2822   contents += "format_version \"1\"\n";
2823 
2824   for (dfs_iter i(root_dir, true); !i.finished(); ++i)
2825     {
2826       contents += "\n";
2827 
2828       const_node_t curr = *i;
2829 
2830       int symbol_length = 0;
2831 
2832       if (is_dir_t(curr))
2833         {
2834           if (print_local_parts)
2835             {
2836               symbol_length = 9;
2837               // unless we have a dormant attr
2838               for (attr_map_t::const_iterator j = curr->attrs.begin();
2839                    j != curr->attrs.end(); ++j)
2840                 {
2841                   if (!j->second.first)
2842                     {
2843                       symbol_length = 12;
2844                       break;
2845                     }
2846                 }
2847             }
2848           else
2849             {
2850               symbol_length = 3;
2851               // unless we have a live attr
2852               for (attr_map_t::const_iterator j = curr->attrs.begin();
2853                    j != curr->attrs.end(); ++j)
2854                 {
2855                   if (j->second.first)
2856                     {
2857                       symbol_length = 4;
2858                       break;
2859                     }
2860                 }
2861             }
2862           contents.append(symbol_length - 3, ' ');
2863           contents.append("dir \"");
2864           append_with_escaped_quotes(contents, i.path());
2865           contents.append("\"\n");
2866         }
2867       else
2868         {
2869           if (print_local_parts)
2870             {
2871               symbol_length = 12;
2872             }
2873           else
2874             {
2875               symbol_length = 7;
2876             }
2877           const_file_t ftmp = downcast_to_file_t(curr);
2878 
2879           contents.append(symbol_length - 4, ' ');
2880           contents.append("file \"");
2881           append_with_escaped_quotes(contents, i.path());
2882           contents.append("\"\n");
2883 
2884           contents.append(symbol_length - 7, ' ');
2885           contents.append("content [");
2886           contents.append(encode_hexenc(ftmp->content.inner()(), origin::internal));
2887           contents.append("]\n");
2888         }
2889 
2890       if (print_local_parts)
2891         {
2892           I(curr->self != the_null_node);
2893           contents.append(symbol_length - 5, ' ');
2894           contents.append("ident \"");
2895           contents.append(lexical_cast<string>(curr->self));
2896           contents.append("\"\n");
2897         }
2898 
2899       // Push the non-dormant part of the attr map
2900       for (attr_map_t::const_iterator j = curr->attrs.begin();
2901            j != curr->attrs.end(); ++j)
2902         {
2903           if (j->second.first)
2904             {
2905               // L(FL("printing attr %s : %s = %s") % fp % j->first % j->second);
2906 
2907               contents.append(symbol_length - 4, ' ');
2908               contents.append("attr \"");
2909               append_with_escaped_quotes(contents, j->first());
2910               contents.append("\" \"");
2911               append_with_escaped_quotes(contents, j->second.second());
2912               contents.append("\"\n");
2913             }
2914         }
2915 
2916       if (print_local_parts)
2917         {
2918           // Push the dormant part of the attr map
2919           for (attr_map_t::const_iterator j = curr->attrs.begin();
2920                j != curr->attrs.end(); ++j)
2921             {
2922               if (!j->second.first)
2923                 {
2924                   I(j->second.second().empty());
2925 
2926                   contents.append("dormant_attr \""); // always the longest sym
2927                   append_with_escaped_quotes(contents, j->first());
2928                   contents.append("\"\n");
2929                 }
2930             }
2931 
2932           const_marking_t m = mm.get_marking(curr->self);
2933           push_marking(contents, is_file_t(curr), m, symbol_length);
2934         }
2935     }
2936   dat = data(contents, origin::internal);
2937 }
2938 
2939 inline size_t
read_num(string const & s)2940 read_num(string const & s)
2941 {
2942   size_t n = 0;
2943 
2944   for (string::const_iterator i = s.begin(); i != s.end(); i++)
2945     {
2946       I(*i >= '0' && *i <= '9');
2947       n *= 10;
2948       n += static_cast<size_t>(*i - '0');
2949     }
2950   return n;
2951 }
2952 
2953 void
parse_from(basic_io::parser & pa,marking_map & mm)2954 roster_t::parse_from(basic_io::parser & pa,
2955                      marking_map & mm)
2956 {
2957   // Instantiate some lookaside caches to ensure this roster reuses
2958   // string storage across ATOMIC elements.
2959   id::symtab id_syms;
2960   utf8::symtab path_syms;
2961   attr_key::symtab attr_key_syms;
2962   attr_value::symtab attr_value_syms;
2963 
2964 
2965   // We *always* parse the local part of a roster, because we do not
2966   // actually send the non-local part over the network; the only times
2967   // we serialize a manifest (non-local roster) is when we're printing
2968   // it out for a user, or when we're hashing it for a manifest ID.
2969   nodes.clear();
2970   root_dir.reset();
2971   mm.clear();
2972 
2973   {
2974     pa.esym(basic_io::syms::format_version);
2975     string vers;
2976     pa.str(vers);
2977     I(vers == "1");
2978   }
2979 
2980   while(pa.symp())
2981     {
2982       string pth, ident, rev;
2983       node_t n;
2984 
2985       if (pa.symp(basic_io::syms::file))
2986         {
2987           string content;
2988           pa.sym();
2989           pa.str(pth);
2990           pa.esym(basic_io::syms::content);
2991           pa.hex(content);
2992           pa.esym(syms::ident);
2993           pa.str(ident);
2994           n = file_t(new file_node(read_num(ident),
2995                                    decode_hexenc_as<file_id>(content,
2996                                                              pa.tok.in.made_from)));
2997         }
2998       else if (pa.symp(basic_io::syms::dir))
2999         {
3000           pa.sym();
3001           pa.str(pth);
3002           pa.esym(syms::ident);
3003           pa.str(ident);
3004           n = dir_t(new dir_node(read_num(ident)));
3005         }
3006       else
3007         break;
3008 
3009       I(static_cast<bool>(n));
3010 
3011       n->cow_version = cow_version;
3012       I(nodes.set_if_missing(n->self, n));
3013 
3014       if (is_dir_t(n) && pth.empty())
3015         {
3016           I(! has_root());
3017           root_dir = downcast_to_dir_t(n);
3018         }
3019       else
3020         {
3021           I(!pth.empty());
3022           attach_node(n->self, file_path_internal(pth));
3023         }
3024 
3025       // Non-dormant attrs
3026       while(pa.symp(basic_io::syms::attr))
3027         {
3028           pa.sym();
3029           string k, v;
3030           pa.str(k);
3031           pa.str(v);
3032           safe_insert(n->attrs, make_pair(attr_key(k, pa.tok.in.made_from),
3033                                           make_pair(true,
3034                                                     attr_value(v, pa.tok.in.made_from))));
3035         }
3036 
3037       // Dormant attrs
3038       while(pa.symp(syms::dormant_attr))
3039         {
3040           pa.sym();
3041           string k;
3042           pa.str(k);
3043           safe_insert(n->attrs, make_pair(attr_key(k, pa.tok.in.made_from),
3044                                           make_pair(false, attr_value())));
3045         }
3046 
3047       {
3048         marking_t m(new marking());
3049         parse_marking(pa, m);
3050         mm.put_marking(n->self, m);
3051       }
3052     }
3053 }
3054 
3055 
3056 void
read_roster_and_marking(roster_data const & dat,roster_t & ros,marking_map & mm)3057 read_roster_and_marking(roster_data const & dat,
3058                         roster_t & ros,
3059                         marking_map & mm)
3060 {
3061   basic_io::input_source src(dat.inner()(), "roster");
3062   basic_io::tokenizer tok(src);
3063   basic_io::parser pars(tok);
3064   ros.parse_from(pars, mm);
3065   I(src.lookahead == EOF);
3066   ros.check_sane_against(mm);
3067 }
3068 
3069 
3070 static void
write_roster_and_marking(roster_t const & ros,marking_map const & mm,data & dat,bool print_local_parts,bool do_sanity_check)3071 write_roster_and_marking(roster_t const & ros,
3072                          marking_map const & mm,
3073                          data & dat,
3074                          bool print_local_parts,
3075                          bool do_sanity_check)
3076 {
3077   if (do_sanity_check)
3078     {
3079       if (print_local_parts)
3080         ros.check_sane_against(mm);
3081       else
3082         ros.check_sane(true);
3083     }
3084   ros.print_to(dat, mm, print_local_parts);
3085 }
3086 
3087 
3088 void
write_roster_and_marking(roster_t const & ros,marking_map const & mm,roster_data & dat)3089 write_roster_and_marking(roster_t const & ros,
3090                          marking_map const & mm,
3091                          roster_data & dat)
3092 {
3093   data tmp;
3094   write_roster_and_marking(ros, mm, tmp, true, true);
3095   dat = roster_data(tmp);
3096 }
3097 
3098 
3099 void
write_manifest_of_roster(roster_t const & ros,manifest_data & dat,bool do_sanity_check)3100 write_manifest_of_roster(roster_t const & ros,
3101                          manifest_data & dat,
3102                          bool do_sanity_check)
3103 {
3104   data tmp;
3105   marking_map mm;
3106   write_roster_and_marking(ros, mm, tmp, false, do_sanity_check);
3107   dat = manifest_data(tmp);
3108 }
3109 
calculate_ident(roster_t const & ros,manifest_id & ident,bool do_sanity_check)3110 void calculate_ident(roster_t const & ros,
3111                      manifest_id & ident,
3112                      bool do_sanity_check)
3113 {
3114   manifest_data tmp;
3115   if (!ros.all_nodes().empty())
3116     {
3117       write_manifest_of_roster(ros, tmp, do_sanity_check);
3118     }
3119   calculate_ident(tmp, ident);
3120 }
3121 
3122 ////////////////////////////////////////////////////////////////////
3123 //   testing
3124 ////////////////////////////////////////////////////////////////////
3125 
3126 
3127 // Local Variables:
3128 // mode: C++
3129 // fill-column: 76
3130 // c-file-style: "gnu"
3131 // indent-tabs-mode: nil
3132 // End:
3133 // vim: et:sw=2:sts=2:ts=2:cino=>2s,{s,\:s,+s,t0,g0,^-2,e-2,n-2,p2s,(0,=s:
3134