1 // Copyright (C) 2005 Nathaniel Smith <njs@pobox.com>
2 //               2008, 2010 - 2011 Stephen Leake <stephen_leake@stephe-leake.org>
3 //
4 // This program is made available under the GNU GPL version 2.0 or
5 // greater. See the accompanying file COPYING for details.
6 //
7 // This program is distributed WITHOUT ANY WARRANTY; without even the
8 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
9 // PURPOSE.
10 
11 #include "base.hh"
12 #include <sstream>
13 
14 #include "paths.hh"
15 #include "file_io.hh"
16 #include "charset.hh"
17 #include "safe_map.hh"
18 
19 using std::exception;
20 using std::ostream;
21 using std::ostringstream;
22 using std::string;
23 using std::vector;
24 using std::map;
25 using std::make_pair;
26 
27 // some structure to ensure we aren't doing anything broken when resolving
28 // filenames.  the idea is to make sure
29 //   -- we don't depend on the existence of something before it has been set
30 //   -- we don't re-set something that has already been used
31 //   -- sometimes, we use the _non_-existence of something, so we shouldn't
32 //      set anything whose un-setted-ness has already been used
33 template <typename T>
34 struct access_tracker
35 {
setaccess_tracker36   void set(T const & val, bool may_be_initialized)
37   {
38     I(may_be_initialized || !initialized);
39     I(!very_uninitialized);
40     I(!used);
41     initialized = true;
42     value = val;
43   }
getaccess_tracker44   T const & get()
45   {
46     I(initialized);
47     used = true;
48     return value;
49   }
get_but_unusedaccess_tracker50   T const & get_but_unused()
51   {
52     I(initialized);
53     return value;
54   }
may_not_initializeaccess_tracker55   void may_not_initialize()
56   {
57     I(!initialized);
58     very_uninitialized = true;
59   }
60   // for unit tests
unsetaccess_tracker61   void unset()
62   {
63     used = initialized = very_uninitialized = false;
64   }
65   T value;
66   bool initialized, used, very_uninitialized;
access_trackeraccess_tracker67   access_tracker() : initialized(false), used(false), very_uninitialized(false) {};
68 };
69 
70 // paths to use in interpreting paths from various sources,
71 // conceptually:
72 //    working_root / initial_rel_path == initial_abs_path
73 
74 // initial_abs_path is for interpreting relative system_path's
75 static access_tracker<system_path> initial_abs_path;
76 // initial_rel_path is for interpreting external file_path's
77 // we used to make it a file_path, but then you can't run monotone from
78 // inside the _MTN/ dir (even when referring to files outside the _MTN/
79 // dir).  use of a bare string requires some caution but does work.
80 static access_tracker<string> initial_rel_path;
81 // working_root is for converting file_path's and bookkeeping_path's to
82 // system_path's.
83 static access_tracker<system_path> working_root;
84 
85 void
save_initial_path()86 save_initial_path()
87 {
88   // FIXME: BUG: this only works if the current working dir is in utf8
89   initial_abs_path.set(system_path(get_current_working_dir(),
90                                    origin::system), false);
91   L(FL("initial abs path is: %s") % initial_abs_path.get_but_unused());
92 }
93 
94 ///////////////////////////////////////////////////////////////////////////
95 // verifying that internal paths are indeed normalized.
96 // this code must be superfast
97 ///////////////////////////////////////////////////////////////////////////
98 
99 // normalized means:
100 //  -- / as path separator
101 //  -- not an absolute path (on either posix or win32)
102 //     operationally, this means: first character != '/', first character != '\',
103 //     second character != ':'
104 //  -- no illegal characters
105 //     -- 0x00 -- 0x1f, 0x7f, \ are the illegal characters.  \ is illegal
106 //        unconditionally to prevent people checking in files on posix that
107 //        have a different interpretation on win32
108 //     -- (may want to allow 0x0a and 0x0d (LF and CR) in the future, but this
109 //        is blocked on manifest format changing)
110 //        (also requires changes to 'automate inventory', possibly others, to
111 //        handle quoting)
112 //  -- no doubled /'s
113 //  -- no trailing /
114 //  -- no "." or ".." path components
115 
116 static inline bool
bad_component(string const & component)117 bad_component(string const & component)
118 {
119   if (component.empty())
120     return true;
121   if (component == ".")
122     return true;
123   if (component == "..")
124     return true;
125   return false;
126 }
127 
128 static inline bool
has_bad_chars(string const & path)129 has_bad_chars(string const & path)
130 {
131   for (string::const_iterator c = path.begin(); LIKELY(c != path.end()); c++)
132     {
133       // char is often a signed type; convert to unsigned to ensure that
134       // bytes 0x80-0xff are considered > 0x1f.
135       u8 x = (u8)*c;
136       // 0x5c is '\\'; we use the hex constant to make the dependency on
137       // ASCII encoding explicit.
138       if (UNLIKELY(x <= 0x1f || x == 0x5c || x == 0x7f))
139         return true;
140     }
141   return false;
142 }
143 
144 // as above, but disallows / as well.
145 static inline bool
has_bad_component_chars(string const & pc)146 has_bad_component_chars(string const & pc)
147 {
148   for (string::const_iterator c = pc.begin(); LIKELY(c != pc.end()); c++)
149     {
150       // char is often a signed type; convert to unsigned to ensure that
151       // bytes 0x80-0xff are considered > 0x1f.
152       u8 x = (u8)*c;
153       // 0x2f is '/' and 0x5c is '\\'; we use hex constants to make the
154       // dependency on ASCII encoding explicit.
155       if (UNLIKELY(x <= 0x1f || x == 0x2f || x == 0x5c || x == 0x7f))
156         return true;
157     }
158   return false;
159 
160 }
161 
162 static bool
is_absolute_here(string const & path)163 is_absolute_here(string const & path)
164 {
165   if (path.empty())
166     return false;
167   if (path[0] == '/')
168     return true;
169 #ifdef WIN32
170   if (path[0] == '\\')
171     return true;
172   if (path.size() > 1 && path[1] == ':')
173     return true;
174 #endif
175   return false;
176 }
177 
178 static inline bool
is_absolute_somewhere(string const & path)179 is_absolute_somewhere(string const & path)
180 {
181   if (path.empty())
182     return false;
183   if (path[0] == '/')
184     return true;
185   if (path[0] == '\\')
186     return true;
187   if (path.size() > 1 && path[1] == ':')
188     return true;
189   return false;
190 }
191 
192 // fully_normalized_path verifies a complete pathname for validity and
193 // having been properly normalized (as if by normalize_path, below).
194 static inline bool
fully_normalized_path(string const & path)195 fully_normalized_path(string const & path)
196 {
197   // empty path is fine
198   if (path.empty())
199     return true;
200   // could use is_absolute_somewhere, but this is the only part of it that
201   // wouldn't be redundant
202   if (path.size() > 1 && path[1] == ':')
203     return false;
204   // first scan for completely illegal bytes
205   if (has_bad_chars(path))
206     return false;
207   // now check each component
208   string::size_type start = 0, stop;
209   while (1)
210     {
211       stop = path.find('/', start);
212       if (stop == string::npos)
213         break;
214       string const & s(path.substr(start, stop - start));
215       if (bad_component(s))
216         return false;
217       start = stop + 1;
218     }
219 
220   string const & s(path.substr(start));
221   return !bad_component(s);
222 }
223 
224 // This function considers _MTN, _MTn, _MtN, _mtn etc. to all be bookkeeping
225 // paths, because on case insensitive filesystems, files put in any of them
226 // may end up in _MTN instead.  This allows arbitrary code execution.  A
227 // better solution would be to fix this in the working directory writing
228 // code -- this prevents all-unix projects from naming things "_mtn", which
229 // is less rude than when the bookkeeping root was "MT", but still rude --
230 // but as a temporary security kluge it works.
231 static inline bool
in_bookkeeping_dir(string const & path)232 in_bookkeeping_dir(string const & path)
233 {
234   if (path.empty() || (path[0] != '_'))
235     return false;
236   if (path.size() == 1 || (path[1] != 'M' && path[1] != 'm'))
237     return false;
238   if (path.size() == 2 || (path[2] != 'T' && path[2] != 't'))
239     return false;
240   if (path.size() == 3 || (path[3] != 'N' && path[3] != 'n'))
241     return false;
242   // if we've gotten here, the first three letters are _, M, T, and N, in
243   // either upper or lower case.  So if that is the whole path, or else if it
244   // continues but the next character is /, then this is a bookkeeping path.
245   if (path.size() == 4 || (path[4] == '/'))
246     return true;
247   return false;
248 }
249 
250 static inline bool
is_valid_internal(string const & path)251 is_valid_internal(string const & path)
252 {
253   return (fully_normalized_path(path)
254           && !in_bookkeeping_dir(path));
255 }
256 
257 static string
normalize_path(string const & in)258 normalize_path(string const & in)
259 {
260   string inT = in;
261   string leader;
262   MM(inT);
263 
264 #ifdef WIN32
265   // the first thing we do is kill all the backslashes
266   for (string::iterator i = inT.begin(); i != inT.end(); i++)
267     if (*i == '\\')
268       *i = '/';
269 #endif
270 
271   if (is_absolute_here (inT))
272     {
273       if (inT[0] == '/')
274         {
275           leader = "/";
276           inT = inT.substr(1);
277 
278           if (!inT.empty() && inT[0] == '/')
279             {
280               // if there are exactly two slashes at the beginning they
281               // are both preserved.  three or more are the same as one.
282               string::size_type f = inT.find_first_not_of("/");
283               if (f == string::npos)
284                 f = inT.size();
285               if (f == 1)
286                 leader = "//";
287               inT = inT.substr(f);
288             }
289         }
290 #ifdef WIN32
291       else
292         {
293           I(inT.size() > 1 && inT[1] == ':');
294           if (inT.size() > 2 && inT[2] == '/')
295             {
296               leader = inT.substr(0, 3);
297               inT = inT.substr(3);
298             }
299           else
300             {
301               leader = inT.substr(0, 2);
302               inT = inT.substr(2);
303             }
304         }
305 #endif
306 
307       I(!is_absolute_here(inT));
308       if (inT.empty())
309         return leader;
310     }
311 
312   vector<string> stack;
313   string::const_iterator head, tail;
314   string::size_type size_estimate = leader.size();
315   for (head = inT.begin(); head != inT.end(); head = tail)
316     {
317       tail = head;
318       while (tail != inT.end() && *tail != '/')
319         tail++;
320 
321       string elt(head, tail);
322       while (tail != inT.end() && *tail == '/')
323         tail++;
324 
325       if (elt == ".")
326         continue;
327       // remove foo/.. element pairs; leave leading .. components alone
328       if (elt == ".." && !stack.empty() && stack.back() != "..")
329         {
330           stack.pop_back();
331           continue;
332         }
333 
334       size_estimate += elt.size() + 1;
335       stack.push_back(elt);
336     }
337 
338   leader.reserve(size_estimate);
339   for (vector<string>::const_iterator i = stack.begin(); i != stack.end(); i++)
340     {
341       if (i != stack.begin())
342         leader += "/";
343       leader += *i;
344     }
345   return leader;
346 }
347 
348 void
normalize_external_path(string const & path,string & normalized,bool to_workspace_root)349 normalize_external_path(string const & path, string & normalized, bool to_workspace_root)
350 {
351   if (!initial_rel_path.initialized)
352     {
353       // we are not in a workspace; treat this as an internal
354       // path, and set the access_tracker() into a very uninitialised
355       // state so that we will hit an exception if we do eventually
356       // enter a workspace
357       initial_rel_path.may_not_initialize();
358       normalized = path;
359       E(is_valid_internal(path), origin::user,
360         F("path '%s' is invalid") % path);
361     }
362   else
363     {
364       E(!is_absolute_here(path), origin::user,
365         F("absolute path '%s' is invalid") % path);
366       string base;
367       try
368         {
369           if (to_workspace_root)
370             base = "";
371           else
372             base = initial_rel_path.get();
373 
374           if (base == "")
375             normalized = normalize_path(path);
376           else
377             normalized = normalize_path(base + "/" + path);
378         }
379       catch (exception &)
380         {
381           E(false, origin::user, F("path '%s' is invalid") % path);
382         }
383       if (normalized == ".")
384         normalized = string("");
385       E(fully_normalized_path(normalized), origin::user,
386         F("path '%s' is invalid") % normalized);
387     }
388 }
389 
390 ///////////////////////////////////////////////////////////////////////////
391 // single path component handling.
392 ///////////////////////////////////////////////////////////////////////////
393 
394 // these constructors confirm that what they are passed is a legitimate
395 // component.  note that the empty string is a legitimate component,
396 // but is not acceptable to bad_component (above) and therefore we have
397 // to open-code most of those checks.
path_component(utf8 const & d)398 path_component::path_component(utf8 const & d)
399   : origin_aware(d.made_from), data(d())
400 {
401   MM(data);
402   I(!has_bad_component_chars(data) && data != "." && data != "..");
403 }
404 
path_component(string const & d,origin::type whence)405 path_component::path_component(string const & d, origin::type whence)
406   : origin_aware(whence), data(d)
407 {
408   MM(data);
409   I(utf8_validate(utf8(data, origin::internal))
410     && !has_bad_component_chars(data)
411     && data != "." && data != "..");
412 }
413 
path_component(char const * d)414 path_component::path_component(char const * d)
415   : data(d)
416 {
417   MM(data);
418   I(utf8_validate(utf8(data, origin::internal))
419     && !has_bad_component_chars(data)
420     && data != "." && data != "..");
421 }
422 
operator <<(std::ostream & s,path_component const & pc)423 std::ostream & operator<<(std::ostream & s, path_component const & pc)
424 {
425   return s << pc();
426 }
427 
dump(path_component const & pc,std::string & to)428 template <> void dump(path_component const & pc, std::string & to)
429 {
430   to = pc();
431 }
432 
433 ///////////////////////////////////////////////////////////////////////////
434 // complete paths to files within a working directory
435 ///////////////////////////////////////////////////////////////////////////
436 
file_path(file_path::source_type type,string const & path,bool to_workspace_root)437 file_path::file_path(file_path::source_type type, string const & path, bool to_workspace_root)
438 {
439   MM(path);
440   I(utf8_validate(utf8(path, origin::internal)));
441   if (type == external)
442     {
443       string normalized;
444       normalize_external_path(path, normalized, to_workspace_root);
445       E(!in_bookkeeping_dir(normalized), origin::user,
446         F("path '%s' is in bookkeeping dir") % normalized);
447       data = normalized;
448     }
449   else
450     data = path;
451   MM(data);
452   I(is_valid_internal(data));
453 }
454 
file_path(file_path::source_type type,utf8 const & path,bool to_workspace_root)455 file_path::file_path(file_path::source_type type, utf8 const & path,
456                      bool to_workspace_root)
457   : any_path(path.made_from)
458 {
459   MM(path);
460   E(utf8_validate(path), made_from, F("invalid utf8"));
461   if (type == external)
462     {
463       string normalized;
464       normalize_external_path(path(), normalized, to_workspace_root);
465       E(!in_bookkeeping_dir(normalized), origin::user,
466         F("path '%s' is in bookkeeping dir") % normalized);
467       data = normalized;
468     }
469   else
470     data = path();
471   MM(data);
472   I(is_valid_internal(data));
473 }
474 
bookkeeping_path(char const * const path)475 bookkeeping_path::bookkeeping_path(char const * const path)
476 {
477   I(fully_normalized_path(path));
478   I(in_bookkeeping_dir(path));
479   data = path;
480 }
481 
bookkeeping_path(string const & path,origin::type made_from)482 bookkeeping_path::bookkeeping_path(string const & path, origin::type made_from)
483 {
484   E(fully_normalized_path(path), made_from, F("path is not normalized"));
485   E(in_bookkeeping_dir(path), made_from,
486     F("bookkeeping path is not in bookkeeping directory"));
487   data = path;
488 }
489 
490 bool
external_string_is_bookkeeping_path(utf8 const & path)491 bookkeeping_path::external_string_is_bookkeeping_path(utf8 const & path)
492 {
493   // FIXME: this charset casting everywhere is ridiculous
494   string normalized;
495   try
496     {
497       normalize_external_path(path(), normalized, false);
498     }
499   catch (recoverable_failure &)
500     {
501       return false;
502     }
503   return internal_string_is_bookkeeping_path(utf8(normalized, path.made_from));
504 }
internal_string_is_bookkeeping_path(utf8 const & path)505 bool bookkeeping_path::internal_string_is_bookkeeping_path(utf8 const & path)
506 {
507   return in_bookkeeping_dir(path());
508 }
509 
510 ///////////////////////////////////////////////////////////////////////////
511 // splitting/joining
512 // this code must be superfast
513 // it depends very much on knowing that it can only be applied to fully
514 // normalized, relative, paths.
515 ///////////////////////////////////////////////////////////////////////////
516 
517 // this peels off the last component of any path and returns it.
518 // the last component of a path with no slashes in it is the complete path.
519 // the last component of a path referring to the root directory is an
520 // empty string.
521 path_component
basename() const522 any_path::basename() const
523 {
524   string const & s = data;
525   string::size_type sep = s.rfind('/');
526 #ifdef WIN32
527   if (sep == string::npos && s.size()>= 2 && s[1] == ':')
528     sep = 1;
529 #endif
530   if (sep == string::npos)
531     return path_component(s, 0);  // force use of short circuit
532   if (sep == s.size())
533     return path_component();
534   return path_component(s, sep + 1);
535 }
536 
537 // this returns all but the last component of any path.  It has to take
538 // care at the root.
539 any_path
dirname() const540 any_path::dirname() const
541 {
542   string const & s = data;
543   string::size_type sep = s.rfind('/');
544 #ifdef WIN32
545   if (sep == string::npos && s.size()>= 2 && s[1] == ':')
546     sep = 1;
547 #endif
548   if (sep == string::npos)
549     return any_path();
550 
551   // dirname() of the root directory is itself
552   if (sep == s.size() - 1)
553     return *this;
554 
555   // dirname() of a direct child of the root is the root
556   if (sep == 0 || (sep == 1 && s[1] == '/')
557 #ifdef WIN32
558       || (sep == 1 || (sep == 2 && s[1] == ':'))
559 #endif
560       )
561     return any_path(s, 0, sep+1);
562 
563   return any_path(s, 0, sep);
564 }
565 
566 // these variations exist to get the return type right.  also,
567 // file_path dirname() can be a little simpler.
568 file_path
dirname() const569 file_path::dirname() const
570 {
571   string const & s = data;
572   string::size_type sep = s.rfind('/');
573   if (sep == string::npos)
574     return file_path();
575   return file_path(s, 0, sep);
576 }
577 
578 system_path
dirname() const579 system_path::dirname() const
580 {
581   string const & s = data;
582   string::size_type sep = s.rfind('/');
583 #ifdef WIN32
584   if (sep == string::npos && s.size()>= 2 && s[1] == ':')
585     sep = 1;
586 #endif
587   I(sep != string::npos);
588 
589   // dirname() of the root directory is itself
590   if (sep == s.size() - 1)
591     return *this;
592 
593   // dirname() of a direct child of the root is the root
594   if (sep == 0 || (sep == 1 && s[1] == '/')
595 #ifdef WIN32
596       || (sep == 1 || (sep == 2 && s[1] == ':'))
597 #endif
598       )
599     return system_path(s, 0, sep+1);
600 
601   return system_path(s, 0, sep);
602 }
603 
604 
605 // produce dirname and basename at the same time
606 void
dirname_basename(file_path & dir,path_component & base) const607 file_path::dirname_basename(file_path & dir, path_component & base) const
608 {
609   string const & s = data;
610   string::size_type sep = s.rfind('/');
611   if (sep == string::npos)
612     {
613       dir = file_path();
614       base = path_component(s, 0);
615     }
616   else
617     {
618       I(sep < s.size() - 1); // last component must have at least one char
619       dir = file_path(s, 0, sep);
620       base = path_component(s, sep + 1);
621     }
622 }
623 
624 // returns true if this path is beneath other
625 bool
is_beneath_of(const file_path & other) const626 file_path::is_beneath_of(const file_path & other) const
627 {
628   if (other.empty())
629     return true;
630 
631   file_path basedir = dirname();
632   while (!basedir.empty())
633     {
634       L(FL("base: %s, other: %s") % basedir % other);
635       if (basedir == other)
636         return true;
637       basedir = basedir.dirname();
638     }
639   return false;
640 }
641 
642 // count the number of /-separated components of the path.
643 unsigned int
depth() const644 file_path::depth() const
645 {
646   if (data.empty())
647     return 0;
648 
649   unsigned int components = 1;
650   for (string::const_iterator p = data.begin(); p != data.end(); p++)
651     if (*p == '/')
652       components++;
653 
654   return components;
655 }
656 
657 ///////////////////////////////////////////////////////////////////////////
658 // localizing file names (externalizing them)
659 // this code must be superfast when there is no conversion needed
660 ///////////////////////////////////////////////////////////////////////////
661 
662 string
as_external() const663 any_path::as_external() const
664 {
665 #ifdef __APPLE__
666   // on OS X paths for the filesystem/kernel are UTF-8 encoded, regardless of
667   // locale.
668   return data;
669 #else
670   // on normal systems we actually have some work to do, alas.
671   // not much, though, because utf8_to_system_string does all the hard work.
672   // it is carefully optimized.  do not screw it up.
673   external out;
674   utf8_to_system_strict(utf8(data, made_from), out);
675   return out();
676 #endif
677 }
678 
679 ///////////////////////////////////////////////////////////////////////////
680 // writing out paths
681 ///////////////////////////////////////////////////////////////////////////
682 
683 ostream &
operator <<(ostream & o,any_path const & a)684 operator <<(ostream & o, any_path const & a)
685 {
686   o << a.as_internal();
687   return o;
688 }
689 
690 template <>
dump(file_path const & p,string & out)691 void dump(file_path const & p, string & out)
692 {
693   ostringstream oss;
694   oss << p << '\n';
695   out = oss.str();
696 }
697 
698 template <>
dump(system_path const & p,string & out)699 void dump(system_path const & p, string & out)
700 {
701   ostringstream oss;
702   oss << p << '\n';
703   out = oss.str();
704 }
705 
706 template <>
dump(bookkeeping_path const & p,string & out)707 void dump(bookkeeping_path const & p, string & out)
708 {
709   ostringstream oss;
710   oss << p << '\n';
711   out = oss.str();
712 }
713 
714 ///////////////////////////////////////////////////////////////////////////
715 // path manipulation
716 // this code's speed does not matter much
717 ///////////////////////////////////////////////////////////////////////////
718 
719 // relies on its arguments already being validated, except that you may not
720 // append the empty path component, and if you are appending to the empty
721 // path, you may not create an absolute path or a path into the bookkeeping
722 // directory.
723 file_path
operator /(path_component const & to_append) const724 file_path::operator /(path_component const & to_append) const
725 {
726   I(!to_append.empty());
727   if (empty())
728     {
729       string const & s = to_append();
730       I(!is_absolute_somewhere(s) && !in_bookkeeping_dir(s));
731       return file_path(s, 0, string::npos);
732     }
733   else
734     return file_path(((*(data.end() - 1) == '/') ? data : data + "/")
735                      + to_append(), 0, string::npos);
736 }
737 
738 // similarly, but even less checking is needed.
739 file_path
operator /(file_path const & to_append) const740 file_path::operator /(file_path const & to_append) const
741 {
742   I(!to_append.empty());
743   if (empty())
744     return to_append;
745   return file_path(((*(data.end() - 1) == '/') ? data : data + "/")
746                    + to_append.as_internal(), 0, string::npos);
747 }
748 
749 bookkeeping_path
operator /(path_component const & to_append) const750 bookkeeping_path::operator /(path_component const & to_append) const
751 {
752   I(!to_append.empty());
753   I(!empty());
754   return bookkeeping_path(((*(data.end() - 1) == '/') ? data : data + "/")
755                           + to_append(), 0, string::npos);
756 }
757 
758 bookkeeping_path
operator /(file_path const & to_append) const759 bookkeeping_path::operator /(file_path const & to_append) const
760 {
761   I(!to_append.empty());
762   I(!empty());
763   return bookkeeping_path(((*(data.end() - 1) == '/') ? data : data + "/")
764                           + to_append.as_internal(), 0, string::npos);
765 }
766 
767 system_path
operator /(path_component const & to_append) const768 system_path::operator /(path_component const & to_append) const
769 {
770   I(!to_append.empty());
771   I(!empty());
772   return system_path(((*(data.end() - 1) == '/') ? data : data + "/")
773                      + to_append(), 0, string::npos);
774 }
775 
776 any_path
operator /(path_component const & to_append) const777 any_path::operator /(path_component const & to_append) const
778 {
779   I(!to_append.empty());
780   I(!empty());
781   return any_path(((*(data.end() - 1) == '/') ? data : data + "/")
782                   + to_append(), 0, string::npos);
783 }
784 
785 // these take strings and validate
786 bookkeeping_path
operator /(char const * to_append) const787 bookkeeping_path::operator /(char const * to_append) const
788 {
789   I(!is_absolute_somewhere(to_append));
790   I(!empty());
791   return bookkeeping_path(((*(data.end() - 1) == '/') ? data : data + "/")
792                           + to_append, origin::internal);
793 }
794 
795 system_path
operator /(char const * to_append) const796 system_path::operator /(char const * to_append) const
797 {
798   I(!empty());
799   I(!is_absolute_here(to_append));
800   return system_path(((*(data.end() - 1) == '/') ? data : data + "/")
801                      + to_append, origin::internal);
802 }
803 
804 ///////////////////////////////////////////////////////////////////////////
805 // system_path
806 ///////////////////////////////////////////////////////////////////////////
807 
system_path(any_path const & other,bool in_true_workspace)808 system_path::system_path(any_path const & other, bool in_true_workspace)
809 {
810   if (is_absolute_here(other.as_internal()))
811     // another system_path.  the normalizing isn't really necessary, but it
812     // makes me feel warm and fuzzy.
813     data = normalize_path(other.as_internal());
814   else
815     {
816       system_path wr;
817       if (in_true_workspace)
818         wr = working_root.get();
819       else
820         wr = working_root.get_but_unused();
821       data = normalize_path(wr.as_internal() + "/" + other.as_internal());
822     }
823 }
824 
const_system_path(utf8 const & path)825 static inline string const_system_path(utf8 const & path)
826 {
827   E(!path().empty(), path.made_from, F("invalid path ''"));
828   string expanded = tilde_expand(path());
829   if (is_absolute_here(expanded))
830     return normalize_path(expanded);
831   else
832     return normalize_path(initial_abs_path.get().as_internal()
833                           + "/" + path());
834 }
835 
system_path(string const & path,origin::type from)836 system_path::system_path(string const & path, origin::type from)
837 {
838   data = const_system_path(utf8(path, from));
839 }
840 
system_path(char const * const path)841 system_path::system_path(char const * const path)
842 {
843   data = const_system_path(utf8(path, origin::internal));
844 }
845 
system_path(utf8 const & path)846 system_path::system_path(utf8 const & path)
847 {
848   data = const_system_path(utf8(path));
849 }
850 
851 // Constant path predicates.
852 #define IMPLEMENT_CONST_PRED(cls, ret)                  \
853   template <> bool                                      \
854   path_always_##ret<cls>::operator()(cls const &) const \
855   { return ret; }
856 
IMPLEMENT_CONST_PRED(any_path,false)857 IMPLEMENT_CONST_PRED(any_path, false)
858 IMPLEMENT_CONST_PRED(system_path, false)
859 IMPLEMENT_CONST_PRED(file_path, false)
860 IMPLEMENT_CONST_PRED(bookkeeping_path, false)
861 
862 IMPLEMENT_CONST_PRED(any_path, true)
863 IMPLEMENT_CONST_PRED(system_path, true)
864 IMPLEMENT_CONST_PRED(file_path, true)
865 IMPLEMENT_CONST_PRED(bookkeeping_path, true)
866 
867 #undef IMPLEMENT_CONST_PRED
868 
869 // If this wasn't a user-supplied path, we should know
870 // which kind it is.
871 boost::shared_ptr<any_path>
872 new_optimal_path(std::string path, bool to_workspace_root)
873 {
874   utf8 const utf8_path = utf8(path, origin::user);
875   string normalized;
876   try
877     {
878       normalize_external_path(utf8_path(), normalized, to_workspace_root);
879     }
880   catch (recoverable_failure &)
881     {
882       // not in workspace
883       return boost::shared_ptr<any_path>(new system_path(path, origin::user));
884     }
885 
886   if (in_bookkeeping_dir(normalized))
887     return boost::shared_ptr<any_path>(new bookkeeping_path(normalized, origin::user));
888   else
889     return boost::shared_ptr<any_path>(new file_path(file_path_internal(normalized)));
890 };
891 
892 // Either conversion of S to a path_component, or composition of P / S, has
893 // failed; figure out what went wrong and issue an appropriate diagnostic.
894 
895 void
report_failed_path_composition(any_path const & p,char const * s,bool isdir)896 report_failed_path_composition(any_path const & p, char const * s,
897                                bool isdir)
898 {
899   utf8 badpth;
900   if (p.empty())
901     badpth = utf8(s);
902   else
903     badpth = utf8(p.as_internal() + "/" + s, p.made_from);
904   if (bookkeeping_path::internal_string_is_bookkeeping_path(badpth))
905     L(FL("ignoring bookkeeping directory '%s'") % badpth);
906   else
907     {
908       // We rely on caller to tell us whether this is a directory.
909       if (isdir)
910         W(F("skipping directory '%s' with unsupported name") % badpth);
911       else
912         W(F("skipping file '%s' with unsupported name") % badpth);
913     }
914 }
915 
916 ///////////////////////////////////////////////////////////////////////////
917 // workspace (and path root) handling
918 ///////////////////////////////////////////////////////////////////////////
919 
920 static bool
find_bookdir(system_path const & root,path_component const & bookdir,system_path & current,string & removed)921 find_bookdir(system_path const & root, path_component const & bookdir,
922              system_path & current, string & removed)
923 {
924   current = initial_abs_path.get();
925   removed.clear();
926 
927   // check that the current directory is below the specified search root
928   if (current.as_internal().find(root.as_internal()) != 0)
929     {
930       W(F("current directory '%s' is not below root '%s'") % current % root);
931       return false;
932     }
933 
934   L(FL("searching for '%s' directory with root '%s'") % bookdir % root);
935 
936   system_path check;
937   while (!(current == root))
938     {
939       check = current / bookdir;
940       switch (get_path_status(check))
941         {
942         case path::nonexistent:
943           L(FL("'%s' not found in '%s' with '%s' removed")
944             % bookdir % current % removed);
945           if (removed.empty())
946             removed = current.basename()();
947           else
948             removed = current.basename()() + "/" + removed;
949           current = current.dirname();
950           continue;
951 
952         case path::file:
953           L(FL("'%s' is not a directory") % check);
954           return false;
955 
956         case path::directory:
957           goto found;
958         }
959     }
960 
961   // if we get here, we have hit the root; try once more
962   check = current / bookdir;
963   switch (get_path_status(check))
964     {
965     case path::nonexistent:
966       L(FL("'%s' not found in '%s' with '%s' removed")
967         % bookdir % current % removed);
968       return false;
969 
970     case path::file:
971       L(FL("'%s' is not a directory") % check);
972       return false;
973 
974     case path::directory:
975       goto found;
976     }
977   return false;
978 
979  found:
980   // check for _MTN/. and _MTN/.. to see if mt dir is readable
981   try
982     {
983       if (!path_exists(check / ".") || !path_exists(check / ".."))
984         {
985           L(FL("problems with '%s' (missing '.' or '..')") % check);
986           return false;
987         }
988     }
989   catch(exception &)
990     {
991       L(FL("problems with '%s' (cannot check for '.' or '..')") % check);
992       return false;
993     }
994   return true;
995 }
996 
997 
998 bool
find_and_go_to_workspace(string const & search_root)999 find_and_go_to_workspace(string const & search_root)
1000 {
1001   system_path root, current;
1002   string removed;
1003 
1004   if (search_root.empty())
1005     {
1006 #ifdef WIN32
1007       std::string cur_str = get_current_working_dir();
1008       current = system_path(cur_str, origin::system);
1009       if (cur_str[0] == '/' || cur_str[0] == '\\')
1010         {
1011           if (cur_str.size() > 1 && (cur_str[1] == '/' || cur_str[1] == '\\'))
1012             {
1013               // UNC name
1014               string::size_type uncend = cur_str.find_first_of("\\/", 2);
1015               if (uncend == string::npos)
1016                 root = system_path(cur_str + "/", origin::system);
1017               else
1018                 root = system_path(cur_str.substr(0, uncend), origin::system);
1019             }
1020           else
1021             root = system_path("/");
1022         }
1023       else if (cur_str.size() > 1 && cur_str[1] == ':')
1024         {
1025           root = system_path(cur_str.substr(0,2) + "/", origin::system);
1026         }
1027       else I(false);
1028 #else
1029       root = system_path("/", origin::internal);
1030 #endif
1031     }
1032   else
1033     {
1034       root = system_path(search_root, origin::user);
1035       L(FL("limiting search for workspace to %s") % root);
1036 
1037       require_path_is_directory(root,
1038                                F("search root '%s' does not exist") % root,
1039                                F("search root '%s' is not a directory") % root);
1040     }
1041 
1042   // first look for the current name of the bookkeeping directory.
1043   // if we don't find it, look for it under the old name, so that
1044   // migration has a chance to work.
1045   if (!find_bookdir(root, bookkeeping_root_component, current, removed))
1046     if (!find_bookdir(root, old_bookkeeping_root_component, current, removed))
1047       return false;
1048 
1049   working_root.set(current, true);
1050   initial_rel_path.set(removed, true);
1051 
1052   L(FL("working root is '%s'") % working_root.get_but_unused());
1053   L(FL("initial relative path is '%s'") % initial_rel_path.get_but_unused());
1054 
1055   change_current_working_dir(working_root.get_but_unused());
1056 
1057   return true;
1058 }
1059 
1060 void
go_to_workspace(system_path const & new_workspace)1061 go_to_workspace(system_path const & new_workspace)
1062 {
1063   working_root.set(new_workspace, true);
1064   initial_rel_path.set(string(), true);
1065   change_current_working_dir(new_workspace);
1066 }
1067 
1068 void
get_current_workspace(system_path & workspace)1069 get_current_workspace(system_path & workspace)
1070 {
1071   workspace = working_root.get_but_unused();
1072 }
1073 
1074 void
mark_std_paths_used(void)1075 mark_std_paths_used(void)
1076 {
1077   working_root.get();
1078   initial_rel_path.get();
1079 }
1080 
1081 void
reset_std_paths(void)1082 reset_std_paths(void)
1083 {
1084   // we don't reset initial_abs_path here, because it is only set in
1085   // monotone.cc:cpp_main. initial_rel_path, working_root are reset for each
1086   // command.
1087   initial_rel_path.unset();
1088   working_root.unset();
1089 }
1090 
1091 
1092 ///////////////////////////////////////////////////////////////////////////
1093 // utility used by migrate_ancestry
1094 ///////////////////////////////////////////////////////////////////////////
1095 
1096 
1097 static file_path
find_old_path_for(map<file_path,file_path> const & renames,file_path const & new_path)1098 find_old_path_for(map<file_path, file_path> const & renames,
1099                   file_path const & new_path)
1100 {
1101   map<file_path, file_path>::const_iterator i = renames.find(new_path);
1102   if (i != renames.end())
1103     return i->second;
1104 
1105   // ??? root directory rename possible in the old schema?
1106   // if not, do this first.
1107   if (new_path.empty())
1108     return new_path;
1109 
1110   file_path dir;
1111   path_component base;
1112   new_path.dirname_basename(dir, base);
1113   return find_old_path_for(renames, dir) / base;
1114 }
1115 
1116 file_path
find_new_path_for(map<file_path,file_path> const & renames,file_path const & old_path)1117 find_new_path_for(map<file_path, file_path> const & renames,
1118                   file_path const & old_path)
1119 {
1120   map<file_path, file_path> reversed;
1121   for (map<file_path, file_path>::const_iterator i = renames.begin();
1122        i != renames.end(); ++i)
1123     reversed.insert(make_pair(i->second, i->first));
1124   // this is a hackish kluge.  seems to work, though.
1125   return find_old_path_for(reversed, old_path);
1126 }
1127 
1128 // Local Variables:
1129 // mode: C++
1130 // fill-column: 76
1131 // c-file-style: "gnu"
1132 // indent-tabs-mode: nil
1133 // End:
1134 // vim: et:sw=2:sts=2:ts=2:cino=>2s,{s,\:s,+s,t0,g0,^-2,e-2,n-2,p2s,(0,=s:
1135