1 // Copyright (C) 2005 Nathaniel Smith <njs@pobox.com>
2 // 2008, 2010 - 2011 Stephen Leake <stephen_leake@stephe-leake.org>
3 //
4 // This program is made available under the GNU GPL version 2.0 or
5 // greater. See the accompanying file COPYING for details.
6 //
7 // This program is distributed WITHOUT ANY WARRANTY; without even the
8 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
9 // PURPOSE.
10
11 #include "base.hh"
12 #include <sstream>
13
14 #include "paths.hh"
15 #include "file_io.hh"
16 #include "charset.hh"
17 #include "safe_map.hh"
18
19 using std::exception;
20 using std::ostream;
21 using std::ostringstream;
22 using std::string;
23 using std::vector;
24 using std::map;
25 using std::make_pair;
26
27 // some structure to ensure we aren't doing anything broken when resolving
28 // filenames. the idea is to make sure
29 // -- we don't depend on the existence of something before it has been set
30 // -- we don't re-set something that has already been used
31 // -- sometimes, we use the _non_-existence of something, so we shouldn't
32 // set anything whose un-setted-ness has already been used
33 template <typename T>
34 struct access_tracker
35 {
setaccess_tracker36 void set(T const & val, bool may_be_initialized)
37 {
38 I(may_be_initialized || !initialized);
39 I(!very_uninitialized);
40 I(!used);
41 initialized = true;
42 value = val;
43 }
getaccess_tracker44 T const & get()
45 {
46 I(initialized);
47 used = true;
48 return value;
49 }
get_but_unusedaccess_tracker50 T const & get_but_unused()
51 {
52 I(initialized);
53 return value;
54 }
may_not_initializeaccess_tracker55 void may_not_initialize()
56 {
57 I(!initialized);
58 very_uninitialized = true;
59 }
60 // for unit tests
unsetaccess_tracker61 void unset()
62 {
63 used = initialized = very_uninitialized = false;
64 }
65 T value;
66 bool initialized, used, very_uninitialized;
access_trackeraccess_tracker67 access_tracker() : initialized(false), used(false), very_uninitialized(false) {};
68 };
69
70 // paths to use in interpreting paths from various sources,
71 // conceptually:
72 // working_root / initial_rel_path == initial_abs_path
73
74 // initial_abs_path is for interpreting relative system_path's
75 static access_tracker<system_path> initial_abs_path;
76 // initial_rel_path is for interpreting external file_path's
77 // we used to make it a file_path, but then you can't run monotone from
78 // inside the _MTN/ dir (even when referring to files outside the _MTN/
79 // dir). use of a bare string requires some caution but does work.
80 static access_tracker<string> initial_rel_path;
81 // working_root is for converting file_path's and bookkeeping_path's to
82 // system_path's.
83 static access_tracker<system_path> working_root;
84
85 void
save_initial_path()86 save_initial_path()
87 {
88 // FIXME: BUG: this only works if the current working dir is in utf8
89 initial_abs_path.set(system_path(get_current_working_dir(),
90 origin::system), false);
91 L(FL("initial abs path is: %s") % initial_abs_path.get_but_unused());
92 }
93
94 ///////////////////////////////////////////////////////////////////////////
95 // verifying that internal paths are indeed normalized.
96 // this code must be superfast
97 ///////////////////////////////////////////////////////////////////////////
98
99 // normalized means:
100 // -- / as path separator
101 // -- not an absolute path (on either posix or win32)
102 // operationally, this means: first character != '/', first character != '\',
103 // second character != ':'
104 // -- no illegal characters
105 // -- 0x00 -- 0x1f, 0x7f, \ are the illegal characters. \ is illegal
106 // unconditionally to prevent people checking in files on posix that
107 // have a different interpretation on win32
108 // -- (may want to allow 0x0a and 0x0d (LF and CR) in the future, but this
109 // is blocked on manifest format changing)
110 // (also requires changes to 'automate inventory', possibly others, to
111 // handle quoting)
112 // -- no doubled /'s
113 // -- no trailing /
114 // -- no "." or ".." path components
115
116 static inline bool
bad_component(string const & component)117 bad_component(string const & component)
118 {
119 if (component.empty())
120 return true;
121 if (component == ".")
122 return true;
123 if (component == "..")
124 return true;
125 return false;
126 }
127
128 static inline bool
has_bad_chars(string const & path)129 has_bad_chars(string const & path)
130 {
131 for (string::const_iterator c = path.begin(); LIKELY(c != path.end()); c++)
132 {
133 // char is often a signed type; convert to unsigned to ensure that
134 // bytes 0x80-0xff are considered > 0x1f.
135 u8 x = (u8)*c;
136 // 0x5c is '\\'; we use the hex constant to make the dependency on
137 // ASCII encoding explicit.
138 if (UNLIKELY(x <= 0x1f || x == 0x5c || x == 0x7f))
139 return true;
140 }
141 return false;
142 }
143
144 // as above, but disallows / as well.
145 static inline bool
has_bad_component_chars(string const & pc)146 has_bad_component_chars(string const & pc)
147 {
148 for (string::const_iterator c = pc.begin(); LIKELY(c != pc.end()); c++)
149 {
150 // char is often a signed type; convert to unsigned to ensure that
151 // bytes 0x80-0xff are considered > 0x1f.
152 u8 x = (u8)*c;
153 // 0x2f is '/' and 0x5c is '\\'; we use hex constants to make the
154 // dependency on ASCII encoding explicit.
155 if (UNLIKELY(x <= 0x1f || x == 0x2f || x == 0x5c || x == 0x7f))
156 return true;
157 }
158 return false;
159
160 }
161
162 static bool
is_absolute_here(string const & path)163 is_absolute_here(string const & path)
164 {
165 if (path.empty())
166 return false;
167 if (path[0] == '/')
168 return true;
169 #ifdef WIN32
170 if (path[0] == '\\')
171 return true;
172 if (path.size() > 1 && path[1] == ':')
173 return true;
174 #endif
175 return false;
176 }
177
178 static inline bool
is_absolute_somewhere(string const & path)179 is_absolute_somewhere(string const & path)
180 {
181 if (path.empty())
182 return false;
183 if (path[0] == '/')
184 return true;
185 if (path[0] == '\\')
186 return true;
187 if (path.size() > 1 && path[1] == ':')
188 return true;
189 return false;
190 }
191
192 // fully_normalized_path verifies a complete pathname for validity and
193 // having been properly normalized (as if by normalize_path, below).
194 static inline bool
fully_normalized_path(string const & path)195 fully_normalized_path(string const & path)
196 {
197 // empty path is fine
198 if (path.empty())
199 return true;
200 // could use is_absolute_somewhere, but this is the only part of it that
201 // wouldn't be redundant
202 if (path.size() > 1 && path[1] == ':')
203 return false;
204 // first scan for completely illegal bytes
205 if (has_bad_chars(path))
206 return false;
207 // now check each component
208 string::size_type start = 0, stop;
209 while (1)
210 {
211 stop = path.find('/', start);
212 if (stop == string::npos)
213 break;
214 string const & s(path.substr(start, stop - start));
215 if (bad_component(s))
216 return false;
217 start = stop + 1;
218 }
219
220 string const & s(path.substr(start));
221 return !bad_component(s);
222 }
223
224 // This function considers _MTN, _MTn, _MtN, _mtn etc. to all be bookkeeping
225 // paths, because on case insensitive filesystems, files put in any of them
226 // may end up in _MTN instead. This allows arbitrary code execution. A
227 // better solution would be to fix this in the working directory writing
228 // code -- this prevents all-unix projects from naming things "_mtn", which
229 // is less rude than when the bookkeeping root was "MT", but still rude --
230 // but as a temporary security kluge it works.
231 static inline bool
in_bookkeeping_dir(string const & path)232 in_bookkeeping_dir(string const & path)
233 {
234 if (path.empty() || (path[0] != '_'))
235 return false;
236 if (path.size() == 1 || (path[1] != 'M' && path[1] != 'm'))
237 return false;
238 if (path.size() == 2 || (path[2] != 'T' && path[2] != 't'))
239 return false;
240 if (path.size() == 3 || (path[3] != 'N' && path[3] != 'n'))
241 return false;
242 // if we've gotten here, the first three letters are _, M, T, and N, in
243 // either upper or lower case. So if that is the whole path, or else if it
244 // continues but the next character is /, then this is a bookkeeping path.
245 if (path.size() == 4 || (path[4] == '/'))
246 return true;
247 return false;
248 }
249
250 static inline bool
is_valid_internal(string const & path)251 is_valid_internal(string const & path)
252 {
253 return (fully_normalized_path(path)
254 && !in_bookkeeping_dir(path));
255 }
256
257 static string
normalize_path(string const & in)258 normalize_path(string const & in)
259 {
260 string inT = in;
261 string leader;
262 MM(inT);
263
264 #ifdef WIN32
265 // the first thing we do is kill all the backslashes
266 for (string::iterator i = inT.begin(); i != inT.end(); i++)
267 if (*i == '\\')
268 *i = '/';
269 #endif
270
271 if (is_absolute_here (inT))
272 {
273 if (inT[0] == '/')
274 {
275 leader = "/";
276 inT = inT.substr(1);
277
278 if (!inT.empty() && inT[0] == '/')
279 {
280 // if there are exactly two slashes at the beginning they
281 // are both preserved. three or more are the same as one.
282 string::size_type f = inT.find_first_not_of("/");
283 if (f == string::npos)
284 f = inT.size();
285 if (f == 1)
286 leader = "//";
287 inT = inT.substr(f);
288 }
289 }
290 #ifdef WIN32
291 else
292 {
293 I(inT.size() > 1 && inT[1] == ':');
294 if (inT.size() > 2 && inT[2] == '/')
295 {
296 leader = inT.substr(0, 3);
297 inT = inT.substr(3);
298 }
299 else
300 {
301 leader = inT.substr(0, 2);
302 inT = inT.substr(2);
303 }
304 }
305 #endif
306
307 I(!is_absolute_here(inT));
308 if (inT.empty())
309 return leader;
310 }
311
312 vector<string> stack;
313 string::const_iterator head, tail;
314 string::size_type size_estimate = leader.size();
315 for (head = inT.begin(); head != inT.end(); head = tail)
316 {
317 tail = head;
318 while (tail != inT.end() && *tail != '/')
319 tail++;
320
321 string elt(head, tail);
322 while (tail != inT.end() && *tail == '/')
323 tail++;
324
325 if (elt == ".")
326 continue;
327 // remove foo/.. element pairs; leave leading .. components alone
328 if (elt == ".." && !stack.empty() && stack.back() != "..")
329 {
330 stack.pop_back();
331 continue;
332 }
333
334 size_estimate += elt.size() + 1;
335 stack.push_back(elt);
336 }
337
338 leader.reserve(size_estimate);
339 for (vector<string>::const_iterator i = stack.begin(); i != stack.end(); i++)
340 {
341 if (i != stack.begin())
342 leader += "/";
343 leader += *i;
344 }
345 return leader;
346 }
347
348 void
normalize_external_path(string const & path,string & normalized,bool to_workspace_root)349 normalize_external_path(string const & path, string & normalized, bool to_workspace_root)
350 {
351 if (!initial_rel_path.initialized)
352 {
353 // we are not in a workspace; treat this as an internal
354 // path, and set the access_tracker() into a very uninitialised
355 // state so that we will hit an exception if we do eventually
356 // enter a workspace
357 initial_rel_path.may_not_initialize();
358 normalized = path;
359 E(is_valid_internal(path), origin::user,
360 F("path '%s' is invalid") % path);
361 }
362 else
363 {
364 E(!is_absolute_here(path), origin::user,
365 F("absolute path '%s' is invalid") % path);
366 string base;
367 try
368 {
369 if (to_workspace_root)
370 base = "";
371 else
372 base = initial_rel_path.get();
373
374 if (base == "")
375 normalized = normalize_path(path);
376 else
377 normalized = normalize_path(base + "/" + path);
378 }
379 catch (exception &)
380 {
381 E(false, origin::user, F("path '%s' is invalid") % path);
382 }
383 if (normalized == ".")
384 normalized = string("");
385 E(fully_normalized_path(normalized), origin::user,
386 F("path '%s' is invalid") % normalized);
387 }
388 }
389
390 ///////////////////////////////////////////////////////////////////////////
391 // single path component handling.
392 ///////////////////////////////////////////////////////////////////////////
393
394 // these constructors confirm that what they are passed is a legitimate
395 // component. note that the empty string is a legitimate component,
396 // but is not acceptable to bad_component (above) and therefore we have
397 // to open-code most of those checks.
path_component(utf8 const & d)398 path_component::path_component(utf8 const & d)
399 : origin_aware(d.made_from), data(d())
400 {
401 MM(data);
402 I(!has_bad_component_chars(data) && data != "." && data != "..");
403 }
404
path_component(string const & d,origin::type whence)405 path_component::path_component(string const & d, origin::type whence)
406 : origin_aware(whence), data(d)
407 {
408 MM(data);
409 I(utf8_validate(utf8(data, origin::internal))
410 && !has_bad_component_chars(data)
411 && data != "." && data != "..");
412 }
413
path_component(char const * d)414 path_component::path_component(char const * d)
415 : data(d)
416 {
417 MM(data);
418 I(utf8_validate(utf8(data, origin::internal))
419 && !has_bad_component_chars(data)
420 && data != "." && data != "..");
421 }
422
operator <<(std::ostream & s,path_component const & pc)423 std::ostream & operator<<(std::ostream & s, path_component const & pc)
424 {
425 return s << pc();
426 }
427
dump(path_component const & pc,std::string & to)428 template <> void dump(path_component const & pc, std::string & to)
429 {
430 to = pc();
431 }
432
433 ///////////////////////////////////////////////////////////////////////////
434 // complete paths to files within a working directory
435 ///////////////////////////////////////////////////////////////////////////
436
file_path(file_path::source_type type,string const & path,bool to_workspace_root)437 file_path::file_path(file_path::source_type type, string const & path, bool to_workspace_root)
438 {
439 MM(path);
440 I(utf8_validate(utf8(path, origin::internal)));
441 if (type == external)
442 {
443 string normalized;
444 normalize_external_path(path, normalized, to_workspace_root);
445 E(!in_bookkeeping_dir(normalized), origin::user,
446 F("path '%s' is in bookkeeping dir") % normalized);
447 data = normalized;
448 }
449 else
450 data = path;
451 MM(data);
452 I(is_valid_internal(data));
453 }
454
file_path(file_path::source_type type,utf8 const & path,bool to_workspace_root)455 file_path::file_path(file_path::source_type type, utf8 const & path,
456 bool to_workspace_root)
457 : any_path(path.made_from)
458 {
459 MM(path);
460 E(utf8_validate(path), made_from, F("invalid utf8"));
461 if (type == external)
462 {
463 string normalized;
464 normalize_external_path(path(), normalized, to_workspace_root);
465 E(!in_bookkeeping_dir(normalized), origin::user,
466 F("path '%s' is in bookkeeping dir") % normalized);
467 data = normalized;
468 }
469 else
470 data = path();
471 MM(data);
472 I(is_valid_internal(data));
473 }
474
bookkeeping_path(char const * const path)475 bookkeeping_path::bookkeeping_path(char const * const path)
476 {
477 I(fully_normalized_path(path));
478 I(in_bookkeeping_dir(path));
479 data = path;
480 }
481
bookkeeping_path(string const & path,origin::type made_from)482 bookkeeping_path::bookkeeping_path(string const & path, origin::type made_from)
483 {
484 E(fully_normalized_path(path), made_from, F("path is not normalized"));
485 E(in_bookkeeping_dir(path), made_from,
486 F("bookkeeping path is not in bookkeeping directory"));
487 data = path;
488 }
489
490 bool
external_string_is_bookkeeping_path(utf8 const & path)491 bookkeeping_path::external_string_is_bookkeeping_path(utf8 const & path)
492 {
493 // FIXME: this charset casting everywhere is ridiculous
494 string normalized;
495 try
496 {
497 normalize_external_path(path(), normalized, false);
498 }
499 catch (recoverable_failure &)
500 {
501 return false;
502 }
503 return internal_string_is_bookkeeping_path(utf8(normalized, path.made_from));
504 }
internal_string_is_bookkeeping_path(utf8 const & path)505 bool bookkeeping_path::internal_string_is_bookkeeping_path(utf8 const & path)
506 {
507 return in_bookkeeping_dir(path());
508 }
509
510 ///////////////////////////////////////////////////////////////////////////
511 // splitting/joining
512 // this code must be superfast
513 // it depends very much on knowing that it can only be applied to fully
514 // normalized, relative, paths.
515 ///////////////////////////////////////////////////////////////////////////
516
517 // this peels off the last component of any path and returns it.
518 // the last component of a path with no slashes in it is the complete path.
519 // the last component of a path referring to the root directory is an
520 // empty string.
521 path_component
basename() const522 any_path::basename() const
523 {
524 string const & s = data;
525 string::size_type sep = s.rfind('/');
526 #ifdef WIN32
527 if (sep == string::npos && s.size()>= 2 && s[1] == ':')
528 sep = 1;
529 #endif
530 if (sep == string::npos)
531 return path_component(s, 0); // force use of short circuit
532 if (sep == s.size())
533 return path_component();
534 return path_component(s, sep + 1);
535 }
536
537 // this returns all but the last component of any path. It has to take
538 // care at the root.
539 any_path
dirname() const540 any_path::dirname() const
541 {
542 string const & s = data;
543 string::size_type sep = s.rfind('/');
544 #ifdef WIN32
545 if (sep == string::npos && s.size()>= 2 && s[1] == ':')
546 sep = 1;
547 #endif
548 if (sep == string::npos)
549 return any_path();
550
551 // dirname() of the root directory is itself
552 if (sep == s.size() - 1)
553 return *this;
554
555 // dirname() of a direct child of the root is the root
556 if (sep == 0 || (sep == 1 && s[1] == '/')
557 #ifdef WIN32
558 || (sep == 1 || (sep == 2 && s[1] == ':'))
559 #endif
560 )
561 return any_path(s, 0, sep+1);
562
563 return any_path(s, 0, sep);
564 }
565
566 // these variations exist to get the return type right. also,
567 // file_path dirname() can be a little simpler.
568 file_path
dirname() const569 file_path::dirname() const
570 {
571 string const & s = data;
572 string::size_type sep = s.rfind('/');
573 if (sep == string::npos)
574 return file_path();
575 return file_path(s, 0, sep);
576 }
577
578 system_path
dirname() const579 system_path::dirname() const
580 {
581 string const & s = data;
582 string::size_type sep = s.rfind('/');
583 #ifdef WIN32
584 if (sep == string::npos && s.size()>= 2 && s[1] == ':')
585 sep = 1;
586 #endif
587 I(sep != string::npos);
588
589 // dirname() of the root directory is itself
590 if (sep == s.size() - 1)
591 return *this;
592
593 // dirname() of a direct child of the root is the root
594 if (sep == 0 || (sep == 1 && s[1] == '/')
595 #ifdef WIN32
596 || (sep == 1 || (sep == 2 && s[1] == ':'))
597 #endif
598 )
599 return system_path(s, 0, sep+1);
600
601 return system_path(s, 0, sep);
602 }
603
604
605 // produce dirname and basename at the same time
606 void
dirname_basename(file_path & dir,path_component & base) const607 file_path::dirname_basename(file_path & dir, path_component & base) const
608 {
609 string const & s = data;
610 string::size_type sep = s.rfind('/');
611 if (sep == string::npos)
612 {
613 dir = file_path();
614 base = path_component(s, 0);
615 }
616 else
617 {
618 I(sep < s.size() - 1); // last component must have at least one char
619 dir = file_path(s, 0, sep);
620 base = path_component(s, sep + 1);
621 }
622 }
623
624 // returns true if this path is beneath other
625 bool
is_beneath_of(const file_path & other) const626 file_path::is_beneath_of(const file_path & other) const
627 {
628 if (other.empty())
629 return true;
630
631 file_path basedir = dirname();
632 while (!basedir.empty())
633 {
634 L(FL("base: %s, other: %s") % basedir % other);
635 if (basedir == other)
636 return true;
637 basedir = basedir.dirname();
638 }
639 return false;
640 }
641
642 // count the number of /-separated components of the path.
643 unsigned int
depth() const644 file_path::depth() const
645 {
646 if (data.empty())
647 return 0;
648
649 unsigned int components = 1;
650 for (string::const_iterator p = data.begin(); p != data.end(); p++)
651 if (*p == '/')
652 components++;
653
654 return components;
655 }
656
657 ///////////////////////////////////////////////////////////////////////////
658 // localizing file names (externalizing them)
659 // this code must be superfast when there is no conversion needed
660 ///////////////////////////////////////////////////////////////////////////
661
662 string
as_external() const663 any_path::as_external() const
664 {
665 #ifdef __APPLE__
666 // on OS X paths for the filesystem/kernel are UTF-8 encoded, regardless of
667 // locale.
668 return data;
669 #else
670 // on normal systems we actually have some work to do, alas.
671 // not much, though, because utf8_to_system_string does all the hard work.
672 // it is carefully optimized. do not screw it up.
673 external out;
674 utf8_to_system_strict(utf8(data, made_from), out);
675 return out();
676 #endif
677 }
678
679 ///////////////////////////////////////////////////////////////////////////
680 // writing out paths
681 ///////////////////////////////////////////////////////////////////////////
682
683 ostream &
operator <<(ostream & o,any_path const & a)684 operator <<(ostream & o, any_path const & a)
685 {
686 o << a.as_internal();
687 return o;
688 }
689
690 template <>
dump(file_path const & p,string & out)691 void dump(file_path const & p, string & out)
692 {
693 ostringstream oss;
694 oss << p << '\n';
695 out = oss.str();
696 }
697
698 template <>
dump(system_path const & p,string & out)699 void dump(system_path const & p, string & out)
700 {
701 ostringstream oss;
702 oss << p << '\n';
703 out = oss.str();
704 }
705
706 template <>
dump(bookkeeping_path const & p,string & out)707 void dump(bookkeeping_path const & p, string & out)
708 {
709 ostringstream oss;
710 oss << p << '\n';
711 out = oss.str();
712 }
713
714 ///////////////////////////////////////////////////////////////////////////
715 // path manipulation
716 // this code's speed does not matter much
717 ///////////////////////////////////////////////////////////////////////////
718
719 // relies on its arguments already being validated, except that you may not
720 // append the empty path component, and if you are appending to the empty
721 // path, you may not create an absolute path or a path into the bookkeeping
722 // directory.
723 file_path
operator /(path_component const & to_append) const724 file_path::operator /(path_component const & to_append) const
725 {
726 I(!to_append.empty());
727 if (empty())
728 {
729 string const & s = to_append();
730 I(!is_absolute_somewhere(s) && !in_bookkeeping_dir(s));
731 return file_path(s, 0, string::npos);
732 }
733 else
734 return file_path(((*(data.end() - 1) == '/') ? data : data + "/")
735 + to_append(), 0, string::npos);
736 }
737
738 // similarly, but even less checking is needed.
739 file_path
operator /(file_path const & to_append) const740 file_path::operator /(file_path const & to_append) const
741 {
742 I(!to_append.empty());
743 if (empty())
744 return to_append;
745 return file_path(((*(data.end() - 1) == '/') ? data : data + "/")
746 + to_append.as_internal(), 0, string::npos);
747 }
748
749 bookkeeping_path
operator /(path_component const & to_append) const750 bookkeeping_path::operator /(path_component const & to_append) const
751 {
752 I(!to_append.empty());
753 I(!empty());
754 return bookkeeping_path(((*(data.end() - 1) == '/') ? data : data + "/")
755 + to_append(), 0, string::npos);
756 }
757
758 bookkeeping_path
operator /(file_path const & to_append) const759 bookkeeping_path::operator /(file_path const & to_append) const
760 {
761 I(!to_append.empty());
762 I(!empty());
763 return bookkeeping_path(((*(data.end() - 1) == '/') ? data : data + "/")
764 + to_append.as_internal(), 0, string::npos);
765 }
766
767 system_path
operator /(path_component const & to_append) const768 system_path::operator /(path_component const & to_append) const
769 {
770 I(!to_append.empty());
771 I(!empty());
772 return system_path(((*(data.end() - 1) == '/') ? data : data + "/")
773 + to_append(), 0, string::npos);
774 }
775
776 any_path
operator /(path_component const & to_append) const777 any_path::operator /(path_component const & to_append) const
778 {
779 I(!to_append.empty());
780 I(!empty());
781 return any_path(((*(data.end() - 1) == '/') ? data : data + "/")
782 + to_append(), 0, string::npos);
783 }
784
785 // these take strings and validate
786 bookkeeping_path
operator /(char const * to_append) const787 bookkeeping_path::operator /(char const * to_append) const
788 {
789 I(!is_absolute_somewhere(to_append));
790 I(!empty());
791 return bookkeeping_path(((*(data.end() - 1) == '/') ? data : data + "/")
792 + to_append, origin::internal);
793 }
794
795 system_path
operator /(char const * to_append) const796 system_path::operator /(char const * to_append) const
797 {
798 I(!empty());
799 I(!is_absolute_here(to_append));
800 return system_path(((*(data.end() - 1) == '/') ? data : data + "/")
801 + to_append, origin::internal);
802 }
803
804 ///////////////////////////////////////////////////////////////////////////
805 // system_path
806 ///////////////////////////////////////////////////////////////////////////
807
system_path(any_path const & other,bool in_true_workspace)808 system_path::system_path(any_path const & other, bool in_true_workspace)
809 {
810 if (is_absolute_here(other.as_internal()))
811 // another system_path. the normalizing isn't really necessary, but it
812 // makes me feel warm and fuzzy.
813 data = normalize_path(other.as_internal());
814 else
815 {
816 system_path wr;
817 if (in_true_workspace)
818 wr = working_root.get();
819 else
820 wr = working_root.get_but_unused();
821 data = normalize_path(wr.as_internal() + "/" + other.as_internal());
822 }
823 }
824
const_system_path(utf8 const & path)825 static inline string const_system_path(utf8 const & path)
826 {
827 E(!path().empty(), path.made_from, F("invalid path ''"));
828 string expanded = tilde_expand(path());
829 if (is_absolute_here(expanded))
830 return normalize_path(expanded);
831 else
832 return normalize_path(initial_abs_path.get().as_internal()
833 + "/" + path());
834 }
835
system_path(string const & path,origin::type from)836 system_path::system_path(string const & path, origin::type from)
837 {
838 data = const_system_path(utf8(path, from));
839 }
840
system_path(char const * const path)841 system_path::system_path(char const * const path)
842 {
843 data = const_system_path(utf8(path, origin::internal));
844 }
845
system_path(utf8 const & path)846 system_path::system_path(utf8 const & path)
847 {
848 data = const_system_path(utf8(path));
849 }
850
851 // Constant path predicates.
852 #define IMPLEMENT_CONST_PRED(cls, ret) \
853 template <> bool \
854 path_always_##ret<cls>::operator()(cls const &) const \
855 { return ret; }
856
IMPLEMENT_CONST_PRED(any_path,false)857 IMPLEMENT_CONST_PRED(any_path, false)
858 IMPLEMENT_CONST_PRED(system_path, false)
859 IMPLEMENT_CONST_PRED(file_path, false)
860 IMPLEMENT_CONST_PRED(bookkeeping_path, false)
861
862 IMPLEMENT_CONST_PRED(any_path, true)
863 IMPLEMENT_CONST_PRED(system_path, true)
864 IMPLEMENT_CONST_PRED(file_path, true)
865 IMPLEMENT_CONST_PRED(bookkeeping_path, true)
866
867 #undef IMPLEMENT_CONST_PRED
868
869 // If this wasn't a user-supplied path, we should know
870 // which kind it is.
871 boost::shared_ptr<any_path>
872 new_optimal_path(std::string path, bool to_workspace_root)
873 {
874 utf8 const utf8_path = utf8(path, origin::user);
875 string normalized;
876 try
877 {
878 normalize_external_path(utf8_path(), normalized, to_workspace_root);
879 }
880 catch (recoverable_failure &)
881 {
882 // not in workspace
883 return boost::shared_ptr<any_path>(new system_path(path, origin::user));
884 }
885
886 if (in_bookkeeping_dir(normalized))
887 return boost::shared_ptr<any_path>(new bookkeeping_path(normalized, origin::user));
888 else
889 return boost::shared_ptr<any_path>(new file_path(file_path_internal(normalized)));
890 };
891
892 // Either conversion of S to a path_component, or composition of P / S, has
893 // failed; figure out what went wrong and issue an appropriate diagnostic.
894
895 void
report_failed_path_composition(any_path const & p,char const * s,bool isdir)896 report_failed_path_composition(any_path const & p, char const * s,
897 bool isdir)
898 {
899 utf8 badpth;
900 if (p.empty())
901 badpth = utf8(s);
902 else
903 badpth = utf8(p.as_internal() + "/" + s, p.made_from);
904 if (bookkeeping_path::internal_string_is_bookkeeping_path(badpth))
905 L(FL("ignoring bookkeeping directory '%s'") % badpth);
906 else
907 {
908 // We rely on caller to tell us whether this is a directory.
909 if (isdir)
910 W(F("skipping directory '%s' with unsupported name") % badpth);
911 else
912 W(F("skipping file '%s' with unsupported name") % badpth);
913 }
914 }
915
916 ///////////////////////////////////////////////////////////////////////////
917 // workspace (and path root) handling
918 ///////////////////////////////////////////////////////////////////////////
919
920 static bool
find_bookdir(system_path const & root,path_component const & bookdir,system_path & current,string & removed)921 find_bookdir(system_path const & root, path_component const & bookdir,
922 system_path & current, string & removed)
923 {
924 current = initial_abs_path.get();
925 removed.clear();
926
927 // check that the current directory is below the specified search root
928 if (current.as_internal().find(root.as_internal()) != 0)
929 {
930 W(F("current directory '%s' is not below root '%s'") % current % root);
931 return false;
932 }
933
934 L(FL("searching for '%s' directory with root '%s'") % bookdir % root);
935
936 system_path check;
937 while (!(current == root))
938 {
939 check = current / bookdir;
940 switch (get_path_status(check))
941 {
942 case path::nonexistent:
943 L(FL("'%s' not found in '%s' with '%s' removed")
944 % bookdir % current % removed);
945 if (removed.empty())
946 removed = current.basename()();
947 else
948 removed = current.basename()() + "/" + removed;
949 current = current.dirname();
950 continue;
951
952 case path::file:
953 L(FL("'%s' is not a directory") % check);
954 return false;
955
956 case path::directory:
957 goto found;
958 }
959 }
960
961 // if we get here, we have hit the root; try once more
962 check = current / bookdir;
963 switch (get_path_status(check))
964 {
965 case path::nonexistent:
966 L(FL("'%s' not found in '%s' with '%s' removed")
967 % bookdir % current % removed);
968 return false;
969
970 case path::file:
971 L(FL("'%s' is not a directory") % check);
972 return false;
973
974 case path::directory:
975 goto found;
976 }
977 return false;
978
979 found:
980 // check for _MTN/. and _MTN/.. to see if mt dir is readable
981 try
982 {
983 if (!path_exists(check / ".") || !path_exists(check / ".."))
984 {
985 L(FL("problems with '%s' (missing '.' or '..')") % check);
986 return false;
987 }
988 }
989 catch(exception &)
990 {
991 L(FL("problems with '%s' (cannot check for '.' or '..')") % check);
992 return false;
993 }
994 return true;
995 }
996
997
998 bool
find_and_go_to_workspace(string const & search_root)999 find_and_go_to_workspace(string const & search_root)
1000 {
1001 system_path root, current;
1002 string removed;
1003
1004 if (search_root.empty())
1005 {
1006 #ifdef WIN32
1007 std::string cur_str = get_current_working_dir();
1008 current = system_path(cur_str, origin::system);
1009 if (cur_str[0] == '/' || cur_str[0] == '\\')
1010 {
1011 if (cur_str.size() > 1 && (cur_str[1] == '/' || cur_str[1] == '\\'))
1012 {
1013 // UNC name
1014 string::size_type uncend = cur_str.find_first_of("\\/", 2);
1015 if (uncend == string::npos)
1016 root = system_path(cur_str + "/", origin::system);
1017 else
1018 root = system_path(cur_str.substr(0, uncend), origin::system);
1019 }
1020 else
1021 root = system_path("/");
1022 }
1023 else if (cur_str.size() > 1 && cur_str[1] == ':')
1024 {
1025 root = system_path(cur_str.substr(0,2) + "/", origin::system);
1026 }
1027 else I(false);
1028 #else
1029 root = system_path("/", origin::internal);
1030 #endif
1031 }
1032 else
1033 {
1034 root = system_path(search_root, origin::user);
1035 L(FL("limiting search for workspace to %s") % root);
1036
1037 require_path_is_directory(root,
1038 F("search root '%s' does not exist") % root,
1039 F("search root '%s' is not a directory") % root);
1040 }
1041
1042 // first look for the current name of the bookkeeping directory.
1043 // if we don't find it, look for it under the old name, so that
1044 // migration has a chance to work.
1045 if (!find_bookdir(root, bookkeeping_root_component, current, removed))
1046 if (!find_bookdir(root, old_bookkeeping_root_component, current, removed))
1047 return false;
1048
1049 working_root.set(current, true);
1050 initial_rel_path.set(removed, true);
1051
1052 L(FL("working root is '%s'") % working_root.get_but_unused());
1053 L(FL("initial relative path is '%s'") % initial_rel_path.get_but_unused());
1054
1055 change_current_working_dir(working_root.get_but_unused());
1056
1057 return true;
1058 }
1059
1060 void
go_to_workspace(system_path const & new_workspace)1061 go_to_workspace(system_path const & new_workspace)
1062 {
1063 working_root.set(new_workspace, true);
1064 initial_rel_path.set(string(), true);
1065 change_current_working_dir(new_workspace);
1066 }
1067
1068 void
get_current_workspace(system_path & workspace)1069 get_current_workspace(system_path & workspace)
1070 {
1071 workspace = working_root.get_but_unused();
1072 }
1073
1074 void
mark_std_paths_used(void)1075 mark_std_paths_used(void)
1076 {
1077 working_root.get();
1078 initial_rel_path.get();
1079 }
1080
1081 void
reset_std_paths(void)1082 reset_std_paths(void)
1083 {
1084 // we don't reset initial_abs_path here, because it is only set in
1085 // monotone.cc:cpp_main. initial_rel_path, working_root are reset for each
1086 // command.
1087 initial_rel_path.unset();
1088 working_root.unset();
1089 }
1090
1091
1092 ///////////////////////////////////////////////////////////////////////////
1093 // utility used by migrate_ancestry
1094 ///////////////////////////////////////////////////////////////////////////
1095
1096
1097 static file_path
find_old_path_for(map<file_path,file_path> const & renames,file_path const & new_path)1098 find_old_path_for(map<file_path, file_path> const & renames,
1099 file_path const & new_path)
1100 {
1101 map<file_path, file_path>::const_iterator i = renames.find(new_path);
1102 if (i != renames.end())
1103 return i->second;
1104
1105 // ??? root directory rename possible in the old schema?
1106 // if not, do this first.
1107 if (new_path.empty())
1108 return new_path;
1109
1110 file_path dir;
1111 path_component base;
1112 new_path.dirname_basename(dir, base);
1113 return find_old_path_for(renames, dir) / base;
1114 }
1115
1116 file_path
find_new_path_for(map<file_path,file_path> const & renames,file_path const & old_path)1117 find_new_path_for(map<file_path, file_path> const & renames,
1118 file_path const & old_path)
1119 {
1120 map<file_path, file_path> reversed;
1121 for (map<file_path, file_path>::const_iterator i = renames.begin();
1122 i != renames.end(); ++i)
1123 reversed.insert(make_pair(i->second, i->first));
1124 // this is a hackish kluge. seems to work, though.
1125 return find_old_path_for(reversed, old_path);
1126 }
1127
1128 // Local Variables:
1129 // mode: C++
1130 // fill-column: 76
1131 // c-file-style: "gnu"
1132 // indent-tabs-mode: nil
1133 // End:
1134 // vim: et:sw=2:sts=2:ts=2:cino=>2s,{s,\:s,+s,t0,g0,^-2,e-2,n-2,p2s,(0,=s:
1135