1 /* Copyright 2012-present Facebook, Inc.
2  * Licensed under the Apache License, Version 2.0 */
3 
4 #include "watchman.h"
5 #include <stdarg.h>
6 #include <new>
7 #include <stdexcept>
8 
9 // string piece
10 
w_string_piece()11 w_string_piece::w_string_piece() : s_(nullptr), e_(nullptr) {}
w_string_piece(std::nullptr_t)12 w_string_piece::w_string_piece(std::nullptr_t) : s_(nullptr), e_(nullptr) {}
13 
w_string_piece(w_string_piece && other)14 w_string_piece::w_string_piece(w_string_piece&& other) noexcept
15     : s_(other.s_), e_(other.e_) {
16   other.s_ = nullptr;
17   other.e_ = nullptr;
18 }
19 
asWString(w_string_type_t stringType) const20 w_string w_string_piece::asWString(w_string_type_t stringType) const {
21   return w_string(data(), size(), stringType);
22 }
23 
asLowerCase(w_string_type_t stringType) const24 w_string w_string_piece::asLowerCase(w_string_type_t stringType) const {
25   char* buf;
26   w_string_t* s;
27 
28   /* need to make a lowercase version */
29   s = (w_string_t*)(new char[sizeof(*s) + size() + 1]);
30   new (s) watchman_string();
31 
32   s->refcnt = 1;
33   s->len = size();
34   buf = (char*)(s + 1);
35   s->buf = buf;
36   s->type = stringType;
37 
38   auto cursor = s_;
39   while (cursor < e_) {
40     *buf = (char)tolower((uint8_t)*cursor);
41     ++cursor;
42     ++buf;
43   }
44   *buf = 0;
45 
46   return w_string(s, false);
47 }
48 
asUTF8Clean() const49 w_string w_string_piece::asUTF8Clean() const {
50   w_string s(s_, e_ - s_, W_STRING_UNICODE);
51   utf8_fix_string(const_cast<char*>(s.data()), s.size());
52   return s;
53 }
54 
pathIsAbsolute() const55 bool w_string_piece::pathIsAbsolute() const {
56   return w_is_path_absolute_cstr_len(data(), size());
57 }
58 
59 /** Compares two path strings.
60  * They are equal if the case of each character matches.
61  * Directory separator slashes are normalized such that
62  * \ and / are considered equal. */
pathIsEqual(w_string_piece other) const63 bool w_string_piece::pathIsEqual(w_string_piece other) const {
64 #ifdef _WIN32
65   if (size() != other.size()) {
66     return false;
67   }
68 
69   auto A = data();
70   auto B = other.data();
71 
72   auto end = A + size();
73   for (; A < end; ++A, ++B) {
74     if (*A == *B) {
75       continue;
76     }
77     if (A == data()) {
78       // This is a bit awful, but msys and other similar software
79       // can set the cwd to a lowercase drive letter.  Since we
80       // can't ever watch at a level higher than drive letters,
81       // we really shouldn't care about a case difference there
82       // so we relax the strictness of the check here.
83       // This case only triggers for the first character of the
84       // path.  Paths evaluated with this method are always
85       // absolute.  In theory, we should also do something
86       // reasonable for UNC paths, but folks shouldn't be
87       // watching those with watchman anyway.
88       if (tolower(*A) == tolower(*B)) {
89         continue;
90       }
91     }
92 
93     if (is_slash(*A) && is_slash(*B)) {
94       continue;
95     }
96     return false;
97   }
98   return true;
99 #else
100   return *this == other;
101 #endif
102 }
103 
104 
dirName() const105 w_string_piece w_string_piece::dirName() const {
106   if (e_ == s_) {
107     return nullptr;
108   }
109   for (auto end = e_ - 1; end >= s_; --end) {
110     if (is_slash(*end)) {
111       /* found the end of the parent dir */
112 #ifdef _WIN32
113       if (end > s_ && end[-1] == ':') {
114         // Special case for "C:\"; we want to keep the
115         // trailing slash for this case so that we continue
116         // to consider it an absolute path
117         return w_string_piece(s_, 1 + end - s_);
118       }
119 #endif
120       return w_string_piece(s_, end - s_);
121     }
122   }
123   return nullptr;
124 }
125 
baseName() const126 w_string_piece w_string_piece::baseName() const {
127   if (e_ == s_) {
128     return *this;
129   }
130   for (auto end = e_ - 1; end >= s_; --end) {
131     if (is_slash(*end)) {
132       /* found the end of the parent dir */
133 #ifdef _WIN32
134       if (end == e_ && end > s_ && end[-1] == ':') {
135         // Special case for "C:\"; we want the baseName to
136         // be this same component so that we continue
137         // to consider it an absolute path
138         return *this;
139       }
140 #endif
141       return w_string_piece(end + 1, e_ - (end + 1));
142     }
143   }
144 
145   return *this;
146 }
147 
operator <(w_string_piece other) const148 bool w_string_piece::operator<(w_string_piece other) const {
149   int res;
150   if (size() < other.size()) {
151     res = memcmp(data(), other.data(), size());
152     return (res == 0 ? -1 : res) < 0;
153   } else if (size() > other.size()) {
154     res = memcmp(data(), other.data(), other.size());
155     return (res == 0 ? +1 : res) < 0;
156   }
157   return memcmp(data(), other.data(), size()) < 0;
158 }
159 
operator ==(w_string_piece other) const160 bool w_string_piece::operator==(w_string_piece other) const {
161   if (s_ == other.s_ && e_ == other.e_) {
162     return true;
163   }
164   if (size() != other.size()) {
165     return false;
166   }
167   return memcmp(data(), other.data(), size()) == 0;
168 }
169 
operator !=(w_string_piece other) const170 bool w_string_piece::operator!=(w_string_piece other) const {
171   return !operator==(other);
172 }
173 
startsWith(w_string_piece prefix) const174 bool w_string_piece::startsWith(w_string_piece prefix) const {
175   if (prefix.size() > size()) {
176     return false;
177   }
178   return memcmp(data(), prefix.data(), prefix.size()) == 0;
179 }
180 
startsWithCaseInsensitive(w_string_piece prefix) const181 bool w_string_piece::startsWithCaseInsensitive(w_string_piece prefix) const{
182   if (prefix.size() > size()) {
183     return false;
184   }
185 
186   auto me = s_;
187   auto pref = prefix.s_;
188 
189   while (pref < prefix.e_) {
190     if (tolower((uint8_t)*me) != tolower((uint8_t)*pref)) {
191       return false;
192     }
193     ++pref;
194     ++me;
195   }
196   return true;
197 }
198 
199 // string
200 
w_string()201 w_string::w_string() {}
202 
w_string(std::nullptr_t)203 w_string::w_string(std::nullptr_t) {}
204 
~w_string()205 w_string::~w_string() {
206   if (str_) {
207     w_string_delref(str_);
208   }
209 }
210 
w_string(w_string_t * str,bool addRef)211 w_string::w_string(w_string_t* str, bool addRef) : str_(str) {
212   if (str_ && addRef) {
213     w_string_addref(str_);
214   }
215 }
216 
w_string(const w_string & other)217 w_string::w_string(const w_string& other) : str_(other.str_) {
218   if (str_) {
219     w_string_addref(str_);
220   }
221 }
222 
operator =(const w_string & other)223 w_string& w_string::operator=(const w_string& other) {
224   if (&other == this) {
225     return *this;
226   }
227 
228   reset();
229   if (str_) {
230     w_string_delref(str_);
231   }
232   str_ = other.str_;
233   if (str_) {
234     w_string_addref(str_);
235   }
236 
237   return *this;
238 }
239 
w_string(w_string && other)240 w_string::w_string(w_string&& other) noexcept : str_(other.str_) {
241   other.str_ = nullptr;
242 }
243 
operator =(w_string && other)244 w_string& w_string::operator=(w_string&& other) {
245   if (&other == this) {
246     return *this;
247   }
248   reset();
249   str_ = other.str_;
250   other.str_ = nullptr;
251   return *this;
252 }
253 
reset()254 void w_string::reset() {
255   if (str_) {
256     w_string_delref(str_);
257     str_ = nullptr;
258   }
259 }
260 
release()261 w_string_t *w_string::release() {
262   auto res = str_;
263   str_ = nullptr;
264   return res;
265 }
266 
checked_len(size_t len)267 static inline uint32_t checked_len(size_t len) {
268   if (len > UINT32_MAX) {
269     throw std::range_error("string length exceeds UINT32_MAX");
270   }
271   return len;
272 }
273 
w_string(const char * buf,size_t len,w_string_type_t stringType)274 w_string::w_string(const char* buf, size_t len, w_string_type_t stringType)
275     : w_string(
276           w_string_new_len_typed(buf, checked_len(len), stringType),
277           false) {}
278 
w_string(const char * buf,w_string_type_t stringType)279 w_string::w_string(const char* buf, w_string_type_t stringType)
280     : w_string(
281           w_string_new_len_typed(buf, strlen_uint32(buf), stringType),
282           false) {}
283 
dirName() const284 w_string w_string::dirName() const {
285   return w_string_piece(*this).dirName().asWString();
286 }
287 
baseName() const288 w_string w_string::baseName() const {
289   return w_string_piece(*this).baseName().asWString();
290 }
291 
suffix() const292 w_string w_string::suffix() const {
293   ensureNotNull();
294   return w_string(w_string_suffix(str_), false);
295 }
296 
asNullTerminated() const297 w_string w_string::asNullTerminated() const {
298   ensureNotNull();
299   if (w_string_is_null_terminated(str_)) {
300     return *this;
301   }
302 
303   return w_string(str_->buf, str_->len, str_->type);
304 }
305 
normalizeSeparators(char targetSeparator) const306 w_string w_string::normalizeSeparators(char targetSeparator) const {
307   return w_string(w_string_normalize_separators(str_, targetSeparator), false);
308 }
309 
makeNullTerminated()310 void w_string::makeNullTerminated() {
311   if (w_string_is_null_terminated(str_)) {
312     return;
313   }
314 
315   *this = asNullTerminated();
316 }
317 
c_str() const318 const char* w_string::c_str() const {
319   if (!w_string_is_null_terminated(str_)) {
320     throw std::runtime_error(
321         "string is not NULL terminated, use asNullTerminated() or makeNullTerminated()!");
322   }
323   return str_->buf;
324 }
325 
operator <(const w_string & other) const326 bool w_string::operator<(const w_string& other) const {
327   return w_string_compare(str_, other.str_) < 0;
328 }
329 
operator ==(const w_string & other) const330 bool w_string::operator==(const w_string& other) const {
331   return w_string_equal(str_, other.str_);
332 }
333 
operator !=(const w_string & other) const334 bool w_string::operator!=(const w_string& other) const {
335   return !(*this == other);
336 }
337 
pathCat(std::initializer_list<w_string_piece> elems)338 w_string w_string::pathCat(std::initializer_list<w_string_piece> elems) {
339   uint32_t length = 0;
340   w_string_t *s;
341   char *buf;
342 
343   for (auto &p : elems) {
344     length += p.size() + 1;
345   }
346 
347   s = (w_string_t*)(new char[sizeof(*s) + length]);
348   new (s) watchman_string();
349 
350   s->refcnt = 1;
351   buf = (char *)(s + 1);
352   s->buf = buf;
353 
354   for (auto &p : elems) {
355     if (p.size() == 0) {
356       // Skip empty strings
357       continue;
358     }
359     if (buf != s->buf) {
360       *buf = '/';
361       ++buf;
362     }
363     memcpy(buf, p.data(), p.size());
364     buf += p.size();
365   }
366   *buf = 0;
367   s->len = buf - s->buf;
368 
369   return w_string(s, false);
370 }
371 
w_string_compute_hval(w_string_t * str)372 uint32_t w_string_compute_hval(w_string_t *str) {
373   str->_hval = w_hash_bytes(str->buf, str->len, 0);
374   str->hval_computed = 1;
375   return str->_hval;
376 }
377 
hashValue() const378 uint32_t w_string_piece::hashValue() const {
379   return w_hash_bytes(data(), size(), 0);
380 }
381 
382 /** An optimization to avoid heap allocations during a lookup, this function
383  * creates a string object on the stack.  This object does not own the memory
384  * that it references, so it is the responsibility of the caller
385  * to ensure that that memory is live for the duration of use of this string.
386  * It is therefore invalid to add a reference or take a slice of this stack
387  * string as the lifetime guarantees are not upheld. */
w_string_new_len_typed_stack(w_string_t * into,const char * str,uint32_t len,w_string_type_t type)388 void w_string_new_len_typed_stack(w_string_t *into, const char *str,
389                                   uint32_t len, w_string_type_t type) {
390   into->refcnt = 1;
391   into->slice = NULL;
392   into->len = len;
393   into->buf = str;
394   into->hval_computed = 0;
395   into->type = type;
396 }
397 
w_string_slice(w_string_t * str,uint32_t start,uint32_t len)398 w_string_t *w_string_slice(w_string_t *str, uint32_t start, uint32_t len)
399 {
400   if (start == 0 && len == str->len) {
401     w_string_addref(str);
402     return str;
403   }
404 
405   if (start > str->len || start + len > str->len) {
406     errno = EINVAL;
407     throw std::range_error("illegal string slice");
408   }
409 
410   // Can't just new w_string_t because the delref has to call delete[]
411   // in most cases.
412   auto slice = (w_string_t*)(new char[sizeof(w_string_t)]);
413   new (slice) watchman_string();
414 
415   slice->refcnt = 1;
416   slice->len = len;
417   slice->buf = str->buf + start;
418   slice->slice = str;
419   slice->type = str->type;
420 
421   w_string_addref(str);
422   return slice;
423 }
424 
slice(uint32_t start,uint32_t len) const425 w_string w_string::slice(uint32_t start, uint32_t len) const {
426   return w_string(w_string_slice(str_, start, len), false);
427 }
428 
strlen_uint32(const char * str)429 uint32_t strlen_uint32(const char *str) {
430   size_t slen = strlen(str);
431   if (slen > UINT32_MAX) {
432     throw std::range_error("string length exceeds UINT32_MAX");
433   }
434 
435   return (uint32_t)slen;
436 }
437 
watchman_string()438 watchman_string::watchman_string()
439     : refcnt(0),
440       len(0),
441       slice(nullptr),
442       buf(nullptr),
443       type(W_STRING_BYTE),
444       hval_computed(0) {}
445 
~watchman_string()446 watchman_string::~watchman_string() {
447   if (slice) {
448     w_string_delref(slice);
449   }
450 }
451 
w_string_new_len_with_refcnt_typed(const char * str,uint32_t len,long refcnt,w_string_type_t type)452 w_string_t *w_string_new_len_with_refcnt_typed(const char* str,
453     uint32_t len, long refcnt, w_string_type_t type) {
454 
455   w_string_t *s;
456   char *buf;
457 
458   s = (w_string_t*)(new char[sizeof(*s) + len + 1]);
459   new (s) watchman_string();
460 
461   s->refcnt = refcnt;
462   s->len = len;
463   buf = (char*)(s + 1);
464   memcpy(buf, str, len);
465   buf[len] = 0;
466   s->buf = buf;
467   s->type = type;
468 
469   return s;
470 }
471 
w_string_new_len_typed(const char * str,uint32_t len,w_string_type_t type)472 w_string_t *w_string_new_len_typed(const char *str, uint32_t len,
473     w_string_type_t type) {
474   return w_string_new_len_with_refcnt_typed(str, len, 1, type);
475 }
476 
w_string_new_len_no_ref_typed(const char * str,uint32_t len,w_string_type_t type)477 w_string_t *w_string_new_len_no_ref_typed(const char *str, uint32_t len,
478     w_string_type_t type) {
479   return w_string_new_len_with_refcnt_typed(str, len, 0, type);
480 }
481 
w_string_new_typed(const char * str,w_string_type_t type)482 w_string_t *w_string_new_typed(const char *str, w_string_type_t type) {
483   return w_string_new_len_typed(str, strlen_uint32(str), type);
484 }
485 
vprintf(const char * format,va_list args)486 w_string w_string::vprintf(const char* format, va_list args) {
487   w_string_t *s;
488   int len;
489   char *buf;
490   va_list args_copy;
491 
492   va_copy(args_copy, args);
493   // Get the length needed
494   len = vsnprintf(nullptr, 0, format, args_copy);
495   va_end(args_copy);
496 
497   s = (w_string_t*)(new char[sizeof(*s) + len + 1]);
498   if (!s) {
499     perror("no memory available");
500     abort();
501   }
502 
503   new (s) watchman_string();
504 
505   s->refcnt = 1;
506   s->len = len;
507   buf = (char*)(s + 1);
508   vsnprintf(buf, len + 1, format, args);
509   s->buf = buf;
510 
511   return w_string(s, false);
512 }
513 
printf(WATCHMAN_FMT_STRING (const char * format),...)514 w_string w_string::printf(WATCHMAN_FMT_STRING(const char* format), ...) {
515   va_list args;
516   va_start(args, format);
517   auto res = w_string::vprintf(format, args);
518   va_end(args);
519   return res;
520 }
521 
522 /* return a reference to a lowercased version of a string */
w_string_dup_lower(w_string_t * str)523 w_string_t *w_string_dup_lower(w_string_t *str)
524 {
525   bool is_lower = true;
526   char *buf;
527   uint32_t i;
528   w_string_t *s;
529 
530   for (i = 0; i < str->len; i++) {
531     if (tolower((uint8_t)str->buf[i]) != str->buf[i]) {
532       is_lower = false;
533       break;
534     }
535   }
536 
537   if (is_lower) {
538     w_string_addref(str);
539     return str;
540   }
541 
542   /* need to make a lowercase version */
543 
544   s = (w_string_t*)(new char[sizeof(*s) + str->len + 1]);
545   new (s) watchman_string();
546 
547   s->refcnt = 1;
548   s->len = str->len;
549   buf = (char*)(s + 1);
550   for (i = 0; i < str->len; i++) {
551     buf[i] = (char)tolower((uint8_t)str->buf[i]);
552   }
553   buf[str->len] = 0;
554   s->buf = buf;
555 
556   return s;
557 }
558 
559 /* make a lowercased copy of string */
w_string_new_lower_typed(const char * str,w_string_type_t type)560 w_string_t *w_string_new_lower_typed(const char *str,
561     w_string_type_t type)
562 {
563   w_string_t *s;
564   uint32_t len = strlen_uint32(str);
565   char *buf;
566   uint32_t i;
567 
568   s = (w_string_t*)(new char[sizeof(*s) + len + 1]);
569   new (s) watchman_string();
570 
571   s->refcnt = 1;
572   s->len = len;
573   buf = (char*)(s + 1);
574   // TODO: optionally use ICU
575   for (i = 0; i < len; i++) {
576     buf[i] = (char)tolower((uint8_t)str[i]);
577   }
578   buf[len] = 0;
579   s->buf = buf;
580   s->type = type;
581 
582   return s;
583 }
584 
w_string_addref(w_string_t * str)585 void w_string_addref(w_string_t *str)
586 {
587   ++str->refcnt;
588 }
589 
w_string_delref(w_string_t * str)590 void w_string_delref(w_string_t *str)
591 {
592   if (--str->refcnt != 0) {
593     return;
594   }
595   // Call the destructor.  We can't use regular delete because
596   // we allocated using operator new[], and we can't use delete[]
597   // directly either because the type doesn't match what we allocated.
598   str->~w_string_t();
599   // Release the raw memory.
600   delete[](char*) str;
601 }
602 
w_string_compare(const w_string_t * a,const w_string_t * b)603 int w_string_compare(const w_string_t *a, const w_string_t *b)
604 {
605   int res;
606   if (a == b) return 0;
607   if (a->len < b->len) {
608     res = memcmp(a->buf, b->buf, a->len);
609     return res == 0 ? -1 : res;
610   } else if (a->len > b->len) {
611     res = memcmp(a->buf, b->buf, b->len);
612     return res == 0 ? +1 : res;
613   }
614   return memcmp(a->buf, b->buf, a->len);
615 }
616 
w_string_equal_cstring(const w_string_t * a,const char * b)617 bool w_string_equal_cstring(const w_string_t *a, const char *b)
618 {
619   uint32_t blen = strlen_uint32(b);
620   if (a->len != blen) return false;
621   return memcmp(a->buf, b, a->len) == 0 ? true : false;
622 }
623 
w_string_equal(const w_string_t * a,const w_string_t * b)624 bool w_string_equal(const w_string_t *a, const w_string_t *b)
625 {
626   if (a == b) return true;
627   if (a == nullptr || b == nullptr) return false;
628   if (a->len != b->len) return false;
629   if (a->hval_computed && b->hval_computed && a->_hval != b->_hval) {
630     return false;
631   }
632   return memcmp(a->buf, b->buf, a->len) == 0 ? true : false;
633 }
634 
w_string_equal_caseless(w_string_piece a,w_string_piece b)635 bool w_string_equal_caseless(w_string_piece a, w_string_piece b) {
636   uint32_t i;
637 
638   if (a.size() != b.size()) {
639     return false;
640   }
641   for (i = 0; i < a.size(); i++) {
642     if (tolower((uint8_t)a[i]) != tolower((uint8_t)b[i])) {
643       return false;
644     }
645   }
646   return true;
647 }
648 
w_string_dirname(w_string_t * str)649 w_string_t *w_string_dirname(w_string_t *str)
650 {
651   int end;
652 
653   /* can't use libc strXXX functions because we may be operating
654    * on a slice */
655   for (end = str->len - 1; end >= 0; end--) {
656     if (is_slash(str->buf[end])) {
657       /* found the end of the parent dir */
658       return w_string_slice(str, 0, end);
659     }
660   }
661 
662   return NULL;
663 }
664 
hasSuffix(w_string_piece suffix) const665 bool w_string_piece::hasSuffix(w_string_piece suffix) const {
666   unsigned int base, i;
667 
668   if (size() < suffix.size() + 1) {
669     return false;
670   }
671 
672   base = size() - suffix.size();
673 
674   if (s_[base - 1] != '.') {
675     return false;
676   }
677 
678   for (i = 0; i < suffix.size(); i++) {
679     if (tolower((uint8_t)s_[base + i]) != suffix[i]) {
680       return false;
681     }
682   }
683 
684   return true;
685 }
686 
687 // Return the normalized (lowercase) filename suffix
w_string_suffix(w_string_t * str)688 w_string_t *w_string_suffix(w_string_t *str)
689 {
690   int end;
691   char name_buf[128];
692   char *buf;
693 
694   /* can't use libc strXXX functions because we may be operating
695    * on a slice */
696   for (end = str->len - 1; end >= 0; end--) {
697     if (str->buf[end] == '.') {
698       if (str->len - end > sizeof(name_buf)) {
699         // Too long
700         return NULL;
701       }
702 
703       buf = name_buf;
704       end++;
705       while ((unsigned)end < str->len) {
706         *buf = (char)tolower((uint8_t)str->buf[end]);
707         end++;
708         buf++;
709       }
710       *buf = '\0';
711       return w_string_new_typed(name_buf, str->type);
712     } else if (str->len - end >= sizeof(name_buf)) {
713       // We haven't found the '.' yet but the suffix will never fit in our local
714       // buffer
715       return nullptr;
716     }
717 
718     if (is_slash(str->buf[end])) {
719       // No suffix
720       return NULL;
721     }
722   }
723 
724   // Has no suffix
725   return NULL;
726 }
727 
w_string_startswith(w_string_t * str,w_string_t * prefix)728 bool w_string_startswith(w_string_t *str, w_string_t *prefix)
729 {
730   if (prefix->len > str->len) {
731     return false;
732   }
733   return memcmp(str->buf, prefix->buf, prefix->len) == 0;
734 }
735 
w_string_startswith_caseless(w_string_t * str,w_string_t * prefix)736 bool w_string_startswith_caseless(w_string_t *str, w_string_t *prefix)
737 {
738   size_t i;
739 
740   if (prefix->len > str->len) {
741     return false;
742   }
743   for (i = 0; i < prefix->len; i++) {
744     if (tolower((uint8_t)str->buf[i]) != tolower((uint8_t)prefix->buf[i])) {
745       return false;
746     }
747   }
748   return true;
749 }
750 
w_string_contains_cstr_len(const w_string_t * str,const char * needle,uint32_t nlen)751 bool w_string_contains_cstr_len(
752     const w_string_t* str,
753     const char* needle,
754     uint32_t nlen) {
755 #if HAVE_MEMMEM
756   return memmem(str->buf, str->len, needle, nlen) != NULL;
757 #else
758   // Most likely only for Windows.
759   // Inspired by http://stackoverflow.com/a/24000056/149111
760   const char *haystack = str->buf;
761   uint32_t hlen = str->len;
762   const char *limit;
763 
764   if (nlen == 0 || hlen < nlen) {
765     return false;
766   }
767 
768   limit = haystack + hlen - nlen + 1;
769   while ((haystack = (const char*)memchr(
770               haystack, needle[0], limit - haystack)) != NULL) {
771     if (memcmp(haystack, needle, nlen) == 0) {
772       return true;
773     }
774     haystack++;
775   }
776   return false;
777 #endif
778 }
779 
w_string_canon_path(w_string_t * str)780 w_string_t *w_string_canon_path(w_string_t *str)
781 {
782   int end;
783   int trim = 0;
784 
785   for (end = str->len - 1; end >= 0 && is_slash(str->buf[end]); end--) {
786     trim++;
787   }
788   if (trim) {
789     return w_string_slice(str, 0, str->len - trim);
790   }
791   w_string_addref(str);
792   return str;
793 }
794 
795 // Normalize directory separators to match the platform.
796 // Also trims any trailing directory separators
w_string_normalize_separators(w_string_t * str,char target_sep)797 w_string_t *w_string_normalize_separators(w_string_t *str, char target_sep) {
798   w_string_t *s;
799   char *buf;
800   uint32_t i, len;
801 
802   len = str->len;
803 
804   if (len == 0) {
805     w_string_addref(str);
806     return str;
807   }
808 
809   // This doesn't do any special UNC or path len escape prefix handling
810   // on windows.  We don't currently use it in a way that would require it.
811 
812   // Trim any trailing dir seps
813   while (len > 0) {
814     if (str->buf[len-1] == '/' || str->buf[len-1] == '\\') {
815       --len;
816     } else {
817       break;
818     }
819   }
820 
821   s = (w_string_t*)(new char[sizeof(*s) + len + 1]);
822   new (s) watchman_string();
823 
824   s->refcnt = 1;
825   s->len = len;
826   buf = (char*)(s + 1);
827 
828   for (i = 0; i < len; i++) {
829     if (str->buf[i] == '/' || str->buf[i] == '\\') {
830       buf[i] = target_sep;
831     } else {
832       buf[i] = str->buf[i];
833     }
834   }
835   buf[len] = 0;
836   s->buf = buf;
837 
838   return s;
839 }
840 
w_string_in_place_normalize_separators(w_string_t ** str,char target_sep)841 void w_string_in_place_normalize_separators(w_string_t **str, char target_sep) {
842   w_string_t *norm = w_string_normalize_separators(*str, target_sep);
843   w_string_delref(*str);
844   *str = norm;
845 }
846 
847 // Compute the basename of path, return that as a string
w_string_new_basename_typed(const char * path,w_string_type_t type)848 w_string_t *w_string_new_basename_typed(const char *path,
849     w_string_type_t type) {
850   const char *base;
851   base = path + strlen(path);
852   while (base > path && !is_slash(base[-1])) {
853     base--;
854   }
855   return w_string_new_typed(base, type);
856 }
857 
w_string_basename(w_string_t * str)858 w_string_t *w_string_basename(w_string_t *str)
859 {
860   int end;
861 
862   /* can't use libc strXXX functions because we may be operating
863    * on a slice */
864   for (end = str->len - 1; end >= 0; end--) {
865     if (is_slash(str->buf[end])) {
866       /* found the end of the parent dir */
867       return w_string_slice(str, end + 1, str->len - (end + 1));
868     }
869   }
870 
871   w_string_addref(str);
872   return str;
873 }
874 
w_string_path_cat(w_string_t * parent,w_string_t * rhs)875 w_string_t *w_string_path_cat(w_string_t *parent, w_string_t *rhs)
876 {
877   w_string_t *s;
878   int len;
879   char *buf;
880 
881   if (rhs->len == 0) {
882     w_string_addref(parent);
883     return parent;
884   }
885 
886   len = parent->len + rhs->len + 1;
887 
888   s = (w_string_t*)(new char[sizeof(*s) + len + 1]);
889   new (s) watchman_string();
890 
891   s->refcnt = 1;
892   s->len = len;
893   buf = (char*)(s + 1);
894   memcpy(buf, parent->buf, parent->len);
895   buf[parent->len] = '/';
896   memcpy(buf + parent->len + 1, rhs->buf, rhs->len);
897   buf[parent->len + 1 + rhs->len] = '\0';
898   s->buf = buf;
899   s->type = parent->type;
900 
901   return s;
902 }
903 
w_string_path_cat_cstr(w_string_t * parent,const char * rhs)904 w_string_t *w_string_path_cat_cstr(w_string_t *parent, const char *rhs) {
905   return w_string_path_cat_cstr_len(parent, rhs, strlen_uint32(rhs));
906 }
907 
w_string_path_cat_cstr_len(w_string_t * parent,const char * rhs,uint32_t rhs_len)908 w_string_t *w_string_path_cat_cstr_len(w_string_t *parent, const char *rhs,
909                                        uint32_t rhs_len) {
910   w_string_t *s;
911   int len;
912   char *buf;
913 
914   if (rhs_len == 0) {
915     w_string_addref(parent);
916     return parent;
917   }
918 
919   len = parent->len + rhs_len + 1;
920 
921   s = (w_string_t*)(new char[sizeof(*s) + len + 1]);
922   new (s) watchman_string();
923 
924   s->refcnt = 1;
925   s->len = len;
926   buf = (char*)(s + 1);
927   memcpy(buf, parent->buf, parent->len);
928   buf[parent->len] = '/';
929   memcpy(buf + parent->len + 1, rhs, rhs_len);
930   buf[parent->len + 1 + rhs_len] = '\0';
931   s->buf = buf;
932   s->type = parent->type;
933 
934   return s;
935 }
936 
w_dir_path_cat_str(const struct watchman_dir * dir,w_string_piece extra)937 w_string w_dir_path_cat_str(
938     const struct watchman_dir* dir,
939     w_string_piece extra) {
940   uint32_t length = 0;
941   const struct watchman_dir* d;
942   w_string_t *s;
943   char *buf, *end;
944 
945   if (extra.size()) {
946     length = extra.size() + 1 /* separator */;
947   }
948   for (d = dir; d; d = d->parent) {
949     length += d->name.size() + 1 /* separator OR final NUL terminator */;
950   }
951 
952   s = (w_string_t*)(new char[sizeof(*s) + length]);
953   new (s) watchman_string();
954 
955   s->refcnt = 1;
956   s->len = length - 1;
957   buf = (char *)(s + 1);
958   end = buf + s->len;
959 
960   *end = 0;
961   if (extra.size()) {
962     end -= extra.size();
963     memcpy(end, extra.data(), extra.size());
964   }
965   for (d = dir; d; d = d->parent) {
966     if (d != dir || (extra.size())) {
967       --end;
968       *end = '/';
969     }
970     end -= d->name.size();
971     memcpy(end, d->name.data(), d->name.size());
972   }
973 
974   s->buf = buf;
975   return w_string(s, false);
976 }
977 
w_string_dup_buf(const w_string_t * str)978 char *w_string_dup_buf(const w_string_t *str)
979 {
980   char *buf;
981 
982   buf = (char*)malloc(str->len + 1);
983   if (!buf) {
984     return NULL;
985   }
986 
987   memcpy(buf, str->buf, str->len);
988   buf[str->len] = 0;
989 
990   return buf;
991 }
992 
993 
994 // Given a string, return a shell-escaped copy
w_string_shell_escape(const w_string_t * str)995 w_string_t *w_string_shell_escape(const w_string_t *str)
996 {
997   // Worst case expansion for a char is 4x, plus quoting either end
998   uint32_t len = 2 + (str->len * 4);
999   w_string_t *s;
1000   char *buf;
1001   const char *src, *end;
1002 
1003   s = (w_string_t*)(new char[sizeof(*s) + len + 1]);
1004   new (s) watchman_string();
1005 
1006   s->refcnt = 1;
1007   buf = (char*)(s + 1);
1008   s->buf = buf;
1009 
1010   src = str->buf;
1011   end = src + str->len;
1012 
1013   *buf = '\'';
1014   buf++;
1015   while (src < end) {
1016     if (*src == '\'') {
1017       memcpy(buf, "'\\''", 4);
1018       buf += 4;
1019     } else {
1020       *buf = *src;
1021       buf++;
1022     }
1023     src++;
1024   }
1025   *buf = '\'';
1026   buf++;
1027   *buf = 0;
1028   s->len = (uint32_t)(buf - s->buf);
1029   s->type = str->type;
1030 
1031   return s;
1032 }
1033 
w_string_is_known_unicode(w_string_t * str)1034 bool w_string_is_known_unicode(w_string_t *str) {
1035   return str->type == W_STRING_UNICODE;
1036 }
1037 
w_string_is_null_terminated(w_string_t * str)1038 bool w_string_is_null_terminated(w_string_t *str) {
1039   return !str->slice ||
1040     (str->buf + str->len == str->slice->buf + str->slice->len &&
1041      w_string_is_null_terminated(str->slice));
1042 }
1043 
w_string_strlen(w_string_t * str)1044 size_t w_string_strlen(w_string_t *str) {
1045   return str->len;
1046 }
1047 
w_string_path_is_absolute(const w_string_t * str)1048 bool w_string_path_is_absolute(const w_string_t *str) {
1049   return w_is_path_absolute_cstr_len(str->buf, str->len);
1050 }
1051 
w_is_path_absolute_cstr(const char * path)1052 bool w_is_path_absolute_cstr(const char *path) {
1053   return w_is_path_absolute_cstr_len(path, strlen_uint32(path));
1054 }
1055 
w_is_path_absolute_cstr_len(const char * path,uint32_t len)1056 bool w_is_path_absolute_cstr_len(const char *path, uint32_t len) {
1057 #ifdef _WIN32
1058   char drive_letter;
1059 
1060   if (len <= 2) {
1061     return false;
1062   }
1063 
1064   // "\something"
1065   if (is_slash(path[0])) {
1066     // "\\something" is absolute, "\something" is relative to the current
1067     // dir of the current drive, whatever that may be, for a given process
1068     return is_slash(path[1]);
1069   }
1070 
1071   drive_letter = (char)tolower(path[0]);
1072   // "C:something"
1073   if (drive_letter >= 'a' && drive_letter <= 'z' && path[1] == ':') {
1074     // "C:\something" is absolute, but "C:something" is relative to
1075     // the current dir on the C drive(!)
1076     return is_slash(path[2]);
1077   }
1078   // we could check for things like NUL:, COM: and so on here.
1079   // While those are technically absolute names, we can't watch them, so
1080   // we don't consider them absolute for the purposes of checking whether
1081   // the path is a valid watchable root
1082   return false;
1083 #else
1084   return len > 0 && path[0] == '/';
1085 #endif
1086 }
1087 
1088 /* vim:ts=2:sw=2:et:
1089  */
1090