1 /* Copyright 2012-present Facebook, Inc.
2  * Licensed under the Apache License, Version 2.0 */
3 
4 #ifndef WATCHMAN_STRING_H
5 #define WATCHMAN_STRING_H
6 
7 #include "watchman_system.h"
8 
9 #include <stdbool.h>
10 #include <stdint.h>
11 #include <atomic>
12 #include <cstring>
13 #include <initializer_list>
14 #include <memory>
15 #include "watchman_log.h"
16 #ifdef _WIN32
17 #include <string>
18 #endif
19 #include <stdexcept>
20 #include <vector>
21 
22 struct watchman_string;
23 typedef struct watchman_string w_string_t;
24 
25 typedef enum {
26   W_STRING_BYTE,
27   W_STRING_UNICODE,
28   W_STRING_MIXED
29 } w_string_type_t;
30 
31 struct watchman_string {
32   std::atomic<long> refcnt;
33   uint32_t _hval;
34   uint32_t len;
35   w_string_t *slice;
36   const char *buf;
37   w_string_type_t type:3;
38   unsigned hval_computed:1;
39 
40   watchman_string();
41   ~watchman_string();
42 };
43 
44 uint32_t w_string_compute_hval(w_string_t *str);
45 
w_string_hval(w_string_t * str)46 static inline uint32_t w_string_hval(w_string_t *str) {
47   if (str->hval_computed) {
48     return str->_hval;
49   }
50   return w_string_compute_hval(str);
51 }
52 
53 void w_string_addref(w_string_t *str);
54 
55 w_string_t *w_string_basename(w_string_t *str);
56 
57 w_string_t *w_string_canon_path(w_string_t *str);
58 int w_string_compare(const w_string_t *a, const w_string_t *b);
59 bool w_string_contains_cstr_len(
60     const w_string_t* str,
61     const char* needle,
62     uint32_t nlen);
63 
64 void w_string_delref(w_string_t *str);
65 w_string_t *w_string_dirname(w_string_t *str);
66 char *w_string_dup_buf(const w_string_t *str);
67 w_string_t *w_string_dup_lower(w_string_t *str);
68 
69 bool w_string_equal(const w_string_t *a, const w_string_t *b);
70 bool w_string_equal_cstring(const w_string_t *a, const char *b);
71 
72 void w_string_in_place_normalize_separators(
73     w_string_t** str,
74     char target_sep = '/');
75 
76 /* Typed string creation functions. */
77 w_string_t *w_string_new_typed(const char *str,
78     w_string_type_t type);
79 w_string_t *w_string_new_len_typed(const char *str, uint32_t len,
80     w_string_type_t type);
81 w_string_t *w_string_new_len_no_ref_typed(const char *str, uint32_t len,
82     w_string_type_t type);
83 w_string_t *w_string_new_basename_typed(const char *path,
84     w_string_type_t type);
85 w_string_t *w_string_new_lower_typed(const char *str,
86     w_string_type_t type);
87 
88 void w_string_new_len_typed_stack(w_string_t *into, const char *str,
89                                   uint32_t len, w_string_type_t type);
90 
91 w_string_t* w_string_normalize_separators(
92     w_string_t* str,
93     char target_sep = '/');
94 
95 w_string_t *w_string_path_cat(w_string_t *parent, w_string_t *rhs);
96 w_string_t *w_string_path_cat_cstr(w_string_t *parent, const char *rhs);
97 w_string_t *w_string_path_cat_cstr_len(w_string_t *parent, const char *rhs,
98                                        uint32_t rhs_len);
99 bool w_string_path_is_absolute(const w_string_t *str);
100 
101 bool w_string_startswith(w_string_t *str, w_string_t *prefix);
102 bool w_string_startswith_caseless(w_string_t *str, w_string_t *prefix);
103 w_string_t *w_string_shell_escape(const w_string_t *str);
104 w_string_t *w_string_slice(w_string_t *str, uint32_t start, uint32_t len);
105 w_string_t *w_string_suffix(w_string_t *str);
106 
107 bool w_string_is_known_unicode(w_string_t *str);
108 bool w_string_is_null_terminated(w_string_t *str);
109 size_t w_string_strlen(w_string_t *str);
110 
111 uint32_t strlen_uint32(const char *str);
112 
113 bool w_is_path_absolute_cstr(const char *path);
114 bool w_is_path_absolute_cstr_len(const char *path, uint32_t len);
115 
is_slash(char c)116 inline bool is_slash(char c) {
117   return c == '/'
118 #ifdef _WIN32
119       || c == '\\'
120 #endif
121       ;
122 }
123 
124 class w_string;
125 
126 /** Represents a view over some externally managed string storage.
127  * It is simply a pair of pointers that define the start and end
128  * of the valid region. */
129 class w_string_piece {
130   const char *s_, *e_;
131 
132  public:
133   w_string_piece();
134   /* implicit */ w_string_piece(std::nullptr_t);
135 
w_string_piece(w_string_t * str)136   inline /* implicit */ w_string_piece(w_string_t* str)
137       : s_(str->buf), e_(str->buf + str->len) {}
138 
139   /** Construct from a string-like object */
140   template <
141       typename String,
142       typename std::enable_if<
143           std::is_class<String>::value &&
144               !std::is_same<String, w_string>::value,
145           int>::type = 0>
w_string_piece(const String & str)146   inline /* implicit */ w_string_piece(const String& str)
147       : s_(str.data()), e_(str.data() + str.size()) {}
148 
149   /** Construct from w_string.  This is almost the same as
150    * the string like object constructor above, but we need a nullptr check
151    */
152   template <
153       typename String,
154       typename std::enable_if<std::is_same<String, w_string>::value, int>::
155           type = 0>
w_string_piece(const String & str)156   inline /* implicit */ w_string_piece(const String& str) {
157     if (!str) {
158       s_ = nullptr;
159       e_ = nullptr;
160     } else {
161       s_ = str.data();
162       e_ = str.data() + str.size();
163     }
164   }
165 
w_string_piece(const char * cstr)166   inline /* implicit */ w_string_piece(const char* cstr)
167       : s_(cstr), e_(cstr + strlen(cstr)) {}
168 
w_string_piece(const char * cstr,size_t len)169   /* implicit */ inline w_string_piece(const char* cstr, size_t len)
170       : s_(cstr), e_(cstr + len) {}
171 
172   w_string_piece(const w_string_piece& other) = default;
173   w_string_piece& operator=(const w_string_piece& other) = default;
174   w_string_piece(w_string_piece&& other) noexcept;
175 
data()176   inline const char* data() const {
177     return s_;
178   }
179 
empty()180   inline bool empty() const {
181     return e_ == s_;
182   }
183 
size()184   inline size_t size() const {
185     return e_ - s_;
186   }
187 
188   const char& operator[](size_t i) const {
189     return s_[i];
190   }
191 
192   /** move the start of the string by n characters, stripping off that prefix */
advance(size_t n)193   void advance(size_t n) {
194     if (n > size()) {
195       throw std::range_error("index out of range");
196     }
197     s_ += n;
198   }
199 
200   /** Return a copy of the string as a w_string */
201   w_string asWString(w_string_type_t stringType = W_STRING_BYTE) const;
202 
203   /** Return a lowercased copy of the string */
204   w_string asLowerCase(w_string_type_t stringType = W_STRING_BYTE) const;
205 
206   /** Return a UTF-8-clean copy of the string */
207   w_string asUTF8Clean() const;
208 
209   /** Returns true if the filename suffix of this string matches
210    * the provided suffix, which must be lower cased.
211    * This string piece lower cased and matched against suffix */
212   bool hasSuffix(w_string_piece suffix) const;
213 
214   bool pathIsAbsolute() const;
215   bool pathIsEqual(w_string_piece other) const;
216   w_string_piece dirName() const;
217   w_string_piece baseName() const;
218 
219   /** Split the string by delimiter and emit to the provided vector */
220   template <typename Vector>
split(Vector & result,char delim)221   void split(Vector& result, char delim) const {
222     const char* begin = s_;
223     const char* it = begin;
224     while (it != e_) {
225       if (*it == delim) {
226         result.emplace_back(begin, it - begin);
227         begin = ++it;
228         continue;
229       }
230       ++it;
231     }
232 
233     if (begin != e_) {
234       result.emplace_back(begin, e_ - begin);
235     }
236   }
237 
238   bool operator==(w_string_piece other) const;
239   bool operator!=(w_string_piece other) const;
240   bool operator<(w_string_piece other) const;
241 
242   bool startsWith(w_string_piece prefix) const;
243   bool startsWithCaseInsensitive(w_string_piece prefix) const;
244 
245   // Compute a hash value for this piece
246   uint32_t hashValue() const;
247 
248 #ifdef _WIN32
249   // Returns a wide character representation of the piece
250   std::wstring asWideUNC() const;
251 #endif
252 };
253 
254 bool w_string_equal_caseless(w_string_piece a, w_string_piece b);
255 struct watchman_dir;
256 w_string w_dir_path_cat_str(const struct watchman_dir* dir, w_string_piece str);
257 
258 /** A smart pointer class for tracking w_string_t instances */
259 class w_string {
260  public:
261   /** Initialize a nullptr */
262   w_string();
263   /* implicit */ w_string(std::nullptr_t);
264 
265   /** Make a new string from some bytes and a type */
266   w_string(
267       const char* buf,
268       size_t len,
269       w_string_type_t stringType = W_STRING_BYTE);
270   /* implicit */ w_string(
271       const char* buf,
272       w_string_type_t stringType = W_STRING_BYTE);
273 
274 #ifdef _WIN32
275   /** Convert a wide character path to utf-8 and return it as a w_string.
276    * This constructor is intended only for path names and not as a general
277    * WCHAR -> w_string constructor; it will always apply path mapping
278    * logic to the result and may mangle non-pathname strings if they
279    * are passed to it. */
280   w_string(const WCHAR* wpath, size_t len);
281 #endif
282 
283   /** Initialize, taking a ref on w_string_t */
284   /* implicit */ w_string(w_string_t* str, bool addRef = true);
285   /** Release the string reference when we fall out of scope */
286   ~w_string();
287   /** Copying adds a ref */
288   w_string(const w_string& other);
289   w_string& operator=(const w_string& other);
290   /** Moving steals a ref */
291   w_string(w_string&& other) noexcept;
292   w_string& operator=(w_string&& other);
293 
294   /** Stop tracking the underlying string object, decrementing
295    * the reference count. */
296   void reset();
297 
298   /** Stop tracking the underlying string object, returning the
299    * reference to the caller.  The caller is responsible for
300    * decrementing the refcount */
301   w_string_t *release();
302 
303   operator w_string_t*() const {
304     return str_;
305   }
306 
w_string_piece()307   operator w_string_piece() const {
308     return piece();
309   }
310 
piece()311   inline w_string_piece piece() const {
312     if (str_ == nullptr) {
313       return w_string_piece();
314     }
315     return w_string_piece(data(), size());
316   }
317 
318   explicit operator bool() const {
319     return str_ != nullptr;
320   }
321 
322   bool operator==(const w_string& other) const;
323   bool operator!=(const w_string& other) const;
324   bool operator<(const w_string& other) const;
325 
326   /** path concatenation
327    * Pass in a list of w_string_pieces to join them all similarly to
328    * the python os.path.join() function. */
329   static w_string pathCat(std::initializer_list<w_string_piece> elems);
330 
331   /** Similar to asprintf, but returns a w_string */
332   static w_string printf(WATCHMAN_FMT_STRING(const char* format), ...)
333       WATCHMAN_FMT_ATTR(1, 2);
334   static w_string vprintf(const char* format, va_list ap);
335 
336   template <typename... Args>
337   static w_string build(Args&&... args);
338 
339   /** Return a possibly new version of this string that is null terminated */
340   w_string asNullTerminated() const;
341 
342   /** Ensure that this instance is referencing a null terminated version
343    * of the current string */
344   void makeNullTerminated();
345 
346   /** Return a possibly new version of this string that has its separators
347    * normalized to unix slashes */
348   w_string normalizeSeparators(char targetSeparator = '/') const;
349 
ensureNotNull()350   inline void ensureNotNull() const {
351     w_assert(str_ != nullptr, "w_string is nullptr!");
352   }
353 
354   /** Returns a pointer to a null terminated c-string.
355    * If this instance doesn't point to a null terminated c-string, throws
356    * an exception that tells you to use asNullTerminated or makeNullTerminated
357    * first. */
358   const char* c_str() const;
data()359   const char* data() const {
360     ensureNotNull();
361     return str_->buf;
362   }
363 
empty()364   bool empty() const {
365     if (str_) {
366       return str_->len == 0;
367     }
368     return true;
369   }
370 
size()371   size_t size() const {
372     ensureNotNull();
373     return str_->len;
374   }
375 
type()376   w_string_type_t type() const {
377     ensureNotNull();
378     return str_->type;
379   }
380 
381   /** Returns the directory component of the string, assuming a path string */
382   w_string dirName() const;
383   /** Returns the file name component of the string, assuming a path string */
384   w_string baseName() const;
385   /** Returns the filename suffix of a path string */
386   w_string suffix() const;
387 
388   /** Returns a slice of this string */
389   w_string slice(uint32_t start, uint32_t len) const;
390 
391  private:
392   w_string_t* str_{nullptr};
393 };
394 
395 /** Allow w_string to act as a key in unordered_(map|set) */
396 namespace std {
397 template <>
398 struct hash<w_string> {
399   std::size_t operator()(w_string const& str) const {
400     return w_string_hval(str);
401   }
402 };
403 template <>
404 struct hash<w_string_piece> {
405   std::size_t operator()(w_string_piece const& str) const {
406     return str.hashValue();
407   }
408 };
409 }
410 
411 namespace detail {
412 
413 // Testing whether something is a known stringy type
414 // with .data(), .size() methods
415 template <typename T>
416 struct IsStringSrc {
417   enum {
418     value = std::is_same<T, std::string>::value ||
419         std::is_same<T, w_string>::value ||
420         std::is_same<T, w_string_piece>::value
421   };
422 };
423 
424 // w_string_t is immutable so it doesn't have an .append() method, so
425 // we use this helper class to keep track of where we're emitting the
426 // concatenated strings.
427 struct Appender {
428   char* buf;
429   char* limit;
430 
431   Appender(char* buf, size_t len) : buf(buf), limit(buf + len) {}
432 
433   // After placing data in the buffer, move the cursor forwards
434   void advance(size_t amount) {
435     w_assert(amount <= avail(), "advanced more than reserved space");
436     buf += amount;
437   }
438 
439   // How many bytes of space are available
440   size_t avail() const {
441     return limit - buf;
442   }
443 
444   // Copy a buffer in and advance the cursor
445   void append(const char* src, size_t amount) {
446     w_assert(amount <= avail(), "advancing more than reserved space");
447     memcpy(buf, src, amount);
448     advance(amount);
449   }
450 
451   // Helper for rendering a base10 number
452   void appendUint64(uint64_t v) {
453     // 20 is max possible decimal digits for a 64-bit number
454     char localBuf[20];
455     uint32_t pos = sizeof(localBuf) - 1;
456     while (v >= 10) {
457       auto const q = v / 10;
458       auto const r = static_cast<uint32_t>(v % 10);
459       localBuf[pos--] = '0' + r;
460       v = q;
461     }
462     localBuf[pos] = static_cast<uint32_t>(v) + '0';
463     append(localBuf + pos, sizeof(localBuf) - pos);
464   }
465 
466   // Helper for rendering a base16 number
467   void appendHexUint64(uint64_t v) {
468     // 16 is max possible hex digits for a 64-bit number
469     char localBuf[16];
470     static const char* hexDigit = "0123456789abcdef";
471 
472     uint32_t pos = sizeof(localBuf) - 1;
473     while (v >= 16) {
474       auto const q = v / 16;
475       auto const r = static_cast<uint32_t>(v % 16);
476       localBuf[pos--] = hexDigit[r];
477       v = q;
478     }
479     localBuf[pos] = hexDigit[static_cast<uint32_t>(v)];
480     append(localBuf + pos, sizeof(localBuf) - pos);
481   }
482 };
483 
484 template <typename T>
485 constexpr typename std::enable_if<
486     std::is_pointer<T>::value && !std::is_convertible<T, const char*>::value,
487     size_t>::type
488 estimateSpaceNeeded(T) {
489   // We render pointers as 0xXXXXXX, so we need a 2 byte prefix
490   // and then an appropriate number of hex digits for the pointer size.
491   return 2 + (sizeof(T) * 2);
492 }
493 
494 // Render pointers as lowercase hex
495 template <typename T>
496 typename std::enable_if<
497     std::is_pointer<T>::value &&
498     !std::is_convertible<T, const char*>::value>::type
499 toAppend(T value, Appender& result) {
500   result.append("0x", 2);
501   result.appendHexUint64(uintptr_t(value));
502 }
503 
504 // Defer to snprintf for rendering floating point values
505 inline size_t estimateSpaceNeeded(double val) {
506   return snprintf(nullptr, 0, "%f", val);
507 }
508 
509 inline void toAppend(double val, Appender& result) {
510   auto len = snprintf(result.buf, result.avail(), "%f", val);
511   result.advance(len);
512 }
513 
514 // Need to specialize nullptr otherwise we see it as an empty w_string_piece
515 constexpr size_t estimateSpaceNeeded(std::nullptr_t) {
516   return 3; // 0x0
517 }
518 
519 inline void toAppend(std::nullptr_t, Appender& result) {
520   result.append("0x0", 3);
521 }
522 
523 // Size an unsigned integer value
524 template <typename T>
525 constexpr typename std::enable_if<
526     std::is_integral<T>::value && !std::is_signed<T>::value,
527     size_t>::type
528 estimateSpaceNeeded(T) {
529   // approx 0.3 decimal digits per binary bit; round up.
530   // 8 bits -> 3 digits
531   // 16 bits -> 5 digits
532   // 32 bits -> 10 digits
533   // 64 bits -> 20 digits
534   // Note that for larger bit sizes it is more memory efficient
535   // to compute the size on a per value basis (eg: if a 64-bit value
536   // fits in a single decimal digit, we're 20x more wasteful with
537   // memory).  We're going for relative simplicity here; just
538   // one simple function to measure up and ensure we have enough
539   // room.
540   return 1 + (sizeof(T) * 8 * 0.3);
541 }
542 
543 // Render an unsigned integer value
544 template <typename T>
545 typename std::enable_if<
546     std::is_integral<T>::value && !std::is_signed<T>::value>::type
547 toAppend(T value, Appender& result) {
548   result.appendUint64(value);
549 }
550 
551 template <typename T>
552 constexpr typename std::enable_if<
553     std::is_integral<T>::value && std::is_signed<T>::value,
554     size_t>::type
555 estimateSpaceNeeded(T) {
556   // Need 1 extra byte for the '-' sign
557   return 2 + (sizeof(T) * 8 * 0.3);
558 }
559 
560 // Render a signed integer value
561 template <typename T>
562 typename std::enable_if<
563     std::is_integral<T>::value && std::is_signed<T>::value>::type
564 toAppend(T value, Appender& result) {
565   if (value < 0) {
566     result.append("-", 1);
567     // When "value" is the smallest negative, negating it would
568     // evoke undefined behavior, so instead of "-value" we carry
569     // out a bitwise complement and add one.
570     result.appendUint64(~static_cast<uint64_t>(value) + 1);
571   } else {
572     result.appendUint64(static_cast<uint64_t>(value));
573   }
574 }
575 
576 // Measure a stringy value
577 template <typename T>
578 typename std::enable_if<IsStringSrc<T>::value, size_t>::type
579 estimateSpaceNeeded(const T& src) {
580   // For known stringy types, we call the size method
581   if (!src.empty()) {
582     return src.size();
583   }
584   return 0;
585 }
586 
587 template <typename T>
588 typename std::enable_if<IsStringSrc<T>::value>::type toAppend(
589     const T& src,
590     Appender& result) {
591   if (!src.empty()) {
592     result.append(src.data(), src.size());
593   }
594 }
595 
596 // If we can convert it to a string piece, we can call its size() method
597 template <typename T>
598 typename std::enable_if<std::is_convertible<T, const char*>::value, size_t>::
599     type
600     estimateSpaceNeeded(T src) {
601   return w_string_piece(src).size();
602 }
603 
604 template <typename T>
605 typename std::enable_if<std::is_convertible<T, const char*>::value>::type
606 toAppend(T src, Appender& result) {
607   w_string_piece piece(src);
608   result.append(piece.data(), piece.size());
609 }
610 
611 // If we can convert it to a string piece, we can call its size() method
612 template <typename T>
613 typename std::enable_if<
614     std::is_convertible<T, w_string_piece>::value && !IsStringSrc<T>::value &&
615         !std::is_convertible<T, const char*>::value,
616     size_t>::type
617 estimateSpaceNeeded(T src) {
618   return w_string_piece(src).size();
619 }
620 
621 template <typename T>
622 typename std::enable_if<
623     std::is_convertible<T, w_string_piece>::value && !IsStringSrc<T>::value &&
624     !std::is_convertible<T, const char*>::value>::type
625 toAppend(T src, Appender& result) {
626   w_string_piece piece(src);
627   result.append(piece.data(), piece.size());
628 }
629 
630 // Recursive, variadic expansion to measure up a set of arguments
631 
632 inline size_t estimateSpaceToReserve(size_t accumulated) {
633   return accumulated;
634 }
635 
636 template <typename T, typename... Args>
637 size_t
638 estimateSpaceToReserve(size_t accumulated, const T& v, const Args&... args) {
639   return estimateSpaceToReserve(accumulated + estimateSpaceNeeded(v), args...);
640 }
641 
642 // Recursive, variadic expansion to append a set of arguments
643 
644 inline void appendTo(Appender&) {}
645 
646 template <typename T, typename... Args>
647 void appendTo(Appender& result, const T& v, const Args&... args) {
648   toAppend(v, result);
649   appendTo(result, args...);
650 }
651 } // namespace detail
652 
653 // Concat the args together into a w_string
654 
655 template <typename... Args>
656 w_string w_string::build(Args&&... args) {
657   auto reserved = ::detail::estimateSpaceToReserve(1, args...);
658 
659   auto s = (w_string_t*)(new char[sizeof(w_string_t) + reserved]);
660   new (s) watchman_string();
661 
662   s->refcnt = 1;
663   auto buf = (char*)(s + 1);
664   s->buf = buf;
665 
666   ::detail::Appender appender(buf, reserved);
667   ::detail::appendTo(appender, args...);
668 
669   s->len = reserved - appender.avail();
670   buf[s->len] = '\0';
671 
672   return w_string(s, false);
673 }
674 
675 namespace watchman {
676 
677 // Helper for building a stringy container of a given type.
678 // This is similar in spirit to folly::to<>.  Usage is:
679 // auto str = watchman::to<std::string>("foo", 123)
680 // and it will return a std::string holding "foo123"
681 template <typename Container, typename... Args>
682 Container to(Args&&... args) {
683   auto reserved = ::detail::estimateSpaceToReserve(1, args...);
684 
685   Container result;
686   result.resize(reserved);
687 
688   ::detail::Appender appender(&result[0], reserved);
689   ::detail::appendTo(appender, args...);
690 
691   result.resize(reserved - appender.avail());
692   return result;
693 }
694 }
695 
696 #endif
697 
698 /* vim:ts=2:sw=2:et:
699  */
700