1 /* Copyright 2012-present Facebook, Inc.
2 * Licensed under the Apache License, Version 2.0 */
3
4 #ifndef WATCHMAN_STRING_H
5 #define WATCHMAN_STRING_H
6
7 #include "watchman_system.h"
8
9 #include <stdbool.h>
10 #include <stdint.h>
11 #include <atomic>
12 #include <cstring>
13 #include <initializer_list>
14 #include <memory>
15 #include "watchman_log.h"
16 #ifdef _WIN32
17 #include <string>
18 #endif
19 #include <stdexcept>
20 #include <vector>
21
22 struct watchman_string;
23 typedef struct watchman_string w_string_t;
24
25 typedef enum {
26 W_STRING_BYTE,
27 W_STRING_UNICODE,
28 W_STRING_MIXED
29 } w_string_type_t;
30
31 struct watchman_string {
32 std::atomic<long> refcnt;
33 uint32_t _hval;
34 uint32_t len;
35 w_string_t *slice;
36 const char *buf;
37 w_string_type_t type:3;
38 unsigned hval_computed:1;
39
40 watchman_string();
41 ~watchman_string();
42 };
43
44 uint32_t w_string_compute_hval(w_string_t *str);
45
w_string_hval(w_string_t * str)46 static inline uint32_t w_string_hval(w_string_t *str) {
47 if (str->hval_computed) {
48 return str->_hval;
49 }
50 return w_string_compute_hval(str);
51 }
52
53 void w_string_addref(w_string_t *str);
54
55 w_string_t *w_string_basename(w_string_t *str);
56
57 w_string_t *w_string_canon_path(w_string_t *str);
58 int w_string_compare(const w_string_t *a, const w_string_t *b);
59 bool w_string_contains_cstr_len(
60 const w_string_t* str,
61 const char* needle,
62 uint32_t nlen);
63
64 void w_string_delref(w_string_t *str);
65 w_string_t *w_string_dirname(w_string_t *str);
66 char *w_string_dup_buf(const w_string_t *str);
67 w_string_t *w_string_dup_lower(w_string_t *str);
68
69 bool w_string_equal(const w_string_t *a, const w_string_t *b);
70 bool w_string_equal_cstring(const w_string_t *a, const char *b);
71
72 void w_string_in_place_normalize_separators(
73 w_string_t** str,
74 char target_sep = '/');
75
76 /* Typed string creation functions. */
77 w_string_t *w_string_new_typed(const char *str,
78 w_string_type_t type);
79 w_string_t *w_string_new_len_typed(const char *str, uint32_t len,
80 w_string_type_t type);
81 w_string_t *w_string_new_len_no_ref_typed(const char *str, uint32_t len,
82 w_string_type_t type);
83 w_string_t *w_string_new_basename_typed(const char *path,
84 w_string_type_t type);
85 w_string_t *w_string_new_lower_typed(const char *str,
86 w_string_type_t type);
87
88 void w_string_new_len_typed_stack(w_string_t *into, const char *str,
89 uint32_t len, w_string_type_t type);
90
91 w_string_t* w_string_normalize_separators(
92 w_string_t* str,
93 char target_sep = '/');
94
95 w_string_t *w_string_path_cat(w_string_t *parent, w_string_t *rhs);
96 w_string_t *w_string_path_cat_cstr(w_string_t *parent, const char *rhs);
97 w_string_t *w_string_path_cat_cstr_len(w_string_t *parent, const char *rhs,
98 uint32_t rhs_len);
99 bool w_string_path_is_absolute(const w_string_t *str);
100
101 bool w_string_startswith(w_string_t *str, w_string_t *prefix);
102 bool w_string_startswith_caseless(w_string_t *str, w_string_t *prefix);
103 w_string_t *w_string_shell_escape(const w_string_t *str);
104 w_string_t *w_string_slice(w_string_t *str, uint32_t start, uint32_t len);
105 w_string_t *w_string_suffix(w_string_t *str);
106
107 bool w_string_is_known_unicode(w_string_t *str);
108 bool w_string_is_null_terminated(w_string_t *str);
109 size_t w_string_strlen(w_string_t *str);
110
111 uint32_t strlen_uint32(const char *str);
112
113 bool w_is_path_absolute_cstr(const char *path);
114 bool w_is_path_absolute_cstr_len(const char *path, uint32_t len);
115
is_slash(char c)116 inline bool is_slash(char c) {
117 return c == '/'
118 #ifdef _WIN32
119 || c == '\\'
120 #endif
121 ;
122 }
123
124 class w_string;
125
126 /** Represents a view over some externally managed string storage.
127 * It is simply a pair of pointers that define the start and end
128 * of the valid region. */
129 class w_string_piece {
130 const char *s_, *e_;
131
132 public:
133 w_string_piece();
134 /* implicit */ w_string_piece(std::nullptr_t);
135
w_string_piece(w_string_t * str)136 inline /* implicit */ w_string_piece(w_string_t* str)
137 : s_(str->buf), e_(str->buf + str->len) {}
138
139 /** Construct from a string-like object */
140 template <
141 typename String,
142 typename std::enable_if<
143 std::is_class<String>::value &&
144 !std::is_same<String, w_string>::value,
145 int>::type = 0>
w_string_piece(const String & str)146 inline /* implicit */ w_string_piece(const String& str)
147 : s_(str.data()), e_(str.data() + str.size()) {}
148
149 /** Construct from w_string. This is almost the same as
150 * the string like object constructor above, but we need a nullptr check
151 */
152 template <
153 typename String,
154 typename std::enable_if<std::is_same<String, w_string>::value, int>::
155 type = 0>
w_string_piece(const String & str)156 inline /* implicit */ w_string_piece(const String& str) {
157 if (!str) {
158 s_ = nullptr;
159 e_ = nullptr;
160 } else {
161 s_ = str.data();
162 e_ = str.data() + str.size();
163 }
164 }
165
w_string_piece(const char * cstr)166 inline /* implicit */ w_string_piece(const char* cstr)
167 : s_(cstr), e_(cstr + strlen(cstr)) {}
168
w_string_piece(const char * cstr,size_t len)169 /* implicit */ inline w_string_piece(const char* cstr, size_t len)
170 : s_(cstr), e_(cstr + len) {}
171
172 w_string_piece(const w_string_piece& other) = default;
173 w_string_piece& operator=(const w_string_piece& other) = default;
174 w_string_piece(w_string_piece&& other) noexcept;
175
data()176 inline const char* data() const {
177 return s_;
178 }
179
empty()180 inline bool empty() const {
181 return e_ == s_;
182 }
183
size()184 inline size_t size() const {
185 return e_ - s_;
186 }
187
188 const char& operator[](size_t i) const {
189 return s_[i];
190 }
191
192 /** move the start of the string by n characters, stripping off that prefix */
advance(size_t n)193 void advance(size_t n) {
194 if (n > size()) {
195 throw std::range_error("index out of range");
196 }
197 s_ += n;
198 }
199
200 /** Return a copy of the string as a w_string */
201 w_string asWString(w_string_type_t stringType = W_STRING_BYTE) const;
202
203 /** Return a lowercased copy of the string */
204 w_string asLowerCase(w_string_type_t stringType = W_STRING_BYTE) const;
205
206 /** Return a UTF-8-clean copy of the string */
207 w_string asUTF8Clean() const;
208
209 /** Returns true if the filename suffix of this string matches
210 * the provided suffix, which must be lower cased.
211 * This string piece lower cased and matched against suffix */
212 bool hasSuffix(w_string_piece suffix) const;
213
214 bool pathIsAbsolute() const;
215 bool pathIsEqual(w_string_piece other) const;
216 w_string_piece dirName() const;
217 w_string_piece baseName() const;
218
219 /** Split the string by delimiter and emit to the provided vector */
220 template <typename Vector>
split(Vector & result,char delim)221 void split(Vector& result, char delim) const {
222 const char* begin = s_;
223 const char* it = begin;
224 while (it != e_) {
225 if (*it == delim) {
226 result.emplace_back(begin, it - begin);
227 begin = ++it;
228 continue;
229 }
230 ++it;
231 }
232
233 if (begin != e_) {
234 result.emplace_back(begin, e_ - begin);
235 }
236 }
237
238 bool operator==(w_string_piece other) const;
239 bool operator!=(w_string_piece other) const;
240 bool operator<(w_string_piece other) const;
241
242 bool startsWith(w_string_piece prefix) const;
243 bool startsWithCaseInsensitive(w_string_piece prefix) const;
244
245 // Compute a hash value for this piece
246 uint32_t hashValue() const;
247
248 #ifdef _WIN32
249 // Returns a wide character representation of the piece
250 std::wstring asWideUNC() const;
251 #endif
252 };
253
254 bool w_string_equal_caseless(w_string_piece a, w_string_piece b);
255 struct watchman_dir;
256 w_string w_dir_path_cat_str(const struct watchman_dir* dir, w_string_piece str);
257
258 /** A smart pointer class for tracking w_string_t instances */
259 class w_string {
260 public:
261 /** Initialize a nullptr */
262 w_string();
263 /* implicit */ w_string(std::nullptr_t);
264
265 /** Make a new string from some bytes and a type */
266 w_string(
267 const char* buf,
268 size_t len,
269 w_string_type_t stringType = W_STRING_BYTE);
270 /* implicit */ w_string(
271 const char* buf,
272 w_string_type_t stringType = W_STRING_BYTE);
273
274 #ifdef _WIN32
275 /** Convert a wide character path to utf-8 and return it as a w_string.
276 * This constructor is intended only for path names and not as a general
277 * WCHAR -> w_string constructor; it will always apply path mapping
278 * logic to the result and may mangle non-pathname strings if they
279 * are passed to it. */
280 w_string(const WCHAR* wpath, size_t len);
281 #endif
282
283 /** Initialize, taking a ref on w_string_t */
284 /* implicit */ w_string(w_string_t* str, bool addRef = true);
285 /** Release the string reference when we fall out of scope */
286 ~w_string();
287 /** Copying adds a ref */
288 w_string(const w_string& other);
289 w_string& operator=(const w_string& other);
290 /** Moving steals a ref */
291 w_string(w_string&& other) noexcept;
292 w_string& operator=(w_string&& other);
293
294 /** Stop tracking the underlying string object, decrementing
295 * the reference count. */
296 void reset();
297
298 /** Stop tracking the underlying string object, returning the
299 * reference to the caller. The caller is responsible for
300 * decrementing the refcount */
301 w_string_t *release();
302
303 operator w_string_t*() const {
304 return str_;
305 }
306
w_string_piece()307 operator w_string_piece() const {
308 return piece();
309 }
310
piece()311 inline w_string_piece piece() const {
312 if (str_ == nullptr) {
313 return w_string_piece();
314 }
315 return w_string_piece(data(), size());
316 }
317
318 explicit operator bool() const {
319 return str_ != nullptr;
320 }
321
322 bool operator==(const w_string& other) const;
323 bool operator!=(const w_string& other) const;
324 bool operator<(const w_string& other) const;
325
326 /** path concatenation
327 * Pass in a list of w_string_pieces to join them all similarly to
328 * the python os.path.join() function. */
329 static w_string pathCat(std::initializer_list<w_string_piece> elems);
330
331 /** Similar to asprintf, but returns a w_string */
332 static w_string printf(WATCHMAN_FMT_STRING(const char* format), ...)
333 WATCHMAN_FMT_ATTR(1, 2);
334 static w_string vprintf(const char* format, va_list ap);
335
336 template <typename... Args>
337 static w_string build(Args&&... args);
338
339 /** Return a possibly new version of this string that is null terminated */
340 w_string asNullTerminated() const;
341
342 /** Ensure that this instance is referencing a null terminated version
343 * of the current string */
344 void makeNullTerminated();
345
346 /** Return a possibly new version of this string that has its separators
347 * normalized to unix slashes */
348 w_string normalizeSeparators(char targetSeparator = '/') const;
349
ensureNotNull()350 inline void ensureNotNull() const {
351 w_assert(str_ != nullptr, "w_string is nullptr!");
352 }
353
354 /** Returns a pointer to a null terminated c-string.
355 * If this instance doesn't point to a null terminated c-string, throws
356 * an exception that tells you to use asNullTerminated or makeNullTerminated
357 * first. */
358 const char* c_str() const;
data()359 const char* data() const {
360 ensureNotNull();
361 return str_->buf;
362 }
363
empty()364 bool empty() const {
365 if (str_) {
366 return str_->len == 0;
367 }
368 return true;
369 }
370
size()371 size_t size() const {
372 ensureNotNull();
373 return str_->len;
374 }
375
type()376 w_string_type_t type() const {
377 ensureNotNull();
378 return str_->type;
379 }
380
381 /** Returns the directory component of the string, assuming a path string */
382 w_string dirName() const;
383 /** Returns the file name component of the string, assuming a path string */
384 w_string baseName() const;
385 /** Returns the filename suffix of a path string */
386 w_string suffix() const;
387
388 /** Returns a slice of this string */
389 w_string slice(uint32_t start, uint32_t len) const;
390
391 private:
392 w_string_t* str_{nullptr};
393 };
394
395 /** Allow w_string to act as a key in unordered_(map|set) */
396 namespace std {
397 template <>
398 struct hash<w_string> {
399 std::size_t operator()(w_string const& str) const {
400 return w_string_hval(str);
401 }
402 };
403 template <>
404 struct hash<w_string_piece> {
405 std::size_t operator()(w_string_piece const& str) const {
406 return str.hashValue();
407 }
408 };
409 }
410
411 namespace detail {
412
413 // Testing whether something is a known stringy type
414 // with .data(), .size() methods
415 template <typename T>
416 struct IsStringSrc {
417 enum {
418 value = std::is_same<T, std::string>::value ||
419 std::is_same<T, w_string>::value ||
420 std::is_same<T, w_string_piece>::value
421 };
422 };
423
424 // w_string_t is immutable so it doesn't have an .append() method, so
425 // we use this helper class to keep track of where we're emitting the
426 // concatenated strings.
427 struct Appender {
428 char* buf;
429 char* limit;
430
431 Appender(char* buf, size_t len) : buf(buf), limit(buf + len) {}
432
433 // After placing data in the buffer, move the cursor forwards
434 void advance(size_t amount) {
435 w_assert(amount <= avail(), "advanced more than reserved space");
436 buf += amount;
437 }
438
439 // How many bytes of space are available
440 size_t avail() const {
441 return limit - buf;
442 }
443
444 // Copy a buffer in and advance the cursor
445 void append(const char* src, size_t amount) {
446 w_assert(amount <= avail(), "advancing more than reserved space");
447 memcpy(buf, src, amount);
448 advance(amount);
449 }
450
451 // Helper for rendering a base10 number
452 void appendUint64(uint64_t v) {
453 // 20 is max possible decimal digits for a 64-bit number
454 char localBuf[20];
455 uint32_t pos = sizeof(localBuf) - 1;
456 while (v >= 10) {
457 auto const q = v / 10;
458 auto const r = static_cast<uint32_t>(v % 10);
459 localBuf[pos--] = '0' + r;
460 v = q;
461 }
462 localBuf[pos] = static_cast<uint32_t>(v) + '0';
463 append(localBuf + pos, sizeof(localBuf) - pos);
464 }
465
466 // Helper for rendering a base16 number
467 void appendHexUint64(uint64_t v) {
468 // 16 is max possible hex digits for a 64-bit number
469 char localBuf[16];
470 static const char* hexDigit = "0123456789abcdef";
471
472 uint32_t pos = sizeof(localBuf) - 1;
473 while (v >= 16) {
474 auto const q = v / 16;
475 auto const r = static_cast<uint32_t>(v % 16);
476 localBuf[pos--] = hexDigit[r];
477 v = q;
478 }
479 localBuf[pos] = hexDigit[static_cast<uint32_t>(v)];
480 append(localBuf + pos, sizeof(localBuf) - pos);
481 }
482 };
483
484 template <typename T>
485 constexpr typename std::enable_if<
486 std::is_pointer<T>::value && !std::is_convertible<T, const char*>::value,
487 size_t>::type
488 estimateSpaceNeeded(T) {
489 // We render pointers as 0xXXXXXX, so we need a 2 byte prefix
490 // and then an appropriate number of hex digits for the pointer size.
491 return 2 + (sizeof(T) * 2);
492 }
493
494 // Render pointers as lowercase hex
495 template <typename T>
496 typename std::enable_if<
497 std::is_pointer<T>::value &&
498 !std::is_convertible<T, const char*>::value>::type
499 toAppend(T value, Appender& result) {
500 result.append("0x", 2);
501 result.appendHexUint64(uintptr_t(value));
502 }
503
504 // Defer to snprintf for rendering floating point values
505 inline size_t estimateSpaceNeeded(double val) {
506 return snprintf(nullptr, 0, "%f", val);
507 }
508
509 inline void toAppend(double val, Appender& result) {
510 auto len = snprintf(result.buf, result.avail(), "%f", val);
511 result.advance(len);
512 }
513
514 // Need to specialize nullptr otherwise we see it as an empty w_string_piece
515 constexpr size_t estimateSpaceNeeded(std::nullptr_t) {
516 return 3; // 0x0
517 }
518
519 inline void toAppend(std::nullptr_t, Appender& result) {
520 result.append("0x0", 3);
521 }
522
523 // Size an unsigned integer value
524 template <typename T>
525 constexpr typename std::enable_if<
526 std::is_integral<T>::value && !std::is_signed<T>::value,
527 size_t>::type
528 estimateSpaceNeeded(T) {
529 // approx 0.3 decimal digits per binary bit; round up.
530 // 8 bits -> 3 digits
531 // 16 bits -> 5 digits
532 // 32 bits -> 10 digits
533 // 64 bits -> 20 digits
534 // Note that for larger bit sizes it is more memory efficient
535 // to compute the size on a per value basis (eg: if a 64-bit value
536 // fits in a single decimal digit, we're 20x more wasteful with
537 // memory). We're going for relative simplicity here; just
538 // one simple function to measure up and ensure we have enough
539 // room.
540 return 1 + (sizeof(T) * 8 * 0.3);
541 }
542
543 // Render an unsigned integer value
544 template <typename T>
545 typename std::enable_if<
546 std::is_integral<T>::value && !std::is_signed<T>::value>::type
547 toAppend(T value, Appender& result) {
548 result.appendUint64(value);
549 }
550
551 template <typename T>
552 constexpr typename std::enable_if<
553 std::is_integral<T>::value && std::is_signed<T>::value,
554 size_t>::type
555 estimateSpaceNeeded(T) {
556 // Need 1 extra byte for the '-' sign
557 return 2 + (sizeof(T) * 8 * 0.3);
558 }
559
560 // Render a signed integer value
561 template <typename T>
562 typename std::enable_if<
563 std::is_integral<T>::value && std::is_signed<T>::value>::type
564 toAppend(T value, Appender& result) {
565 if (value < 0) {
566 result.append("-", 1);
567 // When "value" is the smallest negative, negating it would
568 // evoke undefined behavior, so instead of "-value" we carry
569 // out a bitwise complement and add one.
570 result.appendUint64(~static_cast<uint64_t>(value) + 1);
571 } else {
572 result.appendUint64(static_cast<uint64_t>(value));
573 }
574 }
575
576 // Measure a stringy value
577 template <typename T>
578 typename std::enable_if<IsStringSrc<T>::value, size_t>::type
579 estimateSpaceNeeded(const T& src) {
580 // For known stringy types, we call the size method
581 if (!src.empty()) {
582 return src.size();
583 }
584 return 0;
585 }
586
587 template <typename T>
588 typename std::enable_if<IsStringSrc<T>::value>::type toAppend(
589 const T& src,
590 Appender& result) {
591 if (!src.empty()) {
592 result.append(src.data(), src.size());
593 }
594 }
595
596 // If we can convert it to a string piece, we can call its size() method
597 template <typename T>
598 typename std::enable_if<std::is_convertible<T, const char*>::value, size_t>::
599 type
600 estimateSpaceNeeded(T src) {
601 return w_string_piece(src).size();
602 }
603
604 template <typename T>
605 typename std::enable_if<std::is_convertible<T, const char*>::value>::type
606 toAppend(T src, Appender& result) {
607 w_string_piece piece(src);
608 result.append(piece.data(), piece.size());
609 }
610
611 // If we can convert it to a string piece, we can call its size() method
612 template <typename T>
613 typename std::enable_if<
614 std::is_convertible<T, w_string_piece>::value && !IsStringSrc<T>::value &&
615 !std::is_convertible<T, const char*>::value,
616 size_t>::type
617 estimateSpaceNeeded(T src) {
618 return w_string_piece(src).size();
619 }
620
621 template <typename T>
622 typename std::enable_if<
623 std::is_convertible<T, w_string_piece>::value && !IsStringSrc<T>::value &&
624 !std::is_convertible<T, const char*>::value>::type
625 toAppend(T src, Appender& result) {
626 w_string_piece piece(src);
627 result.append(piece.data(), piece.size());
628 }
629
630 // Recursive, variadic expansion to measure up a set of arguments
631
632 inline size_t estimateSpaceToReserve(size_t accumulated) {
633 return accumulated;
634 }
635
636 template <typename T, typename... Args>
637 size_t
638 estimateSpaceToReserve(size_t accumulated, const T& v, const Args&... args) {
639 return estimateSpaceToReserve(accumulated + estimateSpaceNeeded(v), args...);
640 }
641
642 // Recursive, variadic expansion to append a set of arguments
643
644 inline void appendTo(Appender&) {}
645
646 template <typename T, typename... Args>
647 void appendTo(Appender& result, const T& v, const Args&... args) {
648 toAppend(v, result);
649 appendTo(result, args...);
650 }
651 } // namespace detail
652
653 // Concat the args together into a w_string
654
655 template <typename... Args>
656 w_string w_string::build(Args&&... args) {
657 auto reserved = ::detail::estimateSpaceToReserve(1, args...);
658
659 auto s = (w_string_t*)(new char[sizeof(w_string_t) + reserved]);
660 new (s) watchman_string();
661
662 s->refcnt = 1;
663 auto buf = (char*)(s + 1);
664 s->buf = buf;
665
666 ::detail::Appender appender(buf, reserved);
667 ::detail::appendTo(appender, args...);
668
669 s->len = reserved - appender.avail();
670 buf[s->len] = '\0';
671
672 return w_string(s, false);
673 }
674
675 namespace watchman {
676
677 // Helper for building a stringy container of a given type.
678 // This is similar in spirit to folly::to<>. Usage is:
679 // auto str = watchman::to<std::string>("foo", 123)
680 // and it will return a std::string holding "foo123"
681 template <typename Container, typename... Args>
682 Container to(Args&&... args) {
683 auto reserved = ::detail::estimateSpaceToReserve(1, args...);
684
685 Container result;
686 result.resize(reserved);
687
688 ::detail::Appender appender(&result[0], reserved);
689 ::detail::appendTo(appender, args...);
690
691 result.resize(reserved - appender.avail());
692 return result;
693 }
694 }
695
696 #endif
697
698 /* vim:ts=2:sw=2:et:
699 */
700