1 //===- StringRef.h - Constant String Reference Wrapper ----------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_ADT_STRINGREF_H
10 #define LLVM_ADT_STRINGREF_H
11 
12 #include "llvm/ADT/DenseMapInfo.h"
13 #include "llvm/ADT/STLFunctionalExtras.h"
14 #include "llvm/ADT/iterator_range.h"
15 #include "llvm/Support/Compiler.h"
16 #include <algorithm>
17 #include <cassert>
18 #include <cstddef>
19 #include <cstring>
20 #include <limits>
21 #include <string>
22 #if __cplusplus > 201402L
23 #include <string_view>
24 #endif
25 #include <type_traits>
26 #include <utility>
27 
28 // Declare the __builtin_strlen intrinsic for MSVC so it can be used in
29 // constexpr context.
30 #if defined(_MSC_VER)
31 extern "C" size_t __builtin_strlen(const char *);
32 #endif
33 
34 namespace llvm {
35 
36   class APInt;
37   class hash_code;
38   template <typename T> class SmallVectorImpl;
39   class StringRef;
40 
41   /// Helper functions for StringRef::getAsInteger.
42   bool getAsUnsignedInteger(StringRef Str, unsigned Radix,
43                             unsigned long long &Result);
44 
45   bool getAsSignedInteger(StringRef Str, unsigned Radix, long long &Result);
46 
47   bool consumeUnsignedInteger(StringRef &Str, unsigned Radix,
48                               unsigned long long &Result);
49   bool consumeSignedInteger(StringRef &Str, unsigned Radix, long long &Result);
50 
51   /// StringRef - Represent a constant reference to a string, i.e. a character
52   /// array and a length, which need not be null terminated.
53   ///
54   /// This class does not own the string data, it is expected to be used in
55   /// situations where the character data resides in some other buffer, whose
56   /// lifetime extends past that of the StringRef. For this reason, it is not in
57   /// general safe to store a StringRef.
58   class LLVM_GSL_POINTER StringRef {
59   public:
60     static constexpr size_t npos = ~size_t(0);
61 
62     using iterator = const char *;
63     using const_iterator = const char *;
64     using size_type = size_t;
65 
66   private:
67     /// The start of the string, in an external buffer.
68     const char *Data = nullptr;
69 
70     /// The length of the string.
71     size_t Length = 0;
72 
73     // Workaround memcmp issue with null pointers (undefined behavior)
74     // by providing a specialized version
75     static int compareMemory(const char *Lhs, const char *Rhs, size_t Length) {
76       if (Length == 0) { return 0; }
77       return ::memcmp(Lhs,Rhs,Length);
78     }
79 
80     // Constexpr version of std::strlen.
81     static constexpr size_t strLen(const char *Str) {
82 #if __cplusplus > 201402L
83       return std::char_traits<char>::length(Str);
84 #elif __has_builtin(__builtin_strlen) || defined(__GNUC__) || \
85     (defined(_MSC_VER) && _MSC_VER >= 1916)
86       return __builtin_strlen(Str);
87 #else
88       const char *Begin = Str;
89       while (*Str != '\0')
90         ++Str;
91       return Str - Begin;
92 #endif
93     }
94 
95   public:
96     /// @name Constructors
97     /// @{
98 
99     /// Construct an empty string ref.
100     /*implicit*/ StringRef() = default;
101 
102     /// Disable conversion from nullptr.  This prevents things like
103     /// if (S == nullptr)
104     StringRef(std::nullptr_t) = delete;
105 
106     /// Construct a string ref from a cstring.
107     /*implicit*/ constexpr StringRef(const char *Str)
108         : Data(Str), Length(Str ? strLen(Str) : 0) {}
109 
110     /// Construct a string ref from a pointer and length.
111     /*implicit*/ constexpr StringRef(const char *data, size_t length)
112         : Data(data), Length(length) {}
113 
114     /// Construct a string ref from an std::string.
115     /*implicit*/ StringRef(const std::string &Str)
116       : Data(Str.data()), Length(Str.length()) {}
117 
118 #if __cplusplus > 201402L
119     /// Construct a string ref from an std::string_view.
120     /*implicit*/ constexpr StringRef(std::string_view Str)
121         : Data(Str.data()), Length(Str.size()) {}
122 #endif
123 
124     /// @}
125     /// @name Iterators
126     /// @{
127 
128     iterator begin() const { return Data; }
129 
130     iterator end() const { return Data + Length; }
131 
132     const unsigned char *bytes_begin() const {
133       return reinterpret_cast<const unsigned char *>(begin());
134     }
135     const unsigned char *bytes_end() const {
136       return reinterpret_cast<const unsigned char *>(end());
137     }
138     iterator_range<const unsigned char *> bytes() const {
139       return make_range(bytes_begin(), bytes_end());
140     }
141 
142     /// @}
143     /// @name String Operations
144     /// @{
145 
146     /// data - Get a pointer to the start of the string (which may not be null
147     /// terminated).
148     LLVM_NODISCARD
149     const char *data() const { return Data; }
150 
151     /// empty - Check if the string is empty.
152     LLVM_NODISCARD
153     constexpr bool empty() const { return Length == 0; }
154 
155     /// size - Get the string size.
156     LLVM_NODISCARD
157     constexpr size_t size() const { return Length; }
158 
159     /// front - Get the first character in the string.
160     LLVM_NODISCARD
161     char front() const {
162       assert(!empty());
163       return Data[0];
164     }
165 
166     /// back - Get the last character in the string.
167     LLVM_NODISCARD
168     char back() const {
169       assert(!empty());
170       return Data[Length-1];
171     }
172 
173     // copy - Allocate copy in Allocator and return StringRef to it.
174     template <typename Allocator>
175     LLVM_NODISCARD StringRef copy(Allocator &A) const {
176       // Don't request a length 0 copy from the allocator.
177       if (empty())
178         return StringRef();
179       char *S = A.template Allocate<char>(Length);
180       std::copy(begin(), end(), S);
181       return StringRef(S, Length);
182     }
183 
184     /// equals - Check for string equality, this is more efficient than
185     /// compare() when the relative ordering of inequal strings isn't needed.
186     LLVM_NODISCARD
187     bool equals(StringRef RHS) const {
188       return (Length == RHS.Length &&
189               compareMemory(Data, RHS.Data, RHS.Length) == 0);
190     }
191 
192     /// Check for string equality, ignoring case.
193     LLVM_NODISCARD
194     bool equals_insensitive(StringRef RHS) const {
195       return Length == RHS.Length && compare_insensitive(RHS) == 0;
196     }
197 
198     /// compare - Compare two strings; the result is -1, 0, or 1 if this string
199     /// is lexicographically less than, equal to, or greater than the \p RHS.
200     LLVM_NODISCARD
201     int compare(StringRef RHS) const {
202       // Check the prefix for a mismatch.
203       if (int Res = compareMemory(Data, RHS.Data, std::min(Length, RHS.Length)))
204         return Res < 0 ? -1 : 1;
205 
206       // Otherwise the prefixes match, so we only need to check the lengths.
207       if (Length == RHS.Length)
208         return 0;
209       return Length < RHS.Length ? -1 : 1;
210     }
211 
212     /// Compare two strings, ignoring case.
213     LLVM_NODISCARD
214     int compare_insensitive(StringRef RHS) const;
215 
216     /// compare_numeric - Compare two strings, treating sequences of digits as
217     /// numbers.
218     LLVM_NODISCARD
219     int compare_numeric(StringRef RHS) const;
220 
221     /// Determine the edit distance between this string and another
222     /// string.
223     ///
224     /// \param Other the string to compare this string against.
225     ///
226     /// \param AllowReplacements whether to allow character
227     /// replacements (change one character into another) as a single
228     /// operation, rather than as two operations (an insertion and a
229     /// removal).
230     ///
231     /// \param MaxEditDistance If non-zero, the maximum edit distance that
232     /// this routine is allowed to compute. If the edit distance will exceed
233     /// that maximum, returns \c MaxEditDistance+1.
234     ///
235     /// \returns the minimum number of character insertions, removals,
236     /// or (if \p AllowReplacements is \c true) replacements needed to
237     /// transform one of the given strings into the other. If zero,
238     /// the strings are identical.
239     LLVM_NODISCARD
240     unsigned edit_distance(StringRef Other, bool AllowReplacements = true,
241                            unsigned MaxEditDistance = 0) const;
242 
243     LLVM_NODISCARD unsigned
244     edit_distance_insensitive(StringRef Other, bool AllowReplacements = true,
245                               unsigned MaxEditDistance = 0) const;
246 
247     /// str - Get the contents as an std::string.
248     LLVM_NODISCARD
249     std::string str() const {
250       if (!Data) return std::string();
251       return std::string(Data, Length);
252     }
253 
254     /// @}
255     /// @name Operator Overloads
256     /// @{
257 
258     LLVM_NODISCARD
259     char operator[](size_t Index) const {
260       assert(Index < Length && "Invalid index!");
261       return Data[Index];
262     }
263 
264     /// Disallow accidental assignment from a temporary std::string.
265     ///
266     /// The declaration here is extra complicated so that `stringRef = {}`
267     /// and `stringRef = "abc"` continue to select the move assignment operator.
268     template <typename T>
269     std::enable_if_t<std::is_same<T, std::string>::value, StringRef> &
270     operator=(T &&Str) = delete;
271 
272     /// @}
273     /// @name Type Conversions
274     /// @{
275 
276     explicit operator std::string() const { return str(); }
277 
278 #if __cplusplus > 201402L
279     operator std::string_view() const {
280       return std::string_view(data(), size());
281     }
282 #endif
283 
284     /// @}
285     /// @name String Predicates
286     /// @{
287 
288     /// Check if this string starts with the given \p Prefix.
289     LLVM_NODISCARD
290     bool startswith(StringRef Prefix) const {
291       return Length >= Prefix.Length &&
292              compareMemory(Data, Prefix.Data, Prefix.Length) == 0;
293     }
294 
295     /// Check if this string starts with the given \p Prefix, ignoring case.
296     LLVM_NODISCARD
297     bool startswith_insensitive(StringRef Prefix) const;
298 
299     /// Check if this string ends with the given \p Suffix.
300     LLVM_NODISCARD
301     bool endswith(StringRef Suffix) const {
302       return Length >= Suffix.Length &&
303         compareMemory(end() - Suffix.Length, Suffix.Data, Suffix.Length) == 0;
304     }
305 
306     /// Check if this string ends with the given \p Suffix, ignoring case.
307     LLVM_NODISCARD
308     bool endswith_insensitive(StringRef Suffix) const;
309 
310     /// @}
311     /// @name String Searching
312     /// @{
313 
314     /// Search for the first character \p C in the string.
315     ///
316     /// \returns The index of the first occurrence of \p C, or npos if not
317     /// found.
318     LLVM_NODISCARD
319     size_t find(char C, size_t From = 0) const {
320       size_t FindBegin = std::min(From, Length);
321       if (FindBegin < Length) { // Avoid calling memchr with nullptr.
322         // Just forward to memchr, which is faster than a hand-rolled loop.
323         if (const void *P = ::memchr(Data + FindBegin, C, Length - FindBegin))
324           return static_cast<const char *>(P) - Data;
325       }
326       return npos;
327     }
328 
329     /// Search for the first character \p C in the string, ignoring case.
330     ///
331     /// \returns The index of the first occurrence of \p C, or npos if not
332     /// found.
333     LLVM_NODISCARD
334     size_t find_insensitive(char C, size_t From = 0) const;
335 
336     /// Search for the first character satisfying the predicate \p F
337     ///
338     /// \returns The index of the first character satisfying \p F starting from
339     /// \p From, or npos if not found.
340     LLVM_NODISCARD
341     size_t find_if(function_ref<bool(char)> F, size_t From = 0) const {
342       StringRef S = drop_front(From);
343       while (!S.empty()) {
344         if (F(S.front()))
345           return size() - S.size();
346         S = S.drop_front();
347       }
348       return npos;
349     }
350 
351     /// Search for the first character not satisfying the predicate \p F
352     ///
353     /// \returns The index of the first character not satisfying \p F starting
354     /// from \p From, or npos if not found.
355     LLVM_NODISCARD
356     size_t find_if_not(function_ref<bool(char)> F, size_t From = 0) const {
357       return find_if([F](char c) { return !F(c); }, From);
358     }
359 
360     /// Search for the first string \p Str in the string.
361     ///
362     /// \returns The index of the first occurrence of \p Str, or npos if not
363     /// found.
364     LLVM_NODISCARD
365     size_t find(StringRef Str, size_t From = 0) const;
366 
367     /// Search for the first string \p Str in the string, ignoring case.
368     ///
369     /// \returns The index of the first occurrence of \p Str, or npos if not
370     /// found.
371     LLVM_NODISCARD
372     size_t find_insensitive(StringRef Str, size_t From = 0) const;
373 
374     /// Search for the last character \p C in the string.
375     ///
376     /// \returns The index of the last occurrence of \p C, or npos if not
377     /// found.
378     LLVM_NODISCARD
379     size_t rfind(char C, size_t From = npos) const {
380       From = std::min(From, Length);
381       size_t i = From;
382       while (i != 0) {
383         --i;
384         if (Data[i] == C)
385           return i;
386       }
387       return npos;
388     }
389 
390     /// Search for the last character \p C in the string, ignoring case.
391     ///
392     /// \returns The index of the last occurrence of \p C, or npos if not
393     /// found.
394     LLVM_NODISCARD
395     size_t rfind_insensitive(char C, size_t From = npos) const;
396 
397     /// Search for the last string \p Str in the string.
398     ///
399     /// \returns The index of the last occurrence of \p Str, or npos if not
400     /// found.
401     LLVM_NODISCARD
402     size_t rfind(StringRef Str) const;
403 
404     /// Search for the last string \p Str in the string, ignoring case.
405     ///
406     /// \returns The index of the last occurrence of \p Str, or npos if not
407     /// found.
408     LLVM_NODISCARD
409     size_t rfind_insensitive(StringRef Str) const;
410 
411     /// Find the first character in the string that is \p C, or npos if not
412     /// found. Same as find.
413     LLVM_NODISCARD
414     size_t find_first_of(char C, size_t From = 0) const {
415       return find(C, From);
416     }
417 
418     /// Find the first character in the string that is in \p Chars, or npos if
419     /// not found.
420     ///
421     /// Complexity: O(size() + Chars.size())
422     LLVM_NODISCARD
423     size_t find_first_of(StringRef Chars, size_t From = 0) const;
424 
425     /// Find the first character in the string that is not \p C or npos if not
426     /// found.
427     LLVM_NODISCARD
428     size_t find_first_not_of(char C, size_t From = 0) const;
429 
430     /// Find the first character in the string that is not in the string
431     /// \p Chars, or npos if not found.
432     ///
433     /// Complexity: O(size() + Chars.size())
434     LLVM_NODISCARD
435     size_t find_first_not_of(StringRef Chars, size_t From = 0) const;
436 
437     /// Find the last character in the string that is \p C, or npos if not
438     /// found.
439     LLVM_NODISCARD
440     size_t find_last_of(char C, size_t From = npos) const {
441       return rfind(C, From);
442     }
443 
444     /// Find the last character in the string that is in \p C, or npos if not
445     /// found.
446     ///
447     /// Complexity: O(size() + Chars.size())
448     LLVM_NODISCARD
449     size_t find_last_of(StringRef Chars, size_t From = npos) const;
450 
451     /// Find the last character in the string that is not \p C, or npos if not
452     /// found.
453     LLVM_NODISCARD
454     size_t find_last_not_of(char C, size_t From = npos) const;
455 
456     /// Find the last character in the string that is not in \p Chars, or
457     /// npos if not found.
458     ///
459     /// Complexity: O(size() + Chars.size())
460     LLVM_NODISCARD
461     size_t find_last_not_of(StringRef Chars, size_t From = npos) const;
462 
463     /// Return true if the given string is a substring of *this, and false
464     /// otherwise.
465     LLVM_NODISCARD
466     bool contains(StringRef Other) const { return find(Other) != npos; }
467 
468     /// Return true if the given character is contained in *this, and false
469     /// otherwise.
470     LLVM_NODISCARD
471     bool contains(char C) const { return find_first_of(C) != npos; }
472 
473     /// Return true if the given string is a substring of *this, and false
474     /// otherwise.
475     LLVM_NODISCARD
476     bool contains_insensitive(StringRef Other) const {
477       return find_insensitive(Other) != npos;
478     }
479 
480     /// Return true if the given character is contained in *this, and false
481     /// otherwise.
482     LLVM_NODISCARD
483     bool contains_insensitive(char C) const {
484       return find_insensitive(C) != npos;
485     }
486 
487     /// @}
488     /// @name Helpful Algorithms
489     /// @{
490 
491     /// Return the number of occurrences of \p C in the string.
492     LLVM_NODISCARD
493     size_t count(char C) const {
494       size_t Count = 0;
495       for (size_t i = 0, e = Length; i != e; ++i)
496         if (Data[i] == C)
497           ++Count;
498       return Count;
499     }
500 
501     /// Return the number of non-overlapped occurrences of \p Str in
502     /// the string.
503     size_t count(StringRef Str) const;
504 
505     /// Parse the current string as an integer of the specified radix.  If
506     /// \p Radix is specified as zero, this does radix autosensing using
507     /// extended C rules: 0 is octal, 0x is hex, 0b is binary.
508     ///
509     /// If the string is invalid or if only a subset of the string is valid,
510     /// this returns true to signify the error.  The string is considered
511     /// erroneous if empty or if it overflows T.
512     template <typename T>
513     std::enable_if_t<std::numeric_limits<T>::is_signed, bool>
514     getAsInteger(unsigned Radix, T &Result) const {
515       long long LLVal;
516       if (getAsSignedInteger(*this, Radix, LLVal) ||
517             static_cast<T>(LLVal) != LLVal)
518         return true;
519       Result = LLVal;
520       return false;
521     }
522 
523     template <typename T>
524     std::enable_if_t<!std::numeric_limits<T>::is_signed, bool>
525     getAsInteger(unsigned Radix, T &Result) const {
526       unsigned long long ULLVal;
527       // The additional cast to unsigned long long is required to avoid the
528       // Visual C++ warning C4805: '!=' : unsafe mix of type 'bool' and type
529       // 'unsigned __int64' when instantiating getAsInteger with T = bool.
530       if (getAsUnsignedInteger(*this, Radix, ULLVal) ||
531           static_cast<unsigned long long>(static_cast<T>(ULLVal)) != ULLVal)
532         return true;
533       Result = ULLVal;
534       return false;
535     }
536 
537     /// Parse the current string as an integer of the specified radix.  If
538     /// \p Radix is specified as zero, this does radix autosensing using
539     /// extended C rules: 0 is octal, 0x is hex, 0b is binary.
540     ///
541     /// If the string does not begin with a number of the specified radix,
542     /// this returns true to signify the error. The string is considered
543     /// erroneous if empty or if it overflows T.
544     /// The portion of the string representing the discovered numeric value
545     /// is removed from the beginning of the string.
546     template <typename T>
547     std::enable_if_t<std::numeric_limits<T>::is_signed, bool>
548     consumeInteger(unsigned Radix, T &Result) {
549       long long LLVal;
550       if (consumeSignedInteger(*this, Radix, LLVal) ||
551           static_cast<long long>(static_cast<T>(LLVal)) != LLVal)
552         return true;
553       Result = LLVal;
554       return false;
555     }
556 
557     template <typename T>
558     std::enable_if_t<!std::numeric_limits<T>::is_signed, bool>
559     consumeInteger(unsigned Radix, T &Result) {
560       unsigned long long ULLVal;
561       if (consumeUnsignedInteger(*this, Radix, ULLVal) ||
562           static_cast<unsigned long long>(static_cast<T>(ULLVal)) != ULLVal)
563         return true;
564       Result = ULLVal;
565       return false;
566     }
567 
568     /// Parse the current string as an integer of the specified \p Radix, or of
569     /// an autosensed radix if the \p Radix given is 0.  The current value in
570     /// \p Result is discarded, and the storage is changed to be wide enough to
571     /// store the parsed integer.
572     ///
573     /// \returns true if the string does not solely consist of a valid
574     /// non-empty number in the appropriate base.
575     ///
576     /// APInt::fromString is superficially similar but assumes the
577     /// string is well-formed in the given radix.
578     bool getAsInteger(unsigned Radix, APInt &Result) const;
579 
580     /// Parse the current string as an IEEE double-precision floating
581     /// point value.  The string must be a well-formed double.
582     ///
583     /// If \p AllowInexact is false, the function will fail if the string
584     /// cannot be represented exactly.  Otherwise, the function only fails
585     /// in case of an overflow or underflow, or an invalid floating point
586     /// representation.
587     bool getAsDouble(double &Result, bool AllowInexact = true) const;
588 
589     /// @}
590     /// @name String Operations
591     /// @{
592 
593     // Convert the given ASCII string to lowercase.
594     LLVM_NODISCARD
595     std::string lower() const;
596 
597     /// Convert the given ASCII string to uppercase.
598     LLVM_NODISCARD
599     std::string upper() const;
600 
601     /// @}
602     /// @name Substring Operations
603     /// @{
604 
605     /// Return a reference to the substring from [Start, Start + N).
606     ///
607     /// \param Start The index of the starting character in the substring; if
608     /// the index is npos or greater than the length of the string then the
609     /// empty substring will be returned.
610     ///
611     /// \param N The number of characters to included in the substring. If N
612     /// exceeds the number of characters remaining in the string, the string
613     /// suffix (starting with \p Start) will be returned.
614     LLVM_NODISCARD
615     StringRef substr(size_t Start, size_t N = npos) const {
616       Start = std::min(Start, Length);
617       return StringRef(Data + Start, std::min(N, Length - Start));
618     }
619 
620     /// Return a StringRef equal to 'this' but with only the first \p N
621     /// elements remaining.  If \p N is greater than the length of the
622     /// string, the entire string is returned.
623     LLVM_NODISCARD
624     StringRef take_front(size_t N = 1) const {
625       if (N >= size())
626         return *this;
627       return drop_back(size() - N);
628     }
629 
630     /// Return a StringRef equal to 'this' but with only the last \p N
631     /// elements remaining.  If \p N is greater than the length of the
632     /// string, the entire string is returned.
633     LLVM_NODISCARD
634     StringRef take_back(size_t N = 1) const {
635       if (N >= size())
636         return *this;
637       return drop_front(size() - N);
638     }
639 
640     /// Return the longest prefix of 'this' such that every character
641     /// in the prefix satisfies the given predicate.
642     LLVM_NODISCARD
643     StringRef take_while(function_ref<bool(char)> F) const {
644       return substr(0, find_if_not(F));
645     }
646 
647     /// Return the longest prefix of 'this' such that no character in
648     /// the prefix satisfies the given predicate.
649     LLVM_NODISCARD
650     StringRef take_until(function_ref<bool(char)> F) const {
651       return substr(0, find_if(F));
652     }
653 
654     /// Return a StringRef equal to 'this' but with the first \p N elements
655     /// dropped.
656     LLVM_NODISCARD
657     StringRef drop_front(size_t N = 1) const {
658       assert(size() >= N && "Dropping more elements than exist");
659       return substr(N);
660     }
661 
662     /// Return a StringRef equal to 'this' but with the last \p N elements
663     /// dropped.
664     LLVM_NODISCARD
665     StringRef drop_back(size_t N = 1) const {
666       assert(size() >= N && "Dropping more elements than exist");
667       return substr(0, size()-N);
668     }
669 
670     /// Return a StringRef equal to 'this', but with all characters satisfying
671     /// the given predicate dropped from the beginning of the string.
672     LLVM_NODISCARD
673     StringRef drop_while(function_ref<bool(char)> F) const {
674       return substr(find_if_not(F));
675     }
676 
677     /// Return a StringRef equal to 'this', but with all characters not
678     /// satisfying the given predicate dropped from the beginning of the string.
679     LLVM_NODISCARD
680     StringRef drop_until(function_ref<bool(char)> F) const {
681       return substr(find_if(F));
682     }
683 
684     /// Returns true if this StringRef has the given prefix and removes that
685     /// prefix.
686     bool consume_front(StringRef Prefix) {
687       if (!startswith(Prefix))
688         return false;
689 
690       *this = drop_front(Prefix.size());
691       return true;
692     }
693 
694     /// Returns true if this StringRef has the given prefix, ignoring case,
695     /// and removes that prefix.
696     bool consume_front_insensitive(StringRef Prefix) {
697       if (!startswith_insensitive(Prefix))
698         return false;
699 
700       *this = drop_front(Prefix.size());
701       return true;
702     }
703 
704     /// Returns true if this StringRef has the given suffix and removes that
705     /// suffix.
706     bool consume_back(StringRef Suffix) {
707       if (!endswith(Suffix))
708         return false;
709 
710       *this = drop_back(Suffix.size());
711       return true;
712     }
713 
714     /// Returns true if this StringRef has the given suffix, ignoring case,
715     /// and removes that suffix.
716     bool consume_back_insensitive(StringRef Suffix) {
717       if (!endswith_insensitive(Suffix))
718         return false;
719 
720       *this = drop_back(Suffix.size());
721       return true;
722     }
723 
724     /// Return a reference to the substring from [Start, End).
725     ///
726     /// \param Start The index of the starting character in the substring; if
727     /// the index is npos or greater than the length of the string then the
728     /// empty substring will be returned.
729     ///
730     /// \param End The index following the last character to include in the
731     /// substring. If this is npos or exceeds the number of characters
732     /// remaining in the string, the string suffix (starting with \p Start)
733     /// will be returned. If this is less than \p Start, an empty string will
734     /// be returned.
735     LLVM_NODISCARD
736     StringRef slice(size_t Start, size_t End) const {
737       Start = std::min(Start, Length);
738       End = std::min(std::max(Start, End), Length);
739       return StringRef(Data + Start, End - Start);
740     }
741 
742     /// Split into two substrings around the first occurrence of a separator
743     /// character.
744     ///
745     /// If \p Separator is in the string, then the result is a pair (LHS, RHS)
746     /// such that (*this == LHS + Separator + RHS) is true and RHS is
747     /// maximal. If \p Separator is not in the string, then the result is a
748     /// pair (LHS, RHS) where (*this == LHS) and (RHS == "").
749     ///
750     /// \param Separator The character to split on.
751     /// \returns The split substrings.
752     LLVM_NODISCARD
753     std::pair<StringRef, StringRef> split(char Separator) const {
754       return split(StringRef(&Separator, 1));
755     }
756 
757     /// Split into two substrings around the first occurrence of a separator
758     /// string.
759     ///
760     /// If \p Separator is in the string, then the result is a pair (LHS, RHS)
761     /// such that (*this == LHS + Separator + RHS) is true and RHS is
762     /// maximal. If \p Separator is not in the string, then the result is a
763     /// pair (LHS, RHS) where (*this == LHS) and (RHS == "").
764     ///
765     /// \param Separator - The string to split on.
766     /// \return - The split substrings.
767     LLVM_NODISCARD
768     std::pair<StringRef, StringRef> split(StringRef Separator) const {
769       size_t Idx = find(Separator);
770       if (Idx == npos)
771         return std::make_pair(*this, StringRef());
772       return std::make_pair(slice(0, Idx), slice(Idx + Separator.size(), npos));
773     }
774 
775     /// Split into two substrings around the last occurrence of a separator
776     /// string.
777     ///
778     /// If \p Separator is in the string, then the result is a pair (LHS, RHS)
779     /// such that (*this == LHS + Separator + RHS) is true and RHS is
780     /// minimal. If \p Separator is not in the string, then the result is a
781     /// pair (LHS, RHS) where (*this == LHS) and (RHS == "").
782     ///
783     /// \param Separator - The string to split on.
784     /// \return - The split substrings.
785     LLVM_NODISCARD
786     std::pair<StringRef, StringRef> rsplit(StringRef Separator) const {
787       size_t Idx = rfind(Separator);
788       if (Idx == npos)
789         return std::make_pair(*this, StringRef());
790       return std::make_pair(slice(0, Idx), slice(Idx + Separator.size(), npos));
791     }
792 
793     /// Split into substrings around the occurrences of a separator string.
794     ///
795     /// Each substring is stored in \p A. If \p MaxSplit is >= 0, at most
796     /// \p MaxSplit splits are done and consequently <= \p MaxSplit + 1
797     /// elements are added to A.
798     /// If \p KeepEmpty is false, empty strings are not added to \p A. They
799     /// still count when considering \p MaxSplit
800     /// An useful invariant is that
801     /// Separator.join(A) == *this if MaxSplit == -1 and KeepEmpty == true
802     ///
803     /// \param A - Where to put the substrings.
804     /// \param Separator - The string to split on.
805     /// \param MaxSplit - The maximum number of times the string is split.
806     /// \param KeepEmpty - True if empty substring should be added.
807     void split(SmallVectorImpl<StringRef> &A,
808                StringRef Separator, int MaxSplit = -1,
809                bool KeepEmpty = true) const;
810 
811     /// Split into substrings around the occurrences of a separator character.
812     ///
813     /// Each substring is stored in \p A. If \p MaxSplit is >= 0, at most
814     /// \p MaxSplit splits are done and consequently <= \p MaxSplit + 1
815     /// elements are added to A.
816     /// If \p KeepEmpty is false, empty strings are not added to \p A. They
817     /// still count when considering \p MaxSplit
818     /// An useful invariant is that
819     /// Separator.join(A) == *this if MaxSplit == -1 and KeepEmpty == true
820     ///
821     /// \param A - Where to put the substrings.
822     /// \param Separator - The string to split on.
823     /// \param MaxSplit - The maximum number of times the string is split.
824     /// \param KeepEmpty - True if empty substring should be added.
825     void split(SmallVectorImpl<StringRef> &A, char Separator, int MaxSplit = -1,
826                bool KeepEmpty = true) const;
827 
828     /// Split into two substrings around the last occurrence of a separator
829     /// character.
830     ///
831     /// If \p Separator is in the string, then the result is a pair (LHS, RHS)
832     /// such that (*this == LHS + Separator + RHS) is true and RHS is
833     /// minimal. If \p Separator is not in the string, then the result is a
834     /// pair (LHS, RHS) where (*this == LHS) and (RHS == "").
835     ///
836     /// \param Separator - The character to split on.
837     /// \return - The split substrings.
838     LLVM_NODISCARD
839     std::pair<StringRef, StringRef> rsplit(char Separator) const {
840       return rsplit(StringRef(&Separator, 1));
841     }
842 
843     /// Return string with consecutive \p Char characters starting from the
844     /// the left removed.
845     LLVM_NODISCARD
846     StringRef ltrim(char Char) const {
847       return drop_front(std::min(Length, find_first_not_of(Char)));
848     }
849 
850     /// Return string with consecutive characters in \p Chars starting from
851     /// the left removed.
852     LLVM_NODISCARD
853     StringRef ltrim(StringRef Chars = " \t\n\v\f\r") const {
854       return drop_front(std::min(Length, find_first_not_of(Chars)));
855     }
856 
857     /// Return string with consecutive \p Char characters starting from the
858     /// right removed.
859     LLVM_NODISCARD
860     StringRef rtrim(char Char) const {
861       return drop_back(Length - std::min(Length, find_last_not_of(Char) + 1));
862     }
863 
864     /// Return string with consecutive characters in \p Chars starting from
865     /// the right removed.
866     LLVM_NODISCARD
867     StringRef rtrim(StringRef Chars = " \t\n\v\f\r") const {
868       return drop_back(Length - std::min(Length, find_last_not_of(Chars) + 1));
869     }
870 
871     /// Return string with consecutive \p Char characters starting from the
872     /// left and right removed.
873     LLVM_NODISCARD
874     StringRef trim(char Char) const {
875       return ltrim(Char).rtrim(Char);
876     }
877 
878     /// Return string with consecutive characters in \p Chars starting from
879     /// the left and right removed.
880     LLVM_NODISCARD
881     StringRef trim(StringRef Chars = " \t\n\v\f\r") const {
882       return ltrim(Chars).rtrim(Chars);
883     }
884 
885     /// Detect the line ending style of the string.
886     ///
887     /// If the string contains a line ending, return the line ending character
888     /// sequence that is detected. Otherwise return '\n' for unix line endings.
889     ///
890     /// \return - The line ending character sequence.
891     LLVM_NODISCARD
892     StringRef detectEOL() const {
893       size_t Pos = find('\r');
894       if (Pos == npos) {
895         // If there is no carriage return, assume unix
896         return "\n";
897       }
898       if (Pos + 1 < Length && Data[Pos + 1] == '\n')
899         return "\r\n"; // Windows
900       if (Pos > 0 && Data[Pos - 1] == '\n')
901         return "\n\r"; // You monster!
902       return "\r";     // Classic Mac
903     }
904     /// @}
905   };
906 
907   /// A wrapper around a string literal that serves as a proxy for constructing
908   /// global tables of StringRefs with the length computed at compile time.
909   /// In order to avoid the invocation of a global constructor, StringLiteral
910   /// should *only* be used in a constexpr context, as such:
911   ///
912   /// constexpr StringLiteral S("test");
913   ///
914   class StringLiteral : public StringRef {
915   private:
916     constexpr StringLiteral(const char *Str, size_t N) : StringRef(Str, N) {
917     }
918 
919   public:
920     template <size_t N>
921     constexpr StringLiteral(const char (&Str)[N])
922 #if defined(__clang__) && __has_attribute(enable_if)
923 #pragma clang diagnostic push
924 #pragma clang diagnostic ignored "-Wgcc-compat"
925         __attribute((enable_if(__builtin_strlen(Str) == N - 1,
926                                "invalid string literal")))
927 #pragma clang diagnostic pop
928 #endif
929         : StringRef(Str, N - 1) {
930     }
931 
932     // Explicit construction for strings like "foo\0bar".
933     template <size_t N>
934     static constexpr StringLiteral withInnerNUL(const char (&Str)[N]) {
935       return StringLiteral(Str, N - 1);
936     }
937   };
938 
939   /// @name StringRef Comparison Operators
940   /// @{
941 
942   inline bool operator==(StringRef LHS, StringRef RHS) {
943     return LHS.equals(RHS);
944   }
945 
946   inline bool operator!=(StringRef LHS, StringRef RHS) { return !(LHS == RHS); }
947 
948   inline bool operator<(StringRef LHS, StringRef RHS) {
949     return LHS.compare(RHS) == -1;
950   }
951 
952   inline bool operator<=(StringRef LHS, StringRef RHS) {
953     return LHS.compare(RHS) != 1;
954   }
955 
956   inline bool operator>(StringRef LHS, StringRef RHS) {
957     return LHS.compare(RHS) == 1;
958   }
959 
960   inline bool operator>=(StringRef LHS, StringRef RHS) {
961     return LHS.compare(RHS) != -1;
962   }
963 
964   inline std::string &operator+=(std::string &buffer, StringRef string) {
965     return buffer.append(string.data(), string.size());
966   }
967 
968   /// @}
969 
970   /// Compute a hash_code for a StringRef.
971   LLVM_NODISCARD
972   hash_code hash_value(StringRef S);
973 
974   // Provide DenseMapInfo for StringRefs.
975   template <> struct DenseMapInfo<StringRef, void> {
976     static inline StringRef getEmptyKey() {
977       return StringRef(
978           reinterpret_cast<const char *>(~static_cast<uintptr_t>(0)), 0);
979     }
980 
981     static inline StringRef getTombstoneKey() {
982       return StringRef(
983           reinterpret_cast<const char *>(~static_cast<uintptr_t>(1)), 0);
984     }
985 
986     static unsigned getHashValue(StringRef Val);
987 
988     static bool isEqual(StringRef LHS, StringRef RHS) {
989       if (RHS.data() == getEmptyKey().data())
990         return LHS.data() == getEmptyKey().data();
991       if (RHS.data() == getTombstoneKey().data())
992         return LHS.data() == getTombstoneKey().data();
993       return LHS == RHS;
994     }
995   };
996 
997 } // end namespace llvm
998 
999 #endif // LLVM_ADT_STRINGREF_H
1000