1 //===- StringRef.h - Constant String Reference Wrapper ----------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_ADT_STRINGREF_H
10 #define LLVM_ADT_STRINGREF_H
11 
12 #include "llvm/ADT/STLExtras.h"
13 #include "llvm/ADT/iterator_range.h"
14 #include "llvm/Support/Compiler.h"
15 #include <algorithm>
16 #include <cassert>
17 #include <cstddef>
18 #include <cstring>
19 #include <limits>
20 #include <string>
21 #if __cplusplus > 201402L
22 #include <string_view>
23 #endif
24 #include <type_traits>
25 #include <utility>
26 
27 // Declare the __builtin_strlen intrinsic for MSVC so it can be used in
28 // constexpr context.
29 #if defined(_MSC_VER)
30 extern "C" size_t __builtin_strlen(const char *);
31 #endif
32 
33 namespace llvm {
34 
35   class APInt;
36   class hash_code;
37   template <typename T> class SmallVectorImpl;
38   class StringRef;
39 
40   /// Helper functions for StringRef::getAsInteger.
41   bool getAsUnsignedInteger(StringRef Str, unsigned Radix,
42                             unsigned long long &Result);
43 
44   bool getAsSignedInteger(StringRef Str, unsigned Radix, long long &Result);
45 
46   bool consumeUnsignedInteger(StringRef &Str, unsigned Radix,
47                               unsigned long long &Result);
48   bool consumeSignedInteger(StringRef &Str, unsigned Radix, long long &Result);
49 
50   /// StringRef - Represent a constant reference to a string, i.e. a character
51   /// array and a length, which need not be null terminated.
52   ///
53   /// This class does not own the string data, it is expected to be used in
54   /// situations where the character data resides in some other buffer, whose
55   /// lifetime extends past that of the StringRef. For this reason, it is not in
56   /// general safe to store a StringRef.
57   class LLVM_GSL_POINTER StringRef {
58   public:
59     static constexpr size_t npos = ~size_t(0);
60 
61     using iterator = const char *;
62     using const_iterator = const char *;
63     using size_type = size_t;
64 
65   private:
66     /// The start of the string, in an external buffer.
67     const char *Data = nullptr;
68 
69     /// The length of the string.
70     size_t Length = 0;
71 
72     // Workaround memcmp issue with null pointers (undefined behavior)
73     // by providing a specialized version
compareMemory(const char * Lhs,const char * Rhs,size_t Length)74     static int compareMemory(const char *Lhs, const char *Rhs, size_t Length) {
75       if (Length == 0) { return 0; }
76       return ::memcmp(Lhs,Rhs,Length);
77     }
78 
79     // Constexpr version of std::strlen.
strLen(const char * Str)80     static constexpr size_t strLen(const char *Str) {
81 #if __cplusplus > 201402L
82       return std::char_traits<char>::length(Str);
83 #elif __has_builtin(__builtin_strlen) || defined(__GNUC__) || \
84     (defined(_MSC_VER) && _MSC_VER >= 1916)
85       return __builtin_strlen(Str);
86 #else
87       const char *Begin = Str;
88       while (*Str != '\0')
89         ++Str;
90       return Str - Begin;
91 #endif
92     }
93 
94   public:
95     /// @name Constructors
96     /// @{
97 
98     /// Construct an empty string ref.
99     /*implicit*/ StringRef() = default;
100 
101     /// Disable conversion from nullptr.  This prevents things like
102     /// if (S == nullptr)
103     StringRef(std::nullptr_t) = delete;
104 
105     /// Construct a string ref from a cstring.
StringRef(const char * Str)106     /*implicit*/ constexpr StringRef(const char *Str)
107         : Data(Str), Length(Str ? strLen(Str) : 0) {}
108 
109     /// Construct a string ref from a pointer and length.
StringRef(const char * data,size_t length)110     /*implicit*/ constexpr StringRef(const char *data, size_t length)
111         : Data(data), Length(length) {}
112 
113     /// Construct a string ref from an std::string.
StringRef(const std::string & Str)114     /*implicit*/ StringRef(const std::string &Str)
115       : Data(Str.data()), Length(Str.length()) {}
116 
117 #if __cplusplus > 201402L
118     /// Construct a string ref from an std::string_view.
StringRef(std::string_view Str)119     /*implicit*/ constexpr StringRef(std::string_view Str)
120         : Data(Str.data()), Length(Str.size()) {}
121 #endif
122 
withNullAsEmpty(const char * data)123     static StringRef withNullAsEmpty(const char *data) {
124       return StringRef(data ? data : "");
125     }
126 
127     /// @}
128     /// @name Iterators
129     /// @{
130 
begin()131     iterator begin() const { return Data; }
132 
end()133     iterator end() const { return Data + Length; }
134 
bytes_begin()135     const unsigned char *bytes_begin() const {
136       return reinterpret_cast<const unsigned char *>(begin());
137     }
bytes_end()138     const unsigned char *bytes_end() const {
139       return reinterpret_cast<const unsigned char *>(end());
140     }
bytes()141     iterator_range<const unsigned char *> bytes() const {
142       return make_range(bytes_begin(), bytes_end());
143     }
144 
145     /// @}
146     /// @name String Operations
147     /// @{
148 
149     /// data - Get a pointer to the start of the string (which may not be null
150     /// terminated).
151     LLVM_NODISCARD
data()152     const char *data() const { return Data; }
153 
154     /// empty - Check if the string is empty.
155     LLVM_NODISCARD
empty()156     bool empty() const { return Length == 0; }
157 
158     /// size - Get the string size.
159     LLVM_NODISCARD
size()160     size_t size() const { return Length; }
161 
162     /// front - Get the first character in the string.
163     LLVM_NODISCARD
front()164     char front() const {
165       assert(!empty());
166       return Data[0];
167     }
168 
169     /// back - Get the last character in the string.
170     LLVM_NODISCARD
back()171     char back() const {
172       assert(!empty());
173       return Data[Length-1];
174     }
175 
176     // copy - Allocate copy in Allocator and return StringRef to it.
177     template <typename Allocator>
copy(Allocator & A)178     LLVM_NODISCARD StringRef copy(Allocator &A) const {
179       // Don't request a length 0 copy from the allocator.
180       if (empty())
181         return StringRef();
182       char *S = A.template Allocate<char>(Length);
183       std::copy(begin(), end(), S);
184       return StringRef(S, Length);
185     }
186 
187     /// equals - Check for string equality, this is more efficient than
188     /// compare() when the relative ordering of inequal strings isn't needed.
189     LLVM_NODISCARD
equals(StringRef RHS)190     bool equals(StringRef RHS) const {
191       return (Length == RHS.Length &&
192               compareMemory(Data, RHS.Data, RHS.Length) == 0);
193     }
194 
195     /// equals_lower - Check for string equality, ignoring case.
196     LLVM_NODISCARD
equals_lower(StringRef RHS)197     bool equals_lower(StringRef RHS) const {
198       return Length == RHS.Length && compare_lower(RHS) == 0;
199     }
200 
201     /// compare - Compare two strings; the result is -1, 0, or 1 if this string
202     /// is lexicographically less than, equal to, or greater than the \p RHS.
203     LLVM_NODISCARD
compare(StringRef RHS)204     int compare(StringRef RHS) const {
205       // Check the prefix for a mismatch.
206       if (int Res = compareMemory(Data, RHS.Data, std::min(Length, RHS.Length)))
207         return Res < 0 ? -1 : 1;
208 
209       // Otherwise the prefixes match, so we only need to check the lengths.
210       if (Length == RHS.Length)
211         return 0;
212       return Length < RHS.Length ? -1 : 1;
213     }
214 
215     /// compare_lower - Compare two strings, ignoring case.
216     LLVM_NODISCARD
217     int compare_lower(StringRef RHS) const;
218 
219     /// compare_numeric - Compare two strings, treating sequences of digits as
220     /// numbers.
221     LLVM_NODISCARD
222     int compare_numeric(StringRef RHS) const;
223 
224     /// Determine the edit distance between this string and another
225     /// string.
226     ///
227     /// \param Other the string to compare this string against.
228     ///
229     /// \param AllowReplacements whether to allow character
230     /// replacements (change one character into another) as a single
231     /// operation, rather than as two operations (an insertion and a
232     /// removal).
233     ///
234     /// \param MaxEditDistance If non-zero, the maximum edit distance that
235     /// this routine is allowed to compute. If the edit distance will exceed
236     /// that maximum, returns \c MaxEditDistance+1.
237     ///
238     /// \returns the minimum number of character insertions, removals,
239     /// or (if \p AllowReplacements is \c true) replacements needed to
240     /// transform one of the given strings into the other. If zero,
241     /// the strings are identical.
242     LLVM_NODISCARD
243     unsigned edit_distance(StringRef Other, bool AllowReplacements = true,
244                            unsigned MaxEditDistance = 0) const;
245 
246     /// str - Get the contents as an std::string.
247     LLVM_NODISCARD
str()248     std::string str() const {
249       if (!Data) return std::string();
250       return std::string(Data, Length);
251     }
252 
253     /// @}
254     /// @name Operator Overloads
255     /// @{
256 
257     LLVM_NODISCARD
258     char operator[](size_t Index) const {
259       assert(Index < Length && "Invalid index!");
260       return Data[Index];
261     }
262 
263     /// Disallow accidental assignment from a temporary std::string.
264     ///
265     /// The declaration here is extra complicated so that `stringRef = {}`
266     /// and `stringRef = "abc"` continue to select the move assignment operator.
267     template <typename T>
268     std::enable_if_t<std::is_same<T, std::string>::value, StringRef> &
269     operator=(T &&Str) = delete;
270 
271     /// @}
272     /// @name Type Conversions
273     /// @{
274 
string()275     explicit operator std::string() const { return str(); }
276 
277 #if __cplusplus > 201402L
string_view()278     operator std::string_view() const {
279       return std::string_view(data(), size());
280     }
281 #endif
282 
283     /// @}
284     /// @name String Predicates
285     /// @{
286 
287     /// Check if this string starts with the given \p Prefix.
288     LLVM_NODISCARD
startswith(StringRef Prefix)289     bool startswith(StringRef Prefix) const {
290       return Length >= Prefix.Length &&
291              compareMemory(Data, Prefix.Data, Prefix.Length) == 0;
292     }
293 
294     /// Check if this string starts with the given \p Prefix, ignoring case.
295     LLVM_NODISCARD
296     bool startswith_lower(StringRef Prefix) const;
297 
298     /// Check if this string ends with the given \p Suffix.
299     LLVM_NODISCARD
endswith(StringRef Suffix)300     bool endswith(StringRef Suffix) const {
301       return Length >= Suffix.Length &&
302         compareMemory(end() - Suffix.Length, Suffix.Data, Suffix.Length) == 0;
303     }
304 
305     /// Check if this string ends with the given \p Suffix, ignoring case.
306     LLVM_NODISCARD
307     bool endswith_lower(StringRef Suffix) const;
308 
309     /// @}
310     /// @name String Searching
311     /// @{
312 
313     /// Search for the first character \p C in the string.
314     ///
315     /// \returns The index of the first occurrence of \p C, or npos if not
316     /// found.
317     LLVM_NODISCARD
318     size_t find(char C, size_t From = 0) const {
319       size_t FindBegin = std::min(From, Length);
320       if (FindBegin < Length) { // Avoid calling memchr with nullptr.
321         // Just forward to memchr, which is faster than a hand-rolled loop.
322         if (const void *P = ::memchr(Data + FindBegin, C, Length - FindBegin))
323           return static_cast<const char *>(P) - Data;
324       }
325       return npos;
326     }
327 
328     /// Search for the first character \p C in the string, ignoring case.
329     ///
330     /// \returns The index of the first occurrence of \p C, or npos if not
331     /// found.
332     LLVM_NODISCARD
333     size_t find_lower(char C, size_t From = 0) const;
334 
335     /// Search for the first character satisfying the predicate \p F
336     ///
337     /// \returns The index of the first character satisfying \p F starting from
338     /// \p From, or npos if not found.
339     LLVM_NODISCARD
340     size_t find_if(function_ref<bool(char)> F, size_t From = 0) const {
341       StringRef S = drop_front(From);
342       while (!S.empty()) {
343         if (F(S.front()))
344           return size() - S.size();
345         S = S.drop_front();
346       }
347       return npos;
348     }
349 
350     /// Search for the first character not satisfying the predicate \p F
351     ///
352     /// \returns The index of the first character not satisfying \p F starting
353     /// from \p From, or npos if not found.
354     LLVM_NODISCARD
355     size_t find_if_not(function_ref<bool(char)> F, size_t From = 0) const {
356       return find_if([F](char c) { return !F(c); }, From);
357     }
358 
359     /// Search for the first string \p Str in the string.
360     ///
361     /// \returns The index of the first occurrence of \p Str, or npos if not
362     /// found.
363     LLVM_NODISCARD
364     size_t find(StringRef Str, size_t From = 0) const;
365 
366     /// Search for the first string \p Str in the string, ignoring case.
367     ///
368     /// \returns The index of the first occurrence of \p Str, or npos if not
369     /// found.
370     LLVM_NODISCARD
371     size_t find_lower(StringRef Str, size_t From = 0) const;
372 
373     /// Search for the last character \p C in the string.
374     ///
375     /// \returns The index of the last occurrence of \p C, or npos if not
376     /// found.
377     LLVM_NODISCARD
378     size_t rfind(char C, size_t From = npos) const {
379       From = std::min(From, Length);
380       size_t i = From;
381       while (i != 0) {
382         --i;
383         if (Data[i] == C)
384           return i;
385       }
386       return npos;
387     }
388 
389     /// Search for the last character \p C in the string, ignoring case.
390     ///
391     /// \returns The index of the last occurrence of \p C, or npos if not
392     /// found.
393     LLVM_NODISCARD
394     size_t rfind_lower(char C, size_t From = npos) const;
395 
396     /// Search for the last string \p Str in the string.
397     ///
398     /// \returns The index of the last occurrence of \p Str, or npos if not
399     /// found.
400     LLVM_NODISCARD
401     size_t rfind(StringRef Str) const;
402 
403     /// Search for the last string \p Str in the string, ignoring case.
404     ///
405     /// \returns The index of the last occurrence of \p Str, or npos if not
406     /// found.
407     LLVM_NODISCARD
408     size_t rfind_lower(StringRef Str) const;
409 
410     /// Find the first character in the string that is \p C, or npos if not
411     /// found. Same as find.
412     LLVM_NODISCARD
413     size_t find_first_of(char C, size_t From = 0) const {
414       return find(C, From);
415     }
416 
417     /// Find the first character in the string that is in \p Chars, or npos if
418     /// not found.
419     ///
420     /// Complexity: O(size() + Chars.size())
421     LLVM_NODISCARD
422     size_t find_first_of(StringRef Chars, size_t From = 0) const;
423 
424     /// Find the first character in the string that is not \p C or npos if not
425     /// found.
426     LLVM_NODISCARD
427     size_t find_first_not_of(char C, size_t From = 0) const;
428 
429     /// Find the first character in the string that is not in the string
430     /// \p Chars, or npos if not found.
431     ///
432     /// Complexity: O(size() + Chars.size())
433     LLVM_NODISCARD
434     size_t find_first_not_of(StringRef Chars, size_t From = 0) const;
435 
436     /// Find the last character in the string that is \p C, or npos if not
437     /// found.
438     LLVM_NODISCARD
439     size_t find_last_of(char C, size_t From = npos) const {
440       return rfind(C, From);
441     }
442 
443     /// Find the last character in the string that is in \p C, or npos if not
444     /// found.
445     ///
446     /// Complexity: O(size() + Chars.size())
447     LLVM_NODISCARD
448     size_t find_last_of(StringRef Chars, size_t From = npos) const;
449 
450     /// Find the last character in the string that is not \p C, or npos if not
451     /// found.
452     LLVM_NODISCARD
453     size_t find_last_not_of(char C, size_t From = npos) const;
454 
455     /// Find the last character in the string that is not in \p Chars, or
456     /// npos if not found.
457     ///
458     /// Complexity: O(size() + Chars.size())
459     LLVM_NODISCARD
460     size_t find_last_not_of(StringRef Chars, size_t From = npos) const;
461 
462     /// Return true if the given string is a substring of *this, and false
463     /// otherwise.
464     LLVM_NODISCARD
contains(StringRef Other)465     bool contains(StringRef Other) const { return find(Other) != npos; }
466 
467     /// Return true if the given character is contained in *this, and false
468     /// otherwise.
469     LLVM_NODISCARD
contains(char C)470     bool contains(char C) const { return find_first_of(C) != npos; }
471 
472     /// Return true if the given string is a substring of *this, and false
473     /// otherwise.
474     LLVM_NODISCARD
contains_lower(StringRef Other)475     bool contains_lower(StringRef Other) const {
476       return find_lower(Other) != npos;
477     }
478 
479     /// Return true if the given character is contained in *this, and false
480     /// otherwise.
481     LLVM_NODISCARD
contains_lower(char C)482     bool contains_lower(char C) const { return find_lower(C) != npos; }
483 
484     /// @}
485     /// @name Helpful Algorithms
486     /// @{
487 
488     /// Return the number of occurrences of \p C in the string.
489     LLVM_NODISCARD
count(char C)490     size_t count(char C) const {
491       size_t Count = 0;
492       for (size_t i = 0, e = Length; i != e; ++i)
493         if (Data[i] == C)
494           ++Count;
495       return Count;
496     }
497 
498     /// Return the number of non-overlapped occurrences of \p Str in
499     /// the string.
500     size_t count(StringRef Str) const;
501 
502     /// Parse the current string as an integer of the specified radix.  If
503     /// \p Radix is specified as zero, this does radix autosensing using
504     /// extended C rules: 0 is octal, 0x is hex, 0b is binary.
505     ///
506     /// If the string is invalid or if only a subset of the string is valid,
507     /// this returns true to signify the error.  The string is considered
508     /// erroneous if empty or if it overflows T.
509     template <typename T>
510     std::enable_if_t<std::numeric_limits<T>::is_signed, bool>
getAsInteger(unsigned Radix,T & Result)511     getAsInteger(unsigned Radix, T &Result) const {
512       long long LLVal;
513       if (getAsSignedInteger(*this, Radix, LLVal) ||
514             static_cast<T>(LLVal) != LLVal)
515         return true;
516       Result = LLVal;
517       return false;
518     }
519 
520     template <typename T>
521     std::enable_if_t<!std::numeric_limits<T>::is_signed, bool>
getAsInteger(unsigned Radix,T & Result)522     getAsInteger(unsigned Radix, T &Result) const {
523       unsigned long long ULLVal;
524       // The additional cast to unsigned long long is required to avoid the
525       // Visual C++ warning C4805: '!=' : unsafe mix of type 'bool' and type
526       // 'unsigned __int64' when instantiating getAsInteger with T = bool.
527       if (getAsUnsignedInteger(*this, Radix, ULLVal) ||
528           static_cast<unsigned long long>(static_cast<T>(ULLVal)) != ULLVal)
529         return true;
530       Result = ULLVal;
531       return false;
532     }
533 
534     /// Parse the current string as an integer of the specified radix.  If
535     /// \p Radix is specified as zero, this does radix autosensing using
536     /// extended C rules: 0 is octal, 0x is hex, 0b is binary.
537     ///
538     /// If the string does not begin with a number of the specified radix,
539     /// this returns true to signify the error. The string is considered
540     /// erroneous if empty or if it overflows T.
541     /// The portion of the string representing the discovered numeric value
542     /// is removed from the beginning of the string.
543     template <typename T>
544     std::enable_if_t<std::numeric_limits<T>::is_signed, bool>
consumeInteger(unsigned Radix,T & Result)545     consumeInteger(unsigned Radix, T &Result) {
546       long long LLVal;
547       if (consumeSignedInteger(*this, Radix, LLVal) ||
548           static_cast<long long>(static_cast<T>(LLVal)) != LLVal)
549         return true;
550       Result = LLVal;
551       return false;
552     }
553 
554     template <typename T>
555     std::enable_if_t<!std::numeric_limits<T>::is_signed, bool>
consumeInteger(unsigned Radix,T & Result)556     consumeInteger(unsigned Radix, T &Result) {
557       unsigned long long ULLVal;
558       if (consumeUnsignedInteger(*this, Radix, ULLVal) ||
559           static_cast<unsigned long long>(static_cast<T>(ULLVal)) != ULLVal)
560         return true;
561       Result = ULLVal;
562       return false;
563     }
564 
565     /// Parse the current string as an integer of the specified \p Radix, or of
566     /// an autosensed radix if the \p Radix given is 0.  The current value in
567     /// \p Result is discarded, and the storage is changed to be wide enough to
568     /// store the parsed integer.
569     ///
570     /// \returns true if the string does not solely consist of a valid
571     /// non-empty number in the appropriate base.
572     ///
573     /// APInt::fromString is superficially similar but assumes the
574     /// string is well-formed in the given radix.
575     bool getAsInteger(unsigned Radix, APInt &Result) const;
576 
577     /// Parse the current string as an IEEE double-precision floating
578     /// point value.  The string must be a well-formed double.
579     ///
580     /// If \p AllowInexact is false, the function will fail if the string
581     /// cannot be represented exactly.  Otherwise, the function only fails
582     /// in case of an overflow or underflow, or an invalid floating point
583     /// representation.
584     bool getAsDouble(double &Result, bool AllowInexact = true) const;
585 
586     /// @}
587     /// @name String Operations
588     /// @{
589 
590     // Convert the given ASCII string to lowercase.
591     LLVM_NODISCARD
592     std::string lower() const;
593 
594     /// Convert the given ASCII string to uppercase.
595     LLVM_NODISCARD
596     std::string upper() const;
597 
598     /// @}
599     /// @name Substring Operations
600     /// @{
601 
602     /// Return a reference to the substring from [Start, Start + N).
603     ///
604     /// \param Start The index of the starting character in the substring; if
605     /// the index is npos or greater than the length of the string then the
606     /// empty substring will be returned.
607     ///
608     /// \param N The number of characters to included in the substring. If N
609     /// exceeds the number of characters remaining in the string, the string
610     /// suffix (starting with \p Start) will be returned.
611     LLVM_NODISCARD
612     StringRef substr(size_t Start, size_t N = npos) const {
613       Start = std::min(Start, Length);
614       return StringRef(Data + Start, std::min(N, Length - Start));
615     }
616 
617     /// Return a StringRef equal to 'this' but with only the first \p N
618     /// elements remaining.  If \p N is greater than the length of the
619     /// string, the entire string is returned.
620     LLVM_NODISCARD
621     StringRef take_front(size_t N = 1) const {
622       if (N >= size())
623         return *this;
624       return drop_back(size() - N);
625     }
626 
627     /// Return a StringRef equal to 'this' but with only the last \p N
628     /// elements remaining.  If \p N is greater than the length of the
629     /// string, the entire string is returned.
630     LLVM_NODISCARD
631     StringRef take_back(size_t N = 1) const {
632       if (N >= size())
633         return *this;
634       return drop_front(size() - N);
635     }
636 
637     /// Return the longest prefix of 'this' such that every character
638     /// in the prefix satisfies the given predicate.
639     LLVM_NODISCARD
take_while(function_ref<bool (char)> F)640     StringRef take_while(function_ref<bool(char)> F) const {
641       return substr(0, find_if_not(F));
642     }
643 
644     /// Return the longest prefix of 'this' such that no character in
645     /// the prefix satisfies the given predicate.
646     LLVM_NODISCARD
take_until(function_ref<bool (char)> F)647     StringRef take_until(function_ref<bool(char)> F) const {
648       return substr(0, find_if(F));
649     }
650 
651     /// Return a StringRef equal to 'this' but with the first \p N elements
652     /// dropped.
653     LLVM_NODISCARD
654     StringRef drop_front(size_t N = 1) const {
655       assert(size() >= N && "Dropping more elements than exist");
656       return substr(N);
657     }
658 
659     /// Return a StringRef equal to 'this' but with the last \p N elements
660     /// dropped.
661     LLVM_NODISCARD
662     StringRef drop_back(size_t N = 1) const {
663       assert(size() >= N && "Dropping more elements than exist");
664       return substr(0, size()-N);
665     }
666 
667     /// Return a StringRef equal to 'this', but with all characters satisfying
668     /// the given predicate dropped from the beginning of the string.
669     LLVM_NODISCARD
drop_while(function_ref<bool (char)> F)670     StringRef drop_while(function_ref<bool(char)> F) const {
671       return substr(find_if_not(F));
672     }
673 
674     /// Return a StringRef equal to 'this', but with all characters not
675     /// satisfying the given predicate dropped from the beginning of the string.
676     LLVM_NODISCARD
drop_until(function_ref<bool (char)> F)677     StringRef drop_until(function_ref<bool(char)> F) const {
678       return substr(find_if(F));
679     }
680 
681     /// Returns true if this StringRef has the given prefix and removes that
682     /// prefix.
consume_front(StringRef Prefix)683     bool consume_front(StringRef Prefix) {
684       if (!startswith(Prefix))
685         return false;
686 
687       *this = drop_front(Prefix.size());
688       return true;
689     }
690 
691     /// Returns true if this StringRef has the given suffix and removes that
692     /// suffix.
consume_back(StringRef Suffix)693     bool consume_back(StringRef Suffix) {
694       if (!endswith(Suffix))
695         return false;
696 
697       *this = drop_back(Suffix.size());
698       return true;
699     }
700 
701     /// Return a reference to the substring from [Start, End).
702     ///
703     /// \param Start The index of the starting character in the substring; if
704     /// the index is npos or greater than the length of the string then the
705     /// empty substring will be returned.
706     ///
707     /// \param End The index following the last character to include in the
708     /// substring. If this is npos or exceeds the number of characters
709     /// remaining in the string, the string suffix (starting with \p Start)
710     /// will be returned. If this is less than \p Start, an empty string will
711     /// be returned.
712     LLVM_NODISCARD
slice(size_t Start,size_t End)713     StringRef slice(size_t Start, size_t End) const {
714       Start = std::min(Start, Length);
715       End = std::min(std::max(Start, End), Length);
716       return StringRef(Data + Start, End - Start);
717     }
718 
719     /// Split into two substrings around the first occurrence of a separator
720     /// character.
721     ///
722     /// If \p Separator is in the string, then the result is a pair (LHS, RHS)
723     /// such that (*this == LHS + Separator + RHS) is true and RHS is
724     /// maximal. If \p Separator is not in the string, then the result is a
725     /// pair (LHS, RHS) where (*this == LHS) and (RHS == "").
726     ///
727     /// \param Separator The character to split on.
728     /// \returns The split substrings.
729     LLVM_NODISCARD
split(char Separator)730     std::pair<StringRef, StringRef> split(char Separator) const {
731       return split(StringRef(&Separator, 1));
732     }
733 
734     /// Split into two substrings around the first occurrence of a separator
735     /// string.
736     ///
737     /// If \p Separator is in the string, then the result is a pair (LHS, RHS)
738     /// such that (*this == LHS + Separator + RHS) is true and RHS is
739     /// maximal. If \p Separator is not in the string, then the result is a
740     /// pair (LHS, RHS) where (*this == LHS) and (RHS == "").
741     ///
742     /// \param Separator - The string to split on.
743     /// \return - The split substrings.
744     LLVM_NODISCARD
split(StringRef Separator)745     std::pair<StringRef, StringRef> split(StringRef Separator) const {
746       size_t Idx = find(Separator);
747       if (Idx == npos)
748         return std::make_pair(*this, StringRef());
749       return std::make_pair(slice(0, Idx), slice(Idx + Separator.size(), npos));
750     }
751 
752     /// Split into two substrings around the last occurrence of a separator
753     /// string.
754     ///
755     /// If \p Separator is in the string, then the result is a pair (LHS, RHS)
756     /// such that (*this == LHS + Separator + RHS) is true and RHS is
757     /// minimal. If \p Separator is not in the string, then the result is a
758     /// pair (LHS, RHS) where (*this == LHS) and (RHS == "").
759     ///
760     /// \param Separator - The string to split on.
761     /// \return - The split substrings.
762     LLVM_NODISCARD
rsplit(StringRef Separator)763     std::pair<StringRef, StringRef> rsplit(StringRef Separator) const {
764       size_t Idx = rfind(Separator);
765       if (Idx == npos)
766         return std::make_pair(*this, StringRef());
767       return std::make_pair(slice(0, Idx), slice(Idx + Separator.size(), npos));
768     }
769 
770     /// Split into substrings around the occurrences of a separator string.
771     ///
772     /// Each substring is stored in \p A. If \p MaxSplit is >= 0, at most
773     /// \p MaxSplit splits are done and consequently <= \p MaxSplit + 1
774     /// elements are added to A.
775     /// If \p KeepEmpty is false, empty strings are not added to \p A. They
776     /// still count when considering \p MaxSplit
777     /// An useful invariant is that
778     /// Separator.join(A) == *this if MaxSplit == -1 and KeepEmpty == true
779     ///
780     /// \param A - Where to put the substrings.
781     /// \param Separator - The string to split on.
782     /// \param MaxSplit - The maximum number of times the string is split.
783     /// \param KeepEmpty - True if empty substring should be added.
784     void split(SmallVectorImpl<StringRef> &A,
785                StringRef Separator, int MaxSplit = -1,
786                bool KeepEmpty = true) const;
787 
788     /// Split into substrings around the occurrences of a separator character.
789     ///
790     /// Each substring is stored in \p A. If \p MaxSplit is >= 0, at most
791     /// \p MaxSplit splits are done and consequently <= \p MaxSplit + 1
792     /// elements are added to A.
793     /// If \p KeepEmpty is false, empty strings are not added to \p A. They
794     /// still count when considering \p MaxSplit
795     /// An useful invariant is that
796     /// Separator.join(A) == *this if MaxSplit == -1 and KeepEmpty == true
797     ///
798     /// \param A - Where to put the substrings.
799     /// \param Separator - The string to split on.
800     /// \param MaxSplit - The maximum number of times the string is split.
801     /// \param KeepEmpty - True if empty substring should be added.
802     void split(SmallVectorImpl<StringRef> &A, char Separator, int MaxSplit = -1,
803                bool KeepEmpty = true) const;
804 
805     /// Split into two substrings around the last occurrence of a separator
806     /// character.
807     ///
808     /// If \p Separator is in the string, then the result is a pair (LHS, RHS)
809     /// such that (*this == LHS + Separator + RHS) is true and RHS is
810     /// minimal. If \p Separator is not in the string, then the result is a
811     /// pair (LHS, RHS) where (*this == LHS) and (RHS == "").
812     ///
813     /// \param Separator - The character to split on.
814     /// \return - The split substrings.
815     LLVM_NODISCARD
rsplit(char Separator)816     std::pair<StringRef, StringRef> rsplit(char Separator) const {
817       return rsplit(StringRef(&Separator, 1));
818     }
819 
820     /// Return string with consecutive \p Char characters starting from the
821     /// the left removed.
822     LLVM_NODISCARD
ltrim(char Char)823     StringRef ltrim(char Char) const {
824       return drop_front(std::min(Length, find_first_not_of(Char)));
825     }
826 
827     /// Return string with consecutive characters in \p Chars starting from
828     /// the left removed.
829     LLVM_NODISCARD
830     StringRef ltrim(StringRef Chars = " \t\n\v\f\r") const {
831       return drop_front(std::min(Length, find_first_not_of(Chars)));
832     }
833 
834     /// Return string with consecutive \p Char characters starting from the
835     /// right removed.
836     LLVM_NODISCARD
rtrim(char Char)837     StringRef rtrim(char Char) const {
838       return drop_back(Length - std::min(Length, find_last_not_of(Char) + 1));
839     }
840 
841     /// Return string with consecutive characters in \p Chars starting from
842     /// the right removed.
843     LLVM_NODISCARD
844     StringRef rtrim(StringRef Chars = " \t\n\v\f\r") const {
845       return drop_back(Length - std::min(Length, find_last_not_of(Chars) + 1));
846     }
847 
848     /// Return string with consecutive \p Char characters starting from the
849     /// left and right removed.
850     LLVM_NODISCARD
trim(char Char)851     StringRef trim(char Char) const {
852       return ltrim(Char).rtrim(Char);
853     }
854 
855     /// Return string with consecutive characters in \p Chars starting from
856     /// the left and right removed.
857     LLVM_NODISCARD
858     StringRef trim(StringRef Chars = " \t\n\v\f\r") const {
859       return ltrim(Chars).rtrim(Chars);
860     }
861 
862     /// @}
863   };
864 
865   /// A wrapper around a string literal that serves as a proxy for constructing
866   /// global tables of StringRefs with the length computed at compile time.
867   /// In order to avoid the invocation of a global constructor, StringLiteral
868   /// should *only* be used in a constexpr context, as such:
869   ///
870   /// constexpr StringLiteral S("test");
871   ///
872   class StringLiteral : public StringRef {
873   private:
StringLiteral(const char * Str,size_t N)874     constexpr StringLiteral(const char *Str, size_t N) : StringRef(Str, N) {
875     }
876 
877   public:
878     template <size_t N>
StringLiteral(const char (& Str)[N])879     constexpr StringLiteral(const char (&Str)[N])
880 #if defined(__clang__) && __has_attribute(enable_if)
881 #pragma clang diagnostic push
882 #pragma clang diagnostic ignored "-Wgcc-compat"
883         __attribute((enable_if(__builtin_strlen(Str) == N - 1,
884                                "invalid string literal")))
885 #pragma clang diagnostic pop
886 #endif
887         : StringRef(Str, N - 1) {
888     }
889 
890     // Explicit construction for strings like "foo\0bar".
891     template <size_t N>
withInnerNUL(const char (& Str)[N])892     static constexpr StringLiteral withInnerNUL(const char (&Str)[N]) {
893       return StringLiteral(Str, N - 1);
894     }
895   };
896 
897   /// @name StringRef Comparison Operators
898   /// @{
899 
900   inline bool operator==(StringRef LHS, StringRef RHS) {
901     return LHS.equals(RHS);
902   }
903 
904   inline bool operator!=(StringRef LHS, StringRef RHS) { return !(LHS == RHS); }
905 
906   inline bool operator<(StringRef LHS, StringRef RHS) {
907     return LHS.compare(RHS) == -1;
908   }
909 
910   inline bool operator<=(StringRef LHS, StringRef RHS) {
911     return LHS.compare(RHS) != 1;
912   }
913 
914   inline bool operator>(StringRef LHS, StringRef RHS) {
915     return LHS.compare(RHS) == 1;
916   }
917 
918   inline bool operator>=(StringRef LHS, StringRef RHS) {
919     return LHS.compare(RHS) != -1;
920   }
921 
922   inline std::string &operator+=(std::string &buffer, StringRef string) {
923     return buffer.append(string.data(), string.size());
924   }
925 
926   /// @}
927 
928   /// Compute a hash_code for a StringRef.
929   LLVM_NODISCARD
930   hash_code hash_value(StringRef S);
931 
932 } // end namespace llvm
933 
934 #endif // LLVM_ADT_STRINGREF_H
935