1 //===- StringRef.h - Constant String Reference Wrapper ----------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_ADT_STRINGREF_H
10 #define LLVM_ADT_STRINGREF_H
11 
12 #include "llvm/ADT/STLExtras.h"
13 #include "llvm/ADT/iterator_range.h"
14 #include "llvm/Support/Compiler.h"
15 #include <algorithm>
16 #include <cassert>
17 #include <cstddef>
18 #include <cstring>
19 #include <limits>
20 #include <string>
21 #if __cplusplus > 201402L
22 #include <string_view>
23 #endif
24 #include <type_traits>
25 #include <utility>
26 
27 // Declare the __builtin_strlen intrinsic for MSVC so it can be used in
28 // constexpr context.
29 #if defined(_MSC_VER)
30 extern "C" size_t __builtin_strlen(const char *);
31 #endif
32 
33 namespace llvm {
34 
35   class APInt;
36   class hash_code;
37   template <typename T> class SmallVectorImpl;
38   class StringRef;
39 
40   /// Helper functions for StringRef::getAsInteger.
41   bool getAsUnsignedInteger(StringRef Str, unsigned Radix,
42                             unsigned long long &Result);
43 
44   bool getAsSignedInteger(StringRef Str, unsigned Radix, long long &Result);
45 
46   bool consumeUnsignedInteger(StringRef &Str, unsigned Radix,
47                               unsigned long long &Result);
48   bool consumeSignedInteger(StringRef &Str, unsigned Radix, long long &Result);
49 
50   /// StringRef - Represent a constant reference to a string, i.e. a character
51   /// array and a length, which need not be null terminated.
52   ///
53   /// This class does not own the string data, it is expected to be used in
54   /// situations where the character data resides in some other buffer, whose
55   /// lifetime extends past that of the StringRef. For this reason, it is not in
56   /// general safe to store a StringRef.
57   class LLVM_GSL_POINTER StringRef {
58   public:
59     static constexpr size_t npos = ~size_t(0);
60 
61     using iterator = const char *;
62     using const_iterator = const char *;
63     using size_type = size_t;
64 
65   private:
66     /// The start of the string, in an external buffer.
67     const char *Data = nullptr;
68 
69     /// The length of the string.
70     size_t Length = 0;
71 
72     // Workaround memcmp issue with null pointers (undefined behavior)
73     // by providing a specialized version
compareMemory(const char * Lhs,const char * Rhs,size_t Length)74     static int compareMemory(const char *Lhs, const char *Rhs, size_t Length) {
75       if (Length == 0) { return 0; }
76       return ::memcmp(Lhs,Rhs,Length);
77     }
78 
79     // Constexpr version of std::strlen.
strLen(const char * Str)80     static constexpr size_t strLen(const char *Str) {
81 #if __cplusplus > 201402L
82       return std::char_traits<char>::length(Str);
83 #elif __has_builtin(__builtin_strlen) || defined(__GNUC__) || \
84     (defined(_MSC_VER) && _MSC_VER >= 1916)
85       return __builtin_strlen(Str);
86 #else
87       const char *Begin = Str;
88       while (*Str != '\0')
89         ++Str;
90       return Str - Begin;
91 #endif
92     }
93 
94   public:
95     /// @name Constructors
96     /// @{
97 
98     /// Construct an empty string ref.
99     /*implicit*/ StringRef() = default;
100 
101     /// Disable conversion from nullptr.  This prevents things like
102     /// if (S == nullptr)
103     StringRef(std::nullptr_t) = delete;
104 
105     /// Construct a string ref from a cstring.
StringRef(const char * Str)106     /*implicit*/ constexpr StringRef(const char *Str)
107         : Data(Str), Length(Str ? strLen(Str) : 0) {}
108 
109     /// Construct a string ref from a pointer and length.
StringRef(const char * data,size_t length)110     /*implicit*/ constexpr StringRef(const char *data, size_t length)
111         : Data(data), Length(length) {}
112 
113     /// Construct a string ref from an std::string.
StringRef(const std::string & Str)114     /*implicit*/ StringRef(const std::string &Str)
115       : Data(Str.data()), Length(Str.length()) {}
116 
117 #if __cplusplus > 201402L
118     /// Construct a string ref from an std::string_view.
StringRef(std::string_view Str)119     /*implicit*/ constexpr StringRef(std::string_view Str)
120         : Data(Str.data()), Length(Str.size()) {}
121 #endif
122 
123     /// @}
124     /// @name Iterators
125     /// @{
126 
begin()127     iterator begin() const { return Data; }
128 
end()129     iterator end() const { return Data + Length; }
130 
bytes_begin()131     const unsigned char *bytes_begin() const {
132       return reinterpret_cast<const unsigned char *>(begin());
133     }
bytes_end()134     const unsigned char *bytes_end() const {
135       return reinterpret_cast<const unsigned char *>(end());
136     }
bytes()137     iterator_range<const unsigned char *> bytes() const {
138       return make_range(bytes_begin(), bytes_end());
139     }
140 
141     /// @}
142     /// @name String Operations
143     /// @{
144 
145     /// data - Get a pointer to the start of the string (which may not be null
146     /// terminated).
147     LLVM_NODISCARD
data()148     const char *data() const { return Data; }
149 
150     /// empty - Check if the string is empty.
151     LLVM_NODISCARD
empty()152     bool empty() const { return Length == 0; }
153 
154     /// size - Get the string size.
155     LLVM_NODISCARD
size()156     size_t size() const { return Length; }
157 
158     /// front - Get the first character in the string.
159     LLVM_NODISCARD
front()160     char front() const {
161       assert(!empty());
162       return Data[0];
163     }
164 
165     /// back - Get the last character in the string.
166     LLVM_NODISCARD
back()167     char back() const {
168       assert(!empty());
169       return Data[Length-1];
170     }
171 
172     // copy - Allocate copy in Allocator and return StringRef to it.
173     template <typename Allocator>
copy(Allocator & A)174     LLVM_NODISCARD StringRef copy(Allocator &A) const {
175       // Don't request a length 0 copy from the allocator.
176       if (empty())
177         return StringRef();
178       char *S = A.template Allocate<char>(Length);
179       std::copy(begin(), end(), S);
180       return StringRef(S, Length);
181     }
182 
183     /// equals - Check for string equality, this is more efficient than
184     /// compare() when the relative ordering of inequal strings isn't needed.
185     LLVM_NODISCARD
equals(StringRef RHS)186     bool equals(StringRef RHS) const {
187       return (Length == RHS.Length &&
188               compareMemory(Data, RHS.Data, RHS.Length) == 0);
189     }
190 
191     /// equals_lower - Check for string equality, ignoring case.
192     LLVM_NODISCARD
equals_lower(StringRef RHS)193     bool equals_lower(StringRef RHS) const {
194       return Length == RHS.Length && compare_lower(RHS) == 0;
195     }
196 
197     /// compare - Compare two strings; the result is -1, 0, or 1 if this string
198     /// is lexicographically less than, equal to, or greater than the \p RHS.
199     LLVM_NODISCARD
compare(StringRef RHS)200     int compare(StringRef RHS) const {
201       // Check the prefix for a mismatch.
202       if (int Res = compareMemory(Data, RHS.Data, std::min(Length, RHS.Length)))
203         return Res < 0 ? -1 : 1;
204 
205       // Otherwise the prefixes match, so we only need to check the lengths.
206       if (Length == RHS.Length)
207         return 0;
208       return Length < RHS.Length ? -1 : 1;
209     }
210 
211     /// compare_lower - Compare two strings, ignoring case.
212     LLVM_NODISCARD
213     int compare_lower(StringRef RHS) const;
214 
215     /// compare_numeric - Compare two strings, treating sequences of digits as
216     /// numbers.
217     LLVM_NODISCARD
218     int compare_numeric(StringRef RHS) const;
219 
220     /// Determine the edit distance between this string and another
221     /// string.
222     ///
223     /// \param Other the string to compare this string against.
224     ///
225     /// \param AllowReplacements whether to allow character
226     /// replacements (change one character into another) as a single
227     /// operation, rather than as two operations (an insertion and a
228     /// removal).
229     ///
230     /// \param MaxEditDistance If non-zero, the maximum edit distance that
231     /// this routine is allowed to compute. If the edit distance will exceed
232     /// that maximum, returns \c MaxEditDistance+1.
233     ///
234     /// \returns the minimum number of character insertions, removals,
235     /// or (if \p AllowReplacements is \c true) replacements needed to
236     /// transform one of the given strings into the other. If zero,
237     /// the strings are identical.
238     LLVM_NODISCARD
239     unsigned edit_distance(StringRef Other, bool AllowReplacements = true,
240                            unsigned MaxEditDistance = 0) const;
241 
242     /// str - Get the contents as an std::string.
243     LLVM_NODISCARD
str()244     std::string str() const {
245       if (!Data) return std::string();
246       return std::string(Data, Length);
247     }
248 
249     /// @}
250     /// @name Operator Overloads
251     /// @{
252 
253     LLVM_NODISCARD
254     char operator[](size_t Index) const {
255       assert(Index < Length && "Invalid index!");
256       return Data[Index];
257     }
258 
259     /// Disallow accidental assignment from a temporary std::string.
260     ///
261     /// The declaration here is extra complicated so that `stringRef = {}`
262     /// and `stringRef = "abc"` continue to select the move assignment operator.
263     template <typename T>
264     std::enable_if_t<std::is_same<T, std::string>::value, StringRef> &
265     operator=(T &&Str) = delete;
266 
267     /// @}
268     /// @name Type Conversions
269     /// @{
270 
string()271     explicit operator std::string() const { return str(); }
272 
273 #if __cplusplus > 201402L
string_view()274     operator std::string_view() const {
275       return std::string_view(data(), size());
276     }
277 #endif
278 
279     /// @}
280     /// @name String Predicates
281     /// @{
282 
283     /// Check if this string starts with the given \p Prefix.
284     LLVM_NODISCARD
startswith(StringRef Prefix)285     bool startswith(StringRef Prefix) const {
286       return Length >= Prefix.Length &&
287              compareMemory(Data, Prefix.Data, Prefix.Length) == 0;
288     }
289 
290     /// Check if this string starts with the given \p Prefix, ignoring case.
291     LLVM_NODISCARD
292     bool startswith_lower(StringRef Prefix) const;
293 
294     /// Check if this string ends with the given \p Suffix.
295     LLVM_NODISCARD
endswith(StringRef Suffix)296     bool endswith(StringRef Suffix) const {
297       return Length >= Suffix.Length &&
298         compareMemory(end() - Suffix.Length, Suffix.Data, Suffix.Length) == 0;
299     }
300 
301     /// Check if this string ends with the given \p Suffix, ignoring case.
302     LLVM_NODISCARD
303     bool endswith_lower(StringRef Suffix) const;
304 
305     /// @}
306     /// @name String Searching
307     /// @{
308 
309     /// Search for the first character \p C in the string.
310     ///
311     /// \returns The index of the first occurrence of \p C, or npos if not
312     /// found.
313     LLVM_NODISCARD
314     size_t find(char C, size_t From = 0) const {
315       size_t FindBegin = std::min(From, Length);
316       if (FindBegin < Length) { // Avoid calling memchr with nullptr.
317         // Just forward to memchr, which is faster than a hand-rolled loop.
318         if (const void *P = ::memchr(Data + FindBegin, C, Length - FindBegin))
319           return static_cast<const char *>(P) - Data;
320       }
321       return npos;
322     }
323 
324     /// Search for the first character \p C in the string, ignoring case.
325     ///
326     /// \returns The index of the first occurrence of \p C, or npos if not
327     /// found.
328     LLVM_NODISCARD
329     size_t find_lower(char C, size_t From = 0) const;
330 
331     /// Search for the first character satisfying the predicate \p F
332     ///
333     /// \returns The index of the first character satisfying \p F starting from
334     /// \p From, or npos if not found.
335     LLVM_NODISCARD
336     size_t find_if(function_ref<bool(char)> F, size_t From = 0) const {
337       StringRef S = drop_front(From);
338       while (!S.empty()) {
339         if (F(S.front()))
340           return size() - S.size();
341         S = S.drop_front();
342       }
343       return npos;
344     }
345 
346     /// Search for the first character not satisfying the predicate \p F
347     ///
348     /// \returns The index of the first character not satisfying \p F starting
349     /// from \p From, or npos if not found.
350     LLVM_NODISCARD
351     size_t find_if_not(function_ref<bool(char)> F, size_t From = 0) const {
352       return find_if([F](char c) { return !F(c); }, From);
353     }
354 
355     /// Search for the first string \p Str in the string.
356     ///
357     /// \returns The index of the first occurrence of \p Str, or npos if not
358     /// found.
359     LLVM_NODISCARD
360     size_t find(StringRef Str, size_t From = 0) const;
361 
362     /// Search for the first string \p Str in the string, ignoring case.
363     ///
364     /// \returns The index of the first occurrence of \p Str, or npos if not
365     /// found.
366     LLVM_NODISCARD
367     size_t find_lower(StringRef Str, size_t From = 0) const;
368 
369     /// Search for the last character \p C in the string.
370     ///
371     /// \returns The index of the last occurrence of \p C, or npos if not
372     /// found.
373     LLVM_NODISCARD
374     size_t rfind(char C, size_t From = npos) const {
375       From = std::min(From, Length);
376       size_t i = From;
377       while (i != 0) {
378         --i;
379         if (Data[i] == C)
380           return i;
381       }
382       return npos;
383     }
384 
385     /// Search for the last character \p C in the string, ignoring case.
386     ///
387     /// \returns The index of the last occurrence of \p C, or npos if not
388     /// found.
389     LLVM_NODISCARD
390     size_t rfind_lower(char C, size_t From = npos) const;
391 
392     /// Search for the last string \p Str in the string.
393     ///
394     /// \returns The index of the last occurrence of \p Str, or npos if not
395     /// found.
396     LLVM_NODISCARD
397     size_t rfind(StringRef Str) const;
398 
399     /// Search for the last string \p Str in the string, ignoring case.
400     ///
401     /// \returns The index of the last occurrence of \p Str, or npos if not
402     /// found.
403     LLVM_NODISCARD
404     size_t rfind_lower(StringRef Str) const;
405 
406     /// Find the first character in the string that is \p C, or npos if not
407     /// found. Same as find.
408     LLVM_NODISCARD
409     size_t find_first_of(char C, size_t From = 0) const {
410       return find(C, From);
411     }
412 
413     /// Find the first character in the string that is in \p Chars, or npos if
414     /// not found.
415     ///
416     /// Complexity: O(size() + Chars.size())
417     LLVM_NODISCARD
418     size_t find_first_of(StringRef Chars, size_t From = 0) const;
419 
420     /// Find the first character in the string that is not \p C or npos if not
421     /// found.
422     LLVM_NODISCARD
423     size_t find_first_not_of(char C, size_t From = 0) const;
424 
425     /// Find the first character in the string that is not in the string
426     /// \p Chars, or npos if not found.
427     ///
428     /// Complexity: O(size() + Chars.size())
429     LLVM_NODISCARD
430     size_t find_first_not_of(StringRef Chars, size_t From = 0) const;
431 
432     /// Find the last character in the string that is \p C, or npos if not
433     /// found.
434     LLVM_NODISCARD
435     size_t find_last_of(char C, size_t From = npos) const {
436       return rfind(C, From);
437     }
438 
439     /// Find the last character in the string that is in \p C, or npos if not
440     /// found.
441     ///
442     /// Complexity: O(size() + Chars.size())
443     LLVM_NODISCARD
444     size_t find_last_of(StringRef Chars, size_t From = npos) const;
445 
446     /// Find the last character in the string that is not \p C, or npos if not
447     /// found.
448     LLVM_NODISCARD
449     size_t find_last_not_of(char C, size_t From = npos) const;
450 
451     /// Find the last character in the string that is not in \p Chars, or
452     /// npos if not found.
453     ///
454     /// Complexity: O(size() + Chars.size())
455     LLVM_NODISCARD
456     size_t find_last_not_of(StringRef Chars, size_t From = npos) const;
457 
458     /// Return true if the given string is a substring of *this, and false
459     /// otherwise.
460     LLVM_NODISCARD
contains(StringRef Other)461     bool contains(StringRef Other) const { return find(Other) != npos; }
462 
463     /// Return true if the given character is contained in *this, and false
464     /// otherwise.
465     LLVM_NODISCARD
contains(char C)466     bool contains(char C) const { return find_first_of(C) != npos; }
467 
468     /// Return true if the given string is a substring of *this, and false
469     /// otherwise.
470     LLVM_NODISCARD
contains_lower(StringRef Other)471     bool contains_lower(StringRef Other) const {
472       return find_lower(Other) != npos;
473     }
474 
475     /// Return true if the given character is contained in *this, and false
476     /// otherwise.
477     LLVM_NODISCARD
contains_lower(char C)478     bool contains_lower(char C) const { return find_lower(C) != npos; }
479 
480     /// @}
481     /// @name Helpful Algorithms
482     /// @{
483 
484     /// Return the number of occurrences of \p C in the string.
485     LLVM_NODISCARD
count(char C)486     size_t count(char C) const {
487       size_t Count = 0;
488       for (size_t i = 0, e = Length; i != e; ++i)
489         if (Data[i] == C)
490           ++Count;
491       return Count;
492     }
493 
494     /// Return the number of non-overlapped occurrences of \p Str in
495     /// the string.
496     size_t count(StringRef Str) const;
497 
498     /// Parse the current string as an integer of the specified radix.  If
499     /// \p Radix is specified as zero, this does radix autosensing using
500     /// extended C rules: 0 is octal, 0x is hex, 0b is binary.
501     ///
502     /// If the string is invalid or if only a subset of the string is valid,
503     /// this returns true to signify the error.  The string is considered
504     /// erroneous if empty or if it overflows T.
505     template <typename T>
506     std::enable_if_t<std::numeric_limits<T>::is_signed, bool>
getAsInteger(unsigned Radix,T & Result)507     getAsInteger(unsigned Radix, T &Result) const {
508       long long LLVal;
509       if (getAsSignedInteger(*this, Radix, LLVal) ||
510             static_cast<T>(LLVal) != LLVal)
511         return true;
512       Result = LLVal;
513       return false;
514     }
515 
516     template <typename T>
517     std::enable_if_t<!std::numeric_limits<T>::is_signed, bool>
getAsInteger(unsigned Radix,T & Result)518     getAsInteger(unsigned Radix, T &Result) const {
519       unsigned long long ULLVal;
520       // The additional cast to unsigned long long is required to avoid the
521       // Visual C++ warning C4805: '!=' : unsafe mix of type 'bool' and type
522       // 'unsigned __int64' when instantiating getAsInteger with T = bool.
523       if (getAsUnsignedInteger(*this, Radix, ULLVal) ||
524           static_cast<unsigned long long>(static_cast<T>(ULLVal)) != ULLVal)
525         return true;
526       Result = ULLVal;
527       return false;
528     }
529 
530     /// Parse the current string as an integer of the specified radix.  If
531     /// \p Radix is specified as zero, this does radix autosensing using
532     /// extended C rules: 0 is octal, 0x is hex, 0b is binary.
533     ///
534     /// If the string does not begin with a number of the specified radix,
535     /// this returns true to signify the error. The string is considered
536     /// erroneous if empty or if it overflows T.
537     /// The portion of the string representing the discovered numeric value
538     /// is removed from the beginning of the string.
539     template <typename T>
540     std::enable_if_t<std::numeric_limits<T>::is_signed, bool>
consumeInteger(unsigned Radix,T & Result)541     consumeInteger(unsigned Radix, T &Result) {
542       long long LLVal;
543       if (consumeSignedInteger(*this, Radix, LLVal) ||
544           static_cast<long long>(static_cast<T>(LLVal)) != LLVal)
545         return true;
546       Result = LLVal;
547       return false;
548     }
549 
550     template <typename T>
551     std::enable_if_t<!std::numeric_limits<T>::is_signed, bool>
consumeInteger(unsigned Radix,T & Result)552     consumeInteger(unsigned Radix, T &Result) {
553       unsigned long long ULLVal;
554       if (consumeUnsignedInteger(*this, Radix, ULLVal) ||
555           static_cast<unsigned long long>(static_cast<T>(ULLVal)) != ULLVal)
556         return true;
557       Result = ULLVal;
558       return false;
559     }
560 
561     /// Parse the current string as an integer of the specified \p Radix, or of
562     /// an autosensed radix if the \p Radix given is 0.  The current value in
563     /// \p Result is discarded, and the storage is changed to be wide enough to
564     /// store the parsed integer.
565     ///
566     /// \returns true if the string does not solely consist of a valid
567     /// non-empty number in the appropriate base.
568     ///
569     /// APInt::fromString is superficially similar but assumes the
570     /// string is well-formed in the given radix.
571     bool getAsInteger(unsigned Radix, APInt &Result) const;
572 
573     /// Parse the current string as an IEEE double-precision floating
574     /// point value.  The string must be a well-formed double.
575     ///
576     /// If \p AllowInexact is false, the function will fail if the string
577     /// cannot be represented exactly.  Otherwise, the function only fails
578     /// in case of an overflow or underflow, or an invalid floating point
579     /// representation.
580     bool getAsDouble(double &Result, bool AllowInexact = true) const;
581 
582     /// @}
583     /// @name String Operations
584     /// @{
585 
586     // Convert the given ASCII string to lowercase.
587     LLVM_NODISCARD
588     std::string lower() const;
589 
590     /// Convert the given ASCII string to uppercase.
591     LLVM_NODISCARD
592     std::string upper() const;
593 
594     /// @}
595     /// @name Substring Operations
596     /// @{
597 
598     /// Return a reference to the substring from [Start, Start + N).
599     ///
600     /// \param Start The index of the starting character in the substring; if
601     /// the index is npos or greater than the length of the string then the
602     /// empty substring will be returned.
603     ///
604     /// \param N The number of characters to included in the substring. If N
605     /// exceeds the number of characters remaining in the string, the string
606     /// suffix (starting with \p Start) will be returned.
607     LLVM_NODISCARD
608     StringRef substr(size_t Start, size_t N = npos) const {
609       Start = std::min(Start, Length);
610       return StringRef(Data + Start, std::min(N, Length - Start));
611     }
612 
613     /// Return a StringRef equal to 'this' but with only the first \p N
614     /// elements remaining.  If \p N is greater than the length of the
615     /// string, the entire string is returned.
616     LLVM_NODISCARD
617     StringRef take_front(size_t N = 1) const {
618       if (N >= size())
619         return *this;
620       return drop_back(size() - N);
621     }
622 
623     /// Return a StringRef equal to 'this' but with only the last \p N
624     /// elements remaining.  If \p N is greater than the length of the
625     /// string, the entire string is returned.
626     LLVM_NODISCARD
627     StringRef take_back(size_t N = 1) const {
628       if (N >= size())
629         return *this;
630       return drop_front(size() - N);
631     }
632 
633     /// Return the longest prefix of 'this' such that every character
634     /// in the prefix satisfies the given predicate.
635     LLVM_NODISCARD
take_while(function_ref<bool (char)> F)636     StringRef take_while(function_ref<bool(char)> F) const {
637       return substr(0, find_if_not(F));
638     }
639 
640     /// Return the longest prefix of 'this' such that no character in
641     /// the prefix satisfies the given predicate.
642     LLVM_NODISCARD
take_until(function_ref<bool (char)> F)643     StringRef take_until(function_ref<bool(char)> F) const {
644       return substr(0, find_if(F));
645     }
646 
647     /// Return a StringRef equal to 'this' but with the first \p N elements
648     /// dropped.
649     LLVM_NODISCARD
650     StringRef drop_front(size_t N = 1) const {
651       assert(size() >= N && "Dropping more elements than exist");
652       return substr(N);
653     }
654 
655     /// Return a StringRef equal to 'this' but with the last \p N elements
656     /// dropped.
657     LLVM_NODISCARD
658     StringRef drop_back(size_t N = 1) const {
659       assert(size() >= N && "Dropping more elements than exist");
660       return substr(0, size()-N);
661     }
662 
663     /// Return a StringRef equal to 'this', but with all characters satisfying
664     /// the given predicate dropped from the beginning of the string.
665     LLVM_NODISCARD
drop_while(function_ref<bool (char)> F)666     StringRef drop_while(function_ref<bool(char)> F) const {
667       return substr(find_if_not(F));
668     }
669 
670     /// Return a StringRef equal to 'this', but with all characters not
671     /// satisfying the given predicate dropped from the beginning of the string.
672     LLVM_NODISCARD
drop_until(function_ref<bool (char)> F)673     StringRef drop_until(function_ref<bool(char)> F) const {
674       return substr(find_if(F));
675     }
676 
677     /// Returns true if this StringRef has the given prefix and removes that
678     /// prefix.
consume_front(StringRef Prefix)679     bool consume_front(StringRef Prefix) {
680       if (!startswith(Prefix))
681         return false;
682 
683       *this = drop_front(Prefix.size());
684       return true;
685     }
686 
687     /// Returns true if this StringRef has the given suffix and removes that
688     /// suffix.
consume_back(StringRef Suffix)689     bool consume_back(StringRef Suffix) {
690       if (!endswith(Suffix))
691         return false;
692 
693       *this = drop_back(Suffix.size());
694       return true;
695     }
696 
697     /// Return a reference to the substring from [Start, End).
698     ///
699     /// \param Start The index of the starting character in the substring; if
700     /// the index is npos or greater than the length of the string then the
701     /// empty substring will be returned.
702     ///
703     /// \param End The index following the last character to include in the
704     /// substring. If this is npos or exceeds the number of characters
705     /// remaining in the string, the string suffix (starting with \p Start)
706     /// will be returned. If this is less than \p Start, an empty string will
707     /// be returned.
708     LLVM_NODISCARD
slice(size_t Start,size_t End)709     StringRef slice(size_t Start, size_t End) const {
710       Start = std::min(Start, Length);
711       End = std::min(std::max(Start, End), Length);
712       return StringRef(Data + Start, End - Start);
713     }
714 
715     /// Split into two substrings around the first occurrence of a separator
716     /// character.
717     ///
718     /// If \p Separator is in the string, then the result is a pair (LHS, RHS)
719     /// such that (*this == LHS + Separator + RHS) is true and RHS is
720     /// maximal. If \p Separator is not in the string, then the result is a
721     /// pair (LHS, RHS) where (*this == LHS) and (RHS == "").
722     ///
723     /// \param Separator The character to split on.
724     /// \returns The split substrings.
725     LLVM_NODISCARD
split(char Separator)726     std::pair<StringRef, StringRef> split(char Separator) const {
727       return split(StringRef(&Separator, 1));
728     }
729 
730     /// Split into two substrings around the first occurrence of a separator
731     /// string.
732     ///
733     /// If \p Separator is in the string, then the result is a pair (LHS, RHS)
734     /// such that (*this == LHS + Separator + RHS) is true and RHS is
735     /// maximal. If \p Separator is not in the string, then the result is a
736     /// pair (LHS, RHS) where (*this == LHS) and (RHS == "").
737     ///
738     /// \param Separator - The string to split on.
739     /// \return - The split substrings.
740     LLVM_NODISCARD
split(StringRef Separator)741     std::pair<StringRef, StringRef> split(StringRef Separator) const {
742       size_t Idx = find(Separator);
743       if (Idx == npos)
744         return std::make_pair(*this, StringRef());
745       return std::make_pair(slice(0, Idx), slice(Idx + Separator.size(), npos));
746     }
747 
748     /// Split into two substrings around the last occurrence of a separator
749     /// string.
750     ///
751     /// If \p Separator is in the string, then the result is a pair (LHS, RHS)
752     /// such that (*this == LHS + Separator + RHS) is true and RHS is
753     /// minimal. If \p Separator is not in the string, then the result is a
754     /// pair (LHS, RHS) where (*this == LHS) and (RHS == "").
755     ///
756     /// \param Separator - The string to split on.
757     /// \return - The split substrings.
758     LLVM_NODISCARD
rsplit(StringRef Separator)759     std::pair<StringRef, StringRef> rsplit(StringRef Separator) const {
760       size_t Idx = rfind(Separator);
761       if (Idx == npos)
762         return std::make_pair(*this, StringRef());
763       return std::make_pair(slice(0, Idx), slice(Idx + Separator.size(), npos));
764     }
765 
766     /// Split into substrings around the occurrences of a separator string.
767     ///
768     /// Each substring is stored in \p A. If \p MaxSplit is >= 0, at most
769     /// \p MaxSplit splits are done and consequently <= \p MaxSplit + 1
770     /// elements are added to A.
771     /// If \p KeepEmpty is false, empty strings are not added to \p A. They
772     /// still count when considering \p MaxSplit
773     /// An useful invariant is that
774     /// Separator.join(A) == *this if MaxSplit == -1 and KeepEmpty == true
775     ///
776     /// \param A - Where to put the substrings.
777     /// \param Separator - The string to split on.
778     /// \param MaxSplit - The maximum number of times the string is split.
779     /// \param KeepEmpty - True if empty substring should be added.
780     void split(SmallVectorImpl<StringRef> &A,
781                StringRef Separator, int MaxSplit = -1,
782                bool KeepEmpty = true) const;
783 
784     /// Split into substrings around the occurrences of a separator character.
785     ///
786     /// Each substring is stored in \p A. If \p MaxSplit is >= 0, at most
787     /// \p MaxSplit splits are done and consequently <= \p MaxSplit + 1
788     /// elements are added to A.
789     /// If \p KeepEmpty is false, empty strings are not added to \p A. They
790     /// still count when considering \p MaxSplit
791     /// An useful invariant is that
792     /// Separator.join(A) == *this if MaxSplit == -1 and KeepEmpty == true
793     ///
794     /// \param A - Where to put the substrings.
795     /// \param Separator - The string to split on.
796     /// \param MaxSplit - The maximum number of times the string is split.
797     /// \param KeepEmpty - True if empty substring should be added.
798     void split(SmallVectorImpl<StringRef> &A, char Separator, int MaxSplit = -1,
799                bool KeepEmpty = true) const;
800 
801     /// Split into two substrings around the last occurrence of a separator
802     /// character.
803     ///
804     /// If \p Separator is in the string, then the result is a pair (LHS, RHS)
805     /// such that (*this == LHS + Separator + RHS) is true and RHS is
806     /// minimal. If \p Separator is not in the string, then the result is a
807     /// pair (LHS, RHS) where (*this == LHS) and (RHS == "").
808     ///
809     /// \param Separator - The character to split on.
810     /// \return - The split substrings.
811     LLVM_NODISCARD
rsplit(char Separator)812     std::pair<StringRef, StringRef> rsplit(char Separator) const {
813       return rsplit(StringRef(&Separator, 1));
814     }
815 
816     /// Return string with consecutive \p Char characters starting from the
817     /// the left removed.
818     LLVM_NODISCARD
ltrim(char Char)819     StringRef ltrim(char Char) const {
820       return drop_front(std::min(Length, find_first_not_of(Char)));
821     }
822 
823     /// Return string with consecutive characters in \p Chars starting from
824     /// the left removed.
825     LLVM_NODISCARD
826     StringRef ltrim(StringRef Chars = " \t\n\v\f\r") const {
827       return drop_front(std::min(Length, find_first_not_of(Chars)));
828     }
829 
830     /// Return string with consecutive \p Char characters starting from the
831     /// right removed.
832     LLVM_NODISCARD
rtrim(char Char)833     StringRef rtrim(char Char) const {
834       return drop_back(Length - std::min(Length, find_last_not_of(Char) + 1));
835     }
836 
837     /// Return string with consecutive characters in \p Chars starting from
838     /// the right removed.
839     LLVM_NODISCARD
840     StringRef rtrim(StringRef Chars = " \t\n\v\f\r") const {
841       return drop_back(Length - std::min(Length, find_last_not_of(Chars) + 1));
842     }
843 
844     /// Return string with consecutive \p Char characters starting from the
845     /// left and right removed.
846     LLVM_NODISCARD
trim(char Char)847     StringRef trim(char Char) const {
848       return ltrim(Char).rtrim(Char);
849     }
850 
851     /// Return string with consecutive characters in \p Chars starting from
852     /// the left and right removed.
853     LLVM_NODISCARD
854     StringRef trim(StringRef Chars = " \t\n\v\f\r") const {
855       return ltrim(Chars).rtrim(Chars);
856     }
857 
858     /// @}
859   };
860 
861   /// A wrapper around a string literal that serves as a proxy for constructing
862   /// global tables of StringRefs with the length computed at compile time.
863   /// In order to avoid the invocation of a global constructor, StringLiteral
864   /// should *only* be used in a constexpr context, as such:
865   ///
866   /// constexpr StringLiteral S("test");
867   ///
868   class StringLiteral : public StringRef {
869   private:
StringLiteral(const char * Str,size_t N)870     constexpr StringLiteral(const char *Str, size_t N) : StringRef(Str, N) {
871     }
872 
873   public:
874     template <size_t N>
StringLiteral(const char (& Str)[N])875     constexpr StringLiteral(const char (&Str)[N])
876 #if defined(__clang__) && __has_attribute(enable_if)
877 #pragma clang diagnostic push
878 #pragma clang diagnostic ignored "-Wgcc-compat"
879         __attribute((enable_if(__builtin_strlen(Str) == N - 1,
880                                "invalid string literal")))
881 #pragma clang diagnostic pop
882 #endif
883         : StringRef(Str, N - 1) {
884     }
885 
886     // Explicit construction for strings like "foo\0bar".
887     template <size_t N>
withInnerNUL(const char (& Str)[N])888     static constexpr StringLiteral withInnerNUL(const char (&Str)[N]) {
889       return StringLiteral(Str, N - 1);
890     }
891   };
892 
893   /// @name StringRef Comparison Operators
894   /// @{
895 
896   inline bool operator==(StringRef LHS, StringRef RHS) {
897     return LHS.equals(RHS);
898   }
899 
900   inline bool operator!=(StringRef LHS, StringRef RHS) { return !(LHS == RHS); }
901 
902   inline bool operator<(StringRef LHS, StringRef RHS) {
903     return LHS.compare(RHS) == -1;
904   }
905 
906   inline bool operator<=(StringRef LHS, StringRef RHS) {
907     return LHS.compare(RHS) != 1;
908   }
909 
910   inline bool operator>(StringRef LHS, StringRef RHS) {
911     return LHS.compare(RHS) == 1;
912   }
913 
914   inline bool operator>=(StringRef LHS, StringRef RHS) {
915     return LHS.compare(RHS) != -1;
916   }
917 
918   inline std::string &operator+=(std::string &buffer, StringRef string) {
919     return buffer.append(string.data(), string.size());
920   }
921 
922   /// @}
923 
924   /// Compute a hash_code for a StringRef.
925   LLVM_NODISCARD
926   hash_code hash_value(StringRef S);
927 
928 } // end namespace llvm
929 
930 #endif // LLVM_ADT_STRINGREF_H
931