1 //===- StringRef.h - Constant String Reference Wrapper ----------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_ADT_STRINGREF_H
10 #define LLVM_ADT_STRINGREF_H
11 
12 #include "llvm/ADT/STLExtras.h"
13 #include "llvm/ADT/iterator_range.h"
14 #include "llvm/Support/Compiler.h"
15 #include <algorithm>
16 #include <cassert>
17 #include <cstddef>
18 #include <cstring>
19 #include <limits>
20 #include <string>
21 #if __cplusplus > 201402L
22 #include <string_view>
23 #endif
24 #include <type_traits>
25 #include <utility>
26 
27 // Declare the __builtin_strlen intrinsic for MSVC so it can be used in
28 // constexpr context.
29 #if defined(_MSC_VER)
30 extern "C" size_t __builtin_strlen(const char *);
31 #endif
32 
33 namespace llvm {
34 
35   class APInt;
36   class hash_code;
37   template <typename T> class SmallVectorImpl;
38   template <typename T> struct DenseMapInfo;
39   class StringRef;
40 
41   /// Helper functions for StringRef::getAsInteger.
42   bool getAsUnsignedInteger(StringRef Str, unsigned Radix,
43                             unsigned long long &Result);
44 
45   bool getAsSignedInteger(StringRef Str, unsigned Radix, long long &Result);
46 
47   bool consumeUnsignedInteger(StringRef &Str, unsigned Radix,
48                               unsigned long long &Result);
49   bool consumeSignedInteger(StringRef &Str, unsigned Radix, long long &Result);
50 
51   /// StringRef - Represent a constant reference to a string, i.e. a character
52   /// array and a length, which need not be null terminated.
53   ///
54   /// This class does not own the string data, it is expected to be used in
55   /// situations where the character data resides in some other buffer, whose
56   /// lifetime extends past that of the StringRef. For this reason, it is not in
57   /// general safe to store a StringRef.
58   class LLVM_GSL_POINTER StringRef {
59   public:
60     static constexpr size_t npos = ~size_t(0);
61 
62     using iterator = const char *;
63     using const_iterator = const char *;
64     using size_type = size_t;
65 
66   private:
67     /// The start of the string, in an external buffer.
68     const char *Data = nullptr;
69 
70     /// The length of the string.
71     size_t Length = 0;
72 
73     // Workaround memcmp issue with null pointers (undefined behavior)
74     // by providing a specialized version
75     static int compareMemory(const char *Lhs, const char *Rhs, size_t Length) {
76       if (Length == 0) { return 0; }
77       return ::memcmp(Lhs,Rhs,Length);
78     }
79 
80     // Constexpr version of std::strlen.
81     static constexpr size_t strLen(const char *Str) {
82 #if __cplusplus > 201402L
83       return std::char_traits<char>::length(Str);
84 #elif __has_builtin(__builtin_strlen) || defined(__GNUC__) || \
85     (defined(_MSC_VER) && _MSC_VER >= 1916)
86       return __builtin_strlen(Str);
87 #else
88       const char *Begin = Str;
89       while (*Str != '\0')
90         ++Str;
91       return Str - Begin;
92 #endif
93     }
94 
95   public:
96     /// @name Constructors
97     /// @{
98 
99     /// Construct an empty string ref.
100     /*implicit*/ StringRef() = default;
101 
102     /// Disable conversion from nullptr.  This prevents things like
103     /// if (S == nullptr)
104     StringRef(std::nullptr_t) = delete;
105 
106     /// Construct a string ref from a cstring.
107     /*implicit*/ constexpr StringRef(const char *Str)
108         : Data(Str), Length(Str ? strLen(Str) : 0) {}
109 
110     /// Construct a string ref from a pointer and length.
111     /*implicit*/ constexpr StringRef(const char *data, size_t length)
112         : Data(data), Length(length) {}
113 
114     /// Construct a string ref from an std::string.
115     /*implicit*/ StringRef(const std::string &Str)
116       : Data(Str.data()), Length(Str.length()) {}
117 
118 #if __cplusplus > 201402L
119     /// Construct a string ref from an std::string_view.
120     /*implicit*/ constexpr StringRef(std::string_view Str)
121         : Data(Str.data()), Length(Str.size()) {}
122 #endif
123 
124     /// @}
125     /// @name Iterators
126     /// @{
127 
128     iterator begin() const { return Data; }
129 
130     iterator end() const { return Data + Length; }
131 
132     const unsigned char *bytes_begin() const {
133       return reinterpret_cast<const unsigned char *>(begin());
134     }
135     const unsigned char *bytes_end() const {
136       return reinterpret_cast<const unsigned char *>(end());
137     }
138     iterator_range<const unsigned char *> bytes() const {
139       return make_range(bytes_begin(), bytes_end());
140     }
141 
142     /// @}
143     /// @name String Operations
144     /// @{
145 
146     /// data - Get a pointer to the start of the string (which may not be null
147     /// terminated).
148     LLVM_NODISCARD
149     const char *data() const { return Data; }
150 
151     /// empty - Check if the string is empty.
152     LLVM_NODISCARD
153     bool empty() const { return Length == 0; }
154 
155     /// size - Get the string size.
156     LLVM_NODISCARD
157     size_t size() const { return Length; }
158 
159     /// front - Get the first character in the string.
160     LLVM_NODISCARD
161     char front() const {
162       assert(!empty());
163       return Data[0];
164     }
165 
166     /// back - Get the last character in the string.
167     LLVM_NODISCARD
168     char back() const {
169       assert(!empty());
170       return Data[Length-1];
171     }
172 
173     // copy - Allocate copy in Allocator and return StringRef to it.
174     template <typename Allocator>
175     LLVM_NODISCARD StringRef copy(Allocator &A) const {
176       // Don't request a length 0 copy from the allocator.
177       if (empty())
178         return StringRef();
179       char *S = A.template Allocate<char>(Length);
180       std::copy(begin(), end(), S);
181       return StringRef(S, Length);
182     }
183 
184     /// equals - Check for string equality, this is more efficient than
185     /// compare() when the relative ordering of inequal strings isn't needed.
186     LLVM_NODISCARD
187     bool equals(StringRef RHS) const {
188       return (Length == RHS.Length &&
189               compareMemory(Data, RHS.Data, RHS.Length) == 0);
190     }
191 
192     /// Check for string equality, ignoring case.
193     LLVM_NODISCARD
194     bool equals_insensitive(StringRef RHS) const {
195       return Length == RHS.Length && compare_insensitive(RHS) == 0;
196     }
197 
198     /// compare - Compare two strings; the result is -1, 0, or 1 if this string
199     /// is lexicographically less than, equal to, or greater than the \p RHS.
200     LLVM_NODISCARD
201     int compare(StringRef RHS) const {
202       // Check the prefix for a mismatch.
203       if (int Res = compareMemory(Data, RHS.Data, std::min(Length, RHS.Length)))
204         return Res < 0 ? -1 : 1;
205 
206       // Otherwise the prefixes match, so we only need to check the lengths.
207       if (Length == RHS.Length)
208         return 0;
209       return Length < RHS.Length ? -1 : 1;
210     }
211 
212     /// Compare two strings, ignoring case.
213     LLVM_NODISCARD
214     int compare_insensitive(StringRef RHS) const;
215 
216     /// compare_numeric - Compare two strings, treating sequences of digits as
217     /// numbers.
218     LLVM_NODISCARD
219     int compare_numeric(StringRef RHS) const;
220 
221     /// Determine the edit distance between this string and another
222     /// string.
223     ///
224     /// \param Other the string to compare this string against.
225     ///
226     /// \param AllowReplacements whether to allow character
227     /// replacements (change one character into another) as a single
228     /// operation, rather than as two operations (an insertion and a
229     /// removal).
230     ///
231     /// \param MaxEditDistance If non-zero, the maximum edit distance that
232     /// this routine is allowed to compute. If the edit distance will exceed
233     /// that maximum, returns \c MaxEditDistance+1.
234     ///
235     /// \returns the minimum number of character insertions, removals,
236     /// or (if \p AllowReplacements is \c true) replacements needed to
237     /// transform one of the given strings into the other. If zero,
238     /// the strings are identical.
239     LLVM_NODISCARD
240     unsigned edit_distance(StringRef Other, bool AllowReplacements = true,
241                            unsigned MaxEditDistance = 0) const;
242 
243     /// str - Get the contents as an std::string.
244     LLVM_NODISCARD
245     std::string str() const {
246       if (!Data) return std::string();
247       return std::string(Data, Length);
248     }
249 
250     /// @}
251     /// @name Operator Overloads
252     /// @{
253 
254     LLVM_NODISCARD
255     char operator[](size_t Index) const {
256       assert(Index < Length && "Invalid index!");
257       return Data[Index];
258     }
259 
260     /// Disallow accidental assignment from a temporary std::string.
261     ///
262     /// The declaration here is extra complicated so that `stringRef = {}`
263     /// and `stringRef = "abc"` continue to select the move assignment operator.
264     template <typename T>
265     std::enable_if_t<std::is_same<T, std::string>::value, StringRef> &
266     operator=(T &&Str) = delete;
267 
268     /// @}
269     /// @name Type Conversions
270     /// @{
271 
272     explicit operator std::string() const { return str(); }
273 
274 #if __cplusplus > 201402L
275     operator std::string_view() const {
276       return std::string_view(data(), size());
277     }
278 #endif
279 
280     /// @}
281     /// @name String Predicates
282     /// @{
283 
284     /// Check if this string starts with the given \p Prefix.
285     LLVM_NODISCARD
286     bool startswith(StringRef Prefix) const {
287       return Length >= Prefix.Length &&
288              compareMemory(Data, Prefix.Data, Prefix.Length) == 0;
289     }
290 
291     /// Check if this string starts with the given \p Prefix, ignoring case.
292     LLVM_NODISCARD
293     bool startswith_insensitive(StringRef Prefix) const;
294 
295     /// Check if this string ends with the given \p Suffix.
296     LLVM_NODISCARD
297     bool endswith(StringRef Suffix) const {
298       return Length >= Suffix.Length &&
299         compareMemory(end() - Suffix.Length, Suffix.Data, Suffix.Length) == 0;
300     }
301 
302     /// Check if this string ends with the given \p Suffix, ignoring case.
303     LLVM_NODISCARD
304     bool endswith_insensitive(StringRef Suffix) const;
305 
306     /// @}
307     /// @name String Searching
308     /// @{
309 
310     /// Search for the first character \p C in the string.
311     ///
312     /// \returns The index of the first occurrence of \p C, or npos if not
313     /// found.
314     LLVM_NODISCARD
315     size_t find(char C, size_t From = 0) const {
316       size_t FindBegin = std::min(From, Length);
317       if (FindBegin < Length) { // Avoid calling memchr with nullptr.
318         // Just forward to memchr, which is faster than a hand-rolled loop.
319         if (const void *P = ::memchr(Data + FindBegin, C, Length - FindBegin))
320           return static_cast<const char *>(P) - Data;
321       }
322       return npos;
323     }
324 
325     /// Search for the first character \p C in the string, ignoring case.
326     ///
327     /// \returns The index of the first occurrence of \p C, or npos if not
328     /// found.
329     LLVM_NODISCARD
330     size_t find_insensitive(char C, size_t From = 0) const;
331 
332     /// Search for the first character satisfying the predicate \p F
333     ///
334     /// \returns The index of the first character satisfying \p F starting from
335     /// \p From, or npos if not found.
336     LLVM_NODISCARD
337     size_t find_if(function_ref<bool(char)> F, size_t From = 0) const {
338       StringRef S = drop_front(From);
339       while (!S.empty()) {
340         if (F(S.front()))
341           return size() - S.size();
342         S = S.drop_front();
343       }
344       return npos;
345     }
346 
347     /// Search for the first character not satisfying the predicate \p F
348     ///
349     /// \returns The index of the first character not satisfying \p F starting
350     /// from \p From, or npos if not found.
351     LLVM_NODISCARD
352     size_t find_if_not(function_ref<bool(char)> F, size_t From = 0) const {
353       return find_if([F](char c) { return !F(c); }, From);
354     }
355 
356     /// Search for the first string \p Str in the string.
357     ///
358     /// \returns The index of the first occurrence of \p Str, or npos if not
359     /// found.
360     LLVM_NODISCARD
361     size_t find(StringRef Str, size_t From = 0) const;
362 
363     /// Search for the first string \p Str in the string, ignoring case.
364     ///
365     /// \returns The index of the first occurrence of \p Str, or npos if not
366     /// found.
367     LLVM_NODISCARD
368     size_t find_insensitive(StringRef Str, size_t From = 0) const;
369 
370     /// Search for the last character \p C in the string.
371     ///
372     /// \returns The index of the last occurrence of \p C, or npos if not
373     /// found.
374     LLVM_NODISCARD
375     size_t rfind(char C, size_t From = npos) const {
376       From = std::min(From, Length);
377       size_t i = From;
378       while (i != 0) {
379         --i;
380         if (Data[i] == C)
381           return i;
382       }
383       return npos;
384     }
385 
386     /// Search for the last character \p C in the string, ignoring case.
387     ///
388     /// \returns The index of the last occurrence of \p C, or npos if not
389     /// found.
390     LLVM_NODISCARD
391     size_t rfind_insensitive(char C, size_t From = npos) const;
392 
393     /// Search for the last string \p Str in the string.
394     ///
395     /// \returns The index of the last occurrence of \p Str, or npos if not
396     /// found.
397     LLVM_NODISCARD
398     size_t rfind(StringRef Str) const;
399 
400     /// Search for the last string \p Str in the string, ignoring case.
401     ///
402     /// \returns The index of the last occurrence of \p Str, or npos if not
403     /// found.
404     LLVM_NODISCARD
405     size_t rfind_insensitive(StringRef Str) const;
406 
407     /// Find the first character in the string that is \p C, or npos if not
408     /// found. Same as find.
409     LLVM_NODISCARD
410     size_t find_first_of(char C, size_t From = 0) const {
411       return find(C, From);
412     }
413 
414     /// Find the first character in the string that is in \p Chars, or npos if
415     /// not found.
416     ///
417     /// Complexity: O(size() + Chars.size())
418     LLVM_NODISCARD
419     size_t find_first_of(StringRef Chars, size_t From = 0) const;
420 
421     /// Find the first character in the string that is not \p C or npos if not
422     /// found.
423     LLVM_NODISCARD
424     size_t find_first_not_of(char C, size_t From = 0) const;
425 
426     /// Find the first character in the string that is not in the string
427     /// \p Chars, or npos if not found.
428     ///
429     /// Complexity: O(size() + Chars.size())
430     LLVM_NODISCARD
431     size_t find_first_not_of(StringRef Chars, size_t From = 0) const;
432 
433     /// Find the last character in the string that is \p C, or npos if not
434     /// found.
435     LLVM_NODISCARD
436     size_t find_last_of(char C, size_t From = npos) const {
437       return rfind(C, From);
438     }
439 
440     /// Find the last character in the string that is in \p C, or npos if not
441     /// found.
442     ///
443     /// Complexity: O(size() + Chars.size())
444     LLVM_NODISCARD
445     size_t find_last_of(StringRef Chars, size_t From = npos) const;
446 
447     /// Find the last character in the string that is not \p C, or npos if not
448     /// found.
449     LLVM_NODISCARD
450     size_t find_last_not_of(char C, size_t From = npos) const;
451 
452     /// Find the last character in the string that is not in \p Chars, or
453     /// npos if not found.
454     ///
455     /// Complexity: O(size() + Chars.size())
456     LLVM_NODISCARD
457     size_t find_last_not_of(StringRef Chars, size_t From = npos) const;
458 
459     /// Return true if the given string is a substring of *this, and false
460     /// otherwise.
461     LLVM_NODISCARD
462     bool contains(StringRef Other) const { return find(Other) != npos; }
463 
464     /// Return true if the given character is contained in *this, and false
465     /// otherwise.
466     LLVM_NODISCARD
467     bool contains(char C) const { return find_first_of(C) != npos; }
468 
469     /// Return true if the given string is a substring of *this, and false
470     /// otherwise.
471     LLVM_NODISCARD
472     bool contains_insensitive(StringRef Other) const {
473       return find_insensitive(Other) != npos;
474     }
475 
476     /// Return true if the given character is contained in *this, and false
477     /// otherwise.
478     LLVM_NODISCARD
479     bool contains_insensitive(char C) const {
480       return find_insensitive(C) != npos;
481     }
482 
483     /// @}
484     /// @name Helpful Algorithms
485     /// @{
486 
487     /// Return the number of occurrences of \p C in the string.
488     LLVM_NODISCARD
489     size_t count(char C) const {
490       size_t Count = 0;
491       for (size_t i = 0, e = Length; i != e; ++i)
492         if (Data[i] == C)
493           ++Count;
494       return Count;
495     }
496 
497     /// Return the number of non-overlapped occurrences of \p Str in
498     /// the string.
499     size_t count(StringRef Str) const;
500 
501     /// Parse the current string as an integer of the specified radix.  If
502     /// \p Radix is specified as zero, this does radix autosensing using
503     /// extended C rules: 0 is octal, 0x is hex, 0b is binary.
504     ///
505     /// If the string is invalid or if only a subset of the string is valid,
506     /// this returns true to signify the error.  The string is considered
507     /// erroneous if empty or if it overflows T.
508     template <typename T>
509     std::enable_if_t<std::numeric_limits<T>::is_signed, bool>
510     getAsInteger(unsigned Radix, T &Result) const {
511       long long LLVal;
512       if (getAsSignedInteger(*this, Radix, LLVal) ||
513             static_cast<T>(LLVal) != LLVal)
514         return true;
515       Result = LLVal;
516       return false;
517     }
518 
519     template <typename T>
520     std::enable_if_t<!std::numeric_limits<T>::is_signed, bool>
521     getAsInteger(unsigned Radix, T &Result) const {
522       unsigned long long ULLVal;
523       // The additional cast to unsigned long long is required to avoid the
524       // Visual C++ warning C4805: '!=' : unsafe mix of type 'bool' and type
525       // 'unsigned __int64' when instantiating getAsInteger with T = bool.
526       if (getAsUnsignedInteger(*this, Radix, ULLVal) ||
527           static_cast<unsigned long long>(static_cast<T>(ULLVal)) != ULLVal)
528         return true;
529       Result = ULLVal;
530       return false;
531     }
532 
533     /// Parse the current string as an integer of the specified radix.  If
534     /// \p Radix is specified as zero, this does radix autosensing using
535     /// extended C rules: 0 is octal, 0x is hex, 0b is binary.
536     ///
537     /// If the string does not begin with a number of the specified radix,
538     /// this returns true to signify the error. The string is considered
539     /// erroneous if empty or if it overflows T.
540     /// The portion of the string representing the discovered numeric value
541     /// is removed from the beginning of the string.
542     template <typename T>
543     std::enable_if_t<std::numeric_limits<T>::is_signed, bool>
544     consumeInteger(unsigned Radix, T &Result) {
545       long long LLVal;
546       if (consumeSignedInteger(*this, Radix, LLVal) ||
547           static_cast<long long>(static_cast<T>(LLVal)) != LLVal)
548         return true;
549       Result = LLVal;
550       return false;
551     }
552 
553     template <typename T>
554     std::enable_if_t<!std::numeric_limits<T>::is_signed, bool>
555     consumeInteger(unsigned Radix, T &Result) {
556       unsigned long long ULLVal;
557       if (consumeUnsignedInteger(*this, Radix, ULLVal) ||
558           static_cast<unsigned long long>(static_cast<T>(ULLVal)) != ULLVal)
559         return true;
560       Result = ULLVal;
561       return false;
562     }
563 
564     /// Parse the current string as an integer of the specified \p Radix, or of
565     /// an autosensed radix if the \p Radix given is 0.  The current value in
566     /// \p Result is discarded, and the storage is changed to be wide enough to
567     /// store the parsed integer.
568     ///
569     /// \returns true if the string does not solely consist of a valid
570     /// non-empty number in the appropriate base.
571     ///
572     /// APInt::fromString is superficially similar but assumes the
573     /// string is well-formed in the given radix.
574     bool getAsInteger(unsigned Radix, APInt &Result) const;
575 
576     /// Parse the current string as an IEEE double-precision floating
577     /// point value.  The string must be a well-formed double.
578     ///
579     /// If \p AllowInexact is false, the function will fail if the string
580     /// cannot be represented exactly.  Otherwise, the function only fails
581     /// in case of an overflow or underflow, or an invalid floating point
582     /// representation.
583     bool getAsDouble(double &Result, bool AllowInexact = true) const;
584 
585     /// @}
586     /// @name String Operations
587     /// @{
588 
589     // Convert the given ASCII string to lowercase.
590     LLVM_NODISCARD
591     std::string lower() const;
592 
593     /// Convert the given ASCII string to uppercase.
594     LLVM_NODISCARD
595     std::string upper() const;
596 
597     /// @}
598     /// @name Substring Operations
599     /// @{
600 
601     /// Return a reference to the substring from [Start, Start + N).
602     ///
603     /// \param Start The index of the starting character in the substring; if
604     /// the index is npos or greater than the length of the string then the
605     /// empty substring will be returned.
606     ///
607     /// \param N The number of characters to included in the substring. If N
608     /// exceeds the number of characters remaining in the string, the string
609     /// suffix (starting with \p Start) will be returned.
610     LLVM_NODISCARD
611     StringRef substr(size_t Start, size_t N = npos) const {
612       Start = std::min(Start, Length);
613       return StringRef(Data + Start, std::min(N, Length - Start));
614     }
615 
616     /// Return a StringRef equal to 'this' but with only the first \p N
617     /// elements remaining.  If \p N is greater than the length of the
618     /// string, the entire string is returned.
619     LLVM_NODISCARD
620     StringRef take_front(size_t N = 1) const {
621       if (N >= size())
622         return *this;
623       return drop_back(size() - N);
624     }
625 
626     /// Return a StringRef equal to 'this' but with only the last \p N
627     /// elements remaining.  If \p N is greater than the length of the
628     /// string, the entire string is returned.
629     LLVM_NODISCARD
630     StringRef take_back(size_t N = 1) const {
631       if (N >= size())
632         return *this;
633       return drop_front(size() - N);
634     }
635 
636     /// Return the longest prefix of 'this' such that every character
637     /// in the prefix satisfies the given predicate.
638     LLVM_NODISCARD
639     StringRef take_while(function_ref<bool(char)> F) const {
640       return substr(0, find_if_not(F));
641     }
642 
643     /// Return the longest prefix of 'this' such that no character in
644     /// the prefix satisfies the given predicate.
645     LLVM_NODISCARD
646     StringRef take_until(function_ref<bool(char)> F) const {
647       return substr(0, find_if(F));
648     }
649 
650     /// Return a StringRef equal to 'this' but with the first \p N elements
651     /// dropped.
652     LLVM_NODISCARD
653     StringRef drop_front(size_t N = 1) const {
654       assert(size() >= N && "Dropping more elements than exist");
655       return substr(N);
656     }
657 
658     /// Return a StringRef equal to 'this' but with the last \p N elements
659     /// dropped.
660     LLVM_NODISCARD
661     StringRef drop_back(size_t N = 1) const {
662       assert(size() >= N && "Dropping more elements than exist");
663       return substr(0, size()-N);
664     }
665 
666     /// Return a StringRef equal to 'this', but with all characters satisfying
667     /// the given predicate dropped from the beginning of the string.
668     LLVM_NODISCARD
669     StringRef drop_while(function_ref<bool(char)> F) const {
670       return substr(find_if_not(F));
671     }
672 
673     /// Return a StringRef equal to 'this', but with all characters not
674     /// satisfying the given predicate dropped from the beginning of the string.
675     LLVM_NODISCARD
676     StringRef drop_until(function_ref<bool(char)> F) const {
677       return substr(find_if(F));
678     }
679 
680     /// Returns true if this StringRef has the given prefix and removes that
681     /// prefix.
682     bool consume_front(StringRef Prefix) {
683       if (!startswith(Prefix))
684         return false;
685 
686       *this = drop_front(Prefix.size());
687       return true;
688     }
689 
690     /// Returns true if this StringRef has the given prefix, ignoring case,
691     /// and removes that prefix.
692     bool consume_front_insensitive(StringRef Prefix) {
693       if (!startswith_insensitive(Prefix))
694         return false;
695 
696       *this = drop_front(Prefix.size());
697       return true;
698     }
699 
700     /// Returns true if this StringRef has the given suffix and removes that
701     /// suffix.
702     bool consume_back(StringRef Suffix) {
703       if (!endswith(Suffix))
704         return false;
705 
706       *this = drop_back(Suffix.size());
707       return true;
708     }
709 
710     /// Returns true if this StringRef has the given suffix, ignoring case,
711     /// and removes that suffix.
712     bool consume_back_insensitive(StringRef Suffix) {
713       if (!endswith_insensitive(Suffix))
714         return false;
715 
716       *this = drop_back(Suffix.size());
717       return true;
718     }
719 
720     /// Return a reference to the substring from [Start, End).
721     ///
722     /// \param Start The index of the starting character in the substring; if
723     /// the index is npos or greater than the length of the string then the
724     /// empty substring will be returned.
725     ///
726     /// \param End The index following the last character to include in the
727     /// substring. If this is npos or exceeds the number of characters
728     /// remaining in the string, the string suffix (starting with \p Start)
729     /// will be returned. If this is less than \p Start, an empty string will
730     /// be returned.
731     LLVM_NODISCARD
732     StringRef slice(size_t Start, size_t End) const {
733       Start = std::min(Start, Length);
734       End = std::min(std::max(Start, End), Length);
735       return StringRef(Data + Start, End - Start);
736     }
737 
738     /// Split into two substrings around the first occurrence of a separator
739     /// character.
740     ///
741     /// If \p Separator is in the string, then the result is a pair (LHS, RHS)
742     /// such that (*this == LHS + Separator + RHS) is true and RHS is
743     /// maximal. If \p Separator is not in the string, then the result is a
744     /// pair (LHS, RHS) where (*this == LHS) and (RHS == "").
745     ///
746     /// \param Separator The character to split on.
747     /// \returns The split substrings.
748     LLVM_NODISCARD
749     std::pair<StringRef, StringRef> split(char Separator) const {
750       return split(StringRef(&Separator, 1));
751     }
752 
753     /// Split into two substrings around the first occurrence of a separator
754     /// string.
755     ///
756     /// If \p Separator is in the string, then the result is a pair (LHS, RHS)
757     /// such that (*this == LHS + Separator + RHS) is true and RHS is
758     /// maximal. If \p Separator is not in the string, then the result is a
759     /// pair (LHS, RHS) where (*this == LHS) and (RHS == "").
760     ///
761     /// \param Separator - The string to split on.
762     /// \return - The split substrings.
763     LLVM_NODISCARD
764     std::pair<StringRef, StringRef> split(StringRef Separator) const {
765       size_t Idx = find(Separator);
766       if (Idx == npos)
767         return std::make_pair(*this, StringRef());
768       return std::make_pair(slice(0, Idx), slice(Idx + Separator.size(), npos));
769     }
770 
771     /// Split into two substrings around the last occurrence of a separator
772     /// string.
773     ///
774     /// If \p Separator is in the string, then the result is a pair (LHS, RHS)
775     /// such that (*this == LHS + Separator + RHS) is true and RHS is
776     /// minimal. If \p Separator is not in the string, then the result is a
777     /// pair (LHS, RHS) where (*this == LHS) and (RHS == "").
778     ///
779     /// \param Separator - The string to split on.
780     /// \return - The split substrings.
781     LLVM_NODISCARD
782     std::pair<StringRef, StringRef> rsplit(StringRef Separator) const {
783       size_t Idx = rfind(Separator);
784       if (Idx == npos)
785         return std::make_pair(*this, StringRef());
786       return std::make_pair(slice(0, Idx), slice(Idx + Separator.size(), npos));
787     }
788 
789     /// Split into substrings around the occurrences of a separator string.
790     ///
791     /// Each substring is stored in \p A. If \p MaxSplit is >= 0, at most
792     /// \p MaxSplit splits are done and consequently <= \p MaxSplit + 1
793     /// elements are added to A.
794     /// If \p KeepEmpty is false, empty strings are not added to \p A. They
795     /// still count when considering \p MaxSplit
796     /// An useful invariant is that
797     /// Separator.join(A) == *this if MaxSplit == -1 and KeepEmpty == true
798     ///
799     /// \param A - Where to put the substrings.
800     /// \param Separator - The string to split on.
801     /// \param MaxSplit - The maximum number of times the string is split.
802     /// \param KeepEmpty - True if empty substring should be added.
803     void split(SmallVectorImpl<StringRef> &A,
804                StringRef Separator, int MaxSplit = -1,
805                bool KeepEmpty = true) const;
806 
807     /// Split into substrings around the occurrences of a separator character.
808     ///
809     /// Each substring is stored in \p A. If \p MaxSplit is >= 0, at most
810     /// \p MaxSplit splits are done and consequently <= \p MaxSplit + 1
811     /// elements are added to A.
812     /// If \p KeepEmpty is false, empty strings are not added to \p A. They
813     /// still count when considering \p MaxSplit
814     /// An useful invariant is that
815     /// Separator.join(A) == *this if MaxSplit == -1 and KeepEmpty == true
816     ///
817     /// \param A - Where to put the substrings.
818     /// \param Separator - The string to split on.
819     /// \param MaxSplit - The maximum number of times the string is split.
820     /// \param KeepEmpty - True if empty substring should be added.
821     void split(SmallVectorImpl<StringRef> &A, char Separator, int MaxSplit = -1,
822                bool KeepEmpty = true) const;
823 
824     /// Split into two substrings around the last occurrence of a separator
825     /// character.
826     ///
827     /// If \p Separator is in the string, then the result is a pair (LHS, RHS)
828     /// such that (*this == LHS + Separator + RHS) is true and RHS is
829     /// minimal. If \p Separator is not in the string, then the result is a
830     /// pair (LHS, RHS) where (*this == LHS) and (RHS == "").
831     ///
832     /// \param Separator - The character to split on.
833     /// \return - The split substrings.
834     LLVM_NODISCARD
835     std::pair<StringRef, StringRef> rsplit(char Separator) const {
836       return rsplit(StringRef(&Separator, 1));
837     }
838 
839     /// Return string with consecutive \p Char characters starting from the
840     /// the left removed.
841     LLVM_NODISCARD
842     StringRef ltrim(char Char) const {
843       return drop_front(std::min(Length, find_first_not_of(Char)));
844     }
845 
846     /// Return string with consecutive characters in \p Chars starting from
847     /// the left removed.
848     LLVM_NODISCARD
849     StringRef ltrim(StringRef Chars = " \t\n\v\f\r") const {
850       return drop_front(std::min(Length, find_first_not_of(Chars)));
851     }
852 
853     /// Return string with consecutive \p Char characters starting from the
854     /// right removed.
855     LLVM_NODISCARD
856     StringRef rtrim(char Char) const {
857       return drop_back(Length - std::min(Length, find_last_not_of(Char) + 1));
858     }
859 
860     /// Return string with consecutive characters in \p Chars starting from
861     /// the right removed.
862     LLVM_NODISCARD
863     StringRef rtrim(StringRef Chars = " \t\n\v\f\r") const {
864       return drop_back(Length - std::min(Length, find_last_not_of(Chars) + 1));
865     }
866 
867     /// Return string with consecutive \p Char characters starting from the
868     /// left and right removed.
869     LLVM_NODISCARD
870     StringRef trim(char Char) const {
871       return ltrim(Char).rtrim(Char);
872     }
873 
874     /// Return string with consecutive characters in \p Chars starting from
875     /// the left and right removed.
876     LLVM_NODISCARD
877     StringRef trim(StringRef Chars = " \t\n\v\f\r") const {
878       return ltrim(Chars).rtrim(Chars);
879     }
880 
881     /// @}
882   };
883 
884   /// A wrapper around a string literal that serves as a proxy for constructing
885   /// global tables of StringRefs with the length computed at compile time.
886   /// In order to avoid the invocation of a global constructor, StringLiteral
887   /// should *only* be used in a constexpr context, as such:
888   ///
889   /// constexpr StringLiteral S("test");
890   ///
891   class StringLiteral : public StringRef {
892   private:
893     constexpr StringLiteral(const char *Str, size_t N) : StringRef(Str, N) {
894     }
895 
896   public:
897     template <size_t N>
898     constexpr StringLiteral(const char (&Str)[N])
899 #if defined(__clang__) && __has_attribute(enable_if)
900 #pragma clang diagnostic push
901 #pragma clang diagnostic ignored "-Wgcc-compat"
902         __attribute((enable_if(__builtin_strlen(Str) == N - 1,
903                                "invalid string literal")))
904 #pragma clang diagnostic pop
905 #endif
906         : StringRef(Str, N - 1) {
907     }
908 
909     // Explicit construction for strings like "foo\0bar".
910     template <size_t N>
911     static constexpr StringLiteral withInnerNUL(const char (&Str)[N]) {
912       return StringLiteral(Str, N - 1);
913     }
914   };
915 
916   /// @name StringRef Comparison Operators
917   /// @{
918 
919   inline bool operator==(StringRef LHS, StringRef RHS) {
920     return LHS.equals(RHS);
921   }
922 
923   inline bool operator!=(StringRef LHS, StringRef RHS) { return !(LHS == RHS); }
924 
925   inline bool operator<(StringRef LHS, StringRef RHS) {
926     return LHS.compare(RHS) == -1;
927   }
928 
929   inline bool operator<=(StringRef LHS, StringRef RHS) {
930     return LHS.compare(RHS) != 1;
931   }
932 
933   inline bool operator>(StringRef LHS, StringRef RHS) {
934     return LHS.compare(RHS) == 1;
935   }
936 
937   inline bool operator>=(StringRef LHS, StringRef RHS) {
938     return LHS.compare(RHS) != -1;
939   }
940 
941   inline std::string &operator+=(std::string &buffer, StringRef string) {
942     return buffer.append(string.data(), string.size());
943   }
944 
945   /// @}
946 
947   /// Compute a hash_code for a StringRef.
948   LLVM_NODISCARD
949   hash_code hash_value(StringRef S);
950 
951   // Provide DenseMapInfo for StringRefs.
952   template <> struct DenseMapInfo<StringRef> {
953     static inline StringRef getEmptyKey() {
954       return StringRef(
955           reinterpret_cast<const char *>(~static_cast<uintptr_t>(0)), 0);
956     }
957 
958     static inline StringRef getTombstoneKey() {
959       return StringRef(
960           reinterpret_cast<const char *>(~static_cast<uintptr_t>(1)), 0);
961     }
962 
963     static unsigned getHashValue(StringRef Val) {
964       assert(Val.data() != getEmptyKey().data() &&
965              "Cannot hash the empty key!");
966       assert(Val.data() != getTombstoneKey().data() &&
967              "Cannot hash the tombstone key!");
968       return (unsigned)(hash_value(Val));
969     }
970 
971     static bool isEqual(StringRef LHS, StringRef RHS) {
972       if (RHS.data() == getEmptyKey().data())
973         return LHS.data() == getEmptyKey().data();
974       if (RHS.data() == getTombstoneKey().data())
975         return LHS.data() == getTombstoneKey().data();
976       return LHS == RHS;
977     }
978   };
979 
980 } // end namespace llvm
981 
982 #endif // LLVM_ADT_STRINGREF_H
983