10b57cec5SDimitry Andric //===- StringRef.h - Constant String Reference Wrapper ----------*- C++ -*-===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric 
90b57cec5SDimitry Andric #ifndef LLVM_ADT_STRINGREF_H
100b57cec5SDimitry Andric #define LLVM_ADT_STRINGREF_H
110b57cec5SDimitry Andric 
1204eeddc0SDimitry Andric #include "llvm/ADT/DenseMapInfo.h"
1304eeddc0SDimitry Andric #include "llvm/ADT/STLFunctionalExtras.h"
140b57cec5SDimitry Andric #include "llvm/ADT/iterator_range.h"
150b57cec5SDimitry Andric #include "llvm/Support/Compiler.h"
160b57cec5SDimitry Andric #include <algorithm>
170b57cec5SDimitry Andric #include <cassert>
180b57cec5SDimitry Andric #include <cstddef>
190b57cec5SDimitry Andric #include <cstring>
200b57cec5SDimitry Andric #include <limits>
210b57cec5SDimitry Andric #include <string>
225ffd83dbSDimitry Andric #include <string_view>
230b57cec5SDimitry Andric #include <type_traits>
240b57cec5SDimitry Andric #include <utility>
250b57cec5SDimitry Andric 
260b57cec5SDimitry Andric namespace llvm {
270b57cec5SDimitry Andric 
280b57cec5SDimitry Andric   class APInt;
290b57cec5SDimitry Andric   class hash_code;
300b57cec5SDimitry Andric   template <typename T> class SmallVectorImpl;
310b57cec5SDimitry Andric   class StringRef;
320b57cec5SDimitry Andric 
330b57cec5SDimitry Andric   /// Helper functions for StringRef::getAsInteger.
340b57cec5SDimitry Andric   bool getAsUnsignedInteger(StringRef Str, unsigned Radix,
350b57cec5SDimitry Andric                             unsigned long long &Result);
360b57cec5SDimitry Andric 
370b57cec5SDimitry Andric   bool getAsSignedInteger(StringRef Str, unsigned Radix, long long &Result);
380b57cec5SDimitry Andric 
390b57cec5SDimitry Andric   bool consumeUnsignedInteger(StringRef &Str, unsigned Radix,
400b57cec5SDimitry Andric                               unsigned long long &Result);
410b57cec5SDimitry Andric   bool consumeSignedInteger(StringRef &Str, unsigned Radix, long long &Result);
420b57cec5SDimitry Andric 
430b57cec5SDimitry Andric   /// StringRef - Represent a constant reference to a string, i.e. a character
440b57cec5SDimitry Andric   /// array and a length, which need not be null terminated.
450b57cec5SDimitry Andric   ///
460b57cec5SDimitry Andric   /// This class does not own the string data, it is expected to be used in
470b57cec5SDimitry Andric   /// situations where the character data resides in some other buffer, whose
480b57cec5SDimitry Andric   /// lifetime extends past that of the StringRef. For this reason, it is not in
490b57cec5SDimitry Andric   /// general safe to store a StringRef.
505ffd83dbSDimitry Andric   class LLVM_GSL_POINTER StringRef {
510b57cec5SDimitry Andric   public:
525ffd83dbSDimitry Andric     static constexpr size_t npos = ~size_t(0);
530b57cec5SDimitry Andric 
540b57cec5SDimitry Andric     using iterator = const char *;
550b57cec5SDimitry Andric     using const_iterator = const char *;
560b57cec5SDimitry Andric     using size_type = size_t;
570b57cec5SDimitry Andric 
580b57cec5SDimitry Andric   private:
590b57cec5SDimitry Andric     /// The start of the string, in an external buffer.
600b57cec5SDimitry Andric     const char *Data = nullptr;
610b57cec5SDimitry Andric 
620b57cec5SDimitry Andric     /// The length of the string.
630b57cec5SDimitry Andric     size_t Length = 0;
640b57cec5SDimitry Andric 
650b57cec5SDimitry Andric     // Workaround memcmp issue with null pointers (undefined behavior)
660b57cec5SDimitry Andric     // by providing a specialized version
compareMemory(const char * Lhs,const char * Rhs,size_t Length)670b57cec5SDimitry Andric     static int compareMemory(const char *Lhs, const char *Rhs, size_t Length) {
680b57cec5SDimitry Andric       if (Length == 0) { return 0; }
690b57cec5SDimitry Andric       return ::memcmp(Lhs,Rhs,Length);
700b57cec5SDimitry Andric     }
710b57cec5SDimitry Andric 
720b57cec5SDimitry Andric   public:
730b57cec5SDimitry Andric     /// @name Constructors
740b57cec5SDimitry Andric     /// @{
750b57cec5SDimitry Andric 
760b57cec5SDimitry Andric     /// Construct an empty string ref.
770b57cec5SDimitry Andric     /*implicit*/ StringRef() = default;
780b57cec5SDimitry Andric 
790b57cec5SDimitry Andric     /// Disable conversion from nullptr.  This prevents things like
800b57cec5SDimitry Andric     /// if (S == nullptr)
810b57cec5SDimitry Andric     StringRef(std::nullptr_t) = delete;
820b57cec5SDimitry Andric 
830b57cec5SDimitry Andric     /// Construct a string ref from a cstring.
StringRef(const char * Str)848bcb0991SDimitry Andric     /*implicit*/ constexpr StringRef(const char *Str)
85bdd1243dSDimitry Andric         : Data(Str), Length(Str ?
86bdd1243dSDimitry Andric     // GCC 7 doesn't have constexpr char_traits. Fall back to __builtin_strlen.
87bdd1243dSDimitry Andric #if defined(_GLIBCXX_RELEASE) && _GLIBCXX_RELEASE < 8
88bdd1243dSDimitry Andric                                 __builtin_strlen(Str)
89bdd1243dSDimitry Andric #else
90bdd1243dSDimitry Andric                                 std::char_traits<char>::length(Str)
91bdd1243dSDimitry Andric #endif
92bdd1243dSDimitry Andric                                 : 0) {
93bdd1243dSDimitry Andric     }
940b57cec5SDimitry Andric 
950b57cec5SDimitry Andric     /// Construct a string ref from a pointer and length.
StringRef(const char * data,size_t length)960b57cec5SDimitry Andric     /*implicit*/ constexpr StringRef(const char *data, size_t length)
970b57cec5SDimitry Andric         : Data(data), Length(length) {}
980b57cec5SDimitry Andric 
990b57cec5SDimitry Andric     /// Construct a string ref from an std::string.
StringRef(const std::string & Str)1000b57cec5SDimitry Andric     /*implicit*/ StringRef(const std::string &Str)
1010b57cec5SDimitry Andric       : Data(Str.data()), Length(Str.length()) {}
1020b57cec5SDimitry Andric 
1035ffd83dbSDimitry Andric     /// Construct a string ref from an std::string_view.
StringRef(std::string_view Str)1045ffd83dbSDimitry Andric     /*implicit*/ constexpr StringRef(std::string_view Str)
1055ffd83dbSDimitry Andric         : Data(Str.data()), Length(Str.size()) {}
1065ffd83dbSDimitry Andric 
1070b57cec5SDimitry Andric     /// @}
1080b57cec5SDimitry Andric     /// @name Iterators
1090b57cec5SDimitry Andric     /// @{
1100b57cec5SDimitry Andric 
begin()1110b57cec5SDimitry Andric     iterator begin() const { return Data; }
1120b57cec5SDimitry Andric 
end()1130b57cec5SDimitry Andric     iterator end() const { return Data + Length; }
1140b57cec5SDimitry Andric 
bytes_begin()1150b57cec5SDimitry Andric     const unsigned char *bytes_begin() const {
1160b57cec5SDimitry Andric       return reinterpret_cast<const unsigned char *>(begin());
1170b57cec5SDimitry Andric     }
bytes_end()1180b57cec5SDimitry Andric     const unsigned char *bytes_end() const {
1190b57cec5SDimitry Andric       return reinterpret_cast<const unsigned char *>(end());
1200b57cec5SDimitry Andric     }
bytes()1210b57cec5SDimitry Andric     iterator_range<const unsigned char *> bytes() const {
1220b57cec5SDimitry Andric       return make_range(bytes_begin(), bytes_end());
1230b57cec5SDimitry Andric     }
1240b57cec5SDimitry Andric 
1250b57cec5SDimitry Andric     /// @}
1260b57cec5SDimitry Andric     /// @name String Operations
1270b57cec5SDimitry Andric     /// @{
1280b57cec5SDimitry Andric 
1290b57cec5SDimitry Andric     /// data - Get a pointer to the start of the string (which may not be null
1300b57cec5SDimitry Andric     /// terminated).
data()131297eecfbSDimitry Andric     [[nodiscard]] constexpr const char *data() const { return Data; }
1320b57cec5SDimitry Andric 
1330b57cec5SDimitry Andric     /// empty - Check if the string is empty.
empty()134bdd1243dSDimitry Andric     [[nodiscard]] constexpr bool empty() const { return Length == 0; }
1350b57cec5SDimitry Andric 
1360b57cec5SDimitry Andric     /// size - Get the string size.
size()137bdd1243dSDimitry Andric     [[nodiscard]] constexpr size_t size() const { return Length; }
1380b57cec5SDimitry Andric 
1390b57cec5SDimitry Andric     /// front - Get the first character in the string.
front()140bdd1243dSDimitry Andric     [[nodiscard]] char front() const {
1410b57cec5SDimitry Andric       assert(!empty());
1420b57cec5SDimitry Andric       return Data[0];
1430b57cec5SDimitry Andric     }
1440b57cec5SDimitry Andric 
1450b57cec5SDimitry Andric     /// back - Get the last character in the string.
back()146bdd1243dSDimitry Andric     [[nodiscard]] char back() const {
1470b57cec5SDimitry Andric       assert(!empty());
1480b57cec5SDimitry Andric       return Data[Length-1];
1490b57cec5SDimitry Andric     }
1500b57cec5SDimitry Andric 
1510b57cec5SDimitry Andric     // copy - Allocate copy in Allocator and return StringRef to it.
1520b57cec5SDimitry Andric     template <typename Allocator>
copy(Allocator & A)153bdd1243dSDimitry Andric     [[nodiscard]] StringRef copy(Allocator &A) const {
1540b57cec5SDimitry Andric       // Don't request a length 0 copy from the allocator.
1550b57cec5SDimitry Andric       if (empty())
1560b57cec5SDimitry Andric         return StringRef();
1570b57cec5SDimitry Andric       char *S = A.template Allocate<char>(Length);
1580b57cec5SDimitry Andric       std::copy(begin(), end(), S);
1590b57cec5SDimitry Andric       return StringRef(S, Length);
1600b57cec5SDimitry Andric     }
1610b57cec5SDimitry Andric 
1620b57cec5SDimitry Andric     /// equals - Check for string equality, this is more efficient than
1630b57cec5SDimitry Andric     /// compare() when the relative ordering of inequal strings isn't needed.
equals(StringRef RHS)164bdd1243dSDimitry Andric     [[nodiscard]] bool equals(StringRef RHS) const {
1650b57cec5SDimitry Andric       return (Length == RHS.Length &&
1660b57cec5SDimitry Andric               compareMemory(Data, RHS.Data, RHS.Length) == 0);
1670b57cec5SDimitry Andric     }
1680b57cec5SDimitry Andric 
169fe6060f1SDimitry Andric     /// Check for string equality, ignoring case.
equals_insensitive(StringRef RHS)170bdd1243dSDimitry Andric     [[nodiscard]] bool equals_insensitive(StringRef RHS) const {
171fe6060f1SDimitry Andric       return Length == RHS.Length && compare_insensitive(RHS) == 0;
1720b57cec5SDimitry Andric     }
1730b57cec5SDimitry Andric 
174bdd1243dSDimitry Andric     /// compare - Compare two strings; the result is negative, zero, or positive
175bdd1243dSDimitry Andric     /// if this string is lexicographically less than, equal to, or greater than
176bdd1243dSDimitry Andric     /// the \p RHS.
compare(StringRef RHS)177bdd1243dSDimitry Andric     [[nodiscard]] int compare(StringRef RHS) const {
1780b57cec5SDimitry Andric       // Check the prefix for a mismatch.
1790b57cec5SDimitry Andric       if (int Res = compareMemory(Data, RHS.Data, std::min(Length, RHS.Length)))
1800b57cec5SDimitry Andric         return Res < 0 ? -1 : 1;
1810b57cec5SDimitry Andric 
1820b57cec5SDimitry Andric       // Otherwise the prefixes match, so we only need to check the lengths.
1830b57cec5SDimitry Andric       if (Length == RHS.Length)
1840b57cec5SDimitry Andric         return 0;
1850b57cec5SDimitry Andric       return Length < RHS.Length ? -1 : 1;
1860b57cec5SDimitry Andric     }
1870b57cec5SDimitry Andric 
188fe6060f1SDimitry Andric     /// Compare two strings, ignoring case.
189bdd1243dSDimitry Andric     [[nodiscard]] int compare_insensitive(StringRef RHS) const;
1900b57cec5SDimitry Andric 
1910b57cec5SDimitry Andric     /// compare_numeric - Compare two strings, treating sequences of digits as
1920b57cec5SDimitry Andric     /// numbers.
193bdd1243dSDimitry Andric     [[nodiscard]] int compare_numeric(StringRef RHS) const;
1940b57cec5SDimitry Andric 
1950b57cec5SDimitry Andric     /// Determine the edit distance between this string and another
1960b57cec5SDimitry Andric     /// string.
1970b57cec5SDimitry Andric     ///
1980b57cec5SDimitry Andric     /// \param Other the string to compare this string against.
1990b57cec5SDimitry Andric     ///
2000b57cec5SDimitry Andric     /// \param AllowReplacements whether to allow character
2010b57cec5SDimitry Andric     /// replacements (change one character into another) as a single
2020b57cec5SDimitry Andric     /// operation, rather than as two operations (an insertion and a
2030b57cec5SDimitry Andric     /// removal).
2040b57cec5SDimitry Andric     ///
2050b57cec5SDimitry Andric     /// \param MaxEditDistance If non-zero, the maximum edit distance that
2060b57cec5SDimitry Andric     /// this routine is allowed to compute. If the edit distance will exceed
2070b57cec5SDimitry Andric     /// that maximum, returns \c MaxEditDistance+1.
2080b57cec5SDimitry Andric     ///
2090b57cec5SDimitry Andric     /// \returns the minimum number of character insertions, removals,
2100b57cec5SDimitry Andric     /// or (if \p AllowReplacements is \c true) replacements needed to
2110b57cec5SDimitry Andric     /// transform one of the given strings into the other. If zero,
2120b57cec5SDimitry Andric     /// the strings are identical.
213bdd1243dSDimitry Andric     [[nodiscard]] unsigned edit_distance(StringRef Other,
214bdd1243dSDimitry Andric                                          bool AllowReplacements = true,
2150b57cec5SDimitry Andric                                          unsigned MaxEditDistance = 0) const;
2160b57cec5SDimitry Andric 
217bdd1243dSDimitry Andric     [[nodiscard]] unsigned
21881ad6265SDimitry Andric     edit_distance_insensitive(StringRef Other, bool AllowReplacements = true,
21981ad6265SDimitry Andric                               unsigned MaxEditDistance = 0) const;
22081ad6265SDimitry Andric 
2210b57cec5SDimitry Andric     /// str - Get the contents as an std::string.
str()222bdd1243dSDimitry Andric     [[nodiscard]] std::string str() const {
2230b57cec5SDimitry Andric       if (!Data) return std::string();
2240b57cec5SDimitry Andric       return std::string(Data, Length);
2250b57cec5SDimitry Andric     }
2260b57cec5SDimitry Andric 
2270b57cec5SDimitry Andric     /// @}
2280b57cec5SDimitry Andric     /// @name Operator Overloads
2290b57cec5SDimitry Andric     /// @{
2300b57cec5SDimitry Andric 
231bdd1243dSDimitry Andric     [[nodiscard]] char operator[](size_t Index) const {
2320b57cec5SDimitry Andric       assert(Index < Length && "Invalid index!");
2330b57cec5SDimitry Andric       return Data[Index];
2340b57cec5SDimitry Andric     }
2350b57cec5SDimitry Andric 
2360b57cec5SDimitry Andric     /// Disallow accidental assignment from a temporary std::string.
2370b57cec5SDimitry Andric     ///
2380b57cec5SDimitry Andric     /// The declaration here is extra complicated so that `stringRef = {}`
2390b57cec5SDimitry Andric     /// and `stringRef = "abc"` continue to select the move assignment operator.
2400b57cec5SDimitry Andric     template <typename T>
2415ffd83dbSDimitry Andric     std::enable_if_t<std::is_same<T, std::string>::value, StringRef> &
2420b57cec5SDimitry Andric     operator=(T &&Str) = delete;
2430b57cec5SDimitry Andric 
2440b57cec5SDimitry Andric     /// @}
2450b57cec5SDimitry Andric     /// @name Type Conversions
2460b57cec5SDimitry Andric     /// @{
2470b57cec5SDimitry Andric 
string_view()248297eecfbSDimitry Andric     constexpr operator std::string_view() const {
2495ffd83dbSDimitry Andric       return std::string_view(data(), size());
2500b57cec5SDimitry Andric     }
2510b57cec5SDimitry Andric 
2520b57cec5SDimitry Andric     /// @}
2530b57cec5SDimitry Andric     /// @name String Predicates
2540b57cec5SDimitry Andric     /// @{
2550b57cec5SDimitry Andric 
2560b57cec5SDimitry Andric     /// Check if this string starts with the given \p Prefix.
starts_with(StringRef Prefix)257bdd1243dSDimitry Andric     [[nodiscard]] bool starts_with(StringRef Prefix) const {
2580b57cec5SDimitry Andric       return Length >= Prefix.Length &&
2590b57cec5SDimitry Andric              compareMemory(Data, Prefix.Data, Prefix.Length) == 0;
2600b57cec5SDimitry Andric     }
261cb14a3feSDimitry Andric     [[nodiscard]] LLVM_DEPRECATED(
262cb14a3feSDimitry Andric         "Use starts_with instead",
startswith(StringRef Prefix)263cb14a3feSDimitry Andric         "starts_with") bool startswith(StringRef Prefix) const {
264bdd1243dSDimitry Andric       return starts_with(Prefix);
265bdd1243dSDimitry Andric     }
2660b57cec5SDimitry Andric 
2670b57cec5SDimitry Andric     /// Check if this string starts with the given \p Prefix, ignoring case.
268bdd1243dSDimitry Andric     [[nodiscard]] bool starts_with_insensitive(StringRef Prefix) const;
2690b57cec5SDimitry Andric 
2700b57cec5SDimitry Andric     /// Check if this string ends with the given \p Suffix.
ends_with(StringRef Suffix)271bdd1243dSDimitry Andric     [[nodiscard]] bool ends_with(StringRef Suffix) const {
2720b57cec5SDimitry Andric       return Length >= Suffix.Length &&
273bdd1243dSDimitry Andric              compareMemory(end() - Suffix.Length, Suffix.Data, Suffix.Length) ==
274bdd1243dSDimitry Andric                  0;
275bdd1243dSDimitry Andric     }
276cb14a3feSDimitry Andric     [[nodiscard]] LLVM_DEPRECATED(
277cb14a3feSDimitry Andric         "Use ends_with instead",
endswith(StringRef Suffix)278cb14a3feSDimitry Andric         "ends_with") bool endswith(StringRef Suffix) const {
279bdd1243dSDimitry Andric       return ends_with(Suffix);
2800b57cec5SDimitry Andric     }
2810b57cec5SDimitry Andric 
2820b57cec5SDimitry Andric     /// Check if this string ends with the given \p Suffix, ignoring case.
283bdd1243dSDimitry Andric     [[nodiscard]] bool ends_with_insensitive(StringRef Suffix) const;
2840b57cec5SDimitry Andric 
2850b57cec5SDimitry Andric     /// @}
2860b57cec5SDimitry Andric     /// @name String Searching
2870b57cec5SDimitry Andric     /// @{
2880b57cec5SDimitry Andric 
2890b57cec5SDimitry Andric     /// Search for the first character \p C in the string.
2900b57cec5SDimitry Andric     ///
2910b57cec5SDimitry Andric     /// \returns The index of the first occurrence of \p C, or npos if not
2920b57cec5SDimitry Andric     /// found.
293bdd1243dSDimitry Andric     [[nodiscard]] size_t find(char C, size_t From = 0) const {
294bdd1243dSDimitry Andric       return std::string_view(*this).find(C, From);
2950b57cec5SDimitry Andric     }
2960b57cec5SDimitry Andric 
2970b57cec5SDimitry Andric     /// Search for the first character \p C in the string, ignoring case.
2980b57cec5SDimitry Andric     ///
2990b57cec5SDimitry Andric     /// \returns The index of the first occurrence of \p C, or npos if not
3000b57cec5SDimitry Andric     /// found.
301bdd1243dSDimitry Andric     [[nodiscard]] size_t find_insensitive(char C, size_t From = 0) const;
3020b57cec5SDimitry Andric 
3030b57cec5SDimitry Andric     /// Search for the first character satisfying the predicate \p F
3040b57cec5SDimitry Andric     ///
3050b57cec5SDimitry Andric     /// \returns The index of the first character satisfying \p F starting from
3060b57cec5SDimitry Andric     /// \p From, or npos if not found.
307bdd1243dSDimitry Andric     [[nodiscard]] size_t find_if(function_ref<bool(char)> F,
308bdd1243dSDimitry Andric                                  size_t From = 0) const {
3090b57cec5SDimitry Andric       StringRef S = drop_front(From);
3100b57cec5SDimitry Andric       while (!S.empty()) {
3110b57cec5SDimitry Andric         if (F(S.front()))
3120b57cec5SDimitry Andric           return size() - S.size();
3130b57cec5SDimitry Andric         S = S.drop_front();
3140b57cec5SDimitry Andric       }
3150b57cec5SDimitry Andric       return npos;
3160b57cec5SDimitry Andric     }
3170b57cec5SDimitry Andric 
3180b57cec5SDimitry Andric     /// Search for the first character not satisfying the predicate \p F
3190b57cec5SDimitry Andric     ///
3200b57cec5SDimitry Andric     /// \returns The index of the first character not satisfying \p F starting
3210b57cec5SDimitry Andric     /// from \p From, or npos if not found.
322bdd1243dSDimitry Andric     [[nodiscard]] size_t find_if_not(function_ref<bool(char)> F,
323bdd1243dSDimitry Andric                                      size_t From = 0) const {
3240b57cec5SDimitry Andric       return find_if([F](char c) { return !F(c); }, From);
3250b57cec5SDimitry Andric     }
3260b57cec5SDimitry Andric 
3270b57cec5SDimitry Andric     /// Search for the first string \p Str in the string.
3280b57cec5SDimitry Andric     ///
3290b57cec5SDimitry Andric     /// \returns The index of the first occurrence of \p Str, or npos if not
3300b57cec5SDimitry Andric     /// found.
331bdd1243dSDimitry Andric     [[nodiscard]] size_t find(StringRef Str, size_t From = 0) const;
3320b57cec5SDimitry Andric 
3330b57cec5SDimitry Andric     /// Search for the first string \p Str in the string, ignoring case.
3340b57cec5SDimitry Andric     ///
3350b57cec5SDimitry Andric     /// \returns The index of the first occurrence of \p Str, or npos if not
3360b57cec5SDimitry Andric     /// found.
337bdd1243dSDimitry Andric     [[nodiscard]] size_t find_insensitive(StringRef Str, size_t From = 0) const;
3380b57cec5SDimitry Andric 
3390b57cec5SDimitry Andric     /// Search for the last character \p C in the string.
3400b57cec5SDimitry Andric     ///
3410b57cec5SDimitry Andric     /// \returns The index of the last occurrence of \p C, or npos if not
3420b57cec5SDimitry Andric     /// found.
343bdd1243dSDimitry Andric     [[nodiscard]] size_t rfind(char C, size_t From = npos) const {
34406c3fb27SDimitry Andric       size_t I = std::min(From, Length);
34506c3fb27SDimitry Andric       while (I) {
34606c3fb27SDimitry Andric         --I;
34706c3fb27SDimitry Andric         if (Data[I] == C)
34806c3fb27SDimitry Andric           return I;
3490b57cec5SDimitry Andric       }
3500b57cec5SDimitry Andric       return npos;
3510b57cec5SDimitry Andric     }
3520b57cec5SDimitry Andric 
3530b57cec5SDimitry Andric     /// Search for the last character \p C in the string, ignoring case.
3540b57cec5SDimitry Andric     ///
3550b57cec5SDimitry Andric     /// \returns The index of the last occurrence of \p C, or npos if not
3560b57cec5SDimitry Andric     /// found.
357bdd1243dSDimitry Andric     [[nodiscard]] size_t rfind_insensitive(char C, size_t From = npos) const;
3580b57cec5SDimitry Andric 
3590b57cec5SDimitry Andric     /// Search for the last string \p Str in the string.
3600b57cec5SDimitry Andric     ///
3610b57cec5SDimitry Andric     /// \returns The index of the last occurrence of \p Str, or npos if not
3620b57cec5SDimitry Andric     /// found.
363bdd1243dSDimitry Andric     [[nodiscard]] size_t rfind(StringRef Str) const;
3640b57cec5SDimitry Andric 
3650b57cec5SDimitry Andric     /// Search for the last string \p Str in the string, ignoring case.
3660b57cec5SDimitry Andric     ///
3670b57cec5SDimitry Andric     /// \returns The index of the last occurrence of \p Str, or npos if not
3680b57cec5SDimitry Andric     /// found.
369bdd1243dSDimitry Andric     [[nodiscard]] size_t rfind_insensitive(StringRef Str) const;
3700b57cec5SDimitry Andric 
3710b57cec5SDimitry Andric     /// Find the first character in the string that is \p C, or npos if not
3720b57cec5SDimitry Andric     /// found. Same as find.
373bdd1243dSDimitry Andric     [[nodiscard]] size_t find_first_of(char C, size_t From = 0) const {
3740b57cec5SDimitry Andric       return find(C, From);
3750b57cec5SDimitry Andric     }
3760b57cec5SDimitry Andric 
3770b57cec5SDimitry Andric     /// Find the first character in the string that is in \p Chars, or npos if
3780b57cec5SDimitry Andric     /// not found.
3790b57cec5SDimitry Andric     ///
3800b57cec5SDimitry Andric     /// Complexity: O(size() + Chars.size())
381bdd1243dSDimitry Andric     [[nodiscard]] size_t find_first_of(StringRef Chars, size_t From = 0) const;
3820b57cec5SDimitry Andric 
3830b57cec5SDimitry Andric     /// Find the first character in the string that is not \p C or npos if not
3840b57cec5SDimitry Andric     /// found.
385bdd1243dSDimitry Andric     [[nodiscard]] size_t find_first_not_of(char C, size_t From = 0) const;
3860b57cec5SDimitry Andric 
3870b57cec5SDimitry Andric     /// Find the first character in the string that is not in the string
3880b57cec5SDimitry Andric     /// \p Chars, or npos if not found.
3890b57cec5SDimitry Andric     ///
3900b57cec5SDimitry Andric     /// Complexity: O(size() + Chars.size())
391bdd1243dSDimitry Andric     [[nodiscard]] size_t find_first_not_of(StringRef Chars,
392bdd1243dSDimitry Andric                                            size_t From = 0) const;
3930b57cec5SDimitry Andric 
3940b57cec5SDimitry Andric     /// Find the last character in the string that is \p C, or npos if not
3950b57cec5SDimitry Andric     /// found.
396bdd1243dSDimitry Andric     [[nodiscard]] size_t find_last_of(char C, size_t From = npos) const {
3970b57cec5SDimitry Andric       return rfind(C, From);
3980b57cec5SDimitry Andric     }
3990b57cec5SDimitry Andric 
4000b57cec5SDimitry Andric     /// Find the last character in the string that is in \p C, or npos if not
4010b57cec5SDimitry Andric     /// found.
4020b57cec5SDimitry Andric     ///
4030b57cec5SDimitry Andric     /// Complexity: O(size() + Chars.size())
404bdd1243dSDimitry Andric     [[nodiscard]] size_t find_last_of(StringRef Chars,
405bdd1243dSDimitry Andric                                       size_t From = npos) const;
4060b57cec5SDimitry Andric 
4070b57cec5SDimitry Andric     /// Find the last character in the string that is not \p C, or npos if not
4080b57cec5SDimitry Andric     /// found.
409bdd1243dSDimitry Andric     [[nodiscard]] size_t find_last_not_of(char C, size_t From = npos) const;
4100b57cec5SDimitry Andric 
4110b57cec5SDimitry Andric     /// Find the last character in the string that is not in \p Chars, or
4120b57cec5SDimitry Andric     /// npos if not found.
4130b57cec5SDimitry Andric     ///
4140b57cec5SDimitry Andric     /// Complexity: O(size() + Chars.size())
415bdd1243dSDimitry Andric     [[nodiscard]] size_t find_last_not_of(StringRef Chars,
416bdd1243dSDimitry Andric                                           size_t From = npos) const;
4170b57cec5SDimitry Andric 
4180b57cec5SDimitry Andric     /// Return true if the given string is a substring of *this, and false
4190b57cec5SDimitry Andric     /// otherwise.
contains(StringRef Other)420bdd1243dSDimitry Andric     [[nodiscard]] bool contains(StringRef Other) const {
421bdd1243dSDimitry Andric       return find(Other) != npos;
422bdd1243dSDimitry Andric     }
4230b57cec5SDimitry Andric 
4240b57cec5SDimitry Andric     /// Return true if the given character is contained in *this, and false
4250b57cec5SDimitry Andric     /// otherwise.
contains(char C)426bdd1243dSDimitry Andric     [[nodiscard]] bool contains(char C) const {
427bdd1243dSDimitry Andric       return find_first_of(C) != npos;
428bdd1243dSDimitry Andric     }
4290b57cec5SDimitry Andric 
4300b57cec5SDimitry Andric     /// Return true if the given string is a substring of *this, and false
4310b57cec5SDimitry Andric     /// otherwise.
contains_insensitive(StringRef Other)432bdd1243dSDimitry Andric     [[nodiscard]] bool contains_insensitive(StringRef Other) const {
433fe6060f1SDimitry Andric       return find_insensitive(Other) != npos;
4340b57cec5SDimitry Andric     }
4350b57cec5SDimitry Andric 
4360b57cec5SDimitry Andric     /// Return true if the given character is contained in *this, and false
4370b57cec5SDimitry Andric     /// otherwise.
contains_insensitive(char C)438bdd1243dSDimitry Andric     [[nodiscard]] bool contains_insensitive(char C) const {
439fe6060f1SDimitry Andric       return find_insensitive(C) != npos;
440fe6060f1SDimitry Andric     }
4410b57cec5SDimitry Andric 
4420b57cec5SDimitry Andric     /// @}
4430b57cec5SDimitry Andric     /// @name Helpful Algorithms
4440b57cec5SDimitry Andric     /// @{
4450b57cec5SDimitry Andric 
4460b57cec5SDimitry Andric     /// Return the number of occurrences of \p C in the string.
count(char C)447bdd1243dSDimitry Andric     [[nodiscard]] size_t count(char C) const {
4480b57cec5SDimitry Andric       size_t Count = 0;
44906c3fb27SDimitry Andric       for (size_t I = 0; I != Length; ++I)
45006c3fb27SDimitry Andric         if (Data[I] == C)
4510b57cec5SDimitry Andric           ++Count;
4520b57cec5SDimitry Andric       return Count;
4530b57cec5SDimitry Andric     }
4540b57cec5SDimitry Andric 
4550b57cec5SDimitry Andric     /// Return the number of non-overlapped occurrences of \p Str in
4560b57cec5SDimitry Andric     /// the string.
4570b57cec5SDimitry Andric     size_t count(StringRef Str) const;
4580b57cec5SDimitry Andric 
4590b57cec5SDimitry Andric     /// Parse the current string as an integer of the specified radix.  If
4600b57cec5SDimitry Andric     /// \p Radix is specified as zero, this does radix autosensing using
4610b57cec5SDimitry Andric     /// extended C rules: 0 is octal, 0x is hex, 0b is binary.
4620b57cec5SDimitry Andric     ///
4630b57cec5SDimitry Andric     /// If the string is invalid or if only a subset of the string is valid,
4640b57cec5SDimitry Andric     /// this returns true to signify the error.  The string is considered
4650b57cec5SDimitry Andric     /// erroneous if empty or if it overflows T.
getAsInteger(unsigned Radix,T & Result)466bdd1243dSDimitry Andric     template <typename T> bool getAsInteger(unsigned Radix, T &Result) const {
467bdd1243dSDimitry Andric       if constexpr (std::numeric_limits<T>::is_signed) {
4680b57cec5SDimitry Andric         long long LLVal;
4690b57cec5SDimitry Andric         if (getAsSignedInteger(*this, Radix, LLVal) ||
4700b57cec5SDimitry Andric             static_cast<T>(LLVal) != LLVal)
4710b57cec5SDimitry Andric           return true;
4720b57cec5SDimitry Andric         Result = LLVal;
473bdd1243dSDimitry Andric       } else {
4740b57cec5SDimitry Andric         unsigned long long ULLVal;
4750b57cec5SDimitry Andric         // The additional cast to unsigned long long is required to avoid the
4760b57cec5SDimitry Andric         // Visual C++ warning C4805: '!=' : unsafe mix of type 'bool' and type
4770b57cec5SDimitry Andric         // 'unsigned __int64' when instantiating getAsInteger with T = bool.
4780b57cec5SDimitry Andric         if (getAsUnsignedInteger(*this, Radix, ULLVal) ||
4790b57cec5SDimitry Andric             static_cast<unsigned long long>(static_cast<T>(ULLVal)) != ULLVal)
4800b57cec5SDimitry Andric           return true;
4810b57cec5SDimitry Andric         Result = ULLVal;
482bdd1243dSDimitry Andric       }
4830b57cec5SDimitry Andric       return false;
4840b57cec5SDimitry Andric     }
4850b57cec5SDimitry Andric 
4860b57cec5SDimitry Andric     /// Parse the current string as an integer of the specified radix.  If
4870b57cec5SDimitry Andric     /// \p Radix is specified as zero, this does radix autosensing using
4880b57cec5SDimitry Andric     /// extended C rules: 0 is octal, 0x is hex, 0b is binary.
4890b57cec5SDimitry Andric     ///
4900b57cec5SDimitry Andric     /// If the string does not begin with a number of the specified radix,
4910b57cec5SDimitry Andric     /// this returns true to signify the error. The string is considered
4920b57cec5SDimitry Andric     /// erroneous if empty or if it overflows T.
4930b57cec5SDimitry Andric     /// The portion of the string representing the discovered numeric value
4940b57cec5SDimitry Andric     /// is removed from the beginning of the string.
consumeInteger(unsigned Radix,T & Result)495bdd1243dSDimitry Andric     template <typename T> bool consumeInteger(unsigned Radix, T &Result) {
496bdd1243dSDimitry Andric       if constexpr (std::numeric_limits<T>::is_signed) {
4970b57cec5SDimitry Andric         long long LLVal;
4980b57cec5SDimitry Andric         if (consumeSignedInteger(*this, Radix, LLVal) ||
4990b57cec5SDimitry Andric             static_cast<long long>(static_cast<T>(LLVal)) != LLVal)
5000b57cec5SDimitry Andric           return true;
5010b57cec5SDimitry Andric         Result = LLVal;
502bdd1243dSDimitry Andric       } else {
5030b57cec5SDimitry Andric         unsigned long long ULLVal;
5040b57cec5SDimitry Andric         if (consumeUnsignedInteger(*this, Radix, ULLVal) ||
5050b57cec5SDimitry Andric             static_cast<unsigned long long>(static_cast<T>(ULLVal)) != ULLVal)
5060b57cec5SDimitry Andric           return true;
5070b57cec5SDimitry Andric         Result = ULLVal;
508bdd1243dSDimitry Andric       }
5090b57cec5SDimitry Andric       return false;
5100b57cec5SDimitry Andric     }
5110b57cec5SDimitry Andric 
5120b57cec5SDimitry Andric     /// Parse the current string as an integer of the specified \p Radix, or of
5130b57cec5SDimitry Andric     /// an autosensed radix if the \p Radix given is 0.  The current value in
5140b57cec5SDimitry Andric     /// \p Result is discarded, and the storage is changed to be wide enough to
5150b57cec5SDimitry Andric     /// store the parsed integer.
5160b57cec5SDimitry Andric     ///
5170b57cec5SDimitry Andric     /// \returns true if the string does not solely consist of a valid
5180b57cec5SDimitry Andric     /// non-empty number in the appropriate base.
5190b57cec5SDimitry Andric     ///
5200b57cec5SDimitry Andric     /// APInt::fromString is superficially similar but assumes the
5210b57cec5SDimitry Andric     /// string is well-formed in the given radix.
5220b57cec5SDimitry Andric     bool getAsInteger(unsigned Radix, APInt &Result) const;
5230b57cec5SDimitry Andric 
52406c3fb27SDimitry Andric     /// Parse the current string as an integer of the specified \p Radix.  If
52506c3fb27SDimitry Andric     /// \p Radix is specified as zero, this does radix autosensing using
52606c3fb27SDimitry Andric     /// extended C rules: 0 is octal, 0x is hex, 0b is binary.
52706c3fb27SDimitry Andric     ///
52806c3fb27SDimitry Andric     /// If the string does not begin with a number of the specified radix,
52906c3fb27SDimitry Andric     /// this returns true to signify the error. The string is considered
53006c3fb27SDimitry Andric     /// erroneous if empty.
53106c3fb27SDimitry Andric     /// The portion of the string representing the discovered numeric value
53206c3fb27SDimitry Andric     /// is removed from the beginning of the string.
53306c3fb27SDimitry Andric     bool consumeInteger(unsigned Radix, APInt &Result);
53406c3fb27SDimitry Andric 
5350b57cec5SDimitry Andric     /// Parse the current string as an IEEE double-precision floating
5360b57cec5SDimitry Andric     /// point value.  The string must be a well-formed double.
5370b57cec5SDimitry Andric     ///
5380b57cec5SDimitry Andric     /// If \p AllowInexact is false, the function will fail if the string
5390b57cec5SDimitry Andric     /// cannot be represented exactly.  Otherwise, the function only fails
540480093f4SDimitry Andric     /// in case of an overflow or underflow, or an invalid floating point
541480093f4SDimitry Andric     /// representation.
5420b57cec5SDimitry Andric     bool getAsDouble(double &Result, bool AllowInexact = true) const;
5430b57cec5SDimitry Andric 
5440b57cec5SDimitry Andric     /// @}
5450b57cec5SDimitry Andric     /// @name String Operations
5460b57cec5SDimitry Andric     /// @{
5470b57cec5SDimitry Andric 
5480b57cec5SDimitry Andric     // Convert the given ASCII string to lowercase.
549bdd1243dSDimitry Andric     [[nodiscard]] std::string lower() const;
5500b57cec5SDimitry Andric 
5510b57cec5SDimitry Andric     /// Convert the given ASCII string to uppercase.
552bdd1243dSDimitry Andric     [[nodiscard]] std::string upper() const;
5530b57cec5SDimitry Andric 
5540b57cec5SDimitry Andric     /// @}
5550b57cec5SDimitry Andric     /// @name Substring Operations
5560b57cec5SDimitry Andric     /// @{
5570b57cec5SDimitry Andric 
5580b57cec5SDimitry Andric     /// Return a reference to the substring from [Start, Start + N).
5590b57cec5SDimitry Andric     ///
5600b57cec5SDimitry Andric     /// \param Start The index of the starting character in the substring; if
5610b57cec5SDimitry Andric     /// the index is npos or greater than the length of the string then the
5620b57cec5SDimitry Andric     /// empty substring will be returned.
5630b57cec5SDimitry Andric     ///
5640b57cec5SDimitry Andric     /// \param N The number of characters to included in the substring. If N
5650b57cec5SDimitry Andric     /// exceeds the number of characters remaining in the string, the string
5660b57cec5SDimitry Andric     /// suffix (starting with \p Start) will be returned.
567bdd1243dSDimitry Andric     [[nodiscard]] constexpr StringRef substr(size_t Start,
568bdd1243dSDimitry Andric                                              size_t N = npos) const {
5690b57cec5SDimitry Andric       Start = std::min(Start, Length);
5700b57cec5SDimitry Andric       return StringRef(Data + Start, std::min(N, Length - Start));
5710b57cec5SDimitry Andric     }
5720b57cec5SDimitry Andric 
5730b57cec5SDimitry Andric     /// Return a StringRef equal to 'this' but with only the first \p N
5740b57cec5SDimitry Andric     /// elements remaining.  If \p N is greater than the length of the
5750b57cec5SDimitry Andric     /// string, the entire string is returned.
576bdd1243dSDimitry Andric     [[nodiscard]] StringRef take_front(size_t N = 1) const {
5770b57cec5SDimitry Andric       if (N >= size())
5780b57cec5SDimitry Andric         return *this;
5790b57cec5SDimitry Andric       return drop_back(size() - N);
5800b57cec5SDimitry Andric     }
5810b57cec5SDimitry Andric 
5820b57cec5SDimitry Andric     /// Return a StringRef equal to 'this' but with only the last \p N
5830b57cec5SDimitry Andric     /// elements remaining.  If \p N is greater than the length of the
5840b57cec5SDimitry Andric     /// string, the entire string is returned.
585bdd1243dSDimitry Andric     [[nodiscard]] StringRef take_back(size_t N = 1) const {
5860b57cec5SDimitry Andric       if (N >= size())
5870b57cec5SDimitry Andric         return *this;
5880b57cec5SDimitry Andric       return drop_front(size() - N);
5890b57cec5SDimitry Andric     }
5900b57cec5SDimitry Andric 
5910b57cec5SDimitry Andric     /// Return the longest prefix of 'this' such that every character
5920b57cec5SDimitry Andric     /// in the prefix satisfies the given predicate.
take_while(function_ref<bool (char)> F)593bdd1243dSDimitry Andric     [[nodiscard]] StringRef take_while(function_ref<bool(char)> F) const {
5940b57cec5SDimitry Andric       return substr(0, find_if_not(F));
5950b57cec5SDimitry Andric     }
5960b57cec5SDimitry Andric 
5970b57cec5SDimitry Andric     /// Return the longest prefix of 'this' such that no character in
5980b57cec5SDimitry Andric     /// the prefix satisfies the given predicate.
take_until(function_ref<bool (char)> F)599bdd1243dSDimitry Andric     [[nodiscard]] StringRef take_until(function_ref<bool(char)> F) const {
6000b57cec5SDimitry Andric       return substr(0, find_if(F));
6010b57cec5SDimitry Andric     }
6020b57cec5SDimitry Andric 
6030b57cec5SDimitry Andric     /// Return a StringRef equal to 'this' but with the first \p N elements
6040b57cec5SDimitry Andric     /// dropped.
605bdd1243dSDimitry Andric     [[nodiscard]] StringRef drop_front(size_t N = 1) const {
6060b57cec5SDimitry Andric       assert(size() >= N && "Dropping more elements than exist");
6070b57cec5SDimitry Andric       return substr(N);
6080b57cec5SDimitry Andric     }
6090b57cec5SDimitry Andric 
6100b57cec5SDimitry Andric     /// Return a StringRef equal to 'this' but with the last \p N elements
6110b57cec5SDimitry Andric     /// dropped.
612bdd1243dSDimitry Andric     [[nodiscard]] StringRef drop_back(size_t N = 1) const {
6130b57cec5SDimitry Andric       assert(size() >= N && "Dropping more elements than exist");
6140b57cec5SDimitry Andric       return substr(0, size()-N);
6150b57cec5SDimitry Andric     }
6160b57cec5SDimitry Andric 
6170b57cec5SDimitry Andric     /// Return a StringRef equal to 'this', but with all characters satisfying
6180b57cec5SDimitry Andric     /// the given predicate dropped from the beginning of the string.
drop_while(function_ref<bool (char)> F)619bdd1243dSDimitry Andric     [[nodiscard]] StringRef drop_while(function_ref<bool(char)> F) const {
6200b57cec5SDimitry Andric       return substr(find_if_not(F));
6210b57cec5SDimitry Andric     }
6220b57cec5SDimitry Andric 
6230b57cec5SDimitry Andric     /// Return a StringRef equal to 'this', but with all characters not
6240b57cec5SDimitry Andric     /// satisfying the given predicate dropped from the beginning of the string.
drop_until(function_ref<bool (char)> F)625bdd1243dSDimitry Andric     [[nodiscard]] StringRef drop_until(function_ref<bool(char)> F) const {
6260b57cec5SDimitry Andric       return substr(find_if(F));
6270b57cec5SDimitry Andric     }
6280b57cec5SDimitry Andric 
6290b57cec5SDimitry Andric     /// Returns true if this StringRef has the given prefix and removes that
6300b57cec5SDimitry Andric     /// prefix.
consume_front(StringRef Prefix)6310b57cec5SDimitry Andric     bool consume_front(StringRef Prefix) {
632bdd1243dSDimitry Andric       if (!starts_with(Prefix))
6330b57cec5SDimitry Andric         return false;
6340b57cec5SDimitry Andric 
63506c3fb27SDimitry Andric       *this = substr(Prefix.size());
6360b57cec5SDimitry Andric       return true;
6370b57cec5SDimitry Andric     }
6380b57cec5SDimitry Andric 
639fe6060f1SDimitry Andric     /// Returns true if this StringRef has the given prefix, ignoring case,
640fe6060f1SDimitry Andric     /// and removes that prefix.
consume_front_insensitive(StringRef Prefix)641fe6060f1SDimitry Andric     bool consume_front_insensitive(StringRef Prefix) {
64206c3fb27SDimitry Andric       if (!starts_with_insensitive(Prefix))
643fe6060f1SDimitry Andric         return false;
644fe6060f1SDimitry Andric 
64506c3fb27SDimitry Andric       *this = substr(Prefix.size());
646fe6060f1SDimitry Andric       return true;
647fe6060f1SDimitry Andric     }
648fe6060f1SDimitry Andric 
6490b57cec5SDimitry Andric     /// Returns true if this StringRef has the given suffix and removes that
6500b57cec5SDimitry Andric     /// suffix.
consume_back(StringRef Suffix)6510b57cec5SDimitry Andric     bool consume_back(StringRef Suffix) {
652bdd1243dSDimitry Andric       if (!ends_with(Suffix))
6530b57cec5SDimitry Andric         return false;
6540b57cec5SDimitry Andric 
65506c3fb27SDimitry Andric       *this = substr(0, size() - Suffix.size());
6560b57cec5SDimitry Andric       return true;
6570b57cec5SDimitry Andric     }
6580b57cec5SDimitry Andric 
659fe6060f1SDimitry Andric     /// Returns true if this StringRef has the given suffix, ignoring case,
660fe6060f1SDimitry Andric     /// and removes that suffix.
consume_back_insensitive(StringRef Suffix)661fe6060f1SDimitry Andric     bool consume_back_insensitive(StringRef Suffix) {
66206c3fb27SDimitry Andric       if (!ends_with_insensitive(Suffix))
663fe6060f1SDimitry Andric         return false;
664fe6060f1SDimitry Andric 
66506c3fb27SDimitry Andric       *this = substr(0, size() - Suffix.size());
666fe6060f1SDimitry Andric       return true;
667fe6060f1SDimitry Andric     }
668fe6060f1SDimitry Andric 
6690b57cec5SDimitry Andric     /// Return a reference to the substring from [Start, End).
6700b57cec5SDimitry Andric     ///
6710b57cec5SDimitry Andric     /// \param Start The index of the starting character in the substring; if
6720b57cec5SDimitry Andric     /// the index is npos or greater than the length of the string then the
6730b57cec5SDimitry Andric     /// empty substring will be returned.
6740b57cec5SDimitry Andric     ///
6750b57cec5SDimitry Andric     /// \param End The index following the last character to include in the
6760b57cec5SDimitry Andric     /// substring. If this is npos or exceeds the number of characters
6770b57cec5SDimitry Andric     /// remaining in the string, the string suffix (starting with \p Start)
6780b57cec5SDimitry Andric     /// will be returned. If this is less than \p Start, an empty string will
6790b57cec5SDimitry Andric     /// be returned.
slice(size_t Start,size_t End)680bdd1243dSDimitry Andric     [[nodiscard]] StringRef slice(size_t Start, size_t End) const {
6810b57cec5SDimitry Andric       Start = std::min(Start, Length);
68206c3fb27SDimitry Andric       End = std::clamp(End, Start, Length);
6830b57cec5SDimitry Andric       return StringRef(Data + Start, End - Start);
6840b57cec5SDimitry Andric     }
6850b57cec5SDimitry Andric 
6860b57cec5SDimitry Andric     /// Split into two substrings around the first occurrence of a separator
6870b57cec5SDimitry Andric     /// character.
6880b57cec5SDimitry Andric     ///
6890b57cec5SDimitry Andric     /// If \p Separator is in the string, then the result is a pair (LHS, RHS)
6900b57cec5SDimitry Andric     /// such that (*this == LHS + Separator + RHS) is true and RHS is
6910b57cec5SDimitry Andric     /// maximal. If \p Separator is not in the string, then the result is a
6920b57cec5SDimitry Andric     /// pair (LHS, RHS) where (*this == LHS) and (RHS == "").
6930b57cec5SDimitry Andric     ///
6940b57cec5SDimitry Andric     /// \param Separator The character to split on.
6950b57cec5SDimitry Andric     /// \returns The split substrings.
split(char Separator)696bdd1243dSDimitry Andric     [[nodiscard]] std::pair<StringRef, StringRef> split(char Separator) const {
6970b57cec5SDimitry Andric       return split(StringRef(&Separator, 1));
6980b57cec5SDimitry Andric     }
6990b57cec5SDimitry Andric 
7000b57cec5SDimitry Andric     /// Split into two substrings around the first occurrence of a separator
7010b57cec5SDimitry Andric     /// string.
7020b57cec5SDimitry Andric     ///
7030b57cec5SDimitry Andric     /// If \p Separator is in the string, then the result is a pair (LHS, RHS)
7040b57cec5SDimitry Andric     /// such that (*this == LHS + Separator + RHS) is true and RHS is
7050b57cec5SDimitry Andric     /// maximal. If \p Separator is not in the string, then the result is a
7060b57cec5SDimitry Andric     /// pair (LHS, RHS) where (*this == LHS) and (RHS == "").
7070b57cec5SDimitry Andric     ///
7080b57cec5SDimitry Andric     /// \param Separator - The string to split on.
7090b57cec5SDimitry Andric     /// \return - The split substrings.
710bdd1243dSDimitry Andric     [[nodiscard]] std::pair<StringRef, StringRef>
split(StringRef Separator)711bdd1243dSDimitry Andric     split(StringRef Separator) const {
7120b57cec5SDimitry Andric       size_t Idx = find(Separator);
7130b57cec5SDimitry Andric       if (Idx == npos)
7140b57cec5SDimitry Andric         return std::make_pair(*this, StringRef());
7150b57cec5SDimitry Andric       return std::make_pair(slice(0, Idx), slice(Idx + Separator.size(), npos));
7160b57cec5SDimitry Andric     }
7170b57cec5SDimitry Andric 
7180b57cec5SDimitry Andric     /// Split into two substrings around the last occurrence of a separator
7190b57cec5SDimitry Andric     /// string.
7200b57cec5SDimitry Andric     ///
7210b57cec5SDimitry Andric     /// If \p Separator is in the string, then the result is a pair (LHS, RHS)
7220b57cec5SDimitry Andric     /// such that (*this == LHS + Separator + RHS) is true and RHS is
7230b57cec5SDimitry Andric     /// minimal. If \p Separator is not in the string, then the result is a
7240b57cec5SDimitry Andric     /// pair (LHS, RHS) where (*this == LHS) and (RHS == "").
7250b57cec5SDimitry Andric     ///
7260b57cec5SDimitry Andric     /// \param Separator - The string to split on.
7270b57cec5SDimitry Andric     /// \return - The split substrings.
728bdd1243dSDimitry Andric     [[nodiscard]] std::pair<StringRef, StringRef>
rsplit(StringRef Separator)729bdd1243dSDimitry Andric     rsplit(StringRef Separator) const {
7300b57cec5SDimitry Andric       size_t Idx = rfind(Separator);
7310b57cec5SDimitry Andric       if (Idx == npos)
7320b57cec5SDimitry Andric         return std::make_pair(*this, StringRef());
7330b57cec5SDimitry Andric       return std::make_pair(slice(0, Idx), slice(Idx + Separator.size(), npos));
7340b57cec5SDimitry Andric     }
7350b57cec5SDimitry Andric 
7360b57cec5SDimitry Andric     /// Split into substrings around the occurrences of a separator string.
7370b57cec5SDimitry Andric     ///
7380b57cec5SDimitry Andric     /// Each substring is stored in \p A. If \p MaxSplit is >= 0, at most
7390b57cec5SDimitry Andric     /// \p MaxSplit splits are done and consequently <= \p MaxSplit + 1
7400b57cec5SDimitry Andric     /// elements are added to A.
7410b57cec5SDimitry Andric     /// If \p KeepEmpty is false, empty strings are not added to \p A. They
7420b57cec5SDimitry Andric     /// still count when considering \p MaxSplit
7430b57cec5SDimitry Andric     /// An useful invariant is that
7440b57cec5SDimitry Andric     /// Separator.join(A) == *this if MaxSplit == -1 and KeepEmpty == true
7450b57cec5SDimitry Andric     ///
7460b57cec5SDimitry Andric     /// \param A - Where to put the substrings.
7470b57cec5SDimitry Andric     /// \param Separator - The string to split on.
7480b57cec5SDimitry Andric     /// \param MaxSplit - The maximum number of times the string is split.
7490b57cec5SDimitry Andric     /// \param KeepEmpty - True if empty substring should be added.
7500b57cec5SDimitry Andric     void split(SmallVectorImpl<StringRef> &A,
7510b57cec5SDimitry Andric                StringRef Separator, int MaxSplit = -1,
7520b57cec5SDimitry Andric                bool KeepEmpty = true) const;
7530b57cec5SDimitry Andric 
7540b57cec5SDimitry Andric     /// Split into substrings around the occurrences of a separator character.
7550b57cec5SDimitry Andric     ///
7560b57cec5SDimitry Andric     /// Each substring is stored in \p A. If \p MaxSplit is >= 0, at most
7570b57cec5SDimitry Andric     /// \p MaxSplit splits are done and consequently <= \p MaxSplit + 1
7580b57cec5SDimitry Andric     /// elements are added to A.
7590b57cec5SDimitry Andric     /// If \p KeepEmpty is false, empty strings are not added to \p A. They
7600b57cec5SDimitry Andric     /// still count when considering \p MaxSplit
7610b57cec5SDimitry Andric     /// An useful invariant is that
7620b57cec5SDimitry Andric     /// Separator.join(A) == *this if MaxSplit == -1 and KeepEmpty == true
7630b57cec5SDimitry Andric     ///
7640b57cec5SDimitry Andric     /// \param A - Where to put the substrings.
7650b57cec5SDimitry Andric     /// \param Separator - The string to split on.
7660b57cec5SDimitry Andric     /// \param MaxSplit - The maximum number of times the string is split.
7670b57cec5SDimitry Andric     /// \param KeepEmpty - True if empty substring should be added.
7680b57cec5SDimitry Andric     void split(SmallVectorImpl<StringRef> &A, char Separator, int MaxSplit = -1,
7690b57cec5SDimitry Andric                bool KeepEmpty = true) const;
7700b57cec5SDimitry Andric 
7710b57cec5SDimitry Andric     /// Split into two substrings around the last occurrence of a separator
7720b57cec5SDimitry Andric     /// character.
7730b57cec5SDimitry Andric     ///
7740b57cec5SDimitry Andric     /// If \p Separator is in the string, then the result is a pair (LHS, RHS)
7750b57cec5SDimitry Andric     /// such that (*this == LHS + Separator + RHS) is true and RHS is
7760b57cec5SDimitry Andric     /// minimal. If \p Separator is not in the string, then the result is a
7770b57cec5SDimitry Andric     /// pair (LHS, RHS) where (*this == LHS) and (RHS == "").
7780b57cec5SDimitry Andric     ///
7790b57cec5SDimitry Andric     /// \param Separator - The character to split on.
7800b57cec5SDimitry Andric     /// \return - The split substrings.
rsplit(char Separator)781bdd1243dSDimitry Andric     [[nodiscard]] std::pair<StringRef, StringRef> rsplit(char Separator) const {
7820b57cec5SDimitry Andric       return rsplit(StringRef(&Separator, 1));
7830b57cec5SDimitry Andric     }
7840b57cec5SDimitry Andric 
7850b57cec5SDimitry Andric     /// Return string with consecutive \p Char characters starting from the
7860b57cec5SDimitry Andric     /// the left removed.
ltrim(char Char)787bdd1243dSDimitry Andric     [[nodiscard]] StringRef ltrim(char Char) const {
7880b57cec5SDimitry Andric       return drop_front(std::min(Length, find_first_not_of(Char)));
7890b57cec5SDimitry Andric     }
7900b57cec5SDimitry Andric 
7910b57cec5SDimitry Andric     /// Return string with consecutive characters in \p Chars starting from
7920b57cec5SDimitry Andric     /// the left removed.
793bdd1243dSDimitry Andric     [[nodiscard]] StringRef ltrim(StringRef Chars = " \t\n\v\f\r") const {
7940b57cec5SDimitry Andric       return drop_front(std::min(Length, find_first_not_of(Chars)));
7950b57cec5SDimitry Andric     }
7960b57cec5SDimitry Andric 
7970b57cec5SDimitry Andric     /// Return string with consecutive \p Char characters starting from the
7980b57cec5SDimitry Andric     /// right removed.
rtrim(char Char)799bdd1243dSDimitry Andric     [[nodiscard]] StringRef rtrim(char Char) const {
8000b57cec5SDimitry Andric       return drop_back(Length - std::min(Length, find_last_not_of(Char) + 1));
8010b57cec5SDimitry Andric     }
8020b57cec5SDimitry Andric 
8030b57cec5SDimitry Andric     /// Return string with consecutive characters in \p Chars starting from
8040b57cec5SDimitry Andric     /// the right removed.
805bdd1243dSDimitry Andric     [[nodiscard]] StringRef rtrim(StringRef Chars = " \t\n\v\f\r") const {
8060b57cec5SDimitry Andric       return drop_back(Length - std::min(Length, find_last_not_of(Chars) + 1));
8070b57cec5SDimitry Andric     }
8080b57cec5SDimitry Andric 
8090b57cec5SDimitry Andric     /// Return string with consecutive \p Char characters starting from the
8100b57cec5SDimitry Andric     /// left and right removed.
trim(char Char)811bdd1243dSDimitry Andric     [[nodiscard]] StringRef trim(char Char) const {
8120b57cec5SDimitry Andric       return ltrim(Char).rtrim(Char);
8130b57cec5SDimitry Andric     }
8140b57cec5SDimitry Andric 
8150b57cec5SDimitry Andric     /// Return string with consecutive characters in \p Chars starting from
8160b57cec5SDimitry Andric     /// the left and right removed.
817bdd1243dSDimitry Andric     [[nodiscard]] StringRef trim(StringRef Chars = " \t\n\v\f\r") const {
8180b57cec5SDimitry Andric       return ltrim(Chars).rtrim(Chars);
8190b57cec5SDimitry Andric     }
8200b57cec5SDimitry Andric 
82104eeddc0SDimitry Andric     /// Detect the line ending style of the string.
82204eeddc0SDimitry Andric     ///
82304eeddc0SDimitry Andric     /// If the string contains a line ending, return the line ending character
82404eeddc0SDimitry Andric     /// sequence that is detected. Otherwise return '\n' for unix line endings.
82504eeddc0SDimitry Andric     ///
82604eeddc0SDimitry Andric     /// \return - The line ending character sequence.
detectEOL()827bdd1243dSDimitry Andric     [[nodiscard]] StringRef detectEOL() const {
82804eeddc0SDimitry Andric       size_t Pos = find('\r');
82904eeddc0SDimitry Andric       if (Pos == npos) {
83004eeddc0SDimitry Andric         // If there is no carriage return, assume unix
83104eeddc0SDimitry Andric         return "\n";
83204eeddc0SDimitry Andric       }
83304eeddc0SDimitry Andric       if (Pos + 1 < Length && Data[Pos + 1] == '\n')
83404eeddc0SDimitry Andric         return "\r\n"; // Windows
83504eeddc0SDimitry Andric       if (Pos > 0 && Data[Pos - 1] == '\n')
83604eeddc0SDimitry Andric         return "\n\r"; // You monster!
83704eeddc0SDimitry Andric       return "\r";     // Classic Mac
83804eeddc0SDimitry Andric     }
8390b57cec5SDimitry Andric     /// @}
8400b57cec5SDimitry Andric   };
8410b57cec5SDimitry Andric 
8420b57cec5SDimitry Andric   /// A wrapper around a string literal that serves as a proxy for constructing
8430b57cec5SDimitry Andric   /// global tables of StringRefs with the length computed at compile time.
8440b57cec5SDimitry Andric   /// In order to avoid the invocation of a global constructor, StringLiteral
8450b57cec5SDimitry Andric   /// should *only* be used in a constexpr context, as such:
8460b57cec5SDimitry Andric   ///
8470b57cec5SDimitry Andric   /// constexpr StringLiteral S("test");
8480b57cec5SDimitry Andric   ///
8490b57cec5SDimitry Andric   class StringLiteral : public StringRef {
8500b57cec5SDimitry Andric   private:
StringLiteral(const char * Str,size_t N)8510b57cec5SDimitry Andric     constexpr StringLiteral(const char *Str, size_t N) : StringRef(Str, N) {
8520b57cec5SDimitry Andric     }
8530b57cec5SDimitry Andric 
8540b57cec5SDimitry Andric   public:
8550b57cec5SDimitry Andric     template <size_t N>
StringLiteral(const char (& Str)[N])8560b57cec5SDimitry Andric     constexpr StringLiteral(const char (&Str)[N])
8570b57cec5SDimitry Andric #if defined(__clang__) && __has_attribute(enable_if)
8580b57cec5SDimitry Andric #pragma clang diagnostic push
8590b57cec5SDimitry Andric #pragma clang diagnostic ignored "-Wgcc-compat"
8600b57cec5SDimitry Andric         __attribute((enable_if(__builtin_strlen(Str) == N - 1,
8610b57cec5SDimitry Andric                                "invalid string literal")))
8620b57cec5SDimitry Andric #pragma clang diagnostic pop
8630b57cec5SDimitry Andric #endif
8640b57cec5SDimitry Andric         : StringRef(Str, N - 1) {
8650b57cec5SDimitry Andric     }
8660b57cec5SDimitry Andric 
8670b57cec5SDimitry Andric     // Explicit construction for strings like "foo\0bar".
8680b57cec5SDimitry Andric     template <size_t N>
withInnerNUL(const char (& Str)[N])8690b57cec5SDimitry Andric     static constexpr StringLiteral withInnerNUL(const char (&Str)[N]) {
8700b57cec5SDimitry Andric       return StringLiteral(Str, N - 1);
8710b57cec5SDimitry Andric     }
8720b57cec5SDimitry Andric   };
8730b57cec5SDimitry Andric 
8740b57cec5SDimitry Andric   /// @name StringRef Comparison Operators
8750b57cec5SDimitry Andric   /// @{
8760b57cec5SDimitry Andric 
8770b57cec5SDimitry Andric   inline bool operator==(StringRef LHS, StringRef RHS) {
8780b57cec5SDimitry Andric     return LHS.equals(RHS);
8790b57cec5SDimitry Andric   }
8800b57cec5SDimitry Andric 
8810b57cec5SDimitry Andric   inline bool operator!=(StringRef LHS, StringRef RHS) { return !(LHS == RHS); }
8820b57cec5SDimitry Andric 
8830b57cec5SDimitry Andric   inline bool operator<(StringRef LHS, StringRef RHS) {
884bdd1243dSDimitry Andric     return LHS.compare(RHS) < 0;
8850b57cec5SDimitry Andric   }
8860b57cec5SDimitry Andric 
8870b57cec5SDimitry Andric   inline bool operator<=(StringRef LHS, StringRef RHS) {
888bdd1243dSDimitry Andric     return LHS.compare(RHS) <= 0;
8890b57cec5SDimitry Andric   }
8900b57cec5SDimitry Andric 
8910b57cec5SDimitry Andric   inline bool operator>(StringRef LHS, StringRef RHS) {
892bdd1243dSDimitry Andric     return LHS.compare(RHS) > 0;
8930b57cec5SDimitry Andric   }
8940b57cec5SDimitry Andric 
8950b57cec5SDimitry Andric   inline bool operator>=(StringRef LHS, StringRef RHS) {
896bdd1243dSDimitry Andric     return LHS.compare(RHS) >= 0;
8970b57cec5SDimitry Andric   }
8980b57cec5SDimitry Andric 
8990b57cec5SDimitry Andric   inline std::string &operator+=(std::string &buffer, StringRef string) {
9000b57cec5SDimitry Andric     return buffer.append(string.data(), string.size());
9010b57cec5SDimitry Andric   }
9020b57cec5SDimitry Andric 
9030b57cec5SDimitry Andric   /// @}
9040b57cec5SDimitry Andric 
9050b57cec5SDimitry Andric   /// Compute a hash_code for a StringRef.
906bdd1243dSDimitry Andric   [[nodiscard]] hash_code hash_value(StringRef S);
9070b57cec5SDimitry Andric 
908fe6060f1SDimitry Andric   // Provide DenseMapInfo for StringRefs.
909349cc55cSDimitry Andric   template <> struct DenseMapInfo<StringRef, void> {
910fe6060f1SDimitry Andric     static inline StringRef getEmptyKey() {
911fe6060f1SDimitry Andric       return StringRef(
912fe6060f1SDimitry Andric           reinterpret_cast<const char *>(~static_cast<uintptr_t>(0)), 0);
913fe6060f1SDimitry Andric     }
914fe6060f1SDimitry Andric 
915fe6060f1SDimitry Andric     static inline StringRef getTombstoneKey() {
916fe6060f1SDimitry Andric       return StringRef(
917fe6060f1SDimitry Andric           reinterpret_cast<const char *>(~static_cast<uintptr_t>(1)), 0);
918fe6060f1SDimitry Andric     }
919fe6060f1SDimitry Andric 
92004eeddc0SDimitry Andric     static unsigned getHashValue(StringRef Val);
921fe6060f1SDimitry Andric 
922fe6060f1SDimitry Andric     static bool isEqual(StringRef LHS, StringRef RHS) {
923fe6060f1SDimitry Andric       if (RHS.data() == getEmptyKey().data())
924fe6060f1SDimitry Andric         return LHS.data() == getEmptyKey().data();
925fe6060f1SDimitry Andric       if (RHS.data() == getTombstoneKey().data())
926fe6060f1SDimitry Andric         return LHS.data() == getTombstoneKey().data();
927fe6060f1SDimitry Andric       return LHS == RHS;
928fe6060f1SDimitry Andric     }
929fe6060f1SDimitry Andric   };
930fe6060f1SDimitry Andric 
9310b57cec5SDimitry Andric } // end namespace llvm
9320b57cec5SDimitry Andric 
9330b57cec5SDimitry Andric #endif // LLVM_ADT_STRINGREF_H
934