10b57cec5SDimitry Andric //===- StringRef.h - Constant String Reference Wrapper ----------*- C++ -*-===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric 90b57cec5SDimitry Andric #ifndef LLVM_ADT_STRINGREF_H 100b57cec5SDimitry Andric #define LLVM_ADT_STRINGREF_H 110b57cec5SDimitry Andric 1204eeddc0SDimitry Andric #include "llvm/ADT/DenseMapInfo.h" 1304eeddc0SDimitry Andric #include "llvm/ADT/STLFunctionalExtras.h" 140b57cec5SDimitry Andric #include "llvm/ADT/iterator_range.h" 150b57cec5SDimitry Andric #include "llvm/Support/Compiler.h" 160b57cec5SDimitry Andric #include <algorithm> 170b57cec5SDimitry Andric #include <cassert> 180b57cec5SDimitry Andric #include <cstddef> 190b57cec5SDimitry Andric #include <cstring> 200b57cec5SDimitry Andric #include <limits> 210b57cec5SDimitry Andric #include <string> 225ffd83dbSDimitry Andric #include <string_view> 230b57cec5SDimitry Andric #include <type_traits> 240b57cec5SDimitry Andric #include <utility> 250b57cec5SDimitry Andric 260b57cec5SDimitry Andric namespace llvm { 270b57cec5SDimitry Andric 280b57cec5SDimitry Andric class APInt; 290b57cec5SDimitry Andric class hash_code; 300b57cec5SDimitry Andric template <typename T> class SmallVectorImpl; 310b57cec5SDimitry Andric class StringRef; 320b57cec5SDimitry Andric 330b57cec5SDimitry Andric /// Helper functions for StringRef::getAsInteger. 340b57cec5SDimitry Andric bool getAsUnsignedInteger(StringRef Str, unsigned Radix, 350b57cec5SDimitry Andric unsigned long long &Result); 360b57cec5SDimitry Andric 370b57cec5SDimitry Andric bool getAsSignedInteger(StringRef Str, unsigned Radix, long long &Result); 380b57cec5SDimitry Andric 390b57cec5SDimitry Andric bool consumeUnsignedInteger(StringRef &Str, unsigned Radix, 400b57cec5SDimitry Andric unsigned long long &Result); 410b57cec5SDimitry Andric bool consumeSignedInteger(StringRef &Str, unsigned Radix, long long &Result); 420b57cec5SDimitry Andric 430b57cec5SDimitry Andric /// StringRef - Represent a constant reference to a string, i.e. a character 440b57cec5SDimitry Andric /// array and a length, which need not be null terminated. 450b57cec5SDimitry Andric /// 460b57cec5SDimitry Andric /// This class does not own the string data, it is expected to be used in 470b57cec5SDimitry Andric /// situations where the character data resides in some other buffer, whose 480b57cec5SDimitry Andric /// lifetime extends past that of the StringRef. For this reason, it is not in 490b57cec5SDimitry Andric /// general safe to store a StringRef. 505ffd83dbSDimitry Andric class LLVM_GSL_POINTER StringRef { 510b57cec5SDimitry Andric public: 525ffd83dbSDimitry Andric static constexpr size_t npos = ~size_t(0); 530b57cec5SDimitry Andric 540b57cec5SDimitry Andric using iterator = const char *; 550b57cec5SDimitry Andric using const_iterator = const char *; 560b57cec5SDimitry Andric using size_type = size_t; 570b57cec5SDimitry Andric 580b57cec5SDimitry Andric private: 590b57cec5SDimitry Andric /// The start of the string, in an external buffer. 600b57cec5SDimitry Andric const char *Data = nullptr; 610b57cec5SDimitry Andric 620b57cec5SDimitry Andric /// The length of the string. 630b57cec5SDimitry Andric size_t Length = 0; 640b57cec5SDimitry Andric 650b57cec5SDimitry Andric // Workaround memcmp issue with null pointers (undefined behavior) 660b57cec5SDimitry Andric // by providing a specialized version compareMemory(const char * Lhs,const char * Rhs,size_t Length)670b57cec5SDimitry Andric static int compareMemory(const char *Lhs, const char *Rhs, size_t Length) { 680b57cec5SDimitry Andric if (Length == 0) { return 0; } 690b57cec5SDimitry Andric return ::memcmp(Lhs,Rhs,Length); 700b57cec5SDimitry Andric } 710b57cec5SDimitry Andric 720b57cec5SDimitry Andric public: 730b57cec5SDimitry Andric /// @name Constructors 740b57cec5SDimitry Andric /// @{ 750b57cec5SDimitry Andric 760b57cec5SDimitry Andric /// Construct an empty string ref. 770b57cec5SDimitry Andric /*implicit*/ StringRef() = default; 780b57cec5SDimitry Andric 790b57cec5SDimitry Andric /// Disable conversion from nullptr. This prevents things like 800b57cec5SDimitry Andric /// if (S == nullptr) 810b57cec5SDimitry Andric StringRef(std::nullptr_t) = delete; 820b57cec5SDimitry Andric 830b57cec5SDimitry Andric /// Construct a string ref from a cstring. StringRef(const char * Str)848bcb0991SDimitry Andric /*implicit*/ constexpr StringRef(const char *Str) 85bdd1243dSDimitry Andric : Data(Str), Length(Str ? 86bdd1243dSDimitry Andric // GCC 7 doesn't have constexpr char_traits. Fall back to __builtin_strlen. 87bdd1243dSDimitry Andric #if defined(_GLIBCXX_RELEASE) && _GLIBCXX_RELEASE < 8 88bdd1243dSDimitry Andric __builtin_strlen(Str) 89bdd1243dSDimitry Andric #else 90bdd1243dSDimitry Andric std::char_traits<char>::length(Str) 91bdd1243dSDimitry Andric #endif 92bdd1243dSDimitry Andric : 0) { 93bdd1243dSDimitry Andric } 940b57cec5SDimitry Andric 950b57cec5SDimitry Andric /// Construct a string ref from a pointer and length. StringRef(const char * data,size_t length)960b57cec5SDimitry Andric /*implicit*/ constexpr StringRef(const char *data, size_t length) 970b57cec5SDimitry Andric : Data(data), Length(length) {} 980b57cec5SDimitry Andric 990b57cec5SDimitry Andric /// Construct a string ref from an std::string. StringRef(const std::string & Str)1000b57cec5SDimitry Andric /*implicit*/ StringRef(const std::string &Str) 1010b57cec5SDimitry Andric : Data(Str.data()), Length(Str.length()) {} 1020b57cec5SDimitry Andric 1035ffd83dbSDimitry Andric /// Construct a string ref from an std::string_view. StringRef(std::string_view Str)1045ffd83dbSDimitry Andric /*implicit*/ constexpr StringRef(std::string_view Str) 1055ffd83dbSDimitry Andric : Data(Str.data()), Length(Str.size()) {} 1065ffd83dbSDimitry Andric 1070b57cec5SDimitry Andric /// @} 1080b57cec5SDimitry Andric /// @name Iterators 1090b57cec5SDimitry Andric /// @{ 1100b57cec5SDimitry Andric begin()1110b57cec5SDimitry Andric iterator begin() const { return Data; } 1120b57cec5SDimitry Andric end()1130b57cec5SDimitry Andric iterator end() const { return Data + Length; } 1140b57cec5SDimitry Andric bytes_begin()1150b57cec5SDimitry Andric const unsigned char *bytes_begin() const { 1160b57cec5SDimitry Andric return reinterpret_cast<const unsigned char *>(begin()); 1170b57cec5SDimitry Andric } bytes_end()1180b57cec5SDimitry Andric const unsigned char *bytes_end() const { 1190b57cec5SDimitry Andric return reinterpret_cast<const unsigned char *>(end()); 1200b57cec5SDimitry Andric } bytes()1210b57cec5SDimitry Andric iterator_range<const unsigned char *> bytes() const { 1220b57cec5SDimitry Andric return make_range(bytes_begin(), bytes_end()); 1230b57cec5SDimitry Andric } 1240b57cec5SDimitry Andric 1250b57cec5SDimitry Andric /// @} 1260b57cec5SDimitry Andric /// @name String Operations 1270b57cec5SDimitry Andric /// @{ 1280b57cec5SDimitry Andric 1290b57cec5SDimitry Andric /// data - Get a pointer to the start of the string (which may not be null 1300b57cec5SDimitry Andric /// terminated). data()131297eecfbSDimitry Andric [[nodiscard]] constexpr const char *data() const { return Data; } 1320b57cec5SDimitry Andric 1330b57cec5SDimitry Andric /// empty - Check if the string is empty. empty()134bdd1243dSDimitry Andric [[nodiscard]] constexpr bool empty() const { return Length == 0; } 1350b57cec5SDimitry Andric 1360b57cec5SDimitry Andric /// size - Get the string size. size()137bdd1243dSDimitry Andric [[nodiscard]] constexpr size_t size() const { return Length; } 1380b57cec5SDimitry Andric 1390b57cec5SDimitry Andric /// front - Get the first character in the string. front()140bdd1243dSDimitry Andric [[nodiscard]] char front() const { 1410b57cec5SDimitry Andric assert(!empty()); 1420b57cec5SDimitry Andric return Data[0]; 1430b57cec5SDimitry Andric } 1440b57cec5SDimitry Andric 1450b57cec5SDimitry Andric /// back - Get the last character in the string. back()146bdd1243dSDimitry Andric [[nodiscard]] char back() const { 1470b57cec5SDimitry Andric assert(!empty()); 1480b57cec5SDimitry Andric return Data[Length-1]; 1490b57cec5SDimitry Andric } 1500b57cec5SDimitry Andric 1510b57cec5SDimitry Andric // copy - Allocate copy in Allocator and return StringRef to it. 1520b57cec5SDimitry Andric template <typename Allocator> copy(Allocator & A)153bdd1243dSDimitry Andric [[nodiscard]] StringRef copy(Allocator &A) const { 1540b57cec5SDimitry Andric // Don't request a length 0 copy from the allocator. 1550b57cec5SDimitry Andric if (empty()) 1560b57cec5SDimitry Andric return StringRef(); 1570b57cec5SDimitry Andric char *S = A.template Allocate<char>(Length); 1580b57cec5SDimitry Andric std::copy(begin(), end(), S); 1590b57cec5SDimitry Andric return StringRef(S, Length); 1600b57cec5SDimitry Andric } 1610b57cec5SDimitry Andric 1620b57cec5SDimitry Andric /// equals - Check for string equality, this is more efficient than 1630b57cec5SDimitry Andric /// compare() when the relative ordering of inequal strings isn't needed. equals(StringRef RHS)164bdd1243dSDimitry Andric [[nodiscard]] bool equals(StringRef RHS) const { 1650b57cec5SDimitry Andric return (Length == RHS.Length && 1660b57cec5SDimitry Andric compareMemory(Data, RHS.Data, RHS.Length) == 0); 1670b57cec5SDimitry Andric } 1680b57cec5SDimitry Andric 169fe6060f1SDimitry Andric /// Check for string equality, ignoring case. equals_insensitive(StringRef RHS)170bdd1243dSDimitry Andric [[nodiscard]] bool equals_insensitive(StringRef RHS) const { 171fe6060f1SDimitry Andric return Length == RHS.Length && compare_insensitive(RHS) == 0; 1720b57cec5SDimitry Andric } 1730b57cec5SDimitry Andric 174bdd1243dSDimitry Andric /// compare - Compare two strings; the result is negative, zero, or positive 175bdd1243dSDimitry Andric /// if this string is lexicographically less than, equal to, or greater than 176bdd1243dSDimitry Andric /// the \p RHS. compare(StringRef RHS)177bdd1243dSDimitry Andric [[nodiscard]] int compare(StringRef RHS) const { 1780b57cec5SDimitry Andric // Check the prefix for a mismatch. 1790b57cec5SDimitry Andric if (int Res = compareMemory(Data, RHS.Data, std::min(Length, RHS.Length))) 1800b57cec5SDimitry Andric return Res < 0 ? -1 : 1; 1810b57cec5SDimitry Andric 1820b57cec5SDimitry Andric // Otherwise the prefixes match, so we only need to check the lengths. 1830b57cec5SDimitry Andric if (Length == RHS.Length) 1840b57cec5SDimitry Andric return 0; 1850b57cec5SDimitry Andric return Length < RHS.Length ? -1 : 1; 1860b57cec5SDimitry Andric } 1870b57cec5SDimitry Andric 188fe6060f1SDimitry Andric /// Compare two strings, ignoring case. 189bdd1243dSDimitry Andric [[nodiscard]] int compare_insensitive(StringRef RHS) const; 1900b57cec5SDimitry Andric 1910b57cec5SDimitry Andric /// compare_numeric - Compare two strings, treating sequences of digits as 1920b57cec5SDimitry Andric /// numbers. 193bdd1243dSDimitry Andric [[nodiscard]] int compare_numeric(StringRef RHS) const; 1940b57cec5SDimitry Andric 1950b57cec5SDimitry Andric /// Determine the edit distance between this string and another 1960b57cec5SDimitry Andric /// string. 1970b57cec5SDimitry Andric /// 1980b57cec5SDimitry Andric /// \param Other the string to compare this string against. 1990b57cec5SDimitry Andric /// 2000b57cec5SDimitry Andric /// \param AllowReplacements whether to allow character 2010b57cec5SDimitry Andric /// replacements (change one character into another) as a single 2020b57cec5SDimitry Andric /// operation, rather than as two operations (an insertion and a 2030b57cec5SDimitry Andric /// removal). 2040b57cec5SDimitry Andric /// 2050b57cec5SDimitry Andric /// \param MaxEditDistance If non-zero, the maximum edit distance that 2060b57cec5SDimitry Andric /// this routine is allowed to compute. If the edit distance will exceed 2070b57cec5SDimitry Andric /// that maximum, returns \c MaxEditDistance+1. 2080b57cec5SDimitry Andric /// 2090b57cec5SDimitry Andric /// \returns the minimum number of character insertions, removals, 2100b57cec5SDimitry Andric /// or (if \p AllowReplacements is \c true) replacements needed to 2110b57cec5SDimitry Andric /// transform one of the given strings into the other. If zero, 2120b57cec5SDimitry Andric /// the strings are identical. 213bdd1243dSDimitry Andric [[nodiscard]] unsigned edit_distance(StringRef Other, 214bdd1243dSDimitry Andric bool AllowReplacements = true, 2150b57cec5SDimitry Andric unsigned MaxEditDistance = 0) const; 2160b57cec5SDimitry Andric 217bdd1243dSDimitry Andric [[nodiscard]] unsigned 21881ad6265SDimitry Andric edit_distance_insensitive(StringRef Other, bool AllowReplacements = true, 21981ad6265SDimitry Andric unsigned MaxEditDistance = 0) const; 22081ad6265SDimitry Andric 2210b57cec5SDimitry Andric /// str - Get the contents as an std::string. str()222bdd1243dSDimitry Andric [[nodiscard]] std::string str() const { 2230b57cec5SDimitry Andric if (!Data) return std::string(); 2240b57cec5SDimitry Andric return std::string(Data, Length); 2250b57cec5SDimitry Andric } 2260b57cec5SDimitry Andric 2270b57cec5SDimitry Andric /// @} 2280b57cec5SDimitry Andric /// @name Operator Overloads 2290b57cec5SDimitry Andric /// @{ 2300b57cec5SDimitry Andric 231bdd1243dSDimitry Andric [[nodiscard]] char operator[](size_t Index) const { 2320b57cec5SDimitry Andric assert(Index < Length && "Invalid index!"); 2330b57cec5SDimitry Andric return Data[Index]; 2340b57cec5SDimitry Andric } 2350b57cec5SDimitry Andric 2360b57cec5SDimitry Andric /// Disallow accidental assignment from a temporary std::string. 2370b57cec5SDimitry Andric /// 2380b57cec5SDimitry Andric /// The declaration here is extra complicated so that `stringRef = {}` 2390b57cec5SDimitry Andric /// and `stringRef = "abc"` continue to select the move assignment operator. 2400b57cec5SDimitry Andric template <typename T> 2415ffd83dbSDimitry Andric std::enable_if_t<std::is_same<T, std::string>::value, StringRef> & 2420b57cec5SDimitry Andric operator=(T &&Str) = delete; 2430b57cec5SDimitry Andric 2440b57cec5SDimitry Andric /// @} 2450b57cec5SDimitry Andric /// @name Type Conversions 2460b57cec5SDimitry Andric /// @{ 2470b57cec5SDimitry Andric string_view()248297eecfbSDimitry Andric constexpr operator std::string_view() const { 2495ffd83dbSDimitry Andric return std::string_view(data(), size()); 2500b57cec5SDimitry Andric } 2510b57cec5SDimitry Andric 2520b57cec5SDimitry Andric /// @} 2530b57cec5SDimitry Andric /// @name String Predicates 2540b57cec5SDimitry Andric /// @{ 2550b57cec5SDimitry Andric 2560b57cec5SDimitry Andric /// Check if this string starts with the given \p Prefix. starts_with(StringRef Prefix)257bdd1243dSDimitry Andric [[nodiscard]] bool starts_with(StringRef Prefix) const { 2580b57cec5SDimitry Andric return Length >= Prefix.Length && 2590b57cec5SDimitry Andric compareMemory(Data, Prefix.Data, Prefix.Length) == 0; 2600b57cec5SDimitry Andric } 261cb14a3feSDimitry Andric [[nodiscard]] LLVM_DEPRECATED( 262cb14a3feSDimitry Andric "Use starts_with instead", startswith(StringRef Prefix)263cb14a3feSDimitry Andric "starts_with") bool startswith(StringRef Prefix) const { 264bdd1243dSDimitry Andric return starts_with(Prefix); 265bdd1243dSDimitry Andric } 2660b57cec5SDimitry Andric 2670b57cec5SDimitry Andric /// Check if this string starts with the given \p Prefix, ignoring case. 268bdd1243dSDimitry Andric [[nodiscard]] bool starts_with_insensitive(StringRef Prefix) const; 2690b57cec5SDimitry Andric 2700b57cec5SDimitry Andric /// Check if this string ends with the given \p Suffix. ends_with(StringRef Suffix)271bdd1243dSDimitry Andric [[nodiscard]] bool ends_with(StringRef Suffix) const { 2720b57cec5SDimitry Andric return Length >= Suffix.Length && 273bdd1243dSDimitry Andric compareMemory(end() - Suffix.Length, Suffix.Data, Suffix.Length) == 274bdd1243dSDimitry Andric 0; 275bdd1243dSDimitry Andric } 276cb14a3feSDimitry Andric [[nodiscard]] LLVM_DEPRECATED( 277cb14a3feSDimitry Andric "Use ends_with instead", endswith(StringRef Suffix)278cb14a3feSDimitry Andric "ends_with") bool endswith(StringRef Suffix) const { 279bdd1243dSDimitry Andric return ends_with(Suffix); 2800b57cec5SDimitry Andric } 2810b57cec5SDimitry Andric 2820b57cec5SDimitry Andric /// Check if this string ends with the given \p Suffix, ignoring case. 283bdd1243dSDimitry Andric [[nodiscard]] bool ends_with_insensitive(StringRef Suffix) const; 2840b57cec5SDimitry Andric 2850b57cec5SDimitry Andric /// @} 2860b57cec5SDimitry Andric /// @name String Searching 2870b57cec5SDimitry Andric /// @{ 2880b57cec5SDimitry Andric 2890b57cec5SDimitry Andric /// Search for the first character \p C in the string. 2900b57cec5SDimitry Andric /// 2910b57cec5SDimitry Andric /// \returns The index of the first occurrence of \p C, or npos if not 2920b57cec5SDimitry Andric /// found. 293bdd1243dSDimitry Andric [[nodiscard]] size_t find(char C, size_t From = 0) const { 294bdd1243dSDimitry Andric return std::string_view(*this).find(C, From); 2950b57cec5SDimitry Andric } 2960b57cec5SDimitry Andric 2970b57cec5SDimitry Andric /// Search for the first character \p C in the string, ignoring case. 2980b57cec5SDimitry Andric /// 2990b57cec5SDimitry Andric /// \returns The index of the first occurrence of \p C, or npos if not 3000b57cec5SDimitry Andric /// found. 301bdd1243dSDimitry Andric [[nodiscard]] size_t find_insensitive(char C, size_t From = 0) const; 3020b57cec5SDimitry Andric 3030b57cec5SDimitry Andric /// Search for the first character satisfying the predicate \p F 3040b57cec5SDimitry Andric /// 3050b57cec5SDimitry Andric /// \returns The index of the first character satisfying \p F starting from 3060b57cec5SDimitry Andric /// \p From, or npos if not found. 307bdd1243dSDimitry Andric [[nodiscard]] size_t find_if(function_ref<bool(char)> F, 308bdd1243dSDimitry Andric size_t From = 0) const { 3090b57cec5SDimitry Andric StringRef S = drop_front(From); 3100b57cec5SDimitry Andric while (!S.empty()) { 3110b57cec5SDimitry Andric if (F(S.front())) 3120b57cec5SDimitry Andric return size() - S.size(); 3130b57cec5SDimitry Andric S = S.drop_front(); 3140b57cec5SDimitry Andric } 3150b57cec5SDimitry Andric return npos; 3160b57cec5SDimitry Andric } 3170b57cec5SDimitry Andric 3180b57cec5SDimitry Andric /// Search for the first character not satisfying the predicate \p F 3190b57cec5SDimitry Andric /// 3200b57cec5SDimitry Andric /// \returns The index of the first character not satisfying \p F starting 3210b57cec5SDimitry Andric /// from \p From, or npos if not found. 322bdd1243dSDimitry Andric [[nodiscard]] size_t find_if_not(function_ref<bool(char)> F, 323bdd1243dSDimitry Andric size_t From = 0) const { 3240b57cec5SDimitry Andric return find_if([F](char c) { return !F(c); }, From); 3250b57cec5SDimitry Andric } 3260b57cec5SDimitry Andric 3270b57cec5SDimitry Andric /// Search for the first string \p Str in the string. 3280b57cec5SDimitry Andric /// 3290b57cec5SDimitry Andric /// \returns The index of the first occurrence of \p Str, or npos if not 3300b57cec5SDimitry Andric /// found. 331bdd1243dSDimitry Andric [[nodiscard]] size_t find(StringRef Str, size_t From = 0) const; 3320b57cec5SDimitry Andric 3330b57cec5SDimitry Andric /// Search for the first string \p Str in the string, ignoring case. 3340b57cec5SDimitry Andric /// 3350b57cec5SDimitry Andric /// \returns The index of the first occurrence of \p Str, or npos if not 3360b57cec5SDimitry Andric /// found. 337bdd1243dSDimitry Andric [[nodiscard]] size_t find_insensitive(StringRef Str, size_t From = 0) const; 3380b57cec5SDimitry Andric 3390b57cec5SDimitry Andric /// Search for the last character \p C in the string. 3400b57cec5SDimitry Andric /// 3410b57cec5SDimitry Andric /// \returns The index of the last occurrence of \p C, or npos if not 3420b57cec5SDimitry Andric /// found. 343bdd1243dSDimitry Andric [[nodiscard]] size_t rfind(char C, size_t From = npos) const { 34406c3fb27SDimitry Andric size_t I = std::min(From, Length); 34506c3fb27SDimitry Andric while (I) { 34606c3fb27SDimitry Andric --I; 34706c3fb27SDimitry Andric if (Data[I] == C) 34806c3fb27SDimitry Andric return I; 3490b57cec5SDimitry Andric } 3500b57cec5SDimitry Andric return npos; 3510b57cec5SDimitry Andric } 3520b57cec5SDimitry Andric 3530b57cec5SDimitry Andric /// Search for the last character \p C in the string, ignoring case. 3540b57cec5SDimitry Andric /// 3550b57cec5SDimitry Andric /// \returns The index of the last occurrence of \p C, or npos if not 3560b57cec5SDimitry Andric /// found. 357bdd1243dSDimitry Andric [[nodiscard]] size_t rfind_insensitive(char C, size_t From = npos) const; 3580b57cec5SDimitry Andric 3590b57cec5SDimitry Andric /// Search for the last string \p Str in the string. 3600b57cec5SDimitry Andric /// 3610b57cec5SDimitry Andric /// \returns The index of the last occurrence of \p Str, or npos if not 3620b57cec5SDimitry Andric /// found. 363bdd1243dSDimitry Andric [[nodiscard]] size_t rfind(StringRef Str) const; 3640b57cec5SDimitry Andric 3650b57cec5SDimitry Andric /// Search for the last string \p Str in the string, ignoring case. 3660b57cec5SDimitry Andric /// 3670b57cec5SDimitry Andric /// \returns The index of the last occurrence of \p Str, or npos if not 3680b57cec5SDimitry Andric /// found. 369bdd1243dSDimitry Andric [[nodiscard]] size_t rfind_insensitive(StringRef Str) const; 3700b57cec5SDimitry Andric 3710b57cec5SDimitry Andric /// Find the first character in the string that is \p C, or npos if not 3720b57cec5SDimitry Andric /// found. Same as find. 373bdd1243dSDimitry Andric [[nodiscard]] size_t find_first_of(char C, size_t From = 0) const { 3740b57cec5SDimitry Andric return find(C, From); 3750b57cec5SDimitry Andric } 3760b57cec5SDimitry Andric 3770b57cec5SDimitry Andric /// Find the first character in the string that is in \p Chars, or npos if 3780b57cec5SDimitry Andric /// not found. 3790b57cec5SDimitry Andric /// 3800b57cec5SDimitry Andric /// Complexity: O(size() + Chars.size()) 381bdd1243dSDimitry Andric [[nodiscard]] size_t find_first_of(StringRef Chars, size_t From = 0) const; 3820b57cec5SDimitry Andric 3830b57cec5SDimitry Andric /// Find the first character in the string that is not \p C or npos if not 3840b57cec5SDimitry Andric /// found. 385bdd1243dSDimitry Andric [[nodiscard]] size_t find_first_not_of(char C, size_t From = 0) const; 3860b57cec5SDimitry Andric 3870b57cec5SDimitry Andric /// Find the first character in the string that is not in the string 3880b57cec5SDimitry Andric /// \p Chars, or npos if not found. 3890b57cec5SDimitry Andric /// 3900b57cec5SDimitry Andric /// Complexity: O(size() + Chars.size()) 391bdd1243dSDimitry Andric [[nodiscard]] size_t find_first_not_of(StringRef Chars, 392bdd1243dSDimitry Andric size_t From = 0) const; 3930b57cec5SDimitry Andric 3940b57cec5SDimitry Andric /// Find the last character in the string that is \p C, or npos if not 3950b57cec5SDimitry Andric /// found. 396bdd1243dSDimitry Andric [[nodiscard]] size_t find_last_of(char C, size_t From = npos) const { 3970b57cec5SDimitry Andric return rfind(C, From); 3980b57cec5SDimitry Andric } 3990b57cec5SDimitry Andric 4000b57cec5SDimitry Andric /// Find the last character in the string that is in \p C, or npos if not 4010b57cec5SDimitry Andric /// found. 4020b57cec5SDimitry Andric /// 4030b57cec5SDimitry Andric /// Complexity: O(size() + Chars.size()) 404bdd1243dSDimitry Andric [[nodiscard]] size_t find_last_of(StringRef Chars, 405bdd1243dSDimitry Andric size_t From = npos) const; 4060b57cec5SDimitry Andric 4070b57cec5SDimitry Andric /// Find the last character in the string that is not \p C, or npos if not 4080b57cec5SDimitry Andric /// found. 409bdd1243dSDimitry Andric [[nodiscard]] size_t find_last_not_of(char C, size_t From = npos) const; 4100b57cec5SDimitry Andric 4110b57cec5SDimitry Andric /// Find the last character in the string that is not in \p Chars, or 4120b57cec5SDimitry Andric /// npos if not found. 4130b57cec5SDimitry Andric /// 4140b57cec5SDimitry Andric /// Complexity: O(size() + Chars.size()) 415bdd1243dSDimitry Andric [[nodiscard]] size_t find_last_not_of(StringRef Chars, 416bdd1243dSDimitry Andric size_t From = npos) const; 4170b57cec5SDimitry Andric 4180b57cec5SDimitry Andric /// Return true if the given string is a substring of *this, and false 4190b57cec5SDimitry Andric /// otherwise. contains(StringRef Other)420bdd1243dSDimitry Andric [[nodiscard]] bool contains(StringRef Other) const { 421bdd1243dSDimitry Andric return find(Other) != npos; 422bdd1243dSDimitry Andric } 4230b57cec5SDimitry Andric 4240b57cec5SDimitry Andric /// Return true if the given character is contained in *this, and false 4250b57cec5SDimitry Andric /// otherwise. contains(char C)426bdd1243dSDimitry Andric [[nodiscard]] bool contains(char C) const { 427bdd1243dSDimitry Andric return find_first_of(C) != npos; 428bdd1243dSDimitry Andric } 4290b57cec5SDimitry Andric 4300b57cec5SDimitry Andric /// Return true if the given string is a substring of *this, and false 4310b57cec5SDimitry Andric /// otherwise. contains_insensitive(StringRef Other)432bdd1243dSDimitry Andric [[nodiscard]] bool contains_insensitive(StringRef Other) const { 433fe6060f1SDimitry Andric return find_insensitive(Other) != npos; 4340b57cec5SDimitry Andric } 4350b57cec5SDimitry Andric 4360b57cec5SDimitry Andric /// Return true if the given character is contained in *this, and false 4370b57cec5SDimitry Andric /// otherwise. contains_insensitive(char C)438bdd1243dSDimitry Andric [[nodiscard]] bool contains_insensitive(char C) const { 439fe6060f1SDimitry Andric return find_insensitive(C) != npos; 440fe6060f1SDimitry Andric } 4410b57cec5SDimitry Andric 4420b57cec5SDimitry Andric /// @} 4430b57cec5SDimitry Andric /// @name Helpful Algorithms 4440b57cec5SDimitry Andric /// @{ 4450b57cec5SDimitry Andric 4460b57cec5SDimitry Andric /// Return the number of occurrences of \p C in the string. count(char C)447bdd1243dSDimitry Andric [[nodiscard]] size_t count(char C) const { 4480b57cec5SDimitry Andric size_t Count = 0; 44906c3fb27SDimitry Andric for (size_t I = 0; I != Length; ++I) 45006c3fb27SDimitry Andric if (Data[I] == C) 4510b57cec5SDimitry Andric ++Count; 4520b57cec5SDimitry Andric return Count; 4530b57cec5SDimitry Andric } 4540b57cec5SDimitry Andric 4550b57cec5SDimitry Andric /// Return the number of non-overlapped occurrences of \p Str in 4560b57cec5SDimitry Andric /// the string. 4570b57cec5SDimitry Andric size_t count(StringRef Str) const; 4580b57cec5SDimitry Andric 4590b57cec5SDimitry Andric /// Parse the current string as an integer of the specified radix. If 4600b57cec5SDimitry Andric /// \p Radix is specified as zero, this does radix autosensing using 4610b57cec5SDimitry Andric /// extended C rules: 0 is octal, 0x is hex, 0b is binary. 4620b57cec5SDimitry Andric /// 4630b57cec5SDimitry Andric /// If the string is invalid or if only a subset of the string is valid, 4640b57cec5SDimitry Andric /// this returns true to signify the error. The string is considered 4650b57cec5SDimitry Andric /// erroneous if empty or if it overflows T. getAsInteger(unsigned Radix,T & Result)466bdd1243dSDimitry Andric template <typename T> bool getAsInteger(unsigned Radix, T &Result) const { 467bdd1243dSDimitry Andric if constexpr (std::numeric_limits<T>::is_signed) { 4680b57cec5SDimitry Andric long long LLVal; 4690b57cec5SDimitry Andric if (getAsSignedInteger(*this, Radix, LLVal) || 4700b57cec5SDimitry Andric static_cast<T>(LLVal) != LLVal) 4710b57cec5SDimitry Andric return true; 4720b57cec5SDimitry Andric Result = LLVal; 473bdd1243dSDimitry Andric } else { 4740b57cec5SDimitry Andric unsigned long long ULLVal; 4750b57cec5SDimitry Andric // The additional cast to unsigned long long is required to avoid the 4760b57cec5SDimitry Andric // Visual C++ warning C4805: '!=' : unsafe mix of type 'bool' and type 4770b57cec5SDimitry Andric // 'unsigned __int64' when instantiating getAsInteger with T = bool. 4780b57cec5SDimitry Andric if (getAsUnsignedInteger(*this, Radix, ULLVal) || 4790b57cec5SDimitry Andric static_cast<unsigned long long>(static_cast<T>(ULLVal)) != ULLVal) 4800b57cec5SDimitry Andric return true; 4810b57cec5SDimitry Andric Result = ULLVal; 482bdd1243dSDimitry Andric } 4830b57cec5SDimitry Andric return false; 4840b57cec5SDimitry Andric } 4850b57cec5SDimitry Andric 4860b57cec5SDimitry Andric /// Parse the current string as an integer of the specified radix. If 4870b57cec5SDimitry Andric /// \p Radix is specified as zero, this does radix autosensing using 4880b57cec5SDimitry Andric /// extended C rules: 0 is octal, 0x is hex, 0b is binary. 4890b57cec5SDimitry Andric /// 4900b57cec5SDimitry Andric /// If the string does not begin with a number of the specified radix, 4910b57cec5SDimitry Andric /// this returns true to signify the error. The string is considered 4920b57cec5SDimitry Andric /// erroneous if empty or if it overflows T. 4930b57cec5SDimitry Andric /// The portion of the string representing the discovered numeric value 4940b57cec5SDimitry Andric /// is removed from the beginning of the string. consumeInteger(unsigned Radix,T & Result)495bdd1243dSDimitry Andric template <typename T> bool consumeInteger(unsigned Radix, T &Result) { 496bdd1243dSDimitry Andric if constexpr (std::numeric_limits<T>::is_signed) { 4970b57cec5SDimitry Andric long long LLVal; 4980b57cec5SDimitry Andric if (consumeSignedInteger(*this, Radix, LLVal) || 4990b57cec5SDimitry Andric static_cast<long long>(static_cast<T>(LLVal)) != LLVal) 5000b57cec5SDimitry Andric return true; 5010b57cec5SDimitry Andric Result = LLVal; 502bdd1243dSDimitry Andric } else { 5030b57cec5SDimitry Andric unsigned long long ULLVal; 5040b57cec5SDimitry Andric if (consumeUnsignedInteger(*this, Radix, ULLVal) || 5050b57cec5SDimitry Andric static_cast<unsigned long long>(static_cast<T>(ULLVal)) != ULLVal) 5060b57cec5SDimitry Andric return true; 5070b57cec5SDimitry Andric Result = ULLVal; 508bdd1243dSDimitry Andric } 5090b57cec5SDimitry Andric return false; 5100b57cec5SDimitry Andric } 5110b57cec5SDimitry Andric 5120b57cec5SDimitry Andric /// Parse the current string as an integer of the specified \p Radix, or of 5130b57cec5SDimitry Andric /// an autosensed radix if the \p Radix given is 0. The current value in 5140b57cec5SDimitry Andric /// \p Result is discarded, and the storage is changed to be wide enough to 5150b57cec5SDimitry Andric /// store the parsed integer. 5160b57cec5SDimitry Andric /// 5170b57cec5SDimitry Andric /// \returns true if the string does not solely consist of a valid 5180b57cec5SDimitry Andric /// non-empty number in the appropriate base. 5190b57cec5SDimitry Andric /// 5200b57cec5SDimitry Andric /// APInt::fromString is superficially similar but assumes the 5210b57cec5SDimitry Andric /// string is well-formed in the given radix. 5220b57cec5SDimitry Andric bool getAsInteger(unsigned Radix, APInt &Result) const; 5230b57cec5SDimitry Andric 52406c3fb27SDimitry Andric /// Parse the current string as an integer of the specified \p Radix. If 52506c3fb27SDimitry Andric /// \p Radix is specified as zero, this does radix autosensing using 52606c3fb27SDimitry Andric /// extended C rules: 0 is octal, 0x is hex, 0b is binary. 52706c3fb27SDimitry Andric /// 52806c3fb27SDimitry Andric /// If the string does not begin with a number of the specified radix, 52906c3fb27SDimitry Andric /// this returns true to signify the error. The string is considered 53006c3fb27SDimitry Andric /// erroneous if empty. 53106c3fb27SDimitry Andric /// The portion of the string representing the discovered numeric value 53206c3fb27SDimitry Andric /// is removed from the beginning of the string. 53306c3fb27SDimitry Andric bool consumeInteger(unsigned Radix, APInt &Result); 53406c3fb27SDimitry Andric 5350b57cec5SDimitry Andric /// Parse the current string as an IEEE double-precision floating 5360b57cec5SDimitry Andric /// point value. The string must be a well-formed double. 5370b57cec5SDimitry Andric /// 5380b57cec5SDimitry Andric /// If \p AllowInexact is false, the function will fail if the string 5390b57cec5SDimitry Andric /// cannot be represented exactly. Otherwise, the function only fails 540480093f4SDimitry Andric /// in case of an overflow or underflow, or an invalid floating point 541480093f4SDimitry Andric /// representation. 5420b57cec5SDimitry Andric bool getAsDouble(double &Result, bool AllowInexact = true) const; 5430b57cec5SDimitry Andric 5440b57cec5SDimitry Andric /// @} 5450b57cec5SDimitry Andric /// @name String Operations 5460b57cec5SDimitry Andric /// @{ 5470b57cec5SDimitry Andric 5480b57cec5SDimitry Andric // Convert the given ASCII string to lowercase. 549bdd1243dSDimitry Andric [[nodiscard]] std::string lower() const; 5500b57cec5SDimitry Andric 5510b57cec5SDimitry Andric /// Convert the given ASCII string to uppercase. 552bdd1243dSDimitry Andric [[nodiscard]] std::string upper() const; 5530b57cec5SDimitry Andric 5540b57cec5SDimitry Andric /// @} 5550b57cec5SDimitry Andric /// @name Substring Operations 5560b57cec5SDimitry Andric /// @{ 5570b57cec5SDimitry Andric 5580b57cec5SDimitry Andric /// Return a reference to the substring from [Start, Start + N). 5590b57cec5SDimitry Andric /// 5600b57cec5SDimitry Andric /// \param Start The index of the starting character in the substring; if 5610b57cec5SDimitry Andric /// the index is npos or greater than the length of the string then the 5620b57cec5SDimitry Andric /// empty substring will be returned. 5630b57cec5SDimitry Andric /// 5640b57cec5SDimitry Andric /// \param N The number of characters to included in the substring. If N 5650b57cec5SDimitry Andric /// exceeds the number of characters remaining in the string, the string 5660b57cec5SDimitry Andric /// suffix (starting with \p Start) will be returned. 567bdd1243dSDimitry Andric [[nodiscard]] constexpr StringRef substr(size_t Start, 568bdd1243dSDimitry Andric size_t N = npos) const { 5690b57cec5SDimitry Andric Start = std::min(Start, Length); 5700b57cec5SDimitry Andric return StringRef(Data + Start, std::min(N, Length - Start)); 5710b57cec5SDimitry Andric } 5720b57cec5SDimitry Andric 5730b57cec5SDimitry Andric /// Return a StringRef equal to 'this' but with only the first \p N 5740b57cec5SDimitry Andric /// elements remaining. If \p N is greater than the length of the 5750b57cec5SDimitry Andric /// string, the entire string is returned. 576bdd1243dSDimitry Andric [[nodiscard]] StringRef take_front(size_t N = 1) const { 5770b57cec5SDimitry Andric if (N >= size()) 5780b57cec5SDimitry Andric return *this; 5790b57cec5SDimitry Andric return drop_back(size() - N); 5800b57cec5SDimitry Andric } 5810b57cec5SDimitry Andric 5820b57cec5SDimitry Andric /// Return a StringRef equal to 'this' but with only the last \p N 5830b57cec5SDimitry Andric /// elements remaining. If \p N is greater than the length of the 5840b57cec5SDimitry Andric /// string, the entire string is returned. 585bdd1243dSDimitry Andric [[nodiscard]] StringRef take_back(size_t N = 1) const { 5860b57cec5SDimitry Andric if (N >= size()) 5870b57cec5SDimitry Andric return *this; 5880b57cec5SDimitry Andric return drop_front(size() - N); 5890b57cec5SDimitry Andric } 5900b57cec5SDimitry Andric 5910b57cec5SDimitry Andric /// Return the longest prefix of 'this' such that every character 5920b57cec5SDimitry Andric /// in the prefix satisfies the given predicate. take_while(function_ref<bool (char)> F)593bdd1243dSDimitry Andric [[nodiscard]] StringRef take_while(function_ref<bool(char)> F) const { 5940b57cec5SDimitry Andric return substr(0, find_if_not(F)); 5950b57cec5SDimitry Andric } 5960b57cec5SDimitry Andric 5970b57cec5SDimitry Andric /// Return the longest prefix of 'this' such that no character in 5980b57cec5SDimitry Andric /// the prefix satisfies the given predicate. take_until(function_ref<bool (char)> F)599bdd1243dSDimitry Andric [[nodiscard]] StringRef take_until(function_ref<bool(char)> F) const { 6000b57cec5SDimitry Andric return substr(0, find_if(F)); 6010b57cec5SDimitry Andric } 6020b57cec5SDimitry Andric 6030b57cec5SDimitry Andric /// Return a StringRef equal to 'this' but with the first \p N elements 6040b57cec5SDimitry Andric /// dropped. 605bdd1243dSDimitry Andric [[nodiscard]] StringRef drop_front(size_t N = 1) const { 6060b57cec5SDimitry Andric assert(size() >= N && "Dropping more elements than exist"); 6070b57cec5SDimitry Andric return substr(N); 6080b57cec5SDimitry Andric } 6090b57cec5SDimitry Andric 6100b57cec5SDimitry Andric /// Return a StringRef equal to 'this' but with the last \p N elements 6110b57cec5SDimitry Andric /// dropped. 612bdd1243dSDimitry Andric [[nodiscard]] StringRef drop_back(size_t N = 1) const { 6130b57cec5SDimitry Andric assert(size() >= N && "Dropping more elements than exist"); 6140b57cec5SDimitry Andric return substr(0, size()-N); 6150b57cec5SDimitry Andric } 6160b57cec5SDimitry Andric 6170b57cec5SDimitry Andric /// Return a StringRef equal to 'this', but with all characters satisfying 6180b57cec5SDimitry Andric /// the given predicate dropped from the beginning of the string. drop_while(function_ref<bool (char)> F)619bdd1243dSDimitry Andric [[nodiscard]] StringRef drop_while(function_ref<bool(char)> F) const { 6200b57cec5SDimitry Andric return substr(find_if_not(F)); 6210b57cec5SDimitry Andric } 6220b57cec5SDimitry Andric 6230b57cec5SDimitry Andric /// Return a StringRef equal to 'this', but with all characters not 6240b57cec5SDimitry Andric /// satisfying the given predicate dropped from the beginning of the string. drop_until(function_ref<bool (char)> F)625bdd1243dSDimitry Andric [[nodiscard]] StringRef drop_until(function_ref<bool(char)> F) const { 6260b57cec5SDimitry Andric return substr(find_if(F)); 6270b57cec5SDimitry Andric } 6280b57cec5SDimitry Andric 6290b57cec5SDimitry Andric /// Returns true if this StringRef has the given prefix and removes that 6300b57cec5SDimitry Andric /// prefix. consume_front(StringRef Prefix)6310b57cec5SDimitry Andric bool consume_front(StringRef Prefix) { 632bdd1243dSDimitry Andric if (!starts_with(Prefix)) 6330b57cec5SDimitry Andric return false; 6340b57cec5SDimitry Andric 63506c3fb27SDimitry Andric *this = substr(Prefix.size()); 6360b57cec5SDimitry Andric return true; 6370b57cec5SDimitry Andric } 6380b57cec5SDimitry Andric 639fe6060f1SDimitry Andric /// Returns true if this StringRef has the given prefix, ignoring case, 640fe6060f1SDimitry Andric /// and removes that prefix. consume_front_insensitive(StringRef Prefix)641fe6060f1SDimitry Andric bool consume_front_insensitive(StringRef Prefix) { 64206c3fb27SDimitry Andric if (!starts_with_insensitive(Prefix)) 643fe6060f1SDimitry Andric return false; 644fe6060f1SDimitry Andric 64506c3fb27SDimitry Andric *this = substr(Prefix.size()); 646fe6060f1SDimitry Andric return true; 647fe6060f1SDimitry Andric } 648fe6060f1SDimitry Andric 6490b57cec5SDimitry Andric /// Returns true if this StringRef has the given suffix and removes that 6500b57cec5SDimitry Andric /// suffix. consume_back(StringRef Suffix)6510b57cec5SDimitry Andric bool consume_back(StringRef Suffix) { 652bdd1243dSDimitry Andric if (!ends_with(Suffix)) 6530b57cec5SDimitry Andric return false; 6540b57cec5SDimitry Andric 65506c3fb27SDimitry Andric *this = substr(0, size() - Suffix.size()); 6560b57cec5SDimitry Andric return true; 6570b57cec5SDimitry Andric } 6580b57cec5SDimitry Andric 659fe6060f1SDimitry Andric /// Returns true if this StringRef has the given suffix, ignoring case, 660fe6060f1SDimitry Andric /// and removes that suffix. consume_back_insensitive(StringRef Suffix)661fe6060f1SDimitry Andric bool consume_back_insensitive(StringRef Suffix) { 66206c3fb27SDimitry Andric if (!ends_with_insensitive(Suffix)) 663fe6060f1SDimitry Andric return false; 664fe6060f1SDimitry Andric 66506c3fb27SDimitry Andric *this = substr(0, size() - Suffix.size()); 666fe6060f1SDimitry Andric return true; 667fe6060f1SDimitry Andric } 668fe6060f1SDimitry Andric 6690b57cec5SDimitry Andric /// Return a reference to the substring from [Start, End). 6700b57cec5SDimitry Andric /// 6710b57cec5SDimitry Andric /// \param Start The index of the starting character in the substring; if 6720b57cec5SDimitry Andric /// the index is npos or greater than the length of the string then the 6730b57cec5SDimitry Andric /// empty substring will be returned. 6740b57cec5SDimitry Andric /// 6750b57cec5SDimitry Andric /// \param End The index following the last character to include in the 6760b57cec5SDimitry Andric /// substring. If this is npos or exceeds the number of characters 6770b57cec5SDimitry Andric /// remaining in the string, the string suffix (starting with \p Start) 6780b57cec5SDimitry Andric /// will be returned. If this is less than \p Start, an empty string will 6790b57cec5SDimitry Andric /// be returned. slice(size_t Start,size_t End)680bdd1243dSDimitry Andric [[nodiscard]] StringRef slice(size_t Start, size_t End) const { 6810b57cec5SDimitry Andric Start = std::min(Start, Length); 68206c3fb27SDimitry Andric End = std::clamp(End, Start, Length); 6830b57cec5SDimitry Andric return StringRef(Data + Start, End - Start); 6840b57cec5SDimitry Andric } 6850b57cec5SDimitry Andric 6860b57cec5SDimitry Andric /// Split into two substrings around the first occurrence of a separator 6870b57cec5SDimitry Andric /// character. 6880b57cec5SDimitry Andric /// 6890b57cec5SDimitry Andric /// If \p Separator is in the string, then the result is a pair (LHS, RHS) 6900b57cec5SDimitry Andric /// such that (*this == LHS + Separator + RHS) is true and RHS is 6910b57cec5SDimitry Andric /// maximal. If \p Separator is not in the string, then the result is a 6920b57cec5SDimitry Andric /// pair (LHS, RHS) where (*this == LHS) and (RHS == ""). 6930b57cec5SDimitry Andric /// 6940b57cec5SDimitry Andric /// \param Separator The character to split on. 6950b57cec5SDimitry Andric /// \returns The split substrings. split(char Separator)696bdd1243dSDimitry Andric [[nodiscard]] std::pair<StringRef, StringRef> split(char Separator) const { 6970b57cec5SDimitry Andric return split(StringRef(&Separator, 1)); 6980b57cec5SDimitry Andric } 6990b57cec5SDimitry Andric 7000b57cec5SDimitry Andric /// Split into two substrings around the first occurrence of a separator 7010b57cec5SDimitry Andric /// string. 7020b57cec5SDimitry Andric /// 7030b57cec5SDimitry Andric /// If \p Separator is in the string, then the result is a pair (LHS, RHS) 7040b57cec5SDimitry Andric /// such that (*this == LHS + Separator + RHS) is true and RHS is 7050b57cec5SDimitry Andric /// maximal. If \p Separator is not in the string, then the result is a 7060b57cec5SDimitry Andric /// pair (LHS, RHS) where (*this == LHS) and (RHS == ""). 7070b57cec5SDimitry Andric /// 7080b57cec5SDimitry Andric /// \param Separator - The string to split on. 7090b57cec5SDimitry Andric /// \return - The split substrings. 710bdd1243dSDimitry Andric [[nodiscard]] std::pair<StringRef, StringRef> split(StringRef Separator)711bdd1243dSDimitry Andric split(StringRef Separator) const { 7120b57cec5SDimitry Andric size_t Idx = find(Separator); 7130b57cec5SDimitry Andric if (Idx == npos) 7140b57cec5SDimitry Andric return std::make_pair(*this, StringRef()); 7150b57cec5SDimitry Andric return std::make_pair(slice(0, Idx), slice(Idx + Separator.size(), npos)); 7160b57cec5SDimitry Andric } 7170b57cec5SDimitry Andric 7180b57cec5SDimitry Andric /// Split into two substrings around the last occurrence of a separator 7190b57cec5SDimitry Andric /// string. 7200b57cec5SDimitry Andric /// 7210b57cec5SDimitry Andric /// If \p Separator is in the string, then the result is a pair (LHS, RHS) 7220b57cec5SDimitry Andric /// such that (*this == LHS + Separator + RHS) is true and RHS is 7230b57cec5SDimitry Andric /// minimal. If \p Separator is not in the string, then the result is a 7240b57cec5SDimitry Andric /// pair (LHS, RHS) where (*this == LHS) and (RHS == ""). 7250b57cec5SDimitry Andric /// 7260b57cec5SDimitry Andric /// \param Separator - The string to split on. 7270b57cec5SDimitry Andric /// \return - The split substrings. 728bdd1243dSDimitry Andric [[nodiscard]] std::pair<StringRef, StringRef> rsplit(StringRef Separator)729bdd1243dSDimitry Andric rsplit(StringRef Separator) const { 7300b57cec5SDimitry Andric size_t Idx = rfind(Separator); 7310b57cec5SDimitry Andric if (Idx == npos) 7320b57cec5SDimitry Andric return std::make_pair(*this, StringRef()); 7330b57cec5SDimitry Andric return std::make_pair(slice(0, Idx), slice(Idx + Separator.size(), npos)); 7340b57cec5SDimitry Andric } 7350b57cec5SDimitry Andric 7360b57cec5SDimitry Andric /// Split into substrings around the occurrences of a separator string. 7370b57cec5SDimitry Andric /// 7380b57cec5SDimitry Andric /// Each substring is stored in \p A. If \p MaxSplit is >= 0, at most 7390b57cec5SDimitry Andric /// \p MaxSplit splits are done and consequently <= \p MaxSplit + 1 7400b57cec5SDimitry Andric /// elements are added to A. 7410b57cec5SDimitry Andric /// If \p KeepEmpty is false, empty strings are not added to \p A. They 7420b57cec5SDimitry Andric /// still count when considering \p MaxSplit 7430b57cec5SDimitry Andric /// An useful invariant is that 7440b57cec5SDimitry Andric /// Separator.join(A) == *this if MaxSplit == -1 and KeepEmpty == true 7450b57cec5SDimitry Andric /// 7460b57cec5SDimitry Andric /// \param A - Where to put the substrings. 7470b57cec5SDimitry Andric /// \param Separator - The string to split on. 7480b57cec5SDimitry Andric /// \param MaxSplit - The maximum number of times the string is split. 7490b57cec5SDimitry Andric /// \param KeepEmpty - True if empty substring should be added. 7500b57cec5SDimitry Andric void split(SmallVectorImpl<StringRef> &A, 7510b57cec5SDimitry Andric StringRef Separator, int MaxSplit = -1, 7520b57cec5SDimitry Andric bool KeepEmpty = true) const; 7530b57cec5SDimitry Andric 7540b57cec5SDimitry Andric /// Split into substrings around the occurrences of a separator character. 7550b57cec5SDimitry Andric /// 7560b57cec5SDimitry Andric /// Each substring is stored in \p A. If \p MaxSplit is >= 0, at most 7570b57cec5SDimitry Andric /// \p MaxSplit splits are done and consequently <= \p MaxSplit + 1 7580b57cec5SDimitry Andric /// elements are added to A. 7590b57cec5SDimitry Andric /// If \p KeepEmpty is false, empty strings are not added to \p A. They 7600b57cec5SDimitry Andric /// still count when considering \p MaxSplit 7610b57cec5SDimitry Andric /// An useful invariant is that 7620b57cec5SDimitry Andric /// Separator.join(A) == *this if MaxSplit == -1 and KeepEmpty == true 7630b57cec5SDimitry Andric /// 7640b57cec5SDimitry Andric /// \param A - Where to put the substrings. 7650b57cec5SDimitry Andric /// \param Separator - The string to split on. 7660b57cec5SDimitry Andric /// \param MaxSplit - The maximum number of times the string is split. 7670b57cec5SDimitry Andric /// \param KeepEmpty - True if empty substring should be added. 7680b57cec5SDimitry Andric void split(SmallVectorImpl<StringRef> &A, char Separator, int MaxSplit = -1, 7690b57cec5SDimitry Andric bool KeepEmpty = true) const; 7700b57cec5SDimitry Andric 7710b57cec5SDimitry Andric /// Split into two substrings around the last occurrence of a separator 7720b57cec5SDimitry Andric /// character. 7730b57cec5SDimitry Andric /// 7740b57cec5SDimitry Andric /// If \p Separator is in the string, then the result is a pair (LHS, RHS) 7750b57cec5SDimitry Andric /// such that (*this == LHS + Separator + RHS) is true and RHS is 7760b57cec5SDimitry Andric /// minimal. If \p Separator is not in the string, then the result is a 7770b57cec5SDimitry Andric /// pair (LHS, RHS) where (*this == LHS) and (RHS == ""). 7780b57cec5SDimitry Andric /// 7790b57cec5SDimitry Andric /// \param Separator - The character to split on. 7800b57cec5SDimitry Andric /// \return - The split substrings. rsplit(char Separator)781bdd1243dSDimitry Andric [[nodiscard]] std::pair<StringRef, StringRef> rsplit(char Separator) const { 7820b57cec5SDimitry Andric return rsplit(StringRef(&Separator, 1)); 7830b57cec5SDimitry Andric } 7840b57cec5SDimitry Andric 7850b57cec5SDimitry Andric /// Return string with consecutive \p Char characters starting from the 7860b57cec5SDimitry Andric /// the left removed. ltrim(char Char)787bdd1243dSDimitry Andric [[nodiscard]] StringRef ltrim(char Char) const { 7880b57cec5SDimitry Andric return drop_front(std::min(Length, find_first_not_of(Char))); 7890b57cec5SDimitry Andric } 7900b57cec5SDimitry Andric 7910b57cec5SDimitry Andric /// Return string with consecutive characters in \p Chars starting from 7920b57cec5SDimitry Andric /// the left removed. 793bdd1243dSDimitry Andric [[nodiscard]] StringRef ltrim(StringRef Chars = " \t\n\v\f\r") const { 7940b57cec5SDimitry Andric return drop_front(std::min(Length, find_first_not_of(Chars))); 7950b57cec5SDimitry Andric } 7960b57cec5SDimitry Andric 7970b57cec5SDimitry Andric /// Return string with consecutive \p Char characters starting from the 7980b57cec5SDimitry Andric /// right removed. rtrim(char Char)799bdd1243dSDimitry Andric [[nodiscard]] StringRef rtrim(char Char) const { 8000b57cec5SDimitry Andric return drop_back(Length - std::min(Length, find_last_not_of(Char) + 1)); 8010b57cec5SDimitry Andric } 8020b57cec5SDimitry Andric 8030b57cec5SDimitry Andric /// Return string with consecutive characters in \p Chars starting from 8040b57cec5SDimitry Andric /// the right removed. 805bdd1243dSDimitry Andric [[nodiscard]] StringRef rtrim(StringRef Chars = " \t\n\v\f\r") const { 8060b57cec5SDimitry Andric return drop_back(Length - std::min(Length, find_last_not_of(Chars) + 1)); 8070b57cec5SDimitry Andric } 8080b57cec5SDimitry Andric 8090b57cec5SDimitry Andric /// Return string with consecutive \p Char characters starting from the 8100b57cec5SDimitry Andric /// left and right removed. trim(char Char)811bdd1243dSDimitry Andric [[nodiscard]] StringRef trim(char Char) const { 8120b57cec5SDimitry Andric return ltrim(Char).rtrim(Char); 8130b57cec5SDimitry Andric } 8140b57cec5SDimitry Andric 8150b57cec5SDimitry Andric /// Return string with consecutive characters in \p Chars starting from 8160b57cec5SDimitry Andric /// the left and right removed. 817bdd1243dSDimitry Andric [[nodiscard]] StringRef trim(StringRef Chars = " \t\n\v\f\r") const { 8180b57cec5SDimitry Andric return ltrim(Chars).rtrim(Chars); 8190b57cec5SDimitry Andric } 8200b57cec5SDimitry Andric 82104eeddc0SDimitry Andric /// Detect the line ending style of the string. 82204eeddc0SDimitry Andric /// 82304eeddc0SDimitry Andric /// If the string contains a line ending, return the line ending character 82404eeddc0SDimitry Andric /// sequence that is detected. Otherwise return '\n' for unix line endings. 82504eeddc0SDimitry Andric /// 82604eeddc0SDimitry Andric /// \return - The line ending character sequence. detectEOL()827bdd1243dSDimitry Andric [[nodiscard]] StringRef detectEOL() const { 82804eeddc0SDimitry Andric size_t Pos = find('\r'); 82904eeddc0SDimitry Andric if (Pos == npos) { 83004eeddc0SDimitry Andric // If there is no carriage return, assume unix 83104eeddc0SDimitry Andric return "\n"; 83204eeddc0SDimitry Andric } 83304eeddc0SDimitry Andric if (Pos + 1 < Length && Data[Pos + 1] == '\n') 83404eeddc0SDimitry Andric return "\r\n"; // Windows 83504eeddc0SDimitry Andric if (Pos > 0 && Data[Pos - 1] == '\n') 83604eeddc0SDimitry Andric return "\n\r"; // You monster! 83704eeddc0SDimitry Andric return "\r"; // Classic Mac 83804eeddc0SDimitry Andric } 8390b57cec5SDimitry Andric /// @} 8400b57cec5SDimitry Andric }; 8410b57cec5SDimitry Andric 8420b57cec5SDimitry Andric /// A wrapper around a string literal that serves as a proxy for constructing 8430b57cec5SDimitry Andric /// global tables of StringRefs with the length computed at compile time. 8440b57cec5SDimitry Andric /// In order to avoid the invocation of a global constructor, StringLiteral 8450b57cec5SDimitry Andric /// should *only* be used in a constexpr context, as such: 8460b57cec5SDimitry Andric /// 8470b57cec5SDimitry Andric /// constexpr StringLiteral S("test"); 8480b57cec5SDimitry Andric /// 8490b57cec5SDimitry Andric class StringLiteral : public StringRef { 8500b57cec5SDimitry Andric private: StringLiteral(const char * Str,size_t N)8510b57cec5SDimitry Andric constexpr StringLiteral(const char *Str, size_t N) : StringRef(Str, N) { 8520b57cec5SDimitry Andric } 8530b57cec5SDimitry Andric 8540b57cec5SDimitry Andric public: 8550b57cec5SDimitry Andric template <size_t N> StringLiteral(const char (& Str)[N])8560b57cec5SDimitry Andric constexpr StringLiteral(const char (&Str)[N]) 8570b57cec5SDimitry Andric #if defined(__clang__) && __has_attribute(enable_if) 8580b57cec5SDimitry Andric #pragma clang diagnostic push 8590b57cec5SDimitry Andric #pragma clang diagnostic ignored "-Wgcc-compat" 8600b57cec5SDimitry Andric __attribute((enable_if(__builtin_strlen(Str) == N - 1, 8610b57cec5SDimitry Andric "invalid string literal"))) 8620b57cec5SDimitry Andric #pragma clang diagnostic pop 8630b57cec5SDimitry Andric #endif 8640b57cec5SDimitry Andric : StringRef(Str, N - 1) { 8650b57cec5SDimitry Andric } 8660b57cec5SDimitry Andric 8670b57cec5SDimitry Andric // Explicit construction for strings like "foo\0bar". 8680b57cec5SDimitry Andric template <size_t N> withInnerNUL(const char (& Str)[N])8690b57cec5SDimitry Andric static constexpr StringLiteral withInnerNUL(const char (&Str)[N]) { 8700b57cec5SDimitry Andric return StringLiteral(Str, N - 1); 8710b57cec5SDimitry Andric } 8720b57cec5SDimitry Andric }; 8730b57cec5SDimitry Andric 8740b57cec5SDimitry Andric /// @name StringRef Comparison Operators 8750b57cec5SDimitry Andric /// @{ 8760b57cec5SDimitry Andric 8770b57cec5SDimitry Andric inline bool operator==(StringRef LHS, StringRef RHS) { 8780b57cec5SDimitry Andric return LHS.equals(RHS); 8790b57cec5SDimitry Andric } 8800b57cec5SDimitry Andric 8810b57cec5SDimitry Andric inline bool operator!=(StringRef LHS, StringRef RHS) { return !(LHS == RHS); } 8820b57cec5SDimitry Andric 8830b57cec5SDimitry Andric inline bool operator<(StringRef LHS, StringRef RHS) { 884bdd1243dSDimitry Andric return LHS.compare(RHS) < 0; 8850b57cec5SDimitry Andric } 8860b57cec5SDimitry Andric 8870b57cec5SDimitry Andric inline bool operator<=(StringRef LHS, StringRef RHS) { 888bdd1243dSDimitry Andric return LHS.compare(RHS) <= 0; 8890b57cec5SDimitry Andric } 8900b57cec5SDimitry Andric 8910b57cec5SDimitry Andric inline bool operator>(StringRef LHS, StringRef RHS) { 892bdd1243dSDimitry Andric return LHS.compare(RHS) > 0; 8930b57cec5SDimitry Andric } 8940b57cec5SDimitry Andric 8950b57cec5SDimitry Andric inline bool operator>=(StringRef LHS, StringRef RHS) { 896bdd1243dSDimitry Andric return LHS.compare(RHS) >= 0; 8970b57cec5SDimitry Andric } 8980b57cec5SDimitry Andric 8990b57cec5SDimitry Andric inline std::string &operator+=(std::string &buffer, StringRef string) { 9000b57cec5SDimitry Andric return buffer.append(string.data(), string.size()); 9010b57cec5SDimitry Andric } 9020b57cec5SDimitry Andric 9030b57cec5SDimitry Andric /// @} 9040b57cec5SDimitry Andric 9050b57cec5SDimitry Andric /// Compute a hash_code for a StringRef. 906bdd1243dSDimitry Andric [[nodiscard]] hash_code hash_value(StringRef S); 9070b57cec5SDimitry Andric 908fe6060f1SDimitry Andric // Provide DenseMapInfo for StringRefs. 909349cc55cSDimitry Andric template <> struct DenseMapInfo<StringRef, void> { 910fe6060f1SDimitry Andric static inline StringRef getEmptyKey() { 911fe6060f1SDimitry Andric return StringRef( 912fe6060f1SDimitry Andric reinterpret_cast<const char *>(~static_cast<uintptr_t>(0)), 0); 913fe6060f1SDimitry Andric } 914fe6060f1SDimitry Andric 915fe6060f1SDimitry Andric static inline StringRef getTombstoneKey() { 916fe6060f1SDimitry Andric return StringRef( 917fe6060f1SDimitry Andric reinterpret_cast<const char *>(~static_cast<uintptr_t>(1)), 0); 918fe6060f1SDimitry Andric } 919fe6060f1SDimitry Andric 92004eeddc0SDimitry Andric static unsigned getHashValue(StringRef Val); 921fe6060f1SDimitry Andric 922fe6060f1SDimitry Andric static bool isEqual(StringRef LHS, StringRef RHS) { 923fe6060f1SDimitry Andric if (RHS.data() == getEmptyKey().data()) 924fe6060f1SDimitry Andric return LHS.data() == getEmptyKey().data(); 925fe6060f1SDimitry Andric if (RHS.data() == getTombstoneKey().data()) 926fe6060f1SDimitry Andric return LHS.data() == getTombstoneKey().data(); 927fe6060f1SDimitry Andric return LHS == RHS; 928fe6060f1SDimitry Andric } 929fe6060f1SDimitry Andric }; 930fe6060f1SDimitry Andric 9310b57cec5SDimitry Andric } // end namespace llvm 9320b57cec5SDimitry Andric 9330b57cec5SDimitry Andric #endif // LLVM_ADT_STRINGREF_H 934