1 //===- StringRef.h - Constant String Reference Wrapper ----------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLVM_ADT_STRINGREF_H 10 #define LLVM_ADT_STRINGREF_H 11 12 #include "llvm/ADT/DenseMapInfo.h" 13 #include "llvm/ADT/STLFunctionalExtras.h" 14 #include "llvm/ADT/iterator_range.h" 15 #include "llvm/Support/Compiler.h" 16 #include <algorithm> 17 #include <cassert> 18 #include <cstddef> 19 #include <cstring> 20 #include <limits> 21 #include <string> 22 #include <string_view> 23 #include <type_traits> 24 #include <utility> 25 26 namespace llvm { 27 28 class APInt; 29 class hash_code; 30 template <typename T> class SmallVectorImpl; 31 class StringRef; 32 33 /// Helper functions for StringRef::getAsInteger. 34 bool getAsUnsignedInteger(StringRef Str, unsigned Radix, 35 unsigned long long &Result); 36 37 bool getAsSignedInteger(StringRef Str, unsigned Radix, long long &Result); 38 39 bool consumeUnsignedInteger(StringRef &Str, unsigned Radix, 40 unsigned long long &Result); 41 bool consumeSignedInteger(StringRef &Str, unsigned Radix, long long &Result); 42 43 /// StringRef - Represent a constant reference to a string, i.e. a character 44 /// array and a length, which need not be null terminated. 45 /// 46 /// This class does not own the string data, it is expected to be used in 47 /// situations where the character data resides in some other buffer, whose 48 /// lifetime extends past that of the StringRef. For this reason, it is not in 49 /// general safe to store a StringRef. 50 class LLVM_GSL_POINTER StringRef { 51 public: 52 static constexpr size_t npos = ~size_t(0); 53 54 using iterator = const char *; 55 using const_iterator = const char *; 56 using size_type = size_t; 57 58 private: 59 /// The start of the string, in an external buffer. 60 const char *Data = nullptr; 61 62 /// The length of the string. 63 size_t Length = 0; 64 65 // Workaround memcmp issue with null pointers (undefined behavior) 66 // by providing a specialized version compareMemory(const char * Lhs,const char * Rhs,size_t Length)67 static int compareMemory(const char *Lhs, const char *Rhs, size_t Length) { 68 if (Length == 0) { return 0; } 69 return ::memcmp(Lhs,Rhs,Length); 70 } 71 72 public: 73 /// @name Constructors 74 /// @{ 75 76 /// Construct an empty string ref. 77 /*implicit*/ StringRef() = default; 78 79 /// Disable conversion from nullptr. This prevents things like 80 /// if (S == nullptr) 81 StringRef(std::nullptr_t) = delete; 82 83 /// Construct a string ref from a cstring. StringRef(const char * Str)84 /*implicit*/ constexpr StringRef(const char *Str) 85 : Data(Str), Length(Str ? 86 // GCC 7 doesn't have constexpr char_traits. Fall back to __builtin_strlen. 87 #if defined(_GLIBCXX_RELEASE) && _GLIBCXX_RELEASE < 8 88 __builtin_strlen(Str) 89 #else 90 std::char_traits<char>::length(Str) 91 #endif 92 : 0) { 93 } 94 95 /// Construct a string ref from a pointer and length. StringRef(const char * data,size_t length)96 /*implicit*/ constexpr StringRef(const char *data, size_t length) 97 : Data(data), Length(length) {} 98 99 /// Construct a string ref from an std::string. StringRef(const std::string & Str)100 /*implicit*/ StringRef(const std::string &Str) 101 : Data(Str.data()), Length(Str.length()) {} 102 103 /// Construct a string ref from an std::string_view. StringRef(std::string_view Str)104 /*implicit*/ constexpr StringRef(std::string_view Str) 105 : Data(Str.data()), Length(Str.size()) {} 106 107 /// @} 108 /// @name Iterators 109 /// @{ 110 begin()111 iterator begin() const { return Data; } 112 end()113 iterator end() const { return Data + Length; } 114 bytes_begin()115 const unsigned char *bytes_begin() const { 116 return reinterpret_cast<const unsigned char *>(begin()); 117 } bytes_end()118 const unsigned char *bytes_end() const { 119 return reinterpret_cast<const unsigned char *>(end()); 120 } bytes()121 iterator_range<const unsigned char *> bytes() const { 122 return make_range(bytes_begin(), bytes_end()); 123 } 124 125 /// @} 126 /// @name String Operations 127 /// @{ 128 129 /// data - Get a pointer to the start of the string (which may not be null 130 /// terminated). data()131 [[nodiscard]] constexpr const char *data() const { return Data; } 132 133 /// empty - Check if the string is empty. empty()134 [[nodiscard]] constexpr bool empty() const { return Length == 0; } 135 136 /// size - Get the string size. size()137 [[nodiscard]] constexpr size_t size() const { return Length; } 138 139 /// front - Get the first character in the string. front()140 [[nodiscard]] char front() const { 141 assert(!empty()); 142 return Data[0]; 143 } 144 145 /// back - Get the last character in the string. back()146 [[nodiscard]] char back() const { 147 assert(!empty()); 148 return Data[Length-1]; 149 } 150 151 // copy - Allocate copy in Allocator and return StringRef to it. 152 template <typename Allocator> copy(Allocator & A)153 [[nodiscard]] StringRef copy(Allocator &A) const { 154 // Don't request a length 0 copy from the allocator. 155 if (empty()) 156 return StringRef(); 157 char *S = A.template Allocate<char>(Length); 158 std::copy(begin(), end(), S); 159 return StringRef(S, Length); 160 } 161 162 /// equals - Check for string equality, this is more efficient than 163 /// compare() when the relative ordering of inequal strings isn't needed. equals(StringRef RHS)164 [[nodiscard]] bool equals(StringRef RHS) const { 165 return (Length == RHS.Length && 166 compareMemory(Data, RHS.Data, RHS.Length) == 0); 167 } 168 169 /// Check for string equality, ignoring case. equals_insensitive(StringRef RHS)170 [[nodiscard]] bool equals_insensitive(StringRef RHS) const { 171 return Length == RHS.Length && compare_insensitive(RHS) == 0; 172 } 173 174 /// compare - Compare two strings; the result is negative, zero, or positive 175 /// if this string is lexicographically less than, equal to, or greater than 176 /// the \p RHS. compare(StringRef RHS)177 [[nodiscard]] int compare(StringRef RHS) const { 178 // Check the prefix for a mismatch. 179 if (int Res = compareMemory(Data, RHS.Data, std::min(Length, RHS.Length))) 180 return Res < 0 ? -1 : 1; 181 182 // Otherwise the prefixes match, so we only need to check the lengths. 183 if (Length == RHS.Length) 184 return 0; 185 return Length < RHS.Length ? -1 : 1; 186 } 187 188 /// Compare two strings, ignoring case. 189 [[nodiscard]] int compare_insensitive(StringRef RHS) const; 190 191 /// compare_numeric - Compare two strings, treating sequences of digits as 192 /// numbers. 193 [[nodiscard]] int compare_numeric(StringRef RHS) const; 194 195 /// Determine the edit distance between this string and another 196 /// string. 197 /// 198 /// \param Other the string to compare this string against. 199 /// 200 /// \param AllowReplacements whether to allow character 201 /// replacements (change one character into another) as a single 202 /// operation, rather than as two operations (an insertion and a 203 /// removal). 204 /// 205 /// \param MaxEditDistance If non-zero, the maximum edit distance that 206 /// this routine is allowed to compute. If the edit distance will exceed 207 /// that maximum, returns \c MaxEditDistance+1. 208 /// 209 /// \returns the minimum number of character insertions, removals, 210 /// or (if \p AllowReplacements is \c true) replacements needed to 211 /// transform one of the given strings into the other. If zero, 212 /// the strings are identical. 213 [[nodiscard]] unsigned edit_distance(StringRef Other, 214 bool AllowReplacements = true, 215 unsigned MaxEditDistance = 0) const; 216 217 [[nodiscard]] unsigned 218 edit_distance_insensitive(StringRef Other, bool AllowReplacements = true, 219 unsigned MaxEditDistance = 0) const; 220 221 /// str - Get the contents as an std::string. str()222 [[nodiscard]] std::string str() const { 223 if (!Data) return std::string(); 224 return std::string(Data, Length); 225 } 226 227 /// @} 228 /// @name Operator Overloads 229 /// @{ 230 231 [[nodiscard]] char operator[](size_t Index) const { 232 assert(Index < Length && "Invalid index!"); 233 return Data[Index]; 234 } 235 236 /// Disallow accidental assignment from a temporary std::string. 237 /// 238 /// The declaration here is extra complicated so that `stringRef = {}` 239 /// and `stringRef = "abc"` continue to select the move assignment operator. 240 template <typename T> 241 std::enable_if_t<std::is_same<T, std::string>::value, StringRef> & 242 operator=(T &&Str) = delete; 243 244 /// @} 245 /// @name Type Conversions 246 /// @{ 247 string_view()248 constexpr operator std::string_view() const { 249 return std::string_view(data(), size()); 250 } 251 252 /// @} 253 /// @name String Predicates 254 /// @{ 255 256 /// Check if this string starts with the given \p Prefix. starts_with(StringRef Prefix)257 [[nodiscard]] bool starts_with(StringRef Prefix) const { 258 return Length >= Prefix.Length && 259 compareMemory(Data, Prefix.Data, Prefix.Length) == 0; 260 } 261 [[nodiscard]] LLVM_DEPRECATED( 262 "Use starts_with instead", startswith(StringRef Prefix)263 "starts_with") bool startswith(StringRef Prefix) const { 264 return starts_with(Prefix); 265 } 266 267 /// Check if this string starts with the given \p Prefix, ignoring case. 268 [[nodiscard]] bool starts_with_insensitive(StringRef Prefix) const; 269 270 /// Check if this string ends with the given \p Suffix. ends_with(StringRef Suffix)271 [[nodiscard]] bool ends_with(StringRef Suffix) const { 272 return Length >= Suffix.Length && 273 compareMemory(end() - Suffix.Length, Suffix.Data, Suffix.Length) == 274 0; 275 } 276 [[nodiscard]] LLVM_DEPRECATED( 277 "Use ends_with instead", endswith(StringRef Suffix)278 "ends_with") bool endswith(StringRef Suffix) const { 279 return ends_with(Suffix); 280 } 281 282 /// Check if this string ends with the given \p Suffix, ignoring case. 283 [[nodiscard]] bool ends_with_insensitive(StringRef Suffix) const; 284 285 /// @} 286 /// @name String Searching 287 /// @{ 288 289 /// Search for the first character \p C in the string. 290 /// 291 /// \returns The index of the first occurrence of \p C, or npos if not 292 /// found. 293 [[nodiscard]] size_t find(char C, size_t From = 0) const { 294 return std::string_view(*this).find(C, From); 295 } 296 297 /// Search for the first character \p C in the string, ignoring case. 298 /// 299 /// \returns The index of the first occurrence of \p C, or npos if not 300 /// found. 301 [[nodiscard]] size_t find_insensitive(char C, size_t From = 0) const; 302 303 /// Search for the first character satisfying the predicate \p F 304 /// 305 /// \returns The index of the first character satisfying \p F starting from 306 /// \p From, or npos if not found. 307 [[nodiscard]] size_t find_if(function_ref<bool(char)> F, 308 size_t From = 0) const { 309 StringRef S = drop_front(From); 310 while (!S.empty()) { 311 if (F(S.front())) 312 return size() - S.size(); 313 S = S.drop_front(); 314 } 315 return npos; 316 } 317 318 /// Search for the first character not satisfying the predicate \p F 319 /// 320 /// \returns The index of the first character not satisfying \p F starting 321 /// from \p From, or npos if not found. 322 [[nodiscard]] size_t find_if_not(function_ref<bool(char)> F, 323 size_t From = 0) const { 324 return find_if([F](char c) { return !F(c); }, From); 325 } 326 327 /// Search for the first string \p Str in the string. 328 /// 329 /// \returns The index of the first occurrence of \p Str, or npos if not 330 /// found. 331 [[nodiscard]] size_t find(StringRef Str, size_t From = 0) const; 332 333 /// Search for the first string \p Str in the string, ignoring case. 334 /// 335 /// \returns The index of the first occurrence of \p Str, or npos if not 336 /// found. 337 [[nodiscard]] size_t find_insensitive(StringRef Str, size_t From = 0) const; 338 339 /// Search for the last character \p C in the string. 340 /// 341 /// \returns The index of the last occurrence of \p C, or npos if not 342 /// found. 343 [[nodiscard]] size_t rfind(char C, size_t From = npos) const { 344 size_t I = std::min(From, Length); 345 while (I) { 346 --I; 347 if (Data[I] == C) 348 return I; 349 } 350 return npos; 351 } 352 353 /// Search for the last character \p C in the string, ignoring case. 354 /// 355 /// \returns The index of the last occurrence of \p C, or npos if not 356 /// found. 357 [[nodiscard]] size_t rfind_insensitive(char C, size_t From = npos) const; 358 359 /// Search for the last string \p Str in the string. 360 /// 361 /// \returns The index of the last occurrence of \p Str, or npos if not 362 /// found. 363 [[nodiscard]] size_t rfind(StringRef Str) const; 364 365 /// Search for the last string \p Str in the string, ignoring case. 366 /// 367 /// \returns The index of the last occurrence of \p Str, or npos if not 368 /// found. 369 [[nodiscard]] size_t rfind_insensitive(StringRef Str) const; 370 371 /// Find the first character in the string that is \p C, or npos if not 372 /// found. Same as find. 373 [[nodiscard]] size_t find_first_of(char C, size_t From = 0) const { 374 return find(C, From); 375 } 376 377 /// Find the first character in the string that is in \p Chars, or npos if 378 /// not found. 379 /// 380 /// Complexity: O(size() + Chars.size()) 381 [[nodiscard]] size_t find_first_of(StringRef Chars, size_t From = 0) const; 382 383 /// Find the first character in the string that is not \p C or npos if not 384 /// found. 385 [[nodiscard]] size_t find_first_not_of(char C, size_t From = 0) const; 386 387 /// Find the first character in the string that is not in the string 388 /// \p Chars, or npos if not found. 389 /// 390 /// Complexity: O(size() + Chars.size()) 391 [[nodiscard]] size_t find_first_not_of(StringRef Chars, 392 size_t From = 0) const; 393 394 /// Find the last character in the string that is \p C, or npos if not 395 /// found. 396 [[nodiscard]] size_t find_last_of(char C, size_t From = npos) const { 397 return rfind(C, From); 398 } 399 400 /// Find the last character in the string that is in \p C, or npos if not 401 /// found. 402 /// 403 /// Complexity: O(size() + Chars.size()) 404 [[nodiscard]] size_t find_last_of(StringRef Chars, 405 size_t From = npos) const; 406 407 /// Find the last character in the string that is not \p C, or npos if not 408 /// found. 409 [[nodiscard]] size_t find_last_not_of(char C, size_t From = npos) const; 410 411 /// Find the last character in the string that is not in \p Chars, or 412 /// npos if not found. 413 /// 414 /// Complexity: O(size() + Chars.size()) 415 [[nodiscard]] size_t find_last_not_of(StringRef Chars, 416 size_t From = npos) const; 417 418 /// Return true if the given string is a substring of *this, and false 419 /// otherwise. contains(StringRef Other)420 [[nodiscard]] bool contains(StringRef Other) const { 421 return find(Other) != npos; 422 } 423 424 /// Return true if the given character is contained in *this, and false 425 /// otherwise. contains(char C)426 [[nodiscard]] bool contains(char C) const { 427 return find_first_of(C) != npos; 428 } 429 430 /// Return true if the given string is a substring of *this, and false 431 /// otherwise. contains_insensitive(StringRef Other)432 [[nodiscard]] bool contains_insensitive(StringRef Other) const { 433 return find_insensitive(Other) != npos; 434 } 435 436 /// Return true if the given character is contained in *this, and false 437 /// otherwise. contains_insensitive(char C)438 [[nodiscard]] bool contains_insensitive(char C) const { 439 return find_insensitive(C) != npos; 440 } 441 442 /// @} 443 /// @name Helpful Algorithms 444 /// @{ 445 446 /// Return the number of occurrences of \p C in the string. count(char C)447 [[nodiscard]] size_t count(char C) const { 448 size_t Count = 0; 449 for (size_t I = 0; I != Length; ++I) 450 if (Data[I] == C) 451 ++Count; 452 return Count; 453 } 454 455 /// Return the number of non-overlapped occurrences of \p Str in 456 /// the string. 457 size_t count(StringRef Str) const; 458 459 /// Parse the current string as an integer of the specified radix. If 460 /// \p Radix is specified as zero, this does radix autosensing using 461 /// extended C rules: 0 is octal, 0x is hex, 0b is binary. 462 /// 463 /// If the string is invalid or if only a subset of the string is valid, 464 /// this returns true to signify the error. The string is considered 465 /// erroneous if empty or if it overflows T. getAsInteger(unsigned Radix,T & Result)466 template <typename T> bool getAsInteger(unsigned Radix, T &Result) const { 467 if constexpr (std::numeric_limits<T>::is_signed) { 468 long long LLVal; 469 if (getAsSignedInteger(*this, Radix, LLVal) || 470 static_cast<T>(LLVal) != LLVal) 471 return true; 472 Result = LLVal; 473 } else { 474 unsigned long long ULLVal; 475 // The additional cast to unsigned long long is required to avoid the 476 // Visual C++ warning C4805: '!=' : unsafe mix of type 'bool' and type 477 // 'unsigned __int64' when instantiating getAsInteger with T = bool. 478 if (getAsUnsignedInteger(*this, Radix, ULLVal) || 479 static_cast<unsigned long long>(static_cast<T>(ULLVal)) != ULLVal) 480 return true; 481 Result = ULLVal; 482 } 483 return false; 484 } 485 486 /// Parse the current string as an integer of the specified radix. If 487 /// \p Radix is specified as zero, this does radix autosensing using 488 /// extended C rules: 0 is octal, 0x is hex, 0b is binary. 489 /// 490 /// If the string does not begin with a number of the specified radix, 491 /// this returns true to signify the error. The string is considered 492 /// erroneous if empty or if it overflows T. 493 /// The portion of the string representing the discovered numeric value 494 /// is removed from the beginning of the string. consumeInteger(unsigned Radix,T & Result)495 template <typename T> bool consumeInteger(unsigned Radix, T &Result) { 496 if constexpr (std::numeric_limits<T>::is_signed) { 497 long long LLVal; 498 if (consumeSignedInteger(*this, Radix, LLVal) || 499 static_cast<long long>(static_cast<T>(LLVal)) != LLVal) 500 return true; 501 Result = LLVal; 502 } else { 503 unsigned long long ULLVal; 504 if (consumeUnsignedInteger(*this, Radix, ULLVal) || 505 static_cast<unsigned long long>(static_cast<T>(ULLVal)) != ULLVal) 506 return true; 507 Result = ULLVal; 508 } 509 return false; 510 } 511 512 /// Parse the current string as an integer of the specified \p Radix, or of 513 /// an autosensed radix if the \p Radix given is 0. The current value in 514 /// \p Result is discarded, and the storage is changed to be wide enough to 515 /// store the parsed integer. 516 /// 517 /// \returns true if the string does not solely consist of a valid 518 /// non-empty number in the appropriate base. 519 /// 520 /// APInt::fromString is superficially similar but assumes the 521 /// string is well-formed in the given radix. 522 bool getAsInteger(unsigned Radix, APInt &Result) const; 523 524 /// Parse the current string as an integer of the specified \p Radix. If 525 /// \p Radix is specified as zero, this does radix autosensing using 526 /// extended C rules: 0 is octal, 0x is hex, 0b is binary. 527 /// 528 /// If the string does not begin with a number of the specified radix, 529 /// this returns true to signify the error. The string is considered 530 /// erroneous if empty. 531 /// The portion of the string representing the discovered numeric value 532 /// is removed from the beginning of the string. 533 bool consumeInteger(unsigned Radix, APInt &Result); 534 535 /// Parse the current string as an IEEE double-precision floating 536 /// point value. The string must be a well-formed double. 537 /// 538 /// If \p AllowInexact is false, the function will fail if the string 539 /// cannot be represented exactly. Otherwise, the function only fails 540 /// in case of an overflow or underflow, or an invalid floating point 541 /// representation. 542 bool getAsDouble(double &Result, bool AllowInexact = true) const; 543 544 /// @} 545 /// @name String Operations 546 /// @{ 547 548 // Convert the given ASCII string to lowercase. 549 [[nodiscard]] std::string lower() const; 550 551 /// Convert the given ASCII string to uppercase. 552 [[nodiscard]] std::string upper() const; 553 554 /// @} 555 /// @name Substring Operations 556 /// @{ 557 558 /// Return a reference to the substring from [Start, Start + N). 559 /// 560 /// \param Start The index of the starting character in the substring; if 561 /// the index is npos or greater than the length of the string then the 562 /// empty substring will be returned. 563 /// 564 /// \param N The number of characters to included in the substring. If N 565 /// exceeds the number of characters remaining in the string, the string 566 /// suffix (starting with \p Start) will be returned. 567 [[nodiscard]] constexpr StringRef substr(size_t Start, 568 size_t N = npos) const { 569 Start = std::min(Start, Length); 570 return StringRef(Data + Start, std::min(N, Length - Start)); 571 } 572 573 /// Return a StringRef equal to 'this' but with only the first \p N 574 /// elements remaining. If \p N is greater than the length of the 575 /// string, the entire string is returned. 576 [[nodiscard]] StringRef take_front(size_t N = 1) const { 577 if (N >= size()) 578 return *this; 579 return drop_back(size() - N); 580 } 581 582 /// Return a StringRef equal to 'this' but with only the last \p N 583 /// elements remaining. If \p N is greater than the length of the 584 /// string, the entire string is returned. 585 [[nodiscard]] StringRef take_back(size_t N = 1) const { 586 if (N >= size()) 587 return *this; 588 return drop_front(size() - N); 589 } 590 591 /// Return the longest prefix of 'this' such that every character 592 /// in the prefix satisfies the given predicate. take_while(function_ref<bool (char)> F)593 [[nodiscard]] StringRef take_while(function_ref<bool(char)> F) const { 594 return substr(0, find_if_not(F)); 595 } 596 597 /// Return the longest prefix of 'this' such that no character in 598 /// the prefix satisfies the given predicate. take_until(function_ref<bool (char)> F)599 [[nodiscard]] StringRef take_until(function_ref<bool(char)> F) const { 600 return substr(0, find_if(F)); 601 } 602 603 /// Return a StringRef equal to 'this' but with the first \p N elements 604 /// dropped. 605 [[nodiscard]] StringRef drop_front(size_t N = 1) const { 606 assert(size() >= N && "Dropping more elements than exist"); 607 return substr(N); 608 } 609 610 /// Return a StringRef equal to 'this' but with the last \p N elements 611 /// dropped. 612 [[nodiscard]] StringRef drop_back(size_t N = 1) const { 613 assert(size() >= N && "Dropping more elements than exist"); 614 return substr(0, size()-N); 615 } 616 617 /// Return a StringRef equal to 'this', but with all characters satisfying 618 /// the given predicate dropped from the beginning of the string. drop_while(function_ref<bool (char)> F)619 [[nodiscard]] StringRef drop_while(function_ref<bool(char)> F) const { 620 return substr(find_if_not(F)); 621 } 622 623 /// Return a StringRef equal to 'this', but with all characters not 624 /// satisfying the given predicate dropped from the beginning of the string. drop_until(function_ref<bool (char)> F)625 [[nodiscard]] StringRef drop_until(function_ref<bool(char)> F) const { 626 return substr(find_if(F)); 627 } 628 629 /// Returns true if this StringRef has the given prefix and removes that 630 /// prefix. consume_front(StringRef Prefix)631 bool consume_front(StringRef Prefix) { 632 if (!starts_with(Prefix)) 633 return false; 634 635 *this = substr(Prefix.size()); 636 return true; 637 } 638 639 /// Returns true if this StringRef has the given prefix, ignoring case, 640 /// and removes that prefix. consume_front_insensitive(StringRef Prefix)641 bool consume_front_insensitive(StringRef Prefix) { 642 if (!starts_with_insensitive(Prefix)) 643 return false; 644 645 *this = substr(Prefix.size()); 646 return true; 647 } 648 649 /// Returns true if this StringRef has the given suffix and removes that 650 /// suffix. consume_back(StringRef Suffix)651 bool consume_back(StringRef Suffix) { 652 if (!ends_with(Suffix)) 653 return false; 654 655 *this = substr(0, size() - Suffix.size()); 656 return true; 657 } 658 659 /// Returns true if this StringRef has the given suffix, ignoring case, 660 /// and removes that suffix. consume_back_insensitive(StringRef Suffix)661 bool consume_back_insensitive(StringRef Suffix) { 662 if (!ends_with_insensitive(Suffix)) 663 return false; 664 665 *this = substr(0, size() - Suffix.size()); 666 return true; 667 } 668 669 /// Return a reference to the substring from [Start, End). 670 /// 671 /// \param Start The index of the starting character in the substring; if 672 /// the index is npos or greater than the length of the string then the 673 /// empty substring will be returned. 674 /// 675 /// \param End The index following the last character to include in the 676 /// substring. If this is npos or exceeds the number of characters 677 /// remaining in the string, the string suffix (starting with \p Start) 678 /// will be returned. If this is less than \p Start, an empty string will 679 /// be returned. slice(size_t Start,size_t End)680 [[nodiscard]] StringRef slice(size_t Start, size_t End) const { 681 Start = std::min(Start, Length); 682 End = std::clamp(End, Start, Length); 683 return StringRef(Data + Start, End - Start); 684 } 685 686 /// Split into two substrings around the first occurrence of a separator 687 /// character. 688 /// 689 /// If \p Separator is in the string, then the result is a pair (LHS, RHS) 690 /// such that (*this == LHS + Separator + RHS) is true and RHS is 691 /// maximal. If \p Separator is not in the string, then the result is a 692 /// pair (LHS, RHS) where (*this == LHS) and (RHS == ""). 693 /// 694 /// \param Separator The character to split on. 695 /// \returns The split substrings. split(char Separator)696 [[nodiscard]] std::pair<StringRef, StringRef> split(char Separator) const { 697 return split(StringRef(&Separator, 1)); 698 } 699 700 /// Split into two substrings around the first occurrence of a separator 701 /// string. 702 /// 703 /// If \p Separator is in the string, then the result is a pair (LHS, RHS) 704 /// such that (*this == LHS + Separator + RHS) is true and RHS is 705 /// maximal. If \p Separator is not in the string, then the result is a 706 /// pair (LHS, RHS) where (*this == LHS) and (RHS == ""). 707 /// 708 /// \param Separator - The string to split on. 709 /// \return - The split substrings. 710 [[nodiscard]] std::pair<StringRef, StringRef> split(StringRef Separator)711 split(StringRef Separator) const { 712 size_t Idx = find(Separator); 713 if (Idx == npos) 714 return std::make_pair(*this, StringRef()); 715 return std::make_pair(slice(0, Idx), slice(Idx + Separator.size(), npos)); 716 } 717 718 /// Split into two substrings around the last occurrence of a separator 719 /// string. 720 /// 721 /// If \p Separator is in the string, then the result is a pair (LHS, RHS) 722 /// such that (*this == LHS + Separator + RHS) is true and RHS is 723 /// minimal. If \p Separator is not in the string, then the result is a 724 /// pair (LHS, RHS) where (*this == LHS) and (RHS == ""). 725 /// 726 /// \param Separator - The string to split on. 727 /// \return - The split substrings. 728 [[nodiscard]] std::pair<StringRef, StringRef> rsplit(StringRef Separator)729 rsplit(StringRef Separator) const { 730 size_t Idx = rfind(Separator); 731 if (Idx == npos) 732 return std::make_pair(*this, StringRef()); 733 return std::make_pair(slice(0, Idx), slice(Idx + Separator.size(), npos)); 734 } 735 736 /// Split into substrings around the occurrences of a separator string. 737 /// 738 /// Each substring is stored in \p A. If \p MaxSplit is >= 0, at most 739 /// \p MaxSplit splits are done and consequently <= \p MaxSplit + 1 740 /// elements are added to A. 741 /// If \p KeepEmpty is false, empty strings are not added to \p A. They 742 /// still count when considering \p MaxSplit 743 /// An useful invariant is that 744 /// Separator.join(A) == *this if MaxSplit == -1 and KeepEmpty == true 745 /// 746 /// \param A - Where to put the substrings. 747 /// \param Separator - The string to split on. 748 /// \param MaxSplit - The maximum number of times the string is split. 749 /// \param KeepEmpty - True if empty substring should be added. 750 void split(SmallVectorImpl<StringRef> &A, 751 StringRef Separator, int MaxSplit = -1, 752 bool KeepEmpty = true) const; 753 754 /// Split into substrings around the occurrences of a separator character. 755 /// 756 /// Each substring is stored in \p A. If \p MaxSplit is >= 0, at most 757 /// \p MaxSplit splits are done and consequently <= \p MaxSplit + 1 758 /// elements are added to A. 759 /// If \p KeepEmpty is false, empty strings are not added to \p A. They 760 /// still count when considering \p MaxSplit 761 /// An useful invariant is that 762 /// Separator.join(A) == *this if MaxSplit == -1 and KeepEmpty == true 763 /// 764 /// \param A - Where to put the substrings. 765 /// \param Separator - The string to split on. 766 /// \param MaxSplit - The maximum number of times the string is split. 767 /// \param KeepEmpty - True if empty substring should be added. 768 void split(SmallVectorImpl<StringRef> &A, char Separator, int MaxSplit = -1, 769 bool KeepEmpty = true) const; 770 771 /// Split into two substrings around the last occurrence of a separator 772 /// character. 773 /// 774 /// If \p Separator is in the string, then the result is a pair (LHS, RHS) 775 /// such that (*this == LHS + Separator + RHS) is true and RHS is 776 /// minimal. If \p Separator is not in the string, then the result is a 777 /// pair (LHS, RHS) where (*this == LHS) and (RHS == ""). 778 /// 779 /// \param Separator - The character to split on. 780 /// \return - The split substrings. rsplit(char Separator)781 [[nodiscard]] std::pair<StringRef, StringRef> rsplit(char Separator) const { 782 return rsplit(StringRef(&Separator, 1)); 783 } 784 785 /// Return string with consecutive \p Char characters starting from the 786 /// the left removed. ltrim(char Char)787 [[nodiscard]] StringRef ltrim(char Char) const { 788 return drop_front(std::min(Length, find_first_not_of(Char))); 789 } 790 791 /// Return string with consecutive characters in \p Chars starting from 792 /// the left removed. 793 [[nodiscard]] StringRef ltrim(StringRef Chars = " \t\n\v\f\r") const { 794 return drop_front(std::min(Length, find_first_not_of(Chars))); 795 } 796 797 /// Return string with consecutive \p Char characters starting from the 798 /// right removed. rtrim(char Char)799 [[nodiscard]] StringRef rtrim(char Char) const { 800 return drop_back(Length - std::min(Length, find_last_not_of(Char) + 1)); 801 } 802 803 /// Return string with consecutive characters in \p Chars starting from 804 /// the right removed. 805 [[nodiscard]] StringRef rtrim(StringRef Chars = " \t\n\v\f\r") const { 806 return drop_back(Length - std::min(Length, find_last_not_of(Chars) + 1)); 807 } 808 809 /// Return string with consecutive \p Char characters starting from the 810 /// left and right removed. trim(char Char)811 [[nodiscard]] StringRef trim(char Char) const { 812 return ltrim(Char).rtrim(Char); 813 } 814 815 /// Return string with consecutive characters in \p Chars starting from 816 /// the left and right removed. 817 [[nodiscard]] StringRef trim(StringRef Chars = " \t\n\v\f\r") const { 818 return ltrim(Chars).rtrim(Chars); 819 } 820 821 /// Detect the line ending style of the string. 822 /// 823 /// If the string contains a line ending, return the line ending character 824 /// sequence that is detected. Otherwise return '\n' for unix line endings. 825 /// 826 /// \return - The line ending character sequence. detectEOL()827 [[nodiscard]] StringRef detectEOL() const { 828 size_t Pos = find('\r'); 829 if (Pos == npos) { 830 // If there is no carriage return, assume unix 831 return "\n"; 832 } 833 if (Pos + 1 < Length && Data[Pos + 1] == '\n') 834 return "\r\n"; // Windows 835 if (Pos > 0 && Data[Pos - 1] == '\n') 836 return "\n\r"; // You monster! 837 return "\r"; // Classic Mac 838 } 839 /// @} 840 }; 841 842 /// A wrapper around a string literal that serves as a proxy for constructing 843 /// global tables of StringRefs with the length computed at compile time. 844 /// In order to avoid the invocation of a global constructor, StringLiteral 845 /// should *only* be used in a constexpr context, as such: 846 /// 847 /// constexpr StringLiteral S("test"); 848 /// 849 class StringLiteral : public StringRef { 850 private: StringLiteral(const char * Str,size_t N)851 constexpr StringLiteral(const char *Str, size_t N) : StringRef(Str, N) { 852 } 853 854 public: 855 template <size_t N> StringLiteral(const char (& Str)[N])856 constexpr StringLiteral(const char (&Str)[N]) 857 #if defined(__clang__) && __has_attribute(enable_if) 858 #pragma clang diagnostic push 859 #pragma clang diagnostic ignored "-Wgcc-compat" 860 __attribute((enable_if(__builtin_strlen(Str) == N - 1, 861 "invalid string literal"))) 862 #pragma clang diagnostic pop 863 #endif 864 : StringRef(Str, N - 1) { 865 } 866 867 // Explicit construction for strings like "foo\0bar". 868 template <size_t N> withInnerNUL(const char (& Str)[N])869 static constexpr StringLiteral withInnerNUL(const char (&Str)[N]) { 870 return StringLiteral(Str, N - 1); 871 } 872 }; 873 874 /// @name StringRef Comparison Operators 875 /// @{ 876 877 inline bool operator==(StringRef LHS, StringRef RHS) { 878 return LHS.equals(RHS); 879 } 880 881 inline bool operator!=(StringRef LHS, StringRef RHS) { return !(LHS == RHS); } 882 883 inline bool operator<(StringRef LHS, StringRef RHS) { 884 return LHS.compare(RHS) < 0; 885 } 886 887 inline bool operator<=(StringRef LHS, StringRef RHS) { 888 return LHS.compare(RHS) <= 0; 889 } 890 891 inline bool operator>(StringRef LHS, StringRef RHS) { 892 return LHS.compare(RHS) > 0; 893 } 894 895 inline bool operator>=(StringRef LHS, StringRef RHS) { 896 return LHS.compare(RHS) >= 0; 897 } 898 899 inline std::string &operator+=(std::string &buffer, StringRef string) { 900 return buffer.append(string.data(), string.size()); 901 } 902 903 /// @} 904 905 /// Compute a hash_code for a StringRef. 906 [[nodiscard]] hash_code hash_value(StringRef S); 907 908 // Provide DenseMapInfo for StringRefs. 909 template <> struct DenseMapInfo<StringRef, void> { 910 static inline StringRef getEmptyKey() { 911 return StringRef( 912 reinterpret_cast<const char *>(~static_cast<uintptr_t>(0)), 0); 913 } 914 915 static inline StringRef getTombstoneKey() { 916 return StringRef( 917 reinterpret_cast<const char *>(~static_cast<uintptr_t>(1)), 0); 918 } 919 920 static unsigned getHashValue(StringRef Val); 921 922 static bool isEqual(StringRef LHS, StringRef RHS) { 923 if (RHS.data() == getEmptyKey().data()) 924 return LHS.data() == getEmptyKey().data(); 925 if (RHS.data() == getTombstoneKey().data()) 926 return LHS.data() == getTombstoneKey().data(); 927 return LHS == RHS; 928 } 929 }; 930 931 } // end namespace llvm 932 933 #endif // LLVM_ADT_STRINGREF_H 934