1 //===-- ConstString.h -------------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLDB_UTILITY_CONSTSTRING_H 10 #define LLDB_UTILITY_CONSTSTRING_H 11 12 #include "llvm/ADT/DenseMapInfo.h" 13 #include "llvm/ADT/StringRef.h" 14 #include "llvm/Support/FormatVariadic.h" 15 #include "llvm/Support/YAMLTraits.h" 16 17 #include <stddef.h> 18 19 namespace lldb_private { 20 class Stream; 21 } 22 namespace llvm { 23 class raw_ostream; 24 } 25 26 namespace lldb_private { 27 28 /// \class ConstString ConstString.h "lldb/Utility/ConstString.h" 29 /// A uniqued constant string class. 30 /// 31 /// Provides an efficient way to store strings as uniqued strings. After the 32 /// strings are uniqued, finding strings that are equal to one another is very 33 /// fast as just the pointers need to be compared. It also allows for many 34 /// common strings from many different sources to be shared to keep the memory 35 /// footprint low. 36 /// 37 /// No reference counting is done on strings that are added to the string 38 /// pool, once strings are added they are in the string pool for the life of 39 /// the program. 40 class ConstString { 41 public: 42 /// Default constructor 43 /// 44 /// Initializes the string to an empty string. 45 ConstString() : m_string(nullptr) {} 46 47 /// Copy constructor 48 /// 49 /// Copies the string value in \a rhs into this object. 50 /// 51 /// \param[in] rhs 52 /// Another string object to copy. 53 ConstString(const ConstString &rhs) : m_string(rhs.m_string) {} 54 55 explicit ConstString(const llvm::StringRef &s); 56 57 /// Construct with C String value 58 /// 59 /// Constructs this object with a C string by looking to see if the 60 /// C string already exists in the global string pool. If it doesn't 61 /// exist, it is added to the string pool. 62 /// 63 /// \param[in] cstr 64 /// A NULL terminated C string to add to the string pool. 65 explicit ConstString(const char *cstr); 66 67 /// Construct with C String value with max length 68 /// 69 /// Constructs this object with a C string with a length. If \a max_cstr_len 70 /// is greater than the actual length of the string, the string length will 71 /// be truncated. This allows substrings to be created without the need to 72 /// NULL terminate the string as it is passed into this function. 73 /// 74 /// \param[in] cstr 75 /// A pointer to the first character in the C string. The C 76 /// string can be NULL terminated in a buffer that contains 77 /// more characters than the length of the string, or the 78 /// string can be part of another string and a new substring 79 /// can be created. 80 /// 81 /// \param[in] max_cstr_len 82 /// The max length of \a cstr. If the string length of \a cstr 83 /// is less than \a max_cstr_len, then the string will be 84 /// truncated. If the string length of \a cstr is greater than 85 /// \a max_cstr_len, then only max_cstr_len bytes will be used 86 /// from \a cstr. 87 explicit ConstString(const char *cstr, size_t max_cstr_len); 88 89 /// Destructor 90 /// 91 /// Since constant string values are currently not reference counted, there 92 /// isn't much to do here. 93 ~ConstString() = default; 94 95 /// C string equality binary predicate function object for ConstString 96 /// objects. 97 struct StringIsEqual { 98 /// C equality test. 99 /// 100 /// Two C strings are equal when they are contained in ConstString objects 101 /// when their pointer values are equal to each other. 102 /// 103 /// \return 104 /// Returns \b true if the C string in \a lhs is equal to 105 /// the C string value in \a rhs, \b false otherwise. 106 bool operator()(const char *lhs, const char *rhs) const { 107 return lhs == rhs; 108 } 109 }; 110 111 /// Convert to bool operator. 112 /// 113 /// This allows code to check a ConstString object to see if it contains a 114 /// valid string using code such as: 115 /// 116 /// \code 117 /// ConstString str(...); 118 /// if (str) 119 /// { ... 120 /// \endcode 121 /// 122 /// \return 123 /// /b True this object contains a valid non-empty C string, \b 124 /// false otherwise. 125 explicit operator bool() const { return !IsEmpty(); } 126 127 /// Assignment operator 128 /// 129 /// Assigns the string in this object with the value from \a rhs. 130 /// 131 /// \param[in] rhs 132 /// Another string object to copy into this object. 133 /// 134 /// \return 135 /// A const reference to this object. 136 ConstString operator=(ConstString rhs) { 137 m_string = rhs.m_string; 138 return *this; 139 } 140 141 /// Equal to operator 142 /// 143 /// Returns true if this string is equal to the string in \a rhs. This 144 /// operation is very fast as it results in a pointer comparison since all 145 /// strings are in a uniqued in a global string pool. 146 /// 147 /// \param[in] rhs 148 /// Another string object to compare this object to. 149 /// 150 /// \return 151 /// true if this object is equal to \a rhs. 152 /// false if this object is not equal to \a rhs. 153 bool operator==(ConstString rhs) const { 154 // We can do a pointer compare to compare these strings since they must 155 // come from the same pool in order to be equal. 156 return m_string == rhs.m_string; 157 } 158 159 /// Equal to operator against a non-ConstString value. 160 /// 161 /// Returns true if this string is equal to the string in \a rhs. This 162 /// overload is usually slower than comparing against a ConstString value. 163 /// However, if the rhs string not already a ConstString and it is impractical 164 /// to turn it into a non-temporary variable, then this overload is faster. 165 /// 166 /// \param[in] rhs 167 /// Another string object to compare this object to. 168 /// 169 /// \return 170 /// \b true if this object is equal to \a rhs. 171 /// \b false if this object is not equal to \a rhs. 172 bool operator==(const char *rhs) const { 173 // ConstString differentiates between empty strings and nullptr strings, but 174 // StringRef doesn't. Therefore we have to do this check manually now. 175 if (m_string == nullptr && rhs != nullptr) 176 return false; 177 if (m_string != nullptr && rhs == nullptr) 178 return false; 179 180 return GetStringRef() == rhs; 181 } 182 183 /// Not equal to operator 184 /// 185 /// Returns true if this string is not equal to the string in \a rhs. This 186 /// operation is very fast as it results in a pointer comparison since all 187 /// strings are in a uniqued in a global string pool. 188 /// 189 /// \param[in] rhs 190 /// Another string object to compare this object to. 191 /// 192 /// \return 193 /// \b true if this object is not equal to \a rhs. 194 /// \b false if this object is equal to \a rhs. 195 bool operator!=(ConstString rhs) const { 196 return m_string != rhs.m_string; 197 } 198 199 /// Not equal to operator against a non-ConstString value. 200 /// 201 /// Returns true if this string is not equal to the string in \a rhs. This 202 /// overload is usually slower than comparing against a ConstString value. 203 /// However, if the rhs string not already a ConstString and it is impractical 204 /// to turn it into a non-temporary variable, then this overload is faster. 205 /// 206 /// \param[in] rhs 207 /// Another string object to compare this object to. 208 /// 209 /// \return \b true if this object is not equal to \a rhs, false otherwise. 210 bool operator!=(const char *rhs) const { return !(*this == rhs); } 211 212 bool operator<(ConstString rhs) const; 213 214 /// Get the string value as a C string. 215 /// 216 /// Get the value of the contained string as a NULL terminated C string 217 /// value. 218 /// 219 /// If \a value_if_empty is nullptr, then nullptr will be returned. 220 /// 221 /// \return Returns \a value_if_empty if the string is empty, otherwise 222 /// the C string value contained in this object. 223 const char *AsCString(const char *value_if_empty = nullptr) const { 224 return (IsEmpty() ? value_if_empty : m_string); 225 } 226 227 /// Get the string value as a llvm::StringRef 228 /// 229 /// \return 230 /// Returns a new llvm::StringRef object filled in with the 231 /// needed data. 232 llvm::StringRef GetStringRef() const { 233 return llvm::StringRef(m_string, GetLength()); 234 } 235 236 /// Get the string value as a C string. 237 /// 238 /// Get the value of the contained string as a NULL terminated C string 239 /// value. Similar to the ConstString::AsCString() function, yet this 240 /// function will always return nullptr if the string is not valid. So this 241 /// function is a direct accessor to the string pointer value. 242 /// 243 /// \return 244 /// Returns nullptr the string is invalid, otherwise the C string 245 /// value contained in this object. 246 const char *GetCString() const { return m_string; } 247 248 /// Get the length in bytes of string value. 249 /// 250 /// The string pool stores the length of the string, so we can avoid calling 251 /// strlen() on the pointer value with this function. 252 /// 253 /// \return 254 /// Returns the number of bytes that this string occupies in 255 /// memory, not including the NULL termination byte. 256 size_t GetLength() const; 257 258 /// Clear this object's state. 259 /// 260 /// Clear any contained string and reset the value to the empty string 261 /// value. 262 void Clear() { m_string = nullptr; } 263 264 /// Equal to operator 265 /// 266 /// Returns true if this string is equal to the string in \a rhs. If case 267 /// sensitive equality is tested, this operation is very fast as it results 268 /// in a pointer comparison since all strings are in a uniqued in a global 269 /// string pool. 270 /// 271 /// \param[in] lhs 272 /// The Left Hand Side const ConstString object reference. 273 /// 274 /// \param[in] rhs 275 /// The Right Hand Side const ConstString object reference. 276 /// 277 /// \param[in] case_sensitive 278 /// Case sensitivity. If true, case sensitive equality 279 /// will be tested, otherwise character case will be ignored 280 /// 281 /// \return \b true if this object is equal to \a rhs, \b false otherwise. 282 static bool Equals(ConstString lhs, ConstString rhs, 283 const bool case_sensitive = true); 284 285 /// Compare two string objects. 286 /// 287 /// Compares the C string values contained in \a lhs and \a rhs and returns 288 /// an integer result. 289 /// 290 /// NOTE: only call this function when you want a true string 291 /// comparison. If you want string equality use the, use the == operator as 292 /// it is much more efficient. Also if you want string inequality, use the 293 /// != operator for the same reasons. 294 /// 295 /// \param[in] lhs 296 /// The Left Hand Side const ConstString object reference. 297 /// 298 /// \param[in] rhs 299 /// The Right Hand Side const ConstString object reference. 300 /// 301 /// \param[in] case_sensitive 302 /// Case sensitivity of compare. If true, case sensitive compare 303 /// will be performed, otherwise character case will be ignored 304 /// 305 /// \return -1 if lhs < rhs, 0 if lhs == rhs, 1 if lhs > rhs 306 static int Compare(ConstString lhs, ConstString rhs, 307 const bool case_sensitive = true); 308 309 /// Dump the object description to a stream. 310 /// 311 /// Dump the string value to the stream \a s. If the contained string is 312 /// empty, print \a value_if_empty to the stream instead. If \a 313 /// value_if_empty is nullptr, then nothing will be dumped to the stream. 314 /// 315 /// \param[in] s 316 /// The stream that will be used to dump the object description. 317 /// 318 /// \param[in] value_if_empty 319 /// The value to dump if the string is empty. If nullptr, nothing 320 /// will be output to the stream. 321 void Dump(Stream *s, const char *value_if_empty = nullptr) const; 322 323 /// Dump the object debug description to a stream. 324 /// 325 /// \param[in] s 326 /// The stream that will be used to dump the object description. 327 void DumpDebug(Stream *s) const; 328 329 /// Test for empty string. 330 /// 331 /// \return 332 /// \b true if the contained string is empty. 333 /// \b false if the contained string is not empty. 334 bool IsEmpty() const { return m_string == nullptr || m_string[0] == '\0'; } 335 336 /// Test for null string. 337 /// 338 /// \return 339 /// \b true if there is no string associated with this instance. 340 /// \b false if there is a string associated with this instance. 341 bool IsNull() const { return m_string == nullptr; } 342 343 /// Set the C string value. 344 /// 345 /// Set the string value in the object by uniquing the \a cstr string value 346 /// in our global string pool. 347 /// 348 /// If the C string already exists in the global string pool, it finds the 349 /// current entry and returns the existing value. If it doesn't exist, it is 350 /// added to the string pool. 351 /// 352 /// \param[in] cstr 353 /// A NULL terminated C string to add to the string pool. 354 void SetCString(const char *cstr); 355 356 void SetString(const llvm::StringRef &s); 357 358 /// Set the C string value and its mangled counterpart. 359 /// 360 /// Object files and debug symbols often use mangled string to represent the 361 /// linkage name for a symbol, function or global. The string pool can 362 /// efficiently store these values and their counterparts so when we run 363 /// into another instance of a mangled name, we can avoid calling the name 364 /// demangler over and over on the same strings and then trying to unique 365 /// them. 366 /// 367 /// \param[in] demangled 368 /// The demangled string to correlate with the \a mangled name. 369 /// 370 /// \param[in] mangled 371 /// The already uniqued mangled ConstString to correlate the 372 /// soon to be uniqued version of \a demangled. 373 void SetStringWithMangledCounterpart(llvm::StringRef demangled, 374 ConstString mangled); 375 376 /// Retrieve the mangled or demangled counterpart for a mangled or demangled 377 /// ConstString. 378 /// 379 /// Object files and debug symbols often use mangled string to represent the 380 /// linkage name for a symbol, function or global. The string pool can 381 /// efficiently store these values and their counterparts so when we run 382 /// into another instance of a mangled name, we can avoid calling the name 383 /// demangler over and over on the same strings and then trying to unique 384 /// them. 385 /// 386 /// \param[in] counterpart 387 /// A reference to a ConstString object that might get filled in 388 /// with the demangled/mangled counterpart. 389 /// 390 /// \return 391 /// /b True if \a counterpart was filled in with the counterpart 392 /// /b false otherwise. 393 bool GetMangledCounterpart(ConstString &counterpart) const; 394 395 /// Set the C string value with length. 396 /// 397 /// Set the string value in the object by uniquing \a cstr_len bytes 398 /// starting at the \a cstr string value in our global string pool. If trim 399 /// is true, then \a cstr_len indicates a maximum length of the CString and 400 /// if the actual length of the string is less, then it will be trimmed. 401 /// 402 /// If the C string already exists in the global string pool, it finds the 403 /// current entry and returns the existing value. If it doesn't exist, it is 404 /// added to the string pool. 405 /// 406 /// \param[in] cstr 407 /// A NULL terminated C string to add to the string pool. 408 /// 409 /// \param[in] cstr_len 410 /// The maximum length of the C string. 411 void SetCStringWithLength(const char *cstr, size_t cstr_len); 412 413 /// Set the C string value with the minimum length between \a fixed_cstr_len 414 /// and the actual length of the C string. This can be used for data 415 /// structures that have a fixed length to store a C string where the string 416 /// might not be NULL terminated if the string takes the entire buffer. 417 void SetTrimmedCStringWithLength(const char *cstr, size_t fixed_cstr_len); 418 419 /// Get the memory cost of this object. 420 /// 421 /// Return the size in bytes that this object takes in memory. This returns 422 /// the size in bytes of this object, which does not include any the shared 423 /// string values it may refer to. 424 /// 425 /// \return 426 /// The number of bytes that this object occupies in memory. 427 /// 428 /// \see ConstString::StaticMemorySize () 429 size_t MemorySize() const { return sizeof(ConstString); } 430 431 /// Get the size in bytes of the current global string pool. 432 /// 433 /// Reports the size in bytes of all shared C string values, containers and 434 /// any other values as a byte size for the entire string pool. 435 /// 436 /// \return 437 /// The number of bytes that the global string pool occupies 438 /// in memory. 439 static size_t StaticMemorySize(); 440 441 protected: 442 template <typename T> friend struct ::llvm::DenseMapInfo; 443 /// Only used by DenseMapInfo. 444 static ConstString FromStringPoolPointer(const char *ptr) { 445 ConstString s; 446 s.m_string = ptr; 447 return s; 448 }; 449 450 // Member variables 451 const char *m_string; 452 }; 453 454 /// Stream the string value \a str to the stream \a s 455 Stream &operator<<(Stream &s, ConstString str); 456 457 } // namespace lldb_private 458 459 namespace llvm { 460 template <> struct format_provider<lldb_private::ConstString> { 461 static void format(const lldb_private::ConstString &CS, llvm::raw_ostream &OS, 462 llvm::StringRef Options); 463 }; 464 465 /// DenseMapInfo implementation. 466 /// \{ 467 template <> struct DenseMapInfo<lldb_private::ConstString> { 468 static inline lldb_private::ConstString getEmptyKey() { 469 return lldb_private::ConstString::FromStringPoolPointer( 470 DenseMapInfo<const char *>::getEmptyKey()); 471 } 472 static inline lldb_private::ConstString getTombstoneKey() { 473 return lldb_private::ConstString::FromStringPoolPointer( 474 DenseMapInfo<const char *>::getTombstoneKey()); 475 } 476 static unsigned getHashValue(lldb_private::ConstString val) { 477 return DenseMapInfo<const char *>::getHashValue(val.m_string); 478 } 479 static bool isEqual(lldb_private::ConstString LHS, 480 lldb_private::ConstString RHS) { 481 return LHS == RHS; 482 } 483 }; 484 /// \} 485 486 namespace yaml { 487 template <> struct ScalarTraits<lldb_private::ConstString> { 488 static void output(const lldb_private::ConstString &, void *, raw_ostream &); 489 static StringRef input(StringRef, void *, lldb_private::ConstString &); 490 static QuotingType mustQuote(StringRef S) { return QuotingType::Double; } 491 }; 492 } // namespace yaml 493 494 inline raw_ostream &operator<<(raw_ostream &os, lldb_private::ConstString s) { 495 os << s.GetStringRef(); 496 return os; 497 } 498 } // namespace llvm 499 500 LLVM_YAML_IS_SEQUENCE_VECTOR(lldb_private::ConstString) 501 502 #endif // LLDB_UTILITY_CONSTSTRING_H 503