1 //===-- ConstString.h -------------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLDB_UTILITY_CONSTSTRING_H 10 #define LLDB_UTILITY_CONSTSTRING_H 11 12 #include "llvm/ADT/DenseMapInfo.h" 13 #include "llvm/ADT/StringRef.h" 14 #include "llvm/Support/FormatVariadic.h" 15 16 #include <cstddef> 17 #include <string_view> 18 19 namespace lldb_private { 20 class Stream; 21 } 22 namespace llvm { 23 class raw_ostream; 24 } 25 26 namespace lldb_private { 27 28 /// \class ConstString ConstString.h "lldb/Utility/ConstString.h" 29 /// A uniqued constant string class. 30 /// 31 /// Provides an efficient way to store strings as uniqued strings. After the 32 /// strings are uniqued, finding strings that are equal to one another is very 33 /// fast as just the pointers need to be compared. It also allows for many 34 /// common strings from many different sources to be shared to keep the memory 35 /// footprint low. 36 /// 37 /// No reference counting is done on strings that are added to the string 38 /// pool, once strings are added they are in the string pool for the life of 39 /// the program. 40 class ConstString { 41 public: 42 /// Default constructor 43 /// 44 /// Initializes the string to an empty string. 45 ConstString() = default; 46 47 explicit ConstString(llvm::StringRef s); 48 49 /// Construct with C String value 50 /// 51 /// Constructs this object with a C string by looking to see if the 52 /// C string already exists in the global string pool. If it doesn't 53 /// exist, it is added to the string pool. 54 /// 55 /// \param[in] cstr 56 /// A NULL terminated C string to add to the string pool. 57 explicit ConstString(const char *cstr); 58 59 /// Construct with C String value with max length 60 /// 61 /// Constructs this object with a C string with a length. If \a max_cstr_len 62 /// is greater than the actual length of the string, the string length will 63 /// be truncated. This allows substrings to be created without the need to 64 /// NULL terminate the string as it is passed into this function. 65 /// 66 /// \param[in] cstr 67 /// A pointer to the first character in the C string. The C 68 /// string can be NULL terminated in a buffer that contains 69 /// more characters than the length of the string, or the 70 /// string can be part of another string and a new substring 71 /// can be created. 72 /// 73 /// \param[in] max_cstr_len 74 /// The max length of \a cstr. If the string length of \a cstr 75 /// is less than \a max_cstr_len, then the string will be 76 /// truncated. If the string length of \a cstr is greater than 77 /// \a max_cstr_len, then only max_cstr_len bytes will be used 78 /// from \a cstr. 79 explicit ConstString(const char *cstr, size_t max_cstr_len); 80 81 /// C string equality binary predicate function object for ConstString 82 /// objects. 83 struct StringIsEqual { 84 /// C equality test. 85 /// 86 /// Two C strings are equal when they are contained in ConstString objects 87 /// when their pointer values are equal to each other. 88 /// 89 /// \return 90 /// Returns \b true if the C string in \a lhs is equal to 91 /// the C string value in \a rhs, \b false otherwise. 92 bool operator()(const char *lhs, const char *rhs) const { 93 return lhs == rhs; 94 } 95 }; 96 97 /// Convert to bool operator. 98 /// 99 /// This allows code to check a ConstString object to see if it contains a 100 /// valid string using code such as: 101 /// 102 /// \code 103 /// ConstString str(...); 104 /// if (str) 105 /// { ... 106 /// \endcode 107 /// 108 /// \return 109 /// /b True this object contains a valid non-empty C string, \b 110 /// false otherwise. 111 explicit operator bool() const { return !IsEmpty(); } 112 113 /// Equal to operator 114 /// 115 /// Returns true if this string is equal to the string in \a rhs. This 116 /// operation is very fast as it results in a pointer comparison since all 117 /// strings are in a uniqued in a global string pool. 118 /// 119 /// \param[in] rhs 120 /// Another string object to compare this object to. 121 /// 122 /// \return 123 /// true if this object is equal to \a rhs. 124 /// false if this object is not equal to \a rhs. 125 bool operator==(ConstString rhs) const { 126 // We can do a pointer compare to compare these strings since they must 127 // come from the same pool in order to be equal. 128 return m_string == rhs.m_string; 129 } 130 131 /// Equal to operator against a non-ConstString value. 132 /// 133 /// Returns true if this string is equal to the string in \a rhs. This 134 /// overload is usually slower than comparing against a ConstString value. 135 /// However, if the rhs string not already a ConstString and it is impractical 136 /// to turn it into a non-temporary variable, then this overload is faster. 137 /// 138 /// \param[in] rhs 139 /// Another string object to compare this object to. 140 /// 141 /// \return 142 /// \b true if this object is equal to \a rhs. 143 /// \b false if this object is not equal to \a rhs. 144 bool operator==(const char *rhs) const { 145 // ConstString differentiates between empty strings and nullptr strings, but 146 // StringRef doesn't. Therefore we have to do this check manually now. 147 if (m_string == nullptr && rhs != nullptr) 148 return false; 149 if (m_string != nullptr && rhs == nullptr) 150 return false; 151 152 return GetStringRef() == rhs; 153 } 154 155 /// Not equal to operator 156 /// 157 /// Returns true if this string is not equal to the string in \a rhs. This 158 /// operation is very fast as it results in a pointer comparison since all 159 /// strings are in a uniqued in a global string pool. 160 /// 161 /// \param[in] rhs 162 /// Another string object to compare this object to. 163 /// 164 /// \return 165 /// \b true if this object is not equal to \a rhs. 166 /// \b false if this object is equal to \a rhs. 167 bool operator!=(ConstString rhs) const { return m_string != rhs.m_string; } 168 169 /// Not equal to operator against a non-ConstString value. 170 /// 171 /// Returns true if this string is not equal to the string in \a rhs. This 172 /// overload is usually slower than comparing against a ConstString value. 173 /// However, if the rhs string not already a ConstString and it is impractical 174 /// to turn it into a non-temporary variable, then this overload is faster. 175 /// 176 /// \param[in] rhs 177 /// Another string object to compare this object to. 178 /// 179 /// \return \b true if this object is not equal to \a rhs, false otherwise. 180 bool operator!=(const char *rhs) const { return !(*this == rhs); } 181 182 bool operator<(ConstString rhs) const; 183 184 // Implicitly convert \class ConstString instances to \class StringRef. 185 operator llvm::StringRef() const { return GetStringRef(); } 186 // Implicitly convert \class ConstString instances to \calss std::string_view. 187 operator std::string_view() const { return std::string_view(m_string, GetLength()); } 188 189 /// Get the string value as a C string. 190 /// 191 /// Get the value of the contained string as a NULL terminated C string 192 /// value. 193 /// 194 /// If \a value_if_empty is nullptr, then nullptr will be returned. 195 /// 196 /// \return Returns \a value_if_empty if the string is empty, otherwise 197 /// the C string value contained in this object. 198 const char *AsCString(const char *value_if_empty = nullptr) const { 199 return (IsEmpty() ? value_if_empty : m_string); 200 } 201 202 /// Get the string value as a llvm::StringRef 203 /// 204 /// \return 205 /// Returns a new llvm::StringRef object filled in with the 206 /// needed data. 207 llvm::StringRef GetStringRef() const { 208 return llvm::StringRef(m_string, GetLength()); 209 } 210 211 /// Get the string value as a C string. 212 /// 213 /// Get the value of the contained string as a NULL terminated C string 214 /// value. Similar to the ConstString::AsCString() function, yet this 215 /// function will always return nullptr if the string is not valid. So this 216 /// function is a direct accessor to the string pointer value. 217 /// 218 /// \return 219 /// Returns nullptr the string is invalid, otherwise the C string 220 /// value contained in this object. 221 const char *GetCString() const { return m_string; } 222 223 /// Get the length in bytes of string value. 224 /// 225 /// The string pool stores the length of the string, so we can avoid calling 226 /// strlen() on the pointer value with this function. 227 /// 228 /// \return 229 /// Returns the number of bytes that this string occupies in 230 /// memory, not including the NULL termination byte. 231 size_t GetLength() const; 232 233 /// Clear this object's state. 234 /// 235 /// Clear any contained string and reset the value to the empty string 236 /// value. 237 void Clear() { m_string = nullptr; } 238 239 /// Equal to operator 240 /// 241 /// Returns true if this string is equal to the string in \a rhs. If case 242 /// sensitive equality is tested, this operation is very fast as it results 243 /// in a pointer comparison since all strings are in a uniqued in a global 244 /// string pool. 245 /// 246 /// \param[in] lhs 247 /// The Left Hand Side const ConstString object reference. 248 /// 249 /// \param[in] rhs 250 /// The Right Hand Side const ConstString object reference. 251 /// 252 /// \param[in] case_sensitive 253 /// Case sensitivity. If true, case sensitive equality 254 /// will be tested, otherwise character case will be ignored 255 /// 256 /// \return \b true if this object is equal to \a rhs, \b false otherwise. 257 static bool Equals(ConstString lhs, ConstString rhs, 258 const bool case_sensitive = true); 259 260 /// Compare two string objects. 261 /// 262 /// Compares the C string values contained in \a lhs and \a rhs and returns 263 /// an integer result. 264 /// 265 /// NOTE: only call this function when you want a true string 266 /// comparison. If you want string equality use the, use the == operator as 267 /// it is much more efficient. Also if you want string inequality, use the 268 /// != operator for the same reasons. 269 /// 270 /// \param[in] lhs 271 /// The Left Hand Side const ConstString object reference. 272 /// 273 /// \param[in] rhs 274 /// The Right Hand Side const ConstString object reference. 275 /// 276 /// \param[in] case_sensitive 277 /// Case sensitivity of compare. If true, case sensitive compare 278 /// will be performed, otherwise character case will be ignored 279 /// 280 /// \return -1 if lhs < rhs, 0 if lhs == rhs, 1 if lhs > rhs 281 static int Compare(ConstString lhs, ConstString rhs, 282 const bool case_sensitive = true); 283 284 /// Dump the object description to a stream. 285 /// 286 /// Dump the string value to the stream \a s. If the contained string is 287 /// empty, print \a value_if_empty to the stream instead. If \a 288 /// value_if_empty is nullptr, then nothing will be dumped to the stream. 289 /// 290 /// \param[in] s 291 /// The stream that will be used to dump the object description. 292 /// 293 /// \param[in] value_if_empty 294 /// The value to dump if the string is empty. If nullptr, nothing 295 /// will be output to the stream. 296 void Dump(Stream *s, const char *value_if_empty = nullptr) const; 297 298 /// Dump the object debug description to a stream. 299 /// 300 /// \param[in] s 301 /// The stream that will be used to dump the object description. 302 void DumpDebug(Stream *s) const; 303 304 /// Test for empty string. 305 /// 306 /// \return 307 /// \b true if the contained string is empty. 308 /// \b false if the contained string is not empty. 309 bool IsEmpty() const { return m_string == nullptr || m_string[0] == '\0'; } 310 311 /// Test for null string. 312 /// 313 /// \return 314 /// \b true if there is no string associated with this instance. 315 /// \b false if there is a string associated with this instance. 316 bool IsNull() const { return m_string == nullptr; } 317 318 /// Set the C string value. 319 /// 320 /// Set the string value in the object by uniquing the \a cstr string value 321 /// in our global string pool. 322 /// 323 /// If the C string already exists in the global string pool, it finds the 324 /// current entry and returns the existing value. If it doesn't exist, it is 325 /// added to the string pool. 326 /// 327 /// \param[in] cstr 328 /// A NULL terminated C string to add to the string pool. 329 void SetCString(const char *cstr); 330 331 void SetString(llvm::StringRef s); 332 333 /// Set the C string value and its mangled counterpart. 334 /// 335 /// Object files and debug symbols often use mangled string to represent the 336 /// linkage name for a symbol, function or global. The string pool can 337 /// efficiently store these values and their counterparts so when we run 338 /// into another instance of a mangled name, we can avoid calling the name 339 /// demangler over and over on the same strings and then trying to unique 340 /// them. 341 /// 342 /// \param[in] demangled 343 /// The demangled string to correlate with the \a mangled name. 344 /// 345 /// \param[in] mangled 346 /// The already uniqued mangled ConstString to correlate the 347 /// soon to be uniqued version of \a demangled. 348 void SetStringWithMangledCounterpart(llvm::StringRef demangled, 349 ConstString mangled); 350 351 /// Retrieve the mangled or demangled counterpart for a mangled or demangled 352 /// ConstString. 353 /// 354 /// Object files and debug symbols often use mangled string to represent the 355 /// linkage name for a symbol, function or global. The string pool can 356 /// efficiently store these values and their counterparts so when we run 357 /// into another instance of a mangled name, we can avoid calling the name 358 /// demangler over and over on the same strings and then trying to unique 359 /// them. 360 /// 361 /// \param[in] counterpart 362 /// A reference to a ConstString object that might get filled in 363 /// with the demangled/mangled counterpart. 364 /// 365 /// \return 366 /// /b True if \a counterpart was filled in with the counterpart 367 /// /b false otherwise. 368 bool GetMangledCounterpart(ConstString &counterpart) const; 369 370 /// Set the C string value with length. 371 /// 372 /// Set the string value in the object by uniquing \a cstr_len bytes 373 /// starting at the \a cstr string value in our global string pool. If trim 374 /// is true, then \a cstr_len indicates a maximum length of the CString and 375 /// if the actual length of the string is less, then it will be trimmed. 376 /// 377 /// If the C string already exists in the global string pool, it finds the 378 /// current entry and returns the existing value. If it doesn't exist, it is 379 /// added to the string pool. 380 /// 381 /// \param[in] cstr 382 /// A NULL terminated C string to add to the string pool. 383 /// 384 /// \param[in] cstr_len 385 /// The maximum length of the C string. 386 void SetCStringWithLength(const char *cstr, size_t cstr_len); 387 388 /// Set the C string value with the minimum length between \a fixed_cstr_len 389 /// and the actual length of the C string. This can be used for data 390 /// structures that have a fixed length to store a C string where the string 391 /// might not be NULL terminated if the string takes the entire buffer. 392 void SetTrimmedCStringWithLength(const char *cstr, size_t fixed_cstr_len); 393 394 /// Get the memory cost of this object. 395 /// 396 /// Return the size in bytes that this object takes in memory. This returns 397 /// the size in bytes of this object, which does not include any the shared 398 /// string values it may refer to. 399 /// 400 /// \return 401 /// The number of bytes that this object occupies in memory. 402 size_t MemorySize() const { return sizeof(ConstString); } 403 404 struct MemoryStats { 405 size_t GetBytesTotal() const { return bytes_total; } 406 size_t GetBytesUsed() const { return bytes_used; } 407 size_t GetBytesUnused() const { return bytes_total - bytes_used; } 408 size_t bytes_total = 0; 409 size_t bytes_used = 0; 410 }; 411 412 static MemoryStats GetMemoryStats(); 413 414 protected: 415 template <typename T, typename Enable> friend struct ::llvm::DenseMapInfo; 416 /// Only used by DenseMapInfo. 417 static ConstString FromStringPoolPointer(const char *ptr) { 418 ConstString s; 419 s.m_string = ptr; 420 return s; 421 }; 422 423 const char *m_string = nullptr; 424 }; 425 426 /// Stream the string value \a str to the stream \a s 427 Stream &operator<<(Stream &s, ConstString str); 428 429 } // namespace lldb_private 430 431 namespace llvm { 432 template <> struct format_provider<lldb_private::ConstString> { 433 static void format(const lldb_private::ConstString &CS, llvm::raw_ostream &OS, 434 llvm::StringRef Options); 435 }; 436 437 /// DenseMapInfo implementation. 438 /// \{ 439 template <> struct DenseMapInfo<lldb_private::ConstString> { 440 static inline lldb_private::ConstString getEmptyKey() { 441 return lldb_private::ConstString::FromStringPoolPointer( 442 DenseMapInfo<const char *>::getEmptyKey()); 443 } 444 static inline lldb_private::ConstString getTombstoneKey() { 445 return lldb_private::ConstString::FromStringPoolPointer( 446 DenseMapInfo<const char *>::getTombstoneKey()); 447 } 448 static unsigned getHashValue(lldb_private::ConstString val) { 449 return DenseMapInfo<const char *>::getHashValue(val.m_string); 450 } 451 static bool isEqual(lldb_private::ConstString LHS, 452 lldb_private::ConstString RHS) { 453 return LHS == RHS; 454 } 455 }; 456 /// \} 457 458 inline raw_ostream &operator<<(raw_ostream &os, lldb_private::ConstString s) { 459 os << s.GetStringRef(); 460 return os; 461 } 462 } // namespace llvm 463 464 #endif // LLDB_UTILITY_CONSTSTRING_H 465