1 //===-- ConstString.h -------------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLDB_UTILITY_CONSTSTRING_H 10 #define LLDB_UTILITY_CONSTSTRING_H 11 12 #include "llvm/ADT/DenseMapInfo.h" 13 #include "llvm/ADT/StringRef.h" 14 #include "llvm/Support/FormatVariadic.h" 15 16 #include <cstddef> 17 18 namespace lldb_private { 19 class Stream; 20 } 21 namespace llvm { 22 class raw_ostream; 23 } 24 25 namespace lldb_private { 26 27 /// \class ConstString ConstString.h "lldb/Utility/ConstString.h" 28 /// A uniqued constant string class. 29 /// 30 /// Provides an efficient way to store strings as uniqued strings. After the 31 /// strings are uniqued, finding strings that are equal to one another is very 32 /// fast as just the pointers need to be compared. It also allows for many 33 /// common strings from many different sources to be shared to keep the memory 34 /// footprint low. 35 /// 36 /// No reference counting is done on strings that are added to the string 37 /// pool, once strings are added they are in the string pool for the life of 38 /// the program. 39 class ConstString { 40 public: 41 /// Default constructor 42 /// 43 /// Initializes the string to an empty string. 44 ConstString() = default; 45 46 explicit ConstString(const llvm::StringRef &s); 47 48 /// Construct with C String value 49 /// 50 /// Constructs this object with a C string by looking to see if the 51 /// C string already exists in the global string pool. If it doesn't 52 /// exist, it is added to the string pool. 53 /// 54 /// \param[in] cstr 55 /// A NULL terminated C string to add to the string pool. 56 explicit ConstString(const char *cstr); 57 58 /// Construct with C String value with max length 59 /// 60 /// Constructs this object with a C string with a length. If \a max_cstr_len 61 /// is greater than the actual length of the string, the string length will 62 /// be truncated. This allows substrings to be created without the need to 63 /// NULL terminate the string as it is passed into this function. 64 /// 65 /// \param[in] cstr 66 /// A pointer to the first character in the C string. The C 67 /// string can be NULL terminated in a buffer that contains 68 /// more characters than the length of the string, or the 69 /// string can be part of another string and a new substring 70 /// can be created. 71 /// 72 /// \param[in] max_cstr_len 73 /// The max length of \a cstr. If the string length of \a cstr 74 /// is less than \a max_cstr_len, then the string will be 75 /// truncated. If the string length of \a cstr is greater than 76 /// \a max_cstr_len, then only max_cstr_len bytes will be used 77 /// from \a cstr. 78 explicit ConstString(const char *cstr, size_t max_cstr_len); 79 80 /// C string equality binary predicate function object for ConstString 81 /// objects. 82 struct StringIsEqual { 83 /// C equality test. 84 /// 85 /// Two C strings are equal when they are contained in ConstString objects 86 /// when their pointer values are equal to each other. 87 /// 88 /// \return 89 /// Returns \b true if the C string in \a lhs is equal to 90 /// the C string value in \a rhs, \b false otherwise. operatorStringIsEqual91 bool operator()(const char *lhs, const char *rhs) const { 92 return lhs == rhs; 93 } 94 }; 95 96 /// Convert to bool operator. 97 /// 98 /// This allows code to check a ConstString object to see if it contains a 99 /// valid string using code such as: 100 /// 101 /// \code 102 /// ConstString str(...); 103 /// if (str) 104 /// { ... 105 /// \endcode 106 /// 107 /// \return 108 /// /b True this object contains a valid non-empty C string, \b 109 /// false otherwise. 110 explicit operator bool() const { return !IsEmpty(); } 111 112 /// Equal to operator 113 /// 114 /// Returns true if this string is equal to the string in \a rhs. This 115 /// operation is very fast as it results in a pointer comparison since all 116 /// strings are in a uniqued in a global string pool. 117 /// 118 /// \param[in] rhs 119 /// Another string object to compare this object to. 120 /// 121 /// \return 122 /// true if this object is equal to \a rhs. 123 /// false if this object is not equal to \a rhs. 124 bool operator==(ConstString rhs) const { 125 // We can do a pointer compare to compare these strings since they must 126 // come from the same pool in order to be equal. 127 return m_string == rhs.m_string; 128 } 129 130 /// Equal to operator against a non-ConstString value. 131 /// 132 /// Returns true if this string is equal to the string in \a rhs. This 133 /// overload is usually slower than comparing against a ConstString value. 134 /// However, if the rhs string not already a ConstString and it is impractical 135 /// to turn it into a non-temporary variable, then this overload is faster. 136 /// 137 /// \param[in] rhs 138 /// Another string object to compare this object to. 139 /// 140 /// \return 141 /// \b true if this object is equal to \a rhs. 142 /// \b false if this object is not equal to \a rhs. 143 bool operator==(const char *rhs) const { 144 // ConstString differentiates between empty strings and nullptr strings, but 145 // StringRef doesn't. Therefore we have to do this check manually now. 146 if (m_string == nullptr && rhs != nullptr) 147 return false; 148 if (m_string != nullptr && rhs == nullptr) 149 return false; 150 151 return GetStringRef() == rhs; 152 } 153 154 /// Not equal to operator 155 /// 156 /// Returns true if this string is not equal to the string in \a rhs. This 157 /// operation is very fast as it results in a pointer comparison since all 158 /// strings are in a uniqued in a global string pool. 159 /// 160 /// \param[in] rhs 161 /// Another string object to compare this object to. 162 /// 163 /// \return 164 /// \b true if this object is not equal to \a rhs. 165 /// \b false if this object is equal to \a rhs. 166 bool operator!=(ConstString rhs) const { return m_string != rhs.m_string; } 167 168 /// Not equal to operator against a non-ConstString value. 169 /// 170 /// Returns true if this string is not equal to the string in \a rhs. This 171 /// overload is usually slower than comparing against a ConstString value. 172 /// However, if the rhs string not already a ConstString and it is impractical 173 /// to turn it into a non-temporary variable, then this overload is faster. 174 /// 175 /// \param[in] rhs 176 /// Another string object to compare this object to. 177 /// 178 /// \return \b true if this object is not equal to \a rhs, false otherwise. 179 bool operator!=(const char *rhs) const { return !(*this == rhs); } 180 181 bool operator<(ConstString rhs) const; 182 183 /// Get the string value as a C string. 184 /// 185 /// Get the value of the contained string as a NULL terminated C string 186 /// value. 187 /// 188 /// If \a value_if_empty is nullptr, then nullptr will be returned. 189 /// 190 /// \return Returns \a value_if_empty if the string is empty, otherwise 191 /// the C string value contained in this object. 192 const char *AsCString(const char *value_if_empty = nullptr) const { 193 return (IsEmpty() ? value_if_empty : m_string); 194 } 195 196 /// Get the string value as a llvm::StringRef 197 /// 198 /// \return 199 /// Returns a new llvm::StringRef object filled in with the 200 /// needed data. GetStringRef()201 llvm::StringRef GetStringRef() const { 202 return llvm::StringRef(m_string, GetLength()); 203 } 204 205 /// Get the string value as a C string. 206 /// 207 /// Get the value of the contained string as a NULL terminated C string 208 /// value. Similar to the ConstString::AsCString() function, yet this 209 /// function will always return nullptr if the string is not valid. So this 210 /// function is a direct accessor to the string pointer value. 211 /// 212 /// \return 213 /// Returns nullptr the string is invalid, otherwise the C string 214 /// value contained in this object. GetCString()215 const char *GetCString() const { return m_string; } 216 217 /// Get the length in bytes of string value. 218 /// 219 /// The string pool stores the length of the string, so we can avoid calling 220 /// strlen() on the pointer value with this function. 221 /// 222 /// \return 223 /// Returns the number of bytes that this string occupies in 224 /// memory, not including the NULL termination byte. 225 size_t GetLength() const; 226 227 /// Clear this object's state. 228 /// 229 /// Clear any contained string and reset the value to the empty string 230 /// value. Clear()231 void Clear() { m_string = nullptr; } 232 233 /// Equal to operator 234 /// 235 /// Returns true if this string is equal to the string in \a rhs. If case 236 /// sensitive equality is tested, this operation is very fast as it results 237 /// in a pointer comparison since all strings are in a uniqued in a global 238 /// string pool. 239 /// 240 /// \param[in] lhs 241 /// The Left Hand Side const ConstString object reference. 242 /// 243 /// \param[in] rhs 244 /// The Right Hand Side const ConstString object reference. 245 /// 246 /// \param[in] case_sensitive 247 /// Case sensitivity. If true, case sensitive equality 248 /// will be tested, otherwise character case will be ignored 249 /// 250 /// \return \b true if this object is equal to \a rhs, \b false otherwise. 251 static bool Equals(ConstString lhs, ConstString rhs, 252 const bool case_sensitive = true); 253 254 /// Compare two string objects. 255 /// 256 /// Compares the C string values contained in \a lhs and \a rhs and returns 257 /// an integer result. 258 /// 259 /// NOTE: only call this function when you want a true string 260 /// comparison. If you want string equality use the, use the == operator as 261 /// it is much more efficient. Also if you want string inequality, use the 262 /// != operator for the same reasons. 263 /// 264 /// \param[in] lhs 265 /// The Left Hand Side const ConstString object reference. 266 /// 267 /// \param[in] rhs 268 /// The Right Hand Side const ConstString object reference. 269 /// 270 /// \param[in] case_sensitive 271 /// Case sensitivity of compare. If true, case sensitive compare 272 /// will be performed, otherwise character case will be ignored 273 /// 274 /// \return -1 if lhs < rhs, 0 if lhs == rhs, 1 if lhs > rhs 275 static int Compare(ConstString lhs, ConstString rhs, 276 const bool case_sensitive = true); 277 278 /// Dump the object description to a stream. 279 /// 280 /// Dump the string value to the stream \a s. If the contained string is 281 /// empty, print \a value_if_empty to the stream instead. If \a 282 /// value_if_empty is nullptr, then nothing will be dumped to the stream. 283 /// 284 /// \param[in] s 285 /// The stream that will be used to dump the object description. 286 /// 287 /// \param[in] value_if_empty 288 /// The value to dump if the string is empty. If nullptr, nothing 289 /// will be output to the stream. 290 void Dump(Stream *s, const char *value_if_empty = nullptr) const; 291 292 /// Dump the object debug description to a stream. 293 /// 294 /// \param[in] s 295 /// The stream that will be used to dump the object description. 296 void DumpDebug(Stream *s) const; 297 298 /// Test for empty string. 299 /// 300 /// \return 301 /// \b true if the contained string is empty. 302 /// \b false if the contained string is not empty. IsEmpty()303 bool IsEmpty() const { return m_string == nullptr || m_string[0] == '\0'; } 304 305 /// Test for null string. 306 /// 307 /// \return 308 /// \b true if there is no string associated with this instance. 309 /// \b false if there is a string associated with this instance. IsNull()310 bool IsNull() const { return m_string == nullptr; } 311 312 /// Set the C string value. 313 /// 314 /// Set the string value in the object by uniquing the \a cstr string value 315 /// in our global string pool. 316 /// 317 /// If the C string already exists in the global string pool, it finds the 318 /// current entry and returns the existing value. If it doesn't exist, it is 319 /// added to the string pool. 320 /// 321 /// \param[in] cstr 322 /// A NULL terminated C string to add to the string pool. 323 void SetCString(const char *cstr); 324 325 void SetString(const llvm::StringRef &s); 326 327 /// Set the C string value and its mangled counterpart. 328 /// 329 /// Object files and debug symbols often use mangled string to represent the 330 /// linkage name for a symbol, function or global. The string pool can 331 /// efficiently store these values and their counterparts so when we run 332 /// into another instance of a mangled name, we can avoid calling the name 333 /// demangler over and over on the same strings and then trying to unique 334 /// them. 335 /// 336 /// \param[in] demangled 337 /// The demangled string to correlate with the \a mangled name. 338 /// 339 /// \param[in] mangled 340 /// The already uniqued mangled ConstString to correlate the 341 /// soon to be uniqued version of \a demangled. 342 void SetStringWithMangledCounterpart(llvm::StringRef demangled, 343 ConstString mangled); 344 345 /// Retrieve the mangled or demangled counterpart for a mangled or demangled 346 /// ConstString. 347 /// 348 /// Object files and debug symbols often use mangled string to represent the 349 /// linkage name for a symbol, function or global. The string pool can 350 /// efficiently store these values and their counterparts so when we run 351 /// into another instance of a mangled name, we can avoid calling the name 352 /// demangler over and over on the same strings and then trying to unique 353 /// them. 354 /// 355 /// \param[in] counterpart 356 /// A reference to a ConstString object that might get filled in 357 /// with the demangled/mangled counterpart. 358 /// 359 /// \return 360 /// /b True if \a counterpart was filled in with the counterpart 361 /// /b false otherwise. 362 bool GetMangledCounterpart(ConstString &counterpart) const; 363 364 /// Set the C string value with length. 365 /// 366 /// Set the string value in the object by uniquing \a cstr_len bytes 367 /// starting at the \a cstr string value in our global string pool. If trim 368 /// is true, then \a cstr_len indicates a maximum length of the CString and 369 /// if the actual length of the string is less, then it will be trimmed. 370 /// 371 /// If the C string already exists in the global string pool, it finds the 372 /// current entry and returns the existing value. If it doesn't exist, it is 373 /// added to the string pool. 374 /// 375 /// \param[in] cstr 376 /// A NULL terminated C string to add to the string pool. 377 /// 378 /// \param[in] cstr_len 379 /// The maximum length of the C string. 380 void SetCStringWithLength(const char *cstr, size_t cstr_len); 381 382 /// Set the C string value with the minimum length between \a fixed_cstr_len 383 /// and the actual length of the C string. This can be used for data 384 /// structures that have a fixed length to store a C string where the string 385 /// might not be NULL terminated if the string takes the entire buffer. 386 void SetTrimmedCStringWithLength(const char *cstr, size_t fixed_cstr_len); 387 388 /// Get the memory cost of this object. 389 /// 390 /// Return the size in bytes that this object takes in memory. This returns 391 /// the size in bytes of this object, which does not include any the shared 392 /// string values it may refer to. 393 /// 394 /// \return 395 /// The number of bytes that this object occupies in memory. MemorySize()396 size_t MemorySize() const { return sizeof(ConstString); } 397 398 struct MemoryStats { GetBytesTotalMemoryStats399 size_t GetBytesTotal() const { return bytes_total; } GetBytesUsedMemoryStats400 size_t GetBytesUsed() const { return bytes_used; } GetBytesUnusedMemoryStats401 size_t GetBytesUnused() const { return bytes_total - bytes_used; } 402 size_t bytes_total = 0; 403 size_t bytes_used = 0; 404 }; 405 406 static MemoryStats GetMemoryStats(); 407 408 protected: 409 template <typename T, typename Enable> friend struct ::llvm::DenseMapInfo; 410 /// Only used by DenseMapInfo. FromStringPoolPointer(const char * ptr)411 static ConstString FromStringPoolPointer(const char *ptr) { 412 ConstString s; 413 s.m_string = ptr; 414 return s; 415 }; 416 417 const char *m_string = nullptr; 418 }; 419 420 /// Stream the string value \a str to the stream \a s 421 Stream &operator<<(Stream &s, ConstString str); 422 423 } // namespace lldb_private 424 425 namespace llvm { 426 template <> struct format_provider<lldb_private::ConstString> { 427 static void format(const lldb_private::ConstString &CS, llvm::raw_ostream &OS, 428 llvm::StringRef Options); 429 }; 430 431 /// DenseMapInfo implementation. 432 /// \{ 433 template <> struct DenseMapInfo<lldb_private::ConstString> { 434 static inline lldb_private::ConstString getEmptyKey() { 435 return lldb_private::ConstString::FromStringPoolPointer( 436 DenseMapInfo<const char *>::getEmptyKey()); 437 } 438 static inline lldb_private::ConstString getTombstoneKey() { 439 return lldb_private::ConstString::FromStringPoolPointer( 440 DenseMapInfo<const char *>::getTombstoneKey()); 441 } 442 static unsigned getHashValue(lldb_private::ConstString val) { 443 return DenseMapInfo<const char *>::getHashValue(val.m_string); 444 } 445 static bool isEqual(lldb_private::ConstString LHS, 446 lldb_private::ConstString RHS) { 447 return LHS == RHS; 448 } 449 }; 450 /// \} 451 452 inline raw_ostream &operator<<(raw_ostream &os, lldb_private::ConstString s) { 453 os << s.GetStringRef(); 454 return os; 455 } 456 } // namespace llvm 457 458 #endif // LLDB_UTILITY_CONSTSTRING_H 459