1 //===-- ConstString.h -------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLDB_UTILITY_CONSTSTRING_H
10 #define LLDB_UTILITY_CONSTSTRING_H
11 
12 #include "llvm/ADT/DenseMapInfo.h"
13 #include "llvm/ADT/StringRef.h"
14 #include "llvm/Support/FormatVariadic.h"
15 
16 #include <cstddef>
17 
18 namespace lldb_private {
19 class Stream;
20 }
21 namespace llvm {
22 class raw_ostream;
23 }
24 
25 namespace lldb_private {
26 
27 /// \class ConstString ConstString.h "lldb/Utility/ConstString.h"
28 /// A uniqued constant string class.
29 ///
30 /// Provides an efficient way to store strings as uniqued strings. After the
31 /// strings are uniqued, finding strings that are equal to one another is very
32 /// fast as just the pointers need to be compared. It also allows for many
33 /// common strings from many different sources to be shared to keep the memory
34 /// footprint low.
35 ///
36 /// No reference counting is done on strings that are added to the string
37 /// pool, once strings are added they are in the string pool for the life of
38 /// the program.
39 class ConstString {
40 public:
41   /// Default constructor
42   ///
43   /// Initializes the string to an empty string.
44   ConstString() = default;
45 
46   explicit ConstString(const llvm::StringRef &s);
47 
48   /// Construct with C String value
49   ///
50   /// Constructs this object with a C string by looking to see if the
51   /// C string already exists in the global string pool. If it doesn't
52   /// exist, it is added to the string pool.
53   ///
54   /// \param[in] cstr
55   ///     A NULL terminated C string to add to the string pool.
56   explicit ConstString(const char *cstr);
57 
58   /// Construct with C String value with max length
59   ///
60   /// Constructs this object with a C string with a length. If \a max_cstr_len
61   /// is greater than the actual length of the string, the string length will
62   /// be truncated. This allows substrings to be created without the need to
63   /// NULL terminate the string as it is passed into this function.
64   ///
65   /// \param[in] cstr
66   ///     A pointer to the first character in the C string. The C
67   ///     string can be NULL terminated in a buffer that contains
68   ///     more characters than the length of the string, or the
69   ///     string can be part of another string and a new substring
70   ///     can be created.
71   ///
72   /// \param[in] max_cstr_len
73   ///     The max length of \a cstr. If the string length of \a cstr
74   ///     is less than \a max_cstr_len, then the string will be
75   ///     truncated. If the string length of \a cstr is greater than
76   ///     \a max_cstr_len, then only max_cstr_len bytes will be used
77   ///     from \a cstr.
78   explicit ConstString(const char *cstr, size_t max_cstr_len);
79 
80   /// C string equality binary predicate function object for ConstString
81   /// objects.
82   struct StringIsEqual {
83     /// C equality test.
84     ///
85     /// Two C strings are equal when they are contained in ConstString objects
86     /// when their pointer values are equal to each other.
87     ///
88     /// \return
89     ///     Returns \b true if the C string in \a lhs is equal to
90     ///     the C string value in \a rhs, \b false otherwise.
91     bool operator()(const char *lhs, const char *rhs) const {
92       return lhs == rhs;
93     }
94   };
95 
96   /// Convert to bool operator.
97   ///
98   /// This allows code to check a ConstString object to see if it contains a
99   /// valid string using code such as:
100   ///
101   /// \code
102   /// ConstString str(...);
103   /// if (str)
104   /// { ...
105   /// \endcode
106   ///
107   /// \return
108   ///     /b True this object contains a valid non-empty C string, \b
109   ///     false otherwise.
110   explicit operator bool() const { return !IsEmpty(); }
111 
112   /// Equal to operator
113   ///
114   /// Returns true if this string is equal to the string in \a rhs. This
115   /// operation is very fast as it results in a pointer comparison since all
116   /// strings are in a uniqued in a global string pool.
117   ///
118   /// \param[in] rhs
119   ///     Another string object to compare this object to.
120   ///
121   /// \return
122   ///     true if this object is equal to \a rhs.
123   ///     false if this object is not equal to \a rhs.
124   bool operator==(ConstString rhs) const {
125     // We can do a pointer compare to compare these strings since they must
126     // come from the same pool in order to be equal.
127     return m_string == rhs.m_string;
128   }
129 
130   /// Equal to operator against a non-ConstString value.
131   ///
132   /// Returns true if this string is equal to the string in \a rhs. This
133   /// overload is usually slower than comparing against a ConstString value.
134   /// However, if the rhs string not already a ConstString and it is impractical
135   /// to turn it into a non-temporary variable, then this overload is faster.
136   ///
137   /// \param[in] rhs
138   ///     Another string object to compare this object to.
139   ///
140   /// \return
141   ///     \b true if this object is equal to \a rhs.
142   ///     \b false if this object is not equal to \a rhs.
143   bool operator==(const char *rhs) const {
144     // ConstString differentiates between empty strings and nullptr strings, but
145     // StringRef doesn't. Therefore we have to do this check manually now.
146     if (m_string == nullptr && rhs != nullptr)
147       return false;
148     if (m_string != nullptr && rhs == nullptr)
149       return false;
150 
151     return GetStringRef() == rhs;
152   }
153 
154   /// Not equal to operator
155   ///
156   /// Returns true if this string is not equal to the string in \a rhs. This
157   /// operation is very fast as it results in a pointer comparison since all
158   /// strings are in a uniqued in a global string pool.
159   ///
160   /// \param[in] rhs
161   ///     Another string object to compare this object to.
162   ///
163   /// \return
164   ///     \b true if this object is not equal to \a rhs.
165   ///     \b false if this object is equal to \a rhs.
166   bool operator!=(ConstString rhs) const { return m_string != rhs.m_string; }
167 
168   /// Not equal to operator against a non-ConstString value.
169   ///
170   /// Returns true if this string is not equal to the string in \a rhs. This
171   /// overload is usually slower than comparing against a ConstString value.
172   /// However, if the rhs string not already a ConstString and it is impractical
173   /// to turn it into a non-temporary variable, then this overload is faster.
174   ///
175   /// \param[in] rhs
176   ///     Another string object to compare this object to.
177   ///
178   /// \return \b true if this object is not equal to \a rhs, false otherwise.
179   bool operator!=(const char *rhs) const { return !(*this == rhs); }
180 
181   bool operator<(ConstString rhs) const;
182 
183   /// Get the string value as a C string.
184   ///
185   /// Get the value of the contained string as a NULL terminated C string
186   /// value.
187   ///
188   /// If \a value_if_empty is nullptr, then nullptr will be returned.
189   ///
190   /// \return Returns \a value_if_empty if the string is empty, otherwise
191   ///     the C string value contained in this object.
192   const char *AsCString(const char *value_if_empty = nullptr) const {
193     return (IsEmpty() ? value_if_empty : m_string);
194   }
195 
196   /// Get the string value as a llvm::StringRef
197   ///
198   /// \return
199   ///     Returns a new llvm::StringRef object filled in with the
200   ///     needed data.
201   llvm::StringRef GetStringRef() const {
202     return llvm::StringRef(m_string, GetLength());
203   }
204 
205   /// Get the string value as a C string.
206   ///
207   /// Get the value of the contained string as a NULL terminated C string
208   /// value. Similar to the ConstString::AsCString() function, yet this
209   /// function will always return nullptr if the string is not valid. So this
210   /// function is a direct accessor to the string pointer value.
211   ///
212   /// \return
213   ///     Returns nullptr the string is invalid, otherwise the C string
214   ///     value contained in this object.
215   const char *GetCString() const { return m_string; }
216 
217   /// Get the length in bytes of string value.
218   ///
219   /// The string pool stores the length of the string, so we can avoid calling
220   /// strlen() on the pointer value with this function.
221   ///
222   /// \return
223   ///     Returns the number of bytes that this string occupies in
224   ///     memory, not including the NULL termination byte.
225   size_t GetLength() const;
226 
227   /// Clear this object's state.
228   ///
229   /// Clear any contained string and reset the value to the empty string
230   /// value.
231   void Clear() { m_string = nullptr; }
232 
233   /// Equal to operator
234   ///
235   /// Returns true if this string is equal to the string in \a rhs. If case
236   /// sensitive equality is tested, this operation is very fast as it results
237   /// in a pointer comparison since all strings are in a uniqued in a global
238   /// string pool.
239   ///
240   /// \param[in] lhs
241   ///     The Left Hand Side const ConstString object reference.
242   ///
243   /// \param[in] rhs
244   ///     The Right Hand Side const ConstString object reference.
245   ///
246   /// \param[in] case_sensitive
247   ///     Case sensitivity. If true, case sensitive equality
248   ///     will be tested, otherwise character case will be ignored
249   ///
250   /// \return \b true if this object is equal to \a rhs, \b false otherwise.
251   static bool Equals(ConstString lhs, ConstString rhs,
252                      const bool case_sensitive = true);
253 
254   /// Compare two string objects.
255   ///
256   /// Compares the C string values contained in \a lhs and \a rhs and returns
257   /// an integer result.
258   ///
259   /// NOTE: only call this function when you want a true string
260   /// comparison. If you want string equality use the, use the == operator as
261   /// it is much more efficient. Also if you want string inequality, use the
262   /// != operator for the same reasons.
263   ///
264   /// \param[in] lhs
265   ///     The Left Hand Side const ConstString object reference.
266   ///
267   /// \param[in] rhs
268   ///     The Right Hand Side const ConstString object reference.
269   ///
270   /// \param[in] case_sensitive
271   ///     Case sensitivity of compare. If true, case sensitive compare
272   ///     will be performed, otherwise character case will be ignored
273   ///
274   /// \return -1 if lhs < rhs, 0 if lhs == rhs, 1 if lhs > rhs
275   static int Compare(ConstString lhs, ConstString rhs,
276                      const bool case_sensitive = true);
277 
278   /// Dump the object description to a stream.
279   ///
280   /// Dump the string value to the stream \a s. If the contained string is
281   /// empty, print \a value_if_empty to the stream instead. If \a
282   /// value_if_empty is nullptr, then nothing will be dumped to the stream.
283   ///
284   /// \param[in] s
285   ///     The stream that will be used to dump the object description.
286   ///
287   /// \param[in] value_if_empty
288   ///     The value to dump if the string is empty. If nullptr, nothing
289   ///     will be output to the stream.
290   void Dump(Stream *s, const char *value_if_empty = nullptr) const;
291 
292   /// Dump the object debug description to a stream.
293   ///
294   /// \param[in] s
295   ///     The stream that will be used to dump the object description.
296   void DumpDebug(Stream *s) const;
297 
298   /// Test for empty string.
299   ///
300   /// \return
301   ///     \b true if the contained string is empty.
302   ///     \b false if the contained string is not empty.
303   bool IsEmpty() const { return m_string == nullptr || m_string[0] == '\0'; }
304 
305   /// Test for null string.
306   ///
307   /// \return
308   ///     \b true if there is no string associated with this instance.
309   ///     \b false if there is a string associated with this instance.
310   bool IsNull() const { return m_string == nullptr; }
311 
312   /// Set the C string value.
313   ///
314   /// Set the string value in the object by uniquing the \a cstr string value
315   /// in our global string pool.
316   ///
317   /// If the C string already exists in the global string pool, it finds the
318   /// current entry and returns the existing value. If it doesn't exist, it is
319   /// added to the string pool.
320   ///
321   /// \param[in] cstr
322   ///     A NULL terminated C string to add to the string pool.
323   void SetCString(const char *cstr);
324 
325   void SetString(const llvm::StringRef &s);
326 
327   /// Set the C string value and its mangled counterpart.
328   ///
329   /// Object files and debug symbols often use mangled string to represent the
330   /// linkage name for a symbol, function or global. The string pool can
331   /// efficiently store these values and their counterparts so when we run
332   /// into another instance of a mangled name, we can avoid calling the name
333   /// demangler over and over on the same strings and then trying to unique
334   /// them.
335   ///
336   /// \param[in] demangled
337   ///     The demangled string to correlate with the \a mangled name.
338   ///
339   /// \param[in] mangled
340   ///     The already uniqued mangled ConstString to correlate the
341   ///     soon to be uniqued version of \a demangled.
342   void SetStringWithMangledCounterpart(llvm::StringRef demangled,
343                                        ConstString mangled);
344 
345   /// Retrieve the mangled or demangled counterpart for a mangled or demangled
346   /// ConstString.
347   ///
348   /// Object files and debug symbols often use mangled string to represent the
349   /// linkage name for a symbol, function or global. The string pool can
350   /// efficiently store these values and their counterparts so when we run
351   /// into another instance of a mangled name, we can avoid calling the name
352   /// demangler over and over on the same strings and then trying to unique
353   /// them.
354   ///
355   /// \param[in] counterpart
356   ///     A reference to a ConstString object that might get filled in
357   ///     with the demangled/mangled counterpart.
358   ///
359   /// \return
360   ///     /b True if \a counterpart was filled in with the counterpart
361   ///     /b false otherwise.
362   bool GetMangledCounterpart(ConstString &counterpart) const;
363 
364   /// Set the C string value with length.
365   ///
366   /// Set the string value in the object by uniquing \a cstr_len bytes
367   /// starting at the \a cstr string value in our global string pool. If trim
368   /// is true, then \a cstr_len indicates a maximum length of the CString and
369   /// if the actual length of the string is less, then it will be trimmed.
370   ///
371   /// If the C string already exists in the global string pool, it finds the
372   /// current entry and returns the existing value. If it doesn't exist, it is
373   /// added to the string pool.
374   ///
375   /// \param[in] cstr
376   ///     A NULL terminated C string to add to the string pool.
377   ///
378   /// \param[in] cstr_len
379   ///     The maximum length of the C string.
380   void SetCStringWithLength(const char *cstr, size_t cstr_len);
381 
382   /// Set the C string value with the minimum length between \a fixed_cstr_len
383   /// and the actual length of the C string. This can be used for data
384   /// structures that have a fixed length to store a C string where the string
385   /// might not be NULL terminated if the string takes the entire buffer.
386   void SetTrimmedCStringWithLength(const char *cstr, size_t fixed_cstr_len);
387 
388   /// Get the memory cost of this object.
389   ///
390   /// Return the size in bytes that this object takes in memory. This returns
391   /// the size in bytes of this object, which does not include any the shared
392   /// string values it may refer to.
393   ///
394   /// \return
395   ///     The number of bytes that this object occupies in memory.
396   size_t MemorySize() const { return sizeof(ConstString); }
397 
398   struct MemoryStats {
399     size_t GetBytesTotal() const { return bytes_total; }
400     size_t GetBytesUsed() const { return bytes_used; }
401     size_t GetBytesUnused() const { return bytes_total - bytes_used; }
402     size_t bytes_total = 0;
403     size_t bytes_used = 0;
404   };
405 
406   static MemoryStats GetMemoryStats();
407 
408 protected:
409   template <typename T, typename Enable> friend struct ::llvm::DenseMapInfo;
410   /// Only used by DenseMapInfo.
411   static ConstString FromStringPoolPointer(const char *ptr) {
412     ConstString s;
413     s.m_string = ptr;
414     return s;
415   };
416 
417   const char *m_string = nullptr;
418 };
419 
420 /// Stream the string value \a str to the stream \a s
421 Stream &operator<<(Stream &s, ConstString str);
422 
423 } // namespace lldb_private
424 
425 namespace llvm {
426 template <> struct format_provider<lldb_private::ConstString> {
427   static void format(const lldb_private::ConstString &CS, llvm::raw_ostream &OS,
428                      llvm::StringRef Options);
429 };
430 
431 /// DenseMapInfo implementation.
432 /// \{
433 template <> struct DenseMapInfo<lldb_private::ConstString> {
434   static inline lldb_private::ConstString getEmptyKey() {
435     return lldb_private::ConstString::FromStringPoolPointer(
436         DenseMapInfo<const char *>::getEmptyKey());
437   }
438   static inline lldb_private::ConstString getTombstoneKey() {
439     return lldb_private::ConstString::FromStringPoolPointer(
440         DenseMapInfo<const char *>::getTombstoneKey());
441   }
442   static unsigned getHashValue(lldb_private::ConstString val) {
443     return DenseMapInfo<const char *>::getHashValue(val.m_string);
444   }
445   static bool isEqual(lldb_private::ConstString LHS,
446                       lldb_private::ConstString RHS) {
447     return LHS == RHS;
448   }
449 };
450 /// \}
451 
452 inline raw_ostream &operator<<(raw_ostream &os, lldb_private::ConstString s) {
453   os << s.GetStringRef();
454   return os;
455 }
456 } // namespace llvm
457 
458 #endif // LLDB_UTILITY_CONSTSTRING_H
459