1 //===-- ConstString.h -------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLDB_UTILITY_CONSTSTRING_H
10 #define LLDB_UTILITY_CONSTSTRING_H
11 
12 #include "llvm/ADT/DenseMapInfo.h"
13 #include "llvm/ADT/StringRef.h"
14 #include "llvm/Support/FormatVariadic.h"
15 #include "llvm/Support/YAMLTraits.h"
16 
17 #include <cstddef>
18 
19 namespace lldb_private {
20 class Stream;
21 }
22 namespace llvm {
23 class raw_ostream;
24 }
25 
26 namespace lldb_private {
27 
28 /// \class ConstString ConstString.h "lldb/Utility/ConstString.h"
29 /// A uniqued constant string class.
30 ///
31 /// Provides an efficient way to store strings as uniqued strings. After the
32 /// strings are uniqued, finding strings that are equal to one another is very
33 /// fast as just the pointers need to be compared. It also allows for many
34 /// common strings from many different sources to be shared to keep the memory
35 /// footprint low.
36 ///
37 /// No reference counting is done on strings that are added to the string
38 /// pool, once strings are added they are in the string pool for the life of
39 /// the program.
40 class ConstString {
41 public:
42   /// Default constructor
43   ///
44   /// Initializes the string to an empty string.
45   ConstString() = default;
46 
47   explicit ConstString(const llvm::StringRef &s);
48 
49   /// Construct with C String value
50   ///
51   /// Constructs this object with a C string by looking to see if the
52   /// C string already exists in the global string pool. If it doesn't
53   /// exist, it is added to the string pool.
54   ///
55   /// \param[in] cstr
56   ///     A NULL terminated C string to add to the string pool.
57   explicit ConstString(const char *cstr);
58 
59   /// Construct with C String value with max length
60   ///
61   /// Constructs this object with a C string with a length. If \a max_cstr_len
62   /// is greater than the actual length of the string, the string length will
63   /// be truncated. This allows substrings to be created without the need to
64   /// NULL terminate the string as it is passed into this function.
65   ///
66   /// \param[in] cstr
67   ///     A pointer to the first character in the C string. The C
68   ///     string can be NULL terminated in a buffer that contains
69   ///     more characters than the length of the string, or the
70   ///     string can be part of another string and a new substring
71   ///     can be created.
72   ///
73   /// \param[in] max_cstr_len
74   ///     The max length of \a cstr. If the string length of \a cstr
75   ///     is less than \a max_cstr_len, then the string will be
76   ///     truncated. If the string length of \a cstr is greater than
77   ///     \a max_cstr_len, then only max_cstr_len bytes will be used
78   ///     from \a cstr.
79   explicit ConstString(const char *cstr, size_t max_cstr_len);
80 
81   /// C string equality binary predicate function object for ConstString
82   /// objects.
83   struct StringIsEqual {
84     /// C equality test.
85     ///
86     /// Two C strings are equal when they are contained in ConstString objects
87     /// when their pointer values are equal to each other.
88     ///
89     /// \return
90     ///     Returns \b true if the C string in \a lhs is equal to
91     ///     the C string value in \a rhs, \b false otherwise.
92     bool operator()(const char *lhs, const char *rhs) const {
93       return lhs == rhs;
94     }
95   };
96 
97   /// Convert to bool operator.
98   ///
99   /// This allows code to check a ConstString object to see if it contains a
100   /// valid string using code such as:
101   ///
102   /// \code
103   /// ConstString str(...);
104   /// if (str)
105   /// { ...
106   /// \endcode
107   ///
108   /// \return
109   ///     /b True this object contains a valid non-empty C string, \b
110   ///     false otherwise.
111   explicit operator bool() const { return !IsEmpty(); }
112 
113   /// Equal to operator
114   ///
115   /// Returns true if this string is equal to the string in \a rhs. This
116   /// operation is very fast as it results in a pointer comparison since all
117   /// strings are in a uniqued in a global string pool.
118   ///
119   /// \param[in] rhs
120   ///     Another string object to compare this object to.
121   ///
122   /// \return
123   ///     true if this object is equal to \a rhs.
124   ///     false if this object is not equal to \a rhs.
125   bool operator==(ConstString rhs) const {
126     // We can do a pointer compare to compare these strings since they must
127     // come from the same pool in order to be equal.
128     return m_string == rhs.m_string;
129   }
130 
131   /// Equal to operator against a non-ConstString value.
132   ///
133   /// Returns true if this string is equal to the string in \a rhs. This
134   /// overload is usually slower than comparing against a ConstString value.
135   /// However, if the rhs string not already a ConstString and it is impractical
136   /// to turn it into a non-temporary variable, then this overload is faster.
137   ///
138   /// \param[in] rhs
139   ///     Another string object to compare this object to.
140   ///
141   /// \return
142   ///     \b true if this object is equal to \a rhs.
143   ///     \b false if this object is not equal to \a rhs.
144   bool operator==(const char *rhs) const {
145     // ConstString differentiates between empty strings and nullptr strings, but
146     // StringRef doesn't. Therefore we have to do this check manually now.
147     if (m_string == nullptr && rhs != nullptr)
148       return false;
149     if (m_string != nullptr && rhs == nullptr)
150       return false;
151 
152     return GetStringRef() == rhs;
153   }
154 
155   /// Not equal to operator
156   ///
157   /// Returns true if this string is not equal to the string in \a rhs. This
158   /// operation is very fast as it results in a pointer comparison since all
159   /// strings are in a uniqued in a global string pool.
160   ///
161   /// \param[in] rhs
162   ///     Another string object to compare this object to.
163   ///
164   /// \return
165   ///     \b true if this object is not equal to \a rhs.
166   ///     \b false if this object is equal to \a rhs.
167   bool operator!=(ConstString rhs) const { return m_string != rhs.m_string; }
168 
169   /// Not equal to operator against a non-ConstString value.
170   ///
171   /// Returns true if this string is not equal to the string in \a rhs. This
172   /// overload is usually slower than comparing against a ConstString value.
173   /// However, if the rhs string not already a ConstString and it is impractical
174   /// to turn it into a non-temporary variable, then this overload is faster.
175   ///
176   /// \param[in] rhs
177   ///     Another string object to compare this object to.
178   ///
179   /// \return \b true if this object is not equal to \a rhs, false otherwise.
180   bool operator!=(const char *rhs) const { return !(*this == rhs); }
181 
182   bool operator<(ConstString rhs) const;
183 
184   /// Get the string value as a C string.
185   ///
186   /// Get the value of the contained string as a NULL terminated C string
187   /// value.
188   ///
189   /// If \a value_if_empty is nullptr, then nullptr will be returned.
190   ///
191   /// \return Returns \a value_if_empty if the string is empty, otherwise
192   ///     the C string value contained in this object.
193   const char *AsCString(const char *value_if_empty = nullptr) const {
194     return (IsEmpty() ? value_if_empty : m_string);
195   }
196 
197   /// Get the string value as a llvm::StringRef
198   ///
199   /// \return
200   ///     Returns a new llvm::StringRef object filled in with the
201   ///     needed data.
202   llvm::StringRef GetStringRef() const {
203     return llvm::StringRef(m_string, GetLength());
204   }
205 
206   /// Get the string value as a C string.
207   ///
208   /// Get the value of the contained string as a NULL terminated C string
209   /// value. Similar to the ConstString::AsCString() function, yet this
210   /// function will always return nullptr if the string is not valid. So this
211   /// function is a direct accessor to the string pointer value.
212   ///
213   /// \return
214   ///     Returns nullptr the string is invalid, otherwise the C string
215   ///     value contained in this object.
216   const char *GetCString() const { return m_string; }
217 
218   /// Get the length in bytes of string value.
219   ///
220   /// The string pool stores the length of the string, so we can avoid calling
221   /// strlen() on the pointer value with this function.
222   ///
223   /// \return
224   ///     Returns the number of bytes that this string occupies in
225   ///     memory, not including the NULL termination byte.
226   size_t GetLength() const;
227 
228   /// Clear this object's state.
229   ///
230   /// Clear any contained string and reset the value to the empty string
231   /// value.
232   void Clear() { m_string = nullptr; }
233 
234   /// Equal to operator
235   ///
236   /// Returns true if this string is equal to the string in \a rhs. If case
237   /// sensitive equality is tested, this operation is very fast as it results
238   /// in a pointer comparison since all strings are in a uniqued in a global
239   /// string pool.
240   ///
241   /// \param[in] lhs
242   ///     The Left Hand Side const ConstString object reference.
243   ///
244   /// \param[in] rhs
245   ///     The Right Hand Side const ConstString object reference.
246   ///
247   /// \param[in] case_sensitive
248   ///     Case sensitivity. If true, case sensitive equality
249   ///     will be tested, otherwise character case will be ignored
250   ///
251   /// \return \b true if this object is equal to \a rhs, \b false otherwise.
252   static bool Equals(ConstString lhs, ConstString rhs,
253                      const bool case_sensitive = true);
254 
255   /// Compare two string objects.
256   ///
257   /// Compares the C string values contained in \a lhs and \a rhs and returns
258   /// an integer result.
259   ///
260   /// NOTE: only call this function when you want a true string
261   /// comparison. If you want string equality use the, use the == operator as
262   /// it is much more efficient. Also if you want string inequality, use the
263   /// != operator for the same reasons.
264   ///
265   /// \param[in] lhs
266   ///     The Left Hand Side const ConstString object reference.
267   ///
268   /// \param[in] rhs
269   ///     The Right Hand Side const ConstString object reference.
270   ///
271   /// \param[in] case_sensitive
272   ///     Case sensitivity of compare. If true, case sensitive compare
273   ///     will be performed, otherwise character case will be ignored
274   ///
275   /// \return -1 if lhs < rhs, 0 if lhs == rhs, 1 if lhs > rhs
276   static int Compare(ConstString lhs, ConstString rhs,
277                      const bool case_sensitive = true);
278 
279   /// Dump the object description to a stream.
280   ///
281   /// Dump the string value to the stream \a s. If the contained string is
282   /// empty, print \a value_if_empty to the stream instead. If \a
283   /// value_if_empty is nullptr, then nothing will be dumped to the stream.
284   ///
285   /// \param[in] s
286   ///     The stream that will be used to dump the object description.
287   ///
288   /// \param[in] value_if_empty
289   ///     The value to dump if the string is empty. If nullptr, nothing
290   ///     will be output to the stream.
291   void Dump(Stream *s, const char *value_if_empty = nullptr) const;
292 
293   /// Dump the object debug description to a stream.
294   ///
295   /// \param[in] s
296   ///     The stream that will be used to dump the object description.
297   void DumpDebug(Stream *s) const;
298 
299   /// Test for empty string.
300   ///
301   /// \return
302   ///     \b true if the contained string is empty.
303   ///     \b false if the contained string is not empty.
304   bool IsEmpty() const { return m_string == nullptr || m_string[0] == '\0'; }
305 
306   /// Test for null string.
307   ///
308   /// \return
309   ///     \b true if there is no string associated with this instance.
310   ///     \b false if there is a string associated with this instance.
311   bool IsNull() const { return m_string == nullptr; }
312 
313   /// Set the C string value.
314   ///
315   /// Set the string value in the object by uniquing the \a cstr string value
316   /// in our global string pool.
317   ///
318   /// If the C string already exists in the global string pool, it finds the
319   /// current entry and returns the existing value. If it doesn't exist, it is
320   /// added to the string pool.
321   ///
322   /// \param[in] cstr
323   ///     A NULL terminated C string to add to the string pool.
324   void SetCString(const char *cstr);
325 
326   void SetString(const llvm::StringRef &s);
327 
328   /// Set the C string value and its mangled counterpart.
329   ///
330   /// Object files and debug symbols often use mangled string to represent the
331   /// linkage name for a symbol, function or global. The string pool can
332   /// efficiently store these values and their counterparts so when we run
333   /// into another instance of a mangled name, we can avoid calling the name
334   /// demangler over and over on the same strings and then trying to unique
335   /// them.
336   ///
337   /// \param[in] demangled
338   ///     The demangled string to correlate with the \a mangled name.
339   ///
340   /// \param[in] mangled
341   ///     The already uniqued mangled ConstString to correlate the
342   ///     soon to be uniqued version of \a demangled.
343   void SetStringWithMangledCounterpart(llvm::StringRef demangled,
344                                        ConstString mangled);
345 
346   /// Retrieve the mangled or demangled counterpart for a mangled or demangled
347   /// ConstString.
348   ///
349   /// Object files and debug symbols often use mangled string to represent the
350   /// linkage name for a symbol, function or global. The string pool can
351   /// efficiently store these values and their counterparts so when we run
352   /// into another instance of a mangled name, we can avoid calling the name
353   /// demangler over and over on the same strings and then trying to unique
354   /// them.
355   ///
356   /// \param[in] counterpart
357   ///     A reference to a ConstString object that might get filled in
358   ///     with the demangled/mangled counterpart.
359   ///
360   /// \return
361   ///     /b True if \a counterpart was filled in with the counterpart
362   ///     /b false otherwise.
363   bool GetMangledCounterpart(ConstString &counterpart) const;
364 
365   /// Set the C string value with length.
366   ///
367   /// Set the string value in the object by uniquing \a cstr_len bytes
368   /// starting at the \a cstr string value in our global string pool. If trim
369   /// is true, then \a cstr_len indicates a maximum length of the CString and
370   /// if the actual length of the string is less, then it will be trimmed.
371   ///
372   /// If the C string already exists in the global string pool, it finds the
373   /// current entry and returns the existing value. If it doesn't exist, it is
374   /// added to the string pool.
375   ///
376   /// \param[in] cstr
377   ///     A NULL terminated C string to add to the string pool.
378   ///
379   /// \param[in] cstr_len
380   ///     The maximum length of the C string.
381   void SetCStringWithLength(const char *cstr, size_t cstr_len);
382 
383   /// Set the C string value with the minimum length between \a fixed_cstr_len
384   /// and the actual length of the C string. This can be used for data
385   /// structures that have a fixed length to store a C string where the string
386   /// might not be NULL terminated if the string takes the entire buffer.
387   void SetTrimmedCStringWithLength(const char *cstr, size_t fixed_cstr_len);
388 
389   /// Get the memory cost of this object.
390   ///
391   /// Return the size in bytes that this object takes in memory. This returns
392   /// the size in bytes of this object, which does not include any the shared
393   /// string values it may refer to.
394   ///
395   /// \return
396   ///     The number of bytes that this object occupies in memory.
397   ///
398   /// \see ConstString::StaticMemorySize ()
399   size_t MemorySize() const { return sizeof(ConstString); }
400 
401   /// Get the size in bytes of the current global string pool.
402   ///
403   /// Reports the size in bytes of all shared C string values, containers and
404   /// any other values as a byte size for the entire string pool.
405   ///
406   /// \return
407   ///     The number of bytes that the global string pool occupies
408   ///     in memory.
409   static size_t StaticMemorySize();
410 
411 protected:
412   template <typename T> friend struct ::llvm::DenseMapInfo;
413   /// Only used by DenseMapInfo.
414   static ConstString FromStringPoolPointer(const char *ptr) {
415     ConstString s;
416     s.m_string = ptr;
417     return s;
418   };
419 
420   const char *m_string = nullptr;
421 };
422 
423 /// Stream the string value \a str to the stream \a s
424 Stream &operator<<(Stream &s, ConstString str);
425 
426 } // namespace lldb_private
427 
428 namespace llvm {
429 template <> struct format_provider<lldb_private::ConstString> {
430   static void format(const lldb_private::ConstString &CS, llvm::raw_ostream &OS,
431                      llvm::StringRef Options);
432 };
433 
434 /// DenseMapInfo implementation.
435 /// \{
436 template <> struct DenseMapInfo<lldb_private::ConstString> {
437   static inline lldb_private::ConstString getEmptyKey() {
438     return lldb_private::ConstString::FromStringPoolPointer(
439         DenseMapInfo<const char *>::getEmptyKey());
440   }
441   static inline lldb_private::ConstString getTombstoneKey() {
442     return lldb_private::ConstString::FromStringPoolPointer(
443         DenseMapInfo<const char *>::getTombstoneKey());
444   }
445   static unsigned getHashValue(lldb_private::ConstString val) {
446     return DenseMapInfo<const char *>::getHashValue(val.m_string);
447   }
448   static bool isEqual(lldb_private::ConstString LHS,
449                       lldb_private::ConstString RHS) {
450     return LHS == RHS;
451   }
452 };
453 /// \}
454 
455 namespace yaml {
456 template <> struct ScalarTraits<lldb_private::ConstString> {
457   static void output(const lldb_private::ConstString &, void *, raw_ostream &);
458   static StringRef input(StringRef, void *, lldb_private::ConstString &);
459   static QuotingType mustQuote(StringRef S) { return QuotingType::Double; }
460 };
461 } // namespace yaml
462 
463 inline raw_ostream &operator<<(raw_ostream &os, lldb_private::ConstString s) {
464   os << s.GetStringRef();
465   return os;
466 }
467 } // namespace llvm
468 
469 LLVM_YAML_IS_SEQUENCE_VECTOR(lldb_private::ConstString)
470 
471 #endif // LLDB_UTILITY_CONSTSTRING_H
472