1 //===-- ConstString.h -------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLDB_UTILITY_CONSTSTRING_H
10 #define LLDB_UTILITY_CONSTSTRING_H
11 
12 #include "llvm/ADT/DenseMapInfo.h"
13 #include "llvm/ADT/StringRef.h"
14 #include "llvm/Support/FormatVariadic.h"
15 #include "llvm/Support/YAMLTraits.h"
16 
17 #include <stddef.h>
18 
19 namespace lldb_private {
20 class Stream;
21 }
22 namespace llvm {
23 class raw_ostream;
24 }
25 
26 namespace lldb_private {
27 
28 /// \class ConstString ConstString.h "lldb/Utility/ConstString.h"
29 /// A uniqued constant string class.
30 ///
31 /// Provides an efficient way to store strings as uniqued strings. After the
32 /// strings are uniqued, finding strings that are equal to one another is very
33 /// fast as just the pointers need to be compared. It also allows for many
34 /// common strings from many different sources to be shared to keep the memory
35 /// footprint low.
36 ///
37 /// No reference counting is done on strings that are added to the string
38 /// pool, once strings are added they are in the string pool for the life of
39 /// the program.
40 class ConstString {
41 public:
42   /// Default constructor
43   ///
44   /// Initializes the string to an empty string.
45   ConstString() : m_string(nullptr) {}
46 
47   /// Copy constructor
48   ///
49   /// Copies the string value in \a rhs into this object.
50   ///
51   /// \param[in] rhs
52   ///     Another string object to copy.
53   ConstString(const ConstString &rhs) : m_string(rhs.m_string) {}
54 
55   explicit ConstString(const llvm::StringRef &s);
56 
57   /// Construct with C String value
58   ///
59   /// Constructs this object with a C string by looking to see if the
60   /// C string already exists in the global string pool. If it doesn't
61   /// exist, it is added to the string pool.
62   ///
63   /// \param[in] cstr
64   ///     A NULL terminated C string to add to the string pool.
65   explicit ConstString(const char *cstr);
66 
67   /// Construct with C String value with max length
68   ///
69   /// Constructs this object with a C string with a length. If \a max_cstr_len
70   /// is greater than the actual length of the string, the string length will
71   /// be truncated. This allows substrings to be created without the need to
72   /// NULL terminate the string as it is passed into this function.
73   ///
74   /// \param[in] cstr
75   ///     A pointer to the first character in the C string. The C
76   ///     string can be NULL terminated in a buffer that contains
77   ///     more characters than the length of the string, or the
78   ///     string can be part of another string and a new substring
79   ///     can be created.
80   ///
81   /// \param[in] max_cstr_len
82   ///     The max length of \a cstr. If the string length of \a cstr
83   ///     is less than \a max_cstr_len, then the string will be
84   ///     truncated. If the string length of \a cstr is greater than
85   ///     \a max_cstr_len, then only max_cstr_len bytes will be used
86   ///     from \a cstr.
87   explicit ConstString(const char *cstr, size_t max_cstr_len);
88 
89   /// Destructor
90   ///
91   /// Since constant string values are currently not reference counted, there
92   /// isn't much to do here.
93   ~ConstString() = default;
94 
95   /// C string equality binary predicate function object for ConstString
96   /// objects.
97   struct StringIsEqual {
98     /// C equality test.
99     ///
100     /// Two C strings are equal when they are contained in ConstString objects
101     /// when their pointer values are equal to each other.
102     ///
103     /// \return
104     ///     Returns \b true if the C string in \a lhs is equal to
105     ///     the C string value in \a rhs, \b false otherwise.
106     bool operator()(const char *lhs, const char *rhs) const {
107       return lhs == rhs;
108     }
109   };
110 
111   /// Convert to bool operator.
112   ///
113   /// This allows code to check a ConstString object to see if it contains a
114   /// valid string using code such as:
115   ///
116   /// \code
117   /// ConstString str(...);
118   /// if (str)
119   /// { ...
120   /// \endcode
121   ///
122   /// \return
123   ///     /b True this object contains a valid non-empty C string, \b
124   ///     false otherwise.
125   explicit operator bool() const { return !IsEmpty(); }
126 
127   /// Assignment operator
128   ///
129   /// Assigns the string in this object with the value from \a rhs.
130   ///
131   /// \param[in] rhs
132   ///     Another string object to copy into this object.
133   ///
134   /// \return
135   ///     A const reference to this object.
136   ConstString operator=(ConstString rhs) {
137     m_string = rhs.m_string;
138     return *this;
139   }
140 
141   /// Equal to operator
142   ///
143   /// Returns true if this string is equal to the string in \a rhs. This
144   /// operation is very fast as it results in a pointer comparison since all
145   /// strings are in a uniqued in a global string pool.
146   ///
147   /// \param[in] rhs
148   ///     Another string object to compare this object to.
149   ///
150   /// \return
151   ///     true if this object is equal to \a rhs.
152   ///     false if this object is not equal to \a rhs.
153   bool operator==(ConstString rhs) const {
154     // We can do a pointer compare to compare these strings since they must
155     // come from the same pool in order to be equal.
156     return m_string == rhs.m_string;
157   }
158 
159   /// Equal to operator against a non-ConstString value.
160   ///
161   /// Returns true if this string is equal to the string in \a rhs. This
162   /// overload is usually slower than comparing against a ConstString value.
163   /// However, if the rhs string not already a ConstString and it is impractical
164   /// to turn it into a non-temporary variable, then this overload is faster.
165   ///
166   /// \param[in] rhs
167   ///     Another string object to compare this object to.
168   ///
169   /// \return
170   ///     \b true if this object is equal to \a rhs.
171   ///     \b false if this object is not equal to \a rhs.
172   bool operator==(const char *rhs) const {
173     // ConstString differentiates between empty strings and nullptr strings, but
174     // StringRef doesn't. Therefore we have to do this check manually now.
175     if (m_string == nullptr && rhs != nullptr)
176       return false;
177     if (m_string != nullptr && rhs == nullptr)
178       return false;
179 
180     return GetStringRef() == rhs;
181   }
182 
183   /// Not equal to operator
184   ///
185   /// Returns true if this string is not equal to the string in \a rhs. This
186   /// operation is very fast as it results in a pointer comparison since all
187   /// strings are in a uniqued in a global string pool.
188   ///
189   /// \param[in] rhs
190   ///     Another string object to compare this object to.
191   ///
192   /// \return
193   ///     \b true if this object is not equal to \a rhs.
194   ///     \b false if this object is equal to \a rhs.
195   bool operator!=(ConstString rhs) const {
196     return m_string != rhs.m_string;
197   }
198 
199   /// Not equal to operator against a non-ConstString value.
200   ///
201   /// Returns true if this string is not equal to the string in \a rhs. This
202   /// overload is usually slower than comparing against a ConstString value.
203   /// However, if the rhs string not already a ConstString and it is impractical
204   /// to turn it into a non-temporary variable, then this overload is faster.
205   ///
206   /// \param[in] rhs
207   ///     Another string object to compare this object to.
208   ///
209   /// \return \b true if this object is not equal to \a rhs, false otherwise.
210   bool operator!=(const char *rhs) const { return !(*this == rhs); }
211 
212   bool operator<(ConstString rhs) const;
213 
214   /// Get the string value as a C string.
215   ///
216   /// Get the value of the contained string as a NULL terminated C string
217   /// value.
218   ///
219   /// If \a value_if_empty is nullptr, then nullptr will be returned.
220   ///
221   /// \return Returns \a value_if_empty if the string is empty, otherwise
222   ///     the C string value contained in this object.
223   const char *AsCString(const char *value_if_empty = nullptr) const {
224     return (IsEmpty() ? value_if_empty : m_string);
225   }
226 
227   /// Get the string value as a llvm::StringRef
228   ///
229   /// \return
230   ///     Returns a new llvm::StringRef object filled in with the
231   ///     needed data.
232   llvm::StringRef GetStringRef() const {
233     return llvm::StringRef(m_string, GetLength());
234   }
235 
236   /// Get the string value as a C string.
237   ///
238   /// Get the value of the contained string as a NULL terminated C string
239   /// value. Similar to the ConstString::AsCString() function, yet this
240   /// function will always return nullptr if the string is not valid. So this
241   /// function is a direct accessor to the string pointer value.
242   ///
243   /// \return
244   ///     Returns nullptr the string is invalid, otherwise the C string
245   ///     value contained in this object.
246   const char *GetCString() const { return m_string; }
247 
248   /// Get the length in bytes of string value.
249   ///
250   /// The string pool stores the length of the string, so we can avoid calling
251   /// strlen() on the pointer value with this function.
252   ///
253   /// \return
254   ///     Returns the number of bytes that this string occupies in
255   ///     memory, not including the NULL termination byte.
256   size_t GetLength() const;
257 
258   /// Clear this object's state.
259   ///
260   /// Clear any contained string and reset the value to the empty string
261   /// value.
262   void Clear() { m_string = nullptr; }
263 
264   /// Equal to operator
265   ///
266   /// Returns true if this string is equal to the string in \a rhs. If case
267   /// sensitive equality is tested, this operation is very fast as it results
268   /// in a pointer comparison since all strings are in a uniqued in a global
269   /// string pool.
270   ///
271   /// \param[in] lhs
272   ///     The Left Hand Side const ConstString object reference.
273   ///
274   /// \param[in] rhs
275   ///     The Right Hand Side const ConstString object reference.
276   ///
277   /// \param[in] case_sensitive
278   ///     Case sensitivity. If true, case sensitive equality
279   ///     will be tested, otherwise character case will be ignored
280   ///
281   /// \return \b true if this object is equal to \a rhs, \b false otherwise.
282   static bool Equals(ConstString lhs, ConstString rhs,
283                      const bool case_sensitive = true);
284 
285   /// Compare two string objects.
286   ///
287   /// Compares the C string values contained in \a lhs and \a rhs and returns
288   /// an integer result.
289   ///
290   /// NOTE: only call this function when you want a true string
291   /// comparison. If you want string equality use the, use the == operator as
292   /// it is much more efficient. Also if you want string inequality, use the
293   /// != operator for the same reasons.
294   ///
295   /// \param[in] lhs
296   ///     The Left Hand Side const ConstString object reference.
297   ///
298   /// \param[in] rhs
299   ///     The Right Hand Side const ConstString object reference.
300   ///
301   /// \param[in] case_sensitive
302   ///     Case sensitivity of compare. If true, case sensitive compare
303   ///     will be performed, otherwise character case will be ignored
304   ///
305   /// \return -1 if lhs < rhs, 0 if lhs == rhs, 1 if lhs > rhs
306   static int Compare(ConstString lhs, ConstString rhs,
307                      const bool case_sensitive = true);
308 
309   /// Dump the object description to a stream.
310   ///
311   /// Dump the string value to the stream \a s. If the contained string is
312   /// empty, print \a value_if_empty to the stream instead. If \a
313   /// value_if_empty is nullptr, then nothing will be dumped to the stream.
314   ///
315   /// \param[in] s
316   ///     The stream that will be used to dump the object description.
317   ///
318   /// \param[in] value_if_empty
319   ///     The value to dump if the string is empty. If nullptr, nothing
320   ///     will be output to the stream.
321   void Dump(Stream *s, const char *value_if_empty = nullptr) const;
322 
323   /// Dump the object debug description to a stream.
324   ///
325   /// \param[in] s
326   ///     The stream that will be used to dump the object description.
327   void DumpDebug(Stream *s) const;
328 
329   /// Test for empty string.
330   ///
331   /// \return
332   ///     \b true if the contained string is empty.
333   ///     \b false if the contained string is not empty.
334   bool IsEmpty() const { return m_string == nullptr || m_string[0] == '\0'; }
335 
336   /// Test for null string.
337   ///
338   /// \return
339   ///     \b true if there is no string associated with this instance.
340   ///     \b false if there is a string associated with this instance.
341   bool IsNull() const { return m_string == nullptr; }
342 
343   /// Set the C string value.
344   ///
345   /// Set the string value in the object by uniquing the \a cstr string value
346   /// in our global string pool.
347   ///
348   /// If the C string already exists in the global string pool, it finds the
349   /// current entry and returns the existing value. If it doesn't exist, it is
350   /// added to the string pool.
351   ///
352   /// \param[in] cstr
353   ///     A NULL terminated C string to add to the string pool.
354   void SetCString(const char *cstr);
355 
356   void SetString(const llvm::StringRef &s);
357 
358   /// Set the C string value and its mangled counterpart.
359   ///
360   /// Object files and debug symbols often use mangled string to represent the
361   /// linkage name for a symbol, function or global. The string pool can
362   /// efficiently store these values and their counterparts so when we run
363   /// into another instance of a mangled name, we can avoid calling the name
364   /// demangler over and over on the same strings and then trying to unique
365   /// them.
366   ///
367   /// \param[in] demangled
368   ///     The demangled string to correlate with the \a mangled name.
369   ///
370   /// \param[in] mangled
371   ///     The already uniqued mangled ConstString to correlate the
372   ///     soon to be uniqued version of \a demangled.
373   void SetStringWithMangledCounterpart(llvm::StringRef demangled,
374                                        ConstString mangled);
375 
376   /// Retrieve the mangled or demangled counterpart for a mangled or demangled
377   /// ConstString.
378   ///
379   /// Object files and debug symbols often use mangled string to represent the
380   /// linkage name for a symbol, function or global. The string pool can
381   /// efficiently store these values and their counterparts so when we run
382   /// into another instance of a mangled name, we can avoid calling the name
383   /// demangler over and over on the same strings and then trying to unique
384   /// them.
385   ///
386   /// \param[in] counterpart
387   ///     A reference to a ConstString object that might get filled in
388   ///     with the demangled/mangled counterpart.
389   ///
390   /// \return
391   ///     /b True if \a counterpart was filled in with the counterpart
392   ///     /b false otherwise.
393   bool GetMangledCounterpart(ConstString &counterpart) const;
394 
395   /// Set the C string value with length.
396   ///
397   /// Set the string value in the object by uniquing \a cstr_len bytes
398   /// starting at the \a cstr string value in our global string pool. If trim
399   /// is true, then \a cstr_len indicates a maximum length of the CString and
400   /// if the actual length of the string is less, then it will be trimmed.
401   ///
402   /// If the C string already exists in the global string pool, it finds the
403   /// current entry and returns the existing value. If it doesn't exist, it is
404   /// added to the string pool.
405   ///
406   /// \param[in] cstr
407   ///     A NULL terminated C string to add to the string pool.
408   ///
409   /// \param[in] cstr_len
410   ///     The maximum length of the C string.
411   void SetCStringWithLength(const char *cstr, size_t cstr_len);
412 
413   /// Set the C string value with the minimum length between \a fixed_cstr_len
414   /// and the actual length of the C string. This can be used for data
415   /// structures that have a fixed length to store a C string where the string
416   /// might not be NULL terminated if the string takes the entire buffer.
417   void SetTrimmedCStringWithLength(const char *cstr, size_t fixed_cstr_len);
418 
419   /// Get the memory cost of this object.
420   ///
421   /// Return the size in bytes that this object takes in memory. This returns
422   /// the size in bytes of this object, which does not include any the shared
423   /// string values it may refer to.
424   ///
425   /// \return
426   ///     The number of bytes that this object occupies in memory.
427   ///
428   /// \see ConstString::StaticMemorySize ()
429   size_t MemorySize() const { return sizeof(ConstString); }
430 
431   /// Get the size in bytes of the current global string pool.
432   ///
433   /// Reports the size in bytes of all shared C string values, containers and
434   /// any other values as a byte size for the entire string pool.
435   ///
436   /// \return
437   ///     The number of bytes that the global string pool occupies
438   ///     in memory.
439   static size_t StaticMemorySize();
440 
441 protected:
442   template <typename T> friend struct ::llvm::DenseMapInfo;
443   /// Only used by DenseMapInfo.
444   static ConstString FromStringPoolPointer(const char *ptr) {
445     ConstString s;
446     s.m_string = ptr;
447     return s;
448   };
449 
450   // Member variables
451   const char *m_string;
452 };
453 
454 /// Stream the string value \a str to the stream \a s
455 Stream &operator<<(Stream &s, ConstString str);
456 
457 } // namespace lldb_private
458 
459 namespace llvm {
460 template <> struct format_provider<lldb_private::ConstString> {
461   static void format(const lldb_private::ConstString &CS, llvm::raw_ostream &OS,
462                      llvm::StringRef Options);
463 };
464 
465 /// DenseMapInfo implementation.
466 /// \{
467 template <> struct DenseMapInfo<lldb_private::ConstString> {
468   static inline lldb_private::ConstString getEmptyKey() {
469     return lldb_private::ConstString::FromStringPoolPointer(
470         DenseMapInfo<const char *>::getEmptyKey());
471   }
472   static inline lldb_private::ConstString getTombstoneKey() {
473     return lldb_private::ConstString::FromStringPoolPointer(
474         DenseMapInfo<const char *>::getTombstoneKey());
475   }
476   static unsigned getHashValue(lldb_private::ConstString val) {
477     return DenseMapInfo<const char *>::getHashValue(val.m_string);
478   }
479   static bool isEqual(lldb_private::ConstString LHS,
480                       lldb_private::ConstString RHS) {
481     return LHS == RHS;
482   }
483 };
484 /// \}
485 
486 namespace yaml {
487 template <> struct ScalarTraits<lldb_private::ConstString> {
488   static void output(const lldb_private::ConstString &, void *, raw_ostream &);
489   static StringRef input(StringRef, void *, lldb_private::ConstString &);
490   static QuotingType mustQuote(StringRef S) { return QuotingType::Double; }
491 };
492 } // namespace yaml
493 
494 inline raw_ostream &operator<<(raw_ostream &os, lldb_private::ConstString s) {
495   os << s.GetStringRef();
496   return os;
497 }
498 } // namespace llvm
499 
500 LLVM_YAML_IS_SEQUENCE_VECTOR(lldb_private::ConstString)
501 
502 #endif // LLDB_UTILITY_CONSTSTRING_H
503