1dda28197Spatrick //===-- ConstString.cpp ---------------------------------------------------===//
2061da546Spatrick //
3061da546Spatrick // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4061da546Spatrick // See https://llvm.org/LICENSE.txt for license information.
5061da546Spatrick // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6061da546Spatrick //
7061da546Spatrick //===----------------------------------------------------------------------===//
8061da546Spatrick 
9061da546Spatrick #include "lldb/Utility/ConstString.h"
10061da546Spatrick 
11061da546Spatrick #include "lldb/Utility/Stream.h"
12061da546Spatrick 
13061da546Spatrick #include "llvm/ADT/StringMap.h"
14061da546Spatrick #include "llvm/ADT/iterator.h"
15061da546Spatrick #include "llvm/Support/Allocator.h"
16061da546Spatrick #include "llvm/Support/DJB.h"
17061da546Spatrick #include "llvm/Support/FormatProviders.h"
18061da546Spatrick #include "llvm/Support/RWMutex.h"
19061da546Spatrick #include "llvm/Support/Threading.h"
20061da546Spatrick 
21061da546Spatrick #include <array>
22061da546Spatrick #include <utility>
23061da546Spatrick 
24be691f3bSpatrick #include <cinttypes>
25be691f3bSpatrick #include <cstdint>
26be691f3bSpatrick #include <cstring>
27061da546Spatrick 
28061da546Spatrick using namespace lldb_private;
29061da546Spatrick 
30061da546Spatrick class Pool {
31061da546Spatrick public:
32dda28197Spatrick   /// The default BumpPtrAllocatorImpl slab size.
33dda28197Spatrick   static const size_t AllocatorSlabSize = 4096;
34dda28197Spatrick   static const size_t SizeThreshold = AllocatorSlabSize;
35dda28197Spatrick   /// Every Pool has its own allocator which receives an equal share of
36dda28197Spatrick   /// the ConstString allocations. This means that when allocating many
37dda28197Spatrick   /// ConstStrings, every allocator sees only its small share of allocations and
38dda28197Spatrick   /// assumes LLDB only allocated a small amount of memory so far. In reality
39dda28197Spatrick   /// LLDB allocated a total memory that is N times as large as what the
40dda28197Spatrick   /// allocator sees (where N is the number of string pools). This causes that
41dda28197Spatrick   /// the BumpPtrAllocator continues a long time to allocate memory in small
42dda28197Spatrick   /// chunks which only makes sense when allocating a small amount of memory
43dda28197Spatrick   /// (which is true from the perspective of a single allocator). On some
44dda28197Spatrick   /// systems doing all these small memory allocations causes LLDB to spend
45dda28197Spatrick   /// a lot of time in malloc, so we need to force all these allocators to
46dda28197Spatrick   /// behave like one allocator in terms of scaling their memory allocations
47dda28197Spatrick   /// with increased demand. To do this we set the growth delay for each single
48dda28197Spatrick   /// allocator to a rate so that our pool of allocators scales their memory
49dda28197Spatrick   /// allocations similar to a single BumpPtrAllocatorImpl.
50dda28197Spatrick   ///
51dda28197Spatrick   /// Currently we have 256 string pools and the normal growth delay of the
52dda28197Spatrick   /// BumpPtrAllocatorImpl is 128 (i.e., the memory allocation size increases
53dda28197Spatrick   /// every 128 full chunks), so by changing the delay to 1 we get a
54dda28197Spatrick   /// total growth delay in our allocator collection of 256/1 = 256. This is
55dda28197Spatrick   /// still only half as fast as a normal allocator but we can't go any faster
56dda28197Spatrick   /// without decreasing the number of string pools.
57dda28197Spatrick   static const size_t AllocatorGrowthDelay = 1;
58dda28197Spatrick   typedef llvm::BumpPtrAllocatorImpl<llvm::MallocAllocator, AllocatorSlabSize,
59dda28197Spatrick                                      SizeThreshold, AllocatorGrowthDelay>
60dda28197Spatrick       Allocator;
61061da546Spatrick   typedef const char *StringPoolValueType;
62dda28197Spatrick   typedef llvm::StringMap<StringPoolValueType, Allocator> StringPool;
63061da546Spatrick   typedef llvm::StringMapEntry<StringPoolValueType> StringPoolEntryType;
64061da546Spatrick 
65061da546Spatrick   static StringPoolEntryType &
GetStringMapEntryFromKeyData(const char * keyData)66061da546Spatrick   GetStringMapEntryFromKeyData(const char *keyData) {
67061da546Spatrick     return StringPoolEntryType::GetStringMapEntryFromKeyData(keyData);
68061da546Spatrick   }
69061da546Spatrick 
GetConstCStringLength(const char * ccstr)70061da546Spatrick   static size_t GetConstCStringLength(const char *ccstr) {
71061da546Spatrick     if (ccstr != nullptr) {
72061da546Spatrick       // Since the entry is read only, and we derive the entry entirely from
73061da546Spatrick       // the pointer, we don't need the lock.
74061da546Spatrick       const StringPoolEntryType &entry = GetStringMapEntryFromKeyData(ccstr);
75061da546Spatrick       return entry.getKey().size();
76061da546Spatrick     }
77061da546Spatrick     return 0;
78061da546Spatrick   }
79061da546Spatrick 
GetMangledCounterpart(const char * ccstr) const80061da546Spatrick   StringPoolValueType GetMangledCounterpart(const char *ccstr) const {
81061da546Spatrick     if (ccstr != nullptr) {
82061da546Spatrick       const uint8_t h = hash(llvm::StringRef(ccstr));
83061da546Spatrick       llvm::sys::SmartScopedReader<false> rlock(m_string_pools[h].m_mutex);
84061da546Spatrick       return GetStringMapEntryFromKeyData(ccstr).getValue();
85061da546Spatrick     }
86061da546Spatrick     return nullptr;
87061da546Spatrick   }
88061da546Spatrick 
GetConstCString(const char * cstr)89061da546Spatrick   const char *GetConstCString(const char *cstr) {
90061da546Spatrick     if (cstr != nullptr)
91061da546Spatrick       return GetConstCStringWithLength(cstr, strlen(cstr));
92061da546Spatrick     return nullptr;
93061da546Spatrick   }
94061da546Spatrick 
GetConstCStringWithLength(const char * cstr,size_t cstr_len)95061da546Spatrick   const char *GetConstCStringWithLength(const char *cstr, size_t cstr_len) {
96061da546Spatrick     if (cstr != nullptr)
97061da546Spatrick       return GetConstCStringWithStringRef(llvm::StringRef(cstr, cstr_len));
98061da546Spatrick     return nullptr;
99061da546Spatrick   }
100061da546Spatrick 
GetConstCStringWithStringRef(const llvm::StringRef & string_ref)101061da546Spatrick   const char *GetConstCStringWithStringRef(const llvm::StringRef &string_ref) {
102061da546Spatrick     if (string_ref.data()) {
103061da546Spatrick       const uint8_t h = hash(string_ref);
104061da546Spatrick 
105061da546Spatrick       {
106061da546Spatrick         llvm::sys::SmartScopedReader<false> rlock(m_string_pools[h].m_mutex);
107061da546Spatrick         auto it = m_string_pools[h].m_string_map.find(string_ref);
108061da546Spatrick         if (it != m_string_pools[h].m_string_map.end())
109061da546Spatrick           return it->getKeyData();
110061da546Spatrick       }
111061da546Spatrick 
112061da546Spatrick       llvm::sys::SmartScopedWriter<false> wlock(m_string_pools[h].m_mutex);
113061da546Spatrick       StringPoolEntryType &entry =
114061da546Spatrick           *m_string_pools[h]
115061da546Spatrick                .m_string_map.insert(std::make_pair(string_ref, nullptr))
116061da546Spatrick                .first;
117061da546Spatrick       return entry.getKeyData();
118061da546Spatrick     }
119061da546Spatrick     return nullptr;
120061da546Spatrick   }
121061da546Spatrick 
122061da546Spatrick   const char *
GetConstCStringAndSetMangledCounterPart(llvm::StringRef demangled,const char * mangled_ccstr)123061da546Spatrick   GetConstCStringAndSetMangledCounterPart(llvm::StringRef demangled,
124061da546Spatrick                                           const char *mangled_ccstr) {
125061da546Spatrick     const char *demangled_ccstr = nullptr;
126061da546Spatrick 
127061da546Spatrick     {
128061da546Spatrick       const uint8_t h = hash(demangled);
129061da546Spatrick       llvm::sys::SmartScopedWriter<false> wlock(m_string_pools[h].m_mutex);
130061da546Spatrick 
131061da546Spatrick       // Make or update string pool entry with the mangled counterpart
132061da546Spatrick       StringPool &map = m_string_pools[h].m_string_map;
133061da546Spatrick       StringPoolEntryType &entry = *map.try_emplace(demangled).first;
134061da546Spatrick 
135061da546Spatrick       entry.second = mangled_ccstr;
136061da546Spatrick 
137061da546Spatrick       // Extract the const version of the demangled_cstr
138061da546Spatrick       demangled_ccstr = entry.getKeyData();
139061da546Spatrick     }
140061da546Spatrick 
141061da546Spatrick     {
142061da546Spatrick       // Now assign the demangled const string as the counterpart of the
143061da546Spatrick       // mangled const string...
144061da546Spatrick       const uint8_t h = hash(llvm::StringRef(mangled_ccstr));
145061da546Spatrick       llvm::sys::SmartScopedWriter<false> wlock(m_string_pools[h].m_mutex);
146061da546Spatrick       GetStringMapEntryFromKeyData(mangled_ccstr).setValue(demangled_ccstr);
147061da546Spatrick     }
148061da546Spatrick 
149061da546Spatrick     // Return the constant demangled C string
150061da546Spatrick     return demangled_ccstr;
151061da546Spatrick   }
152061da546Spatrick 
GetConstTrimmedCStringWithLength(const char * cstr,size_t cstr_len)153061da546Spatrick   const char *GetConstTrimmedCStringWithLength(const char *cstr,
154061da546Spatrick                                                size_t cstr_len) {
155061da546Spatrick     if (cstr != nullptr) {
156061da546Spatrick       const size_t trimmed_len = strnlen(cstr, cstr_len);
157061da546Spatrick       return GetConstCStringWithLength(cstr, trimmed_len);
158061da546Spatrick     }
159061da546Spatrick     return nullptr;
160061da546Spatrick   }
161061da546Spatrick 
GetMemoryStats() const162*f6aab3d8Srobert   ConstString::MemoryStats GetMemoryStats() const {
163*f6aab3d8Srobert     ConstString::MemoryStats stats;
164061da546Spatrick     for (const auto &pool : m_string_pools) {
165061da546Spatrick       llvm::sys::SmartScopedReader<false> rlock(pool.m_mutex);
166*f6aab3d8Srobert       const Allocator &alloc = pool.m_string_map.getAllocator();
167*f6aab3d8Srobert       stats.bytes_total += alloc.getTotalMemory();
168*f6aab3d8Srobert       stats.bytes_used += alloc.getBytesAllocated();
169061da546Spatrick     }
170*f6aab3d8Srobert     return stats;
171061da546Spatrick   }
172061da546Spatrick 
173061da546Spatrick protected:
hash(const llvm::StringRef & s) const174061da546Spatrick   uint8_t hash(const llvm::StringRef &s) const {
175061da546Spatrick     uint32_t h = llvm::djbHash(s);
176061da546Spatrick     return ((h >> 24) ^ (h >> 16) ^ (h >> 8) ^ h) & 0xff;
177061da546Spatrick   }
178061da546Spatrick 
179061da546Spatrick   struct PoolEntry {
180061da546Spatrick     mutable llvm::sys::SmartRWMutex<false> m_mutex;
181061da546Spatrick     StringPool m_string_map;
182061da546Spatrick   };
183061da546Spatrick 
184061da546Spatrick   std::array<PoolEntry, 256> m_string_pools;
185061da546Spatrick };
186061da546Spatrick 
187061da546Spatrick // Frameworks and dylibs aren't supposed to have global C++ initializers so we
188061da546Spatrick // hide the string pool in a static function so that it will get initialized on
189061da546Spatrick // the first call to this static function.
190061da546Spatrick //
191061da546Spatrick // Note, for now we make the string pool a pointer to the pool, because we
192061da546Spatrick // can't guarantee that some objects won't get destroyed after the global
193061da546Spatrick // destructor chain is run, and trying to make sure no destructors touch
194061da546Spatrick // ConstStrings is difficult.  So we leak the pool instead.
StringPool()195061da546Spatrick static Pool &StringPool() {
196061da546Spatrick   static llvm::once_flag g_pool_initialization_flag;
197061da546Spatrick   static Pool *g_string_pool = nullptr;
198061da546Spatrick 
199061da546Spatrick   llvm::call_once(g_pool_initialization_flag,
200061da546Spatrick                  []() { g_string_pool = new Pool(); });
201061da546Spatrick 
202061da546Spatrick   return *g_string_pool;
203061da546Spatrick }
204061da546Spatrick 
ConstString(const char * cstr)205061da546Spatrick ConstString::ConstString(const char *cstr)
206061da546Spatrick     : m_string(StringPool().GetConstCString(cstr)) {}
207061da546Spatrick 
ConstString(const char * cstr,size_t cstr_len)208061da546Spatrick ConstString::ConstString(const char *cstr, size_t cstr_len)
209061da546Spatrick     : m_string(StringPool().GetConstCStringWithLength(cstr, cstr_len)) {}
210061da546Spatrick 
ConstString(const llvm::StringRef & s)211061da546Spatrick ConstString::ConstString(const llvm::StringRef &s)
212061da546Spatrick     : m_string(StringPool().GetConstCStringWithStringRef(s)) {}
213061da546Spatrick 
operator <(ConstString rhs) const214061da546Spatrick bool ConstString::operator<(ConstString rhs) const {
215061da546Spatrick   if (m_string == rhs.m_string)
216061da546Spatrick     return false;
217061da546Spatrick 
218061da546Spatrick   llvm::StringRef lhs_string_ref(GetStringRef());
219061da546Spatrick   llvm::StringRef rhs_string_ref(rhs.GetStringRef());
220061da546Spatrick 
221061da546Spatrick   // If both have valid C strings, then return the comparison
222061da546Spatrick   if (lhs_string_ref.data() && rhs_string_ref.data())
223061da546Spatrick     return lhs_string_ref < rhs_string_ref;
224061da546Spatrick 
225061da546Spatrick   // Else one of them was nullptr, so if LHS is nullptr then it is less than
226061da546Spatrick   return lhs_string_ref.data() == nullptr;
227061da546Spatrick }
228061da546Spatrick 
operator <<(Stream & s,ConstString str)229061da546Spatrick Stream &lldb_private::operator<<(Stream &s, ConstString str) {
230061da546Spatrick   const char *cstr = str.GetCString();
231061da546Spatrick   if (cstr != nullptr)
232061da546Spatrick     s << cstr;
233061da546Spatrick 
234061da546Spatrick   return s;
235061da546Spatrick }
236061da546Spatrick 
GetLength() const237061da546Spatrick size_t ConstString::GetLength() const {
238061da546Spatrick   return Pool::GetConstCStringLength(m_string);
239061da546Spatrick }
240061da546Spatrick 
Equals(ConstString lhs,ConstString rhs,const bool case_sensitive)241061da546Spatrick bool ConstString::Equals(ConstString lhs, ConstString rhs,
242061da546Spatrick                          const bool case_sensitive) {
243061da546Spatrick   if (lhs.m_string == rhs.m_string)
244061da546Spatrick     return true;
245061da546Spatrick 
246061da546Spatrick   // Since the pointers weren't equal, and identical ConstStrings always have
247061da546Spatrick   // identical pointers, the result must be false for case sensitive equality
248061da546Spatrick   // test.
249061da546Spatrick   if (case_sensitive)
250061da546Spatrick     return false;
251061da546Spatrick 
252061da546Spatrick   // perform case insensitive equality test
253061da546Spatrick   llvm::StringRef lhs_string_ref(lhs.GetStringRef());
254061da546Spatrick   llvm::StringRef rhs_string_ref(rhs.GetStringRef());
255be691f3bSpatrick   return lhs_string_ref.equals_insensitive(rhs_string_ref);
256061da546Spatrick }
257061da546Spatrick 
Compare(ConstString lhs,ConstString rhs,const bool case_sensitive)258061da546Spatrick int ConstString::Compare(ConstString lhs, ConstString rhs,
259061da546Spatrick                          const bool case_sensitive) {
260061da546Spatrick   // If the iterators are the same, this is the same string
261061da546Spatrick   const char *lhs_cstr = lhs.m_string;
262061da546Spatrick   const char *rhs_cstr = rhs.m_string;
263061da546Spatrick   if (lhs_cstr == rhs_cstr)
264061da546Spatrick     return 0;
265061da546Spatrick   if (lhs_cstr && rhs_cstr) {
266061da546Spatrick     llvm::StringRef lhs_string_ref(lhs.GetStringRef());
267061da546Spatrick     llvm::StringRef rhs_string_ref(rhs.GetStringRef());
268061da546Spatrick 
269061da546Spatrick     if (case_sensitive) {
270061da546Spatrick       return lhs_string_ref.compare(rhs_string_ref);
271061da546Spatrick     } else {
272be691f3bSpatrick       return lhs_string_ref.compare_insensitive(rhs_string_ref);
273061da546Spatrick     }
274061da546Spatrick   }
275061da546Spatrick 
276061da546Spatrick   if (lhs_cstr)
277061da546Spatrick     return +1; // LHS isn't nullptr but RHS is
278061da546Spatrick   else
279061da546Spatrick     return -1; // LHS is nullptr but RHS isn't
280061da546Spatrick }
281061da546Spatrick 
Dump(Stream * s,const char * fail_value) const282061da546Spatrick void ConstString::Dump(Stream *s, const char *fail_value) const {
283061da546Spatrick   if (s != nullptr) {
284061da546Spatrick     const char *cstr = AsCString(fail_value);
285061da546Spatrick     if (cstr != nullptr)
286061da546Spatrick       s->PutCString(cstr);
287061da546Spatrick   }
288061da546Spatrick }
289061da546Spatrick 
DumpDebug(Stream * s) const290061da546Spatrick void ConstString::DumpDebug(Stream *s) const {
291061da546Spatrick   const char *cstr = GetCString();
292061da546Spatrick   size_t cstr_len = GetLength();
293061da546Spatrick   // Only print the parens if we have a non-nullptr string
294061da546Spatrick   const char *parens = cstr ? "\"" : "";
295061da546Spatrick   s->Printf("%*p: ConstString, string = %s%s%s, length = %" PRIu64,
296061da546Spatrick             static_cast<int>(sizeof(void *) * 2),
297061da546Spatrick             static_cast<const void *>(this), parens, cstr, parens,
298061da546Spatrick             static_cast<uint64_t>(cstr_len));
299061da546Spatrick }
300061da546Spatrick 
SetCString(const char * cstr)301061da546Spatrick void ConstString::SetCString(const char *cstr) {
302061da546Spatrick   m_string = StringPool().GetConstCString(cstr);
303061da546Spatrick }
304061da546Spatrick 
SetString(const llvm::StringRef & s)305061da546Spatrick void ConstString::SetString(const llvm::StringRef &s) {
306061da546Spatrick   m_string = StringPool().GetConstCStringWithLength(s.data(), s.size());
307061da546Spatrick }
308061da546Spatrick 
SetStringWithMangledCounterpart(llvm::StringRef demangled,ConstString mangled)309061da546Spatrick void ConstString::SetStringWithMangledCounterpart(llvm::StringRef demangled,
310061da546Spatrick                                                   ConstString mangled) {
311061da546Spatrick   m_string = StringPool().GetConstCStringAndSetMangledCounterPart(
312061da546Spatrick       demangled, mangled.m_string);
313061da546Spatrick }
314061da546Spatrick 
GetMangledCounterpart(ConstString & counterpart) const315061da546Spatrick bool ConstString::GetMangledCounterpart(ConstString &counterpart) const {
316061da546Spatrick   counterpart.m_string = StringPool().GetMangledCounterpart(m_string);
317061da546Spatrick   return (bool)counterpart;
318061da546Spatrick }
319061da546Spatrick 
SetCStringWithLength(const char * cstr,size_t cstr_len)320061da546Spatrick void ConstString::SetCStringWithLength(const char *cstr, size_t cstr_len) {
321061da546Spatrick   m_string = StringPool().GetConstCStringWithLength(cstr, cstr_len);
322061da546Spatrick }
323061da546Spatrick 
SetTrimmedCStringWithLength(const char * cstr,size_t cstr_len)324061da546Spatrick void ConstString::SetTrimmedCStringWithLength(const char *cstr,
325061da546Spatrick                                               size_t cstr_len) {
326061da546Spatrick   m_string = StringPool().GetConstTrimmedCStringWithLength(cstr, cstr_len);
327061da546Spatrick }
328061da546Spatrick 
GetMemoryStats()329*f6aab3d8Srobert ConstString::MemoryStats ConstString::GetMemoryStats() {
330*f6aab3d8Srobert   return StringPool().GetMemoryStats();
331061da546Spatrick }
332061da546Spatrick 
format(const ConstString & CS,llvm::raw_ostream & OS,llvm::StringRef Options)333061da546Spatrick void llvm::format_provider<ConstString>::format(const ConstString &CS,
334061da546Spatrick                                                 llvm::raw_ostream &OS,
335061da546Spatrick                                                 llvm::StringRef Options) {
336dda28197Spatrick   format_provider<StringRef>::format(CS.GetStringRef(), OS, Options);
337dda28197Spatrick }
338