1 //===-- DataFileCache.h -----------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLDB_CORE_DATAFILECACHE_H
10 #define LLDB_CORE_DATAFILECACHE_H
11 
12 #include "lldb/Utility/FileSpec.h"
13 #include "lldb/Utility/Status.h"
14 #include "lldb/Utility/UUID.h"
15 #include "lldb/lldb-forward.h"
16 #include "llvm/ADT/DenseMap.h"
17 #include "llvm/Support/CachePruning.h"
18 #include "llvm/Support/Caching.h"
19 #include "llvm/Support/MemoryBuffer.h"
20 
21 #include <mutex>
22 #include <optional>
23 
24 namespace lldb_private {
25 
26 /// This class enables data to be cached into a directory using the llvm
27 /// caching code. Data can be stored and accessed using a unique string key.
28 /// The data will be stored in the directory that is specified in the
29 /// DataFileCache constructor. The data will be stored in files that start with
30 /// "llvmcache-<key>" where <key> is the key name specified when getting to
31 /// setting cached data.
32 ///
33 /// Sample code for how to use the cache:
34 ///
35 ///   DataFileCache cache("/tmp/lldb-test-cache");
36 ///   StringRef key("Key1");
37 ///   auto mem_buffer_up = cache.GetCachedData(key);
38 ///   if (mem_buffer_up) {
39 ///     printf("cached data:\n%s", mem_buffer_up->getBufferStart());
40 ///   } else {
41 ///     std::vector<uint8_t> data = { 'h', 'e', 'l', 'l', 'o', '\n' };
42 ///     cache.SetCachedData(key, data);
43 ///   }
44 
45 class DataFileCache {
46 public:
47   /// Create a data file cache in the directory path that is specified, using
48   /// the specified policy.
49   ///
50   /// Data will be cached in files created in this directory when clients call
51   /// DataFileCache::SetCacheData.
52   DataFileCache(llvm::StringRef path,
53                 llvm::CachePruningPolicy policy =
54                     DataFileCache::GetLLDBIndexCachePolicy());
55 
56   /// Gets the default LLDB index cache policy, which is controlled by the
57   /// "LLDBIndexCache" family of settings.
58   static llvm::CachePruningPolicy GetLLDBIndexCachePolicy();
59 
60   /// Get cached data from the cache directory for the specified key.
61   ///
62   /// Keys must be unique for any given data. This function attempts to see if
63   /// the data is available for the specified key and will return a valid memory
64   /// buffer is data is available.
65   ///
66   /// \param key
67   ///   The unique string key that identifies data being cached.
68   ///
69   /// \return
70   ///   A valid unique pointer to a memory buffer if the data is available, or
71   ///   a unique pointer that contains NULL if the data is not available.
72   std::unique_ptr<llvm::MemoryBuffer> GetCachedData(llvm::StringRef key);
73 
74   /// Set cached data for the specified key.
75   ///
76   /// Setting the cached data will save a file in the cache directory to contain
77   /// the specified data.
78   ///
79   /// \param key
80   ///   The unique string key that identifies data being cached.
81   ///
82   /// \return
83   ///   True if the data was successfully cached, false otherwise.
84   bool SetCachedData(llvm::StringRef key, llvm::ArrayRef<uint8_t> data);
85 
86   /// Remove the cache file associated with the key.
87   Status RemoveCacheFile(llvm::StringRef key);
88 
89 private:
90   /// Return the cache file that is associated with the key.
91   FileSpec GetCacheFilePath(llvm::StringRef key);
92 
93   llvm::FileCache m_cache_callback;
94   FileSpec m_cache_dir;
95   std::mutex m_mutex;
96   std::unique_ptr<llvm::MemoryBuffer> m_mem_buff_up;
97   bool m_take_ownership = false;
98 };
99 
100 /// A signature for a given file on disk.
101 ///
102 /// Any files that are cached in the LLDB index cached need some data that
103 /// uniquely identifies a file on disk and this information should be written
104 /// into each cache file so we can validate if the cache file still matches
105 /// the file we are trying to load cached data for. Objects can fill out this
106 /// signature and then encode and decode them to validate the signatures
107 /// match. If they do not match, the cache file on disk should be removed as
108 /// it is out of date.
109 struct CacheSignature {
110   /// UUID of object file or module.
111   std::optional<UUID> m_uuid;
112   /// Modification time of file on disk.
113   std::optional<std::time_t> m_mod_time;
114   /// If this describes a .o file with a BSD archive, the BSD archive's
115   /// modification time will be in m_mod_time, and the .o file's modification
116   /// time will be in this m_obj_mod_time.
117   std::optional<std::time_t> m_obj_mod_time;
118 
119   CacheSignature() = default;
120 
121   /// Create a signature from a module.
122   CacheSignature(lldb_private::Module *module);
123 
124   /// Create a signature from an object file.
125   CacheSignature(lldb_private::ObjectFile *objfile);
126 
127   void Clear() {
128     m_uuid = std::nullopt;
129     m_mod_time = std::nullopt;
130     m_obj_mod_time = std::nullopt;
131   }
132 
133   /// Return true only if the CacheSignature is valid.
134   ///
135   /// Cache signatures are considered valid only if there is a UUID in the file
136   /// that can uniquely identify the file. Some build systems play with
137   /// modification times of file so we can not trust them without using valid
138   /// unique idenifier like the UUID being valid.
139   bool IsValid() const { return m_uuid.has_value(); }
140 
141   /// Check if two signatures are the same.
142   bool operator==(const CacheSignature &rhs) const {
143     return m_uuid == rhs.m_uuid && m_mod_time == rhs.m_mod_time &&
144            m_obj_mod_time == rhs.m_obj_mod_time;
145   }
146 
147   /// Check if two signatures differ.
148   bool operator!=(const CacheSignature &rhs) const { return !(*this == rhs); }
149   /// Encode this object into a data encoder object.
150   ///
151   /// This allows this object to be serialized to disk. The CacheSignature
152   /// object must have at least one member variable that has a value in order to
153   /// be serialized so that we can match this data to when the cached file is
154   /// loaded at a later time.
155   ///
156   /// \param encoder
157   ///   A data encoder object that serialized bytes will be encoded into.
158   ///
159   /// \return
160   ///   True if a signature was encoded, and false if there were no member
161   ///   variables that had value. False indicates this data should not be
162   ///   cached to disk because we were unable to encode a valid signature.
163   bool Encode(DataEncoder &encoder) const;
164 
165   /// Decode a serialized version of this object from data.
166   ///
167   /// \param data
168   ///   The decoder object that references the serialized data.
169   ///
170   /// \param offset_ptr
171   ///   A pointer that contains the offset from which the data will be decoded
172   ///   from that gets updated as data gets decoded.
173   ///
174   /// \return
175   ///   True if the signature was successfully decoded, false otherwise.
176   bool Decode(const DataExtractor &data, lldb::offset_t *offset_ptr);
177 };
178 
179 /// Many cache files require string tables to store data efficiently. This
180 /// class helps create string tables.
181 class ConstStringTable {
182 public:
183   ConstStringTable() = default;
184   /// Add a string into the string table.
185   ///
186   /// Add a string to the string table will only add the same string one time
187   /// and will return the offset in the string table buffer to that string.
188   /// String tables are easy to build with ConstString objects since most LLDB
189   /// classes for symbol or debug info use them already and they provide
190   /// permanent storage for the string.
191   ///
192   /// \param s
193   ///   The string to insert into the string table.
194   ///
195   /// \return
196   ///   The byte offset from the start of the string table for the inserted
197   ///   string. Duplicate strings that get inserted will return the same
198   ///   byte offset.
199   uint32_t Add(ConstString s);
200 
201   bool Encode(DataEncoder &encoder);
202 
203 private:
204   std::vector<ConstString> m_strings;
205   llvm::DenseMap<ConstString, uint32_t> m_string_to_offset;
206   /// Skip one byte to start the string table off with an empty string.
207   uint32_t m_next_offset = 1;
208 };
209 
210 /// Many cache files require string tables to store data efficiently. This
211 /// class helps give out strings from a string table that was read from a
212 /// cache file.
213 class StringTableReader {
214 public:
215   StringTableReader() = default;
216 
217   llvm::StringRef Get(uint32_t offset) const;
218 
219   bool Decode(const DataExtractor &data, lldb::offset_t *offset_ptr);
220 
221 protected:
222   /// All of the strings in the string table are contained in m_data.
223   llvm::StringRef m_data;
224 };
225 
226 } // namespace lldb_private
227 
228 #endif // LLDB_CORE_DATAFILECACHE_H
229