1 #ifndef NETCACHE__STORAGE_TYPES__HPP
2 #define NETCACHE__STORAGE_TYPES__HPP
3 /*  $Id: storage_types.hpp 632770 2021-06-07 19:05:12Z ivanov $
4  * ===========================================================================
5  *
6  *                            PUBLIC DOMAIN NOTICE
7  *               National Center for Biotechnology Information
8  *
9  *  This software/database is a "United States Government Work" under the
10  *  terms of the United States Copyright Act.  It was written as part of
11  *  the author's official duties as a United States Government employee and
12  *  thus cannot be copyrighted.  This software/database is freely available
13  *  to the public for use. The National Library of Medicine and the U.S.
14  *  Government have not placed any restriction on its use or reproduction.
15  *
16  *  Although all reasonable efforts have been taken to ensure the accuracy
17  *  and reliability of the software and data, the NLM and the U.S.
18  *  Government do not and cannot warrant the performance or results that
19  *  may be obtained by using this software or data. The NLM and the U.S.
20  *  Government disclaim all warranties, express or implied, including
21  *  warranties of performance, merchantability or fitness for any particular
22  *  purpose.
23  *
24  *  Please cite the author in any work or product based on this material.
25  *
26  * ===========================================================================
27  *
28  * Authors:  Pavel Ivanov
29  */
30 
31 
32 #include "nc_utils.hpp"
33 #include "nc_db_info.hpp"
34 
35 
36 namespace intr = boost::intrusive;
37 
38 
39 BEGIN_NCBI_SCOPE
40 
41 
42 struct SFileRecHeader;
43 struct SFileMetaRec;
44 class  SNCCacheData;
45 struct SCacheDeadCompare;
46 struct SCacheKeyCompare;
47 
48 
49 enum EFileRecType {
50     eFileRecNone = 0,
51     eFileRecMeta,
52     eFileRecChunkMap,
53     eFileRecChunkData,
54     eFileRecAny
55 };
56 
57 struct ATTR_PACKED SFileIndexRec
58 {
59     Uint4   prev_num;
60     Uint4   next_num;
61     Uint4   offset;
62     Uint4   rec_size:24;
63     Uint1   rec_type:8;
64     SNCDataCoord chain_coord;
65     SNCCacheData* cache_data;
66 };
67 
68 // Meta-type records (kMetaSignature)
69 struct ATTR_PACKED SFileMetaRec
70 {
71     Uint1   has_password;
72     Uint1   align_reserve;
73     Uint2   map_size;       // max number of down_coords in map record - see SFileChunkMapRec
74     Uint4   chunk_size;
75     Uint8   size;           // blob size
76     Uint8   create_time;    // time of completion of blob storage request
77     Uint8   create_server;  // unique server id
78     Uint4   create_id;      // blob id (unique for each server id)
79     Int4    dead_time;      // time when this blob should be deleted from db
80     Int4    ttl;            // time to live since last read, or creation
81     Int4    expire;         // blob expiration time (blob ceases to exist for client)
82     Int4    blob_ver;       // blob version (client supplied)
83     Int4    ver_ttl;        // time to live for this version
84     Int4    ver_expire;     // blob version expiration time
85     char    key_data[1];    // key + MD5 of password, if any
86 };
87 
88 // Map-type records (kMapsSignature)
89 struct ATTR_PACKED SFileChunkMapRec
90 {
91     Uint2 map_idx;         // index of this map in higher level map, if that exists
92     Uint1 map_depth;       // map depth; it can be tree of maps
93     SNCDataCoord down_coords[1] ATTR_ALIGNED_8;  // coords of lower levels
94 };
95 
96 // Data-type records (kDataSignature)
97 struct ATTR_PACKED SFileChunkDataRec
98 {
99     Uint8   chunk_num;     // chunk index in blob
100     Uint2   chunk_idx;     // chunk index in map
101     Uint1   chunk_data[1]; // chunk data, see kNCMaxBlobChunkSize
102 };
103 
104 
105 struct SWritingInfo
106 {
107     CSrvRef<SNCDBFileInfo> cur_file;
108     CSrvRef<SNCDBFileInfo> next_file;
109     Uint4 next_rec_num;
110     Uint4 next_offset;
111     Uint4 left_file_size;
112 
SWritingInfoSWritingInfo113     SWritingInfo(void)
114         : next_rec_num(0), next_offset(0), left_file_size(0) {
115     }
116 };
117 
118 
119 struct SNCTempBlobInfo
120 {
121     string  key;
122     Uint8   size;
123     Uint8   create_time;
124     Uint8   create_server;
125     SNCDataCoord coord;
126     Uint4   create_id;
127     int     dead_time;
128     int     expire;
129     int     ver_expire;
130 
SNCTempBlobInfoSNCTempBlobInfo131     SNCTempBlobInfo(void)
132         :   size(0), create_time(0), create_server(0),
133             create_id(0), dead_time(0), expire(0), ver_expire(0) {
134     }
SNCTempBlobInfoSNCTempBlobInfo135     SNCTempBlobInfo(const SNCCacheData& cache_info)
136         : key(cache_info.key),
137           size(cache_info.size),
138           create_time(cache_info.create_time),
139           create_server(cache_info.create_server),
140           coord(cache_info.coord),
141           create_id(cache_info.create_id),
142           dead_time(cache_info.dead_time),
143           expire(cache_info.expire),
144           ver_expire(cache_info.ver_expire)
145     {
146     }
147 };
148 
149 
150 typedef set<Uint4>              TRecNumsSet;
151 typedef map<Uint4, TRecNumsSet> TFileRecsMap;
152 
153 
154 /*
155     on startup:
156         loads blobs metadata into memory,
157         creates initial DB files
158         checks DB consistency
159 
160     begin: x_StartCaching
161     -> x_StartCaching: get first DB file, goto  x_PreCacheRecNums
162     -> x_PreCacheRecNums:
163         for each DB file, collect valid record numbers
164         goto x_StartCreateFiles
165     -> x_StartCreateFiles
166         goto x_CreateInitialFile
167     -> x_CreateInitialFile
168         create 3 'current' files and 3 'next' files
169         if file creation failed, goto x_DelFileAndRetryCreate
170         when complete, goto x_StartCacheBlobs
171     -> x_DelFileAndRetryCreate
172         if cannot delete any files, request shutdown, goto x_CancelCaching
173         try to delete existing file (prefers data files)
174         goto x_CreateInitialFile
175     -> x_StartCacheBlobs: get first file, goto  x_CacheNextFile
176     -> x_CacheNextFile
177         if file type is eDBFileMeta, goto  x_CacheNextRecord
178         after that, goto x_CleanOrphanRecs
179     -> x_CacheNextRecord
180         for each record, check record validity, remember
181         goto x_CacheNextFile
182     -> x_CleanOrphanRecs
183         when all files are processed, delete abandoned records
184         goto x_FinishCaching
185     -> x_FinishCaching
186         notify NC that caching complete,
187         start DiskFlusher, RecNoSaver,  SpaceShrinker, ExpiredCleaner
188 */
189 class CBlobCacher : public CSrvStatesTask<CBlobCacher>
190 {
191 public:
192     CBlobCacher(void);
193     virtual ~CBlobCacher(void);
194 
195 private:
196     State x_StartCaching(void);
197     State x_PreCacheRecNums(void);
198     State x_CancelCaching(void);
199     State x_StartCreateFiles(void);
200     State x_CreateInitialFile(void);
201     State x_DelFileAndRetryCreate(void);
202     State x_StartCacheBlobs(void);
203     State x_CacheNextFile(void);
204     State x_CacheNextRecord(void);
205     State x_CleanOrphanRecs(void);
206     State x_FinishCaching(void);
207 
208     bool x_CacheMetaRec(SNCDBFileInfo* file_info,
209                         SFileIndexRec* ind_rec,
210                         SNCDataCoord coord);
211     bool x_CacheMapRecs(SNCDataCoord map_coord,
212                         Uint1 map_depth,
213                         SNCDataCoord up_coord,
214                         Uint2 up_index,
215                         SNCCacheData* cache_data,
216                         Uint8 cnt_chunks,
217                         Uint8& chunk_num,
218                         map<Uint4, Uint4>& sizes_map);
219     void x_DeleteIndexes(SNCDataCoord map_coord, Uint1 map_depth);
220 
221 
222     TFileRecsMap m_RecsMap;
223     TRecNumsSet m_NewFileIds;
224     TNCDBFilesMap::const_iterator m_CurFile;
225     TRecNumsSet* m_CurRecsSet;
226     TRecNumsSet::iterator m_CurRecIt;
227     size_t m_CurCreatePass;
228     size_t m_CurCreateFile;
229 };
230 
231 
232 class CNewFileCreator : public CSrvTask
233 {
234 public:
235     CNewFileCreator(void);
236     virtual ~CNewFileCreator(void);
237 
238 private:
239     virtual void ExecuteSlice(TSrvThreadNum thr_num);
240 };
241 
242 
243 /*
244     flush all DB files
245 */
246 class CDiskFlusher : public CSrvStatesTask<CDiskFlusher>
247 {
248 public:
249     CDiskFlusher(void);
250     virtual ~CDiskFlusher(void);
251 
252 private:
253     State x_CheckFlushTime(void);
254     State x_FlushNextFile(void);
255 
256 
257     Uint4 m_LastId;
258 };
259 
260 
261 class CRecNoSaver : public CSrvTask
262 {
263 public:
264     CRecNoSaver(void);
265     virtual ~CRecNoSaver(void);
266 
267 private:
268     virtual void ExecuteSlice(TSrvThreadNum thr_num);
269 };
270 
271 
272 // move blob chunks, or data from old files (which are 'almost empty')
273 // into new ones, then deletes old files
274 /*
275     begin: x_PrepareToShrink
276     -> x_PrepareToShrink: analyze what to move and what to delete;
277     -> x_DeleteNextFile: has smth to delete ? delete : x_StartMoves
278     -> x_StartMoves: has smth to move ? x_MoveNextRecord : x_FinishSession
279     -> x_MoveNextRecord:
280            find what to move;
281            if not found, goto x_FinishMoves;
282            if VerMgr for this key exists, goto x_CheckCurVersion;
283            else x_MoveRecord;
284     -> x_CheckCurVersion
285         if this record already deleted (IndexDeleted), goto x_FinishMoveRecord;
286         if VerMgr has no current version (this is either prev version, or one
287             that was not written yet), goto x_FinishMoveRecord;
288         if record to move does not belong to current version of blob, goto x_FinishMoveRecord;
289             (this means this version will be deleted soon, probably)
290         if this record stores blob metadata, check that another thread BlobVerData does not update it;
291         if this record will be deeted soon, goto x_FinishMoveRecord;
292         goto x_MoveRecord
293     -> x_MoveRecord: move, goto x_FinishMoveRecord
294     -> x_FinishMoveRecord: release used resources;
295         if move failed ? x_FinishMoves :  x_MoveNextRecord
296     -> x_FinishMoves: if the file from which we moved records is empty now, delete it;
297         save some statistics
298         goto x_FinishSession
299     -> x_FinishSession: run again in a second, or sooner
300 */
301 
302 
303 class CSpaceShrinker : public CSrvStatesTask<CSpaceShrinker>,
304                        public CSrvTransConsumer
305 {
306 public:
307     CSpaceShrinker(void);
308     virtual ~CSpaceShrinker(void);
309 
310 private:
311     State x_PrepareToShrink(void);
312     State x_DeleteNextFile(void);
313     State x_StartMoves(void);
314     State x_MoveNextRecord(void);
315     State x_CheckCurVersion(void);
316     State x_MoveRecord(void);
317     State x_FinishMoveRecord(void);
318     State x_FinishMoves(void);
319     State x_FinishSession(void);
320 
321     SNCDataCoord x_FindMetaCoord(SNCDataCoord coord, Uint1 max_map_depth);
322 
323 
324     typedef vector<CSrvRef<SNCDBFileInfo> > TFilesList;
325 
326     TFilesList m_FilesToDel;
327     TFilesList::iterator m_CurDelFile;
328     CSrvRef<SNCDBFileInfo> m_MaxFile;
329     SFileIndexRec* m_IndRec;
330     SNCCacheData* m_CacheData;
331     CNCBlobVerManager* m_VerMgr;
332     CSrvRef<SNCBlobVerData> m_CurVer;
333     int m_StartTime;
334     Uint4 m_RecNum;
335     Uint4 m_PrevRecNum;
336     Uint4 m_LastAlive;
337     Uint4 m_CntProcessed;
338     Uint4 m_CntMoved;
339     Uint4 m_SizeMoved;
340     bool m_Failed;
341     bool m_MovingMeta;
342     TFileRecsMap m_RecsMap;
343 };
344 
345 
346 /*
347 delete expired blobs:
348     by dead_time
349     or, when DB is too big, also delete oldest blobs
350 
351 slot is made of Buckets (see ini file: cnt_slot_buckets)
352 
353     begin: x_StartSession
354     -> x_StartSession
355         analyze DB size (do we need to reduce it?)
356         goto x_CleanNextBucket
357     -> x_CleanNextBucket
358         if all buckets clean, goto x_FinishSession;
359         select bucket, goto x_DeleteNextData
360     -> x_DeleteNextData
361         if nothing to delete, goto x_CleanNextBucket
362         delete blob,  goto x_DeleteNextData
363     -> x_FinishSession
364         goto x_StartSession
365 */
366 
367 class CExpiredCleaner : public CSrvStatesTask<CExpiredCleaner>
368 {
369 public:
370     CExpiredCleaner(void);
371     virtual ~CExpiredCleaner(void);
372 
373     static void x_DeleteData(SNCCacheData* cache_data);
374 
375 private:
376     State x_StartSession(void);
377     State x_CleanNextBucket(void);
378     State x_DeleteNextData(void);
379     State x_FinishSession(void);
380 
381 
382     int m_StartTime;
383     int m_NextDead;
384     Uint4 m_ExtraGCTime;
385     Uint2 m_CurBucket;
386     size_t m_CurDelData;
387     int m_BatchSize;
388     bool m_DoExtraGC;
389     vector<SNCCacheData*> m_CacheDatas;
390 };
391 
392 /*
393     delete multiple blobs created earlier than a certain time
394     by filter (usually  cache|key|* )
395 
396     begin: x_StartSession
397     -> x_StartSession
398         get first filter
399         if found then goto x_FindNext
400     -> x_FindNext
401         for each bucket: get full blob key
402             if found then  goto x_RequestBlobAccess
403         if not found then goto x_FinishSession
404     -> x_RequestBlobAccess
405         create CNCBlobAccessor, request blob meta info
406         goto x_RemoveBlob
407     -> x_RemoveBlob
408         once the info is ready, check it once again and remove
409         goto  x_Finalize
410     -> x_Finalize
411         finalize accessor, goto x_FindNext
412     -> x_FinishSession
413         erase filter
414         goto x_StartSession
415 */
416 class CBulkCleaner : public CSrvStatesTask<CBulkCleaner>
417 {
418 public:
419     CBulkCleaner(void);
420     virtual ~CBulkCleaner(void);
421 
422 private:
423     State x_StartSession(void);
424     State x_FindNext(void);
425     State x_RequestBlobAccess(void);
426     State x_RemoveBlob(void);
427     State x_Finalize(void);
428     State x_FinishSession(void);
429 
430     Uint2 m_CurBucket;
431     Uint8  m_CrTime;
432     string m_Filter, m_Key;
433     CNCBlobAccessor* m_BlobAccess;
434 };
435 
436 
437 class CMovedRecDeleter : public CSrvRCUUser
438 {
439 public:
440     CMovedRecDeleter(SNCDBFileInfo* file_info, SFileIndexRec* ind_rec);
441     virtual ~CMovedRecDeleter(void);
442 
443 private:
444     virtual void ExecuteRCU(void);
445 
446 
447     CSrvRef<SNCDBFileInfo> m_FileInfo;
448     SFileIndexRec* m_IndRec;
449 };
450 
451 END_NCBI_SCOPE
452 
453 #endif /* NETCACHE__STORAGE_TYPES__HPP */
454