1 /*  $Id: seqdbatlas.cpp 631560 2021-05-19 13:52:56Z ivanov $
2  * ===========================================================================
3  *
4  *                            PUBLIC DOMAIN NOTICE
5  *               National Center for Biotechnology Information
6  *
7  *  This software/database is a "United States Government Work" under the
8  *  terms of the United States Copyright Act.  It was written as part of
9  *  the author's official duties as a United States Government employee and
10  *  thus cannot be copyrighted.  This software/database is freely available
11  *  to the public for use. The National Library of Medicine and the U.S.
12  *  Government have not placed any restriction on its use or reproduction.
13  *
14  *  Although all reasonable efforts have been taken to ensure the accuracy
15  *  and reliability of the software and data, the NLM and the U.S.
16  *  Government do not and cannot warrant the performance or results that
17  *  may be obtained by using this software or data. The NLM and the U.S.
18  *  Government disclaim all warranties, express or implied, including
19  *  warranties of performance, merchantability or fitness for any particular
20  *  purpose.
21  *
22  *  Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author:  Kevin Bealer
27  *
28  */
29 
30 /// @file seqdbatlas.cpp
31 /// Implementation for the CSeqDBAtlas class and several related
32 /// classes, which provide control of a set of memory mappings.
33 #include <ncbi_pch.hpp>
34 
35 #include <objtools/blast/seqdb_reader/impl/seqdbatlas.hpp>
36 #include <objtools/blast/seqdb_reader/impl/seqdbgeneral.hpp>
37 #include <memory>
38 #include <algorithm>
39 #include <objtools/blast/seqdb_reader/seqdbcommon.hpp>
40 
41 #include <corelib/ncbi_system.hpp>
42 
43 #if defined(NCBI_OS_UNIX)
44 #include <unistd.h>
45 #include <sys/mman.h>
46 #include <sys/types.h>
47 #include <sys/stat.h>
48 #include <fcntl.h>
49 #include <sys/resource.h>
50 #include <unistd.h>
51 #endif
52 
53 BEGIN_NCBI_SCOPE
54 
55 // Further optimizations:
56 
57 // 1. Regions could be stored in a map<>, sorted by file, then offset.
58 // This would allow a binary search instead of sequential and would
59 // vastly improve the "bad case" of 100_000s of buffers of file data.
60 
61 // 2. "Scrounging" could be done in the file case.  It is bad to read
62 // 0-4096 then 4096 to 8192, then 4000-4220.  The third could use the
63 // first two to avoid reading.  It should either combine the first two
64 // regions into a new region, or else just copy to a new region and
65 // leave the old ones alone (possibly marking the old regions as high
66 // penalty).  Depending on refcnt, penalty, and region sizes.
67 
68 // Throw function
69 
SeqDB_ThrowException(CSeqDBException::EErrCode code,const string & msg)70 void SeqDB_ThrowException(CSeqDBException::EErrCode code, const string & msg)
71 {
72     switch(code) {
73     case CSeqDBException::eArgErr:
74         NCBI_THROW(CSeqDBException, eArgErr, msg);
75 
76     case CSeqDBException::eFileErr:
77         NCBI_THROW(CSeqDBException, eFileErr, msg);
78 
79     default:
80         NCBI_THROW(CSeqDBException, eMemErr, msg);
81     }
82 }
83 
84 /// Build and throw a file-not-found exception.
85 ///
86 /// @param fname The name of the unfound file. [in]
87 
s_SeqDB_FileNotFound(const string & fname)88 static void s_SeqDB_FileNotFound(const string & fname)
89 {
90     string msg("File [");
91     msg += fname;
92     msg += "] not found.";
93     SeqDB_ThrowException(CSeqDBException::eFileErr, msg);
94 }
95 
96 
97 /// Check the size of a number relative to the scope of a numeric type.
98 
99 template<class TIn, class TOut>
SeqDB_CheckLength(TIn value)100 TOut SeqDB_CheckLength(TIn value)
101 {
102     TOut result = TOut(value);
103 
104     if (sizeof(TOut) < sizeof(TIn)) {
105         if (TIn(result) != value) {
106             SeqDB_ThrowException(CSeqDBException::eFileErr,
107                                  "Offset type does not span file length.");
108         }
109     }
110 
111     return result;
112 }
113 
CSeqDBAtlas(bool use_atlas_lock)114 CSeqDBAtlas::CSeqDBAtlas(bool use_atlas_lock)
115      :m_UseLock           (use_atlas_lock),
116       m_MaxFileSize       (0),
117       m_SearchPath        (GenerateSearchPath())
118 {
119     m_OpenedFilesCount = 0;
120     m_MaxOpenedFilesCount = 0;
121 }
122 
~CSeqDBAtlas()123 CSeqDBAtlas::~CSeqDBAtlas()
124 {
125 }
126 
GetMemoryFile(const string & fileName)127 CMemoryFile* CSeqDBAtlas::GetMemoryFile(const string& fileName)
128 {
129     std::lock_guard<std::mutex> guard(m_FileMemMapMutex);
130     auto it = m_FileMemMap.find(fileName);
131     if (it != m_FileMemMap.end()) {
132     	it->second.get()->m_Count++;
133     	//LOG_POST(Info << "File: " << fileName << " count " << it->second.get()->m_Count);
134         return it->second.get();
135     }
136     CAtlasMappedFile* file(new CAtlasMappedFile(fileName));
137     m_FileMemMap[fileName].reset(file);
138    	_TRACE("Open File: " << fileName);
139     ChangeOpenedFilseCount(CSeqDBAtlas::eFileCounterIncrement);
140     return file;
141 }
142 
ReturnMemoryFile(const string & fileName)143 CMemoryFile* CSeqDBAtlas::ReturnMemoryFile(const string& fileName)
144 {
145     std::lock_guard<std::mutex> guard(m_FileMemMapMutex);
146     auto it = m_FileMemMap.find(fileName);
147     if (it == m_FileMemMap.end()) {
148         NCBI_THROW(CSeqDBException, eMemErr, "File not in mapped file list: " + fileName);
149     }
150     it->second.get()->m_Count--;
151    	//LOG_POST(Info << "Return File: " << fileName << "count " << it->second.get()->m_Count);
152    	if ((GetOpenedFilseCount() > CSeqDBAtlas::e_MaxFileDescritors) &&
153    		(it->second.get()->m_isIsam) && (it->second.get()->m_Count == 0)) {
154    		m_FileMemMap.erase(it);
155    		LOG_POST(Info << "Unmap max file descriptor reached: " << fileName);
156    		ChangeOpenedFilseCount(CSeqDBAtlas::eFileCounterDecrement);
157    	}
158     return NULL;
159 }
160 
DoesFileExist(const string & fname)161 bool CSeqDBAtlas::DoesFileExist(const string & fname)
162 {
163     TIndx length(0);
164     return GetFileSize(fname, length);
165 }
166 
GetFileSize(const string & fname,TIndx & length)167 bool CSeqDBAtlas::GetFileSize(const string   & fname,
168                               TIndx          & length)
169 {
170     return GetFileSizeL(fname, length);
171 }
172 
GetFileSizeL(const string & fname,TIndx & length)173 bool CSeqDBAtlas::GetFileSizeL(const string & fname, TIndx &length)
174 {
175     {
176         std::lock_guard<std::mutex> guard(m_FileSizeMutex);
177         auto it = m_FileSize.find(fname);
178         if (it != m_FileSize.end()) {
179             length = it->second.second;
180             return it->second.first;
181         }
182     }
183 
184     pair<bool, TIndx> val;
185     CFile whole(fname);
186     Int8 file_length = whole.GetLength();
187 
188     if (file_length >= 0) {
189         val.first = true;
190         val.second = SeqDB_CheckLength<Int8, TIndx>(file_length);
191     }
192     else {
193         val.first = false;
194         val.second = 0;
195     }
196 
197     {
198         std::lock_guard<std::mutex> guard(m_FileSizeMutex);
199         m_FileSize[fname] = val;
200 
201         if (file_length >= 0 && (Uint8)file_length > m_MaxFileSize)
202             m_MaxFileSize = file_length;
203     }
204 
205     length = val.second;
206     return val.first;
207 }
208 
209 /// Simple idiom for RIIA with malloc + free.
210 struct CSeqDBAutoFree {
211     /// Constructor.
CSeqDBAutoFreeCSeqDBAutoFree212     CSeqDBAutoFree()
213         : m_Array(0)
214     {
215     }
216 
217     /// Specify a malloced area of memory.
SetCSeqDBAutoFree218     void Set(const char * x)
219     {
220         m_Array = x;
221     }
222 
223     /// Destructor will free that memory.
~CSeqDBAutoFreeCSeqDBAutoFree224     ~CSeqDBAutoFree()
225     {
226         if (m_Array) {
227             free((void*) m_Array);
228         }
229     }
230 
231 private:
232     /// Pointer to malloced memory.
233     const char * m_Array;
234 };
235 
236 
237 
238 /// Releases allocated memory
RetRegion(const char * datap)239 void CSeqDBAtlas::RetRegion(const char * datap)
240 {
241 	delete [] datap;
242 }
243 
244 
Alloc(size_t length,bool clear)245 char * CSeqDBAtlas::Alloc(size_t length, bool clear)
246 {
247     if (! length) {
248         length = 1;
249     }
250 
251     // Allocate/clear
252 
253     char * newcp = 0;
254 
255     try {
256         newcp = new char[length];
257 
258         // new() should have thrown, but some old implementations are
259         // said to be non-compliant in this regard:
260 
261         if (! newcp) {
262             throw std::bad_alloc();
263         }
264 
265         if (clear) {
266             memset(newcp, 0, length);
267         }
268     }
269     catch(std::bad_alloc) {
270         NCBI_THROW(CSeqDBException, eMemErr,
271                    "CSeqDBAtlas::Alloc: allocation failed.");
272     }
273 
274     return newcp;
275 }
276 
RegisterExternal(CSeqDBMemReg & memreg,size_t bytes,CSeqDBLockHold & locked)277 void CSeqDBAtlas::RegisterExternal(CSeqDBMemReg   & memreg,
278                                    size_t           bytes,
279                                    CSeqDBLockHold & locked)
280 {
281     if (bytes > 0) {
282         Lock(locked);
283         _ASSERT(memreg.m_Bytes == 0);
284 	    memreg.m_Bytes = bytes;
285     }
286 }
287 
UnregisterExternal(CSeqDBMemReg & memreg)288 void CSeqDBAtlas::UnregisterExternal(CSeqDBMemReg & memreg)
289 {
290     size_t bytes = memreg.m_Bytes;
291 
292     if (bytes > 0) {
293         memreg.m_Bytes = 0;
294     }
295 }
296 
297 
298 
299 
300 
CSeqDBAtlasHolder(CSeqDBLockHold * lockedp,bool use_atlas_lock)301 CSeqDBAtlasHolder::CSeqDBAtlasHolder(CSeqDBLockHold * lockedp,
302                                      bool use_atlas_lock)
303 
304 {
305     {{
306     CFastMutexGuard guard(m_Lock);
307 
308     if (m_Count == 0) {
309         m_Atlas = new CSeqDBAtlas(use_atlas_lock);
310     }
311     m_Count ++;
312     }}
313 }
314 
315 
316 // FIXME: This constrctor is deprecated
CSeqDBAtlasHolder(bool use_atlas_lock,CSeqDBLockHold * locdep)317 CSeqDBAtlasHolder::CSeqDBAtlasHolder(bool use_atlas_lock,
318                                      CSeqDBLockHold* locdep)
319 {
320     {{
321     CFastMutexGuard guard(m_Lock);
322 
323     if (m_Count == 0) {
324         m_Atlas = new CSeqDBAtlas(use_atlas_lock);
325     }
326     m_Count ++;
327     }}
328 }
329 
330 
331 DEFINE_CLASS_STATIC_FAST_MUTEX(CSeqDBAtlasHolder::m_Lock);
332 
~CSeqDBAtlasHolder()333 CSeqDBAtlasHolder::~CSeqDBAtlasHolder()
334 {
335 
336     CFastMutexGuard guard(m_Lock);
337     m_Count --;
338 
339     if (m_Count == 0) {
340         delete m_Atlas;
341     }
342 }
343 
Get()344 CSeqDBAtlas & CSeqDBAtlasHolder::Get()
345 {
346     _ASSERT(m_Atlas);
347     return *m_Atlas;
348 }
349 
~CSeqDBLockHold()350 CSeqDBLockHold::~CSeqDBLockHold()
351 {
352     CHECK_MARKER();
353 
354     m_Atlas.Unlock(*this);
355     BREAK_MARKER();
356 }
357 
358 int CSeqDBAtlasHolder::m_Count = 0;
359 CSeqDBAtlas * CSeqDBAtlasHolder::m_Atlas = NULL;
360 
361 
362 CSeqDB_AtlasRegionHolder::
CSeqDB_AtlasRegionHolder(CSeqDBAtlas & atlas,const char * ptr)363 CSeqDB_AtlasRegionHolder(CSeqDBAtlas & atlas, const char * ptr)
364     : m_Atlas(atlas), m_Ptr(ptr)
365 {
366 }
367 
~CSeqDB_AtlasRegionHolder()368 CSeqDB_AtlasRegionHolder::~CSeqDB_AtlasRegionHolder()
369 {
370     if (m_Ptr) {
371         CSeqDBLockHold locked(m_Atlas);
372         m_Atlas.Lock(locked);
373 
374         //m_Atlas.RetRegion(m_Ptr);
375         m_Ptr = NULL;
376     }
377 }
378 
379 END_NCBI_SCOPE
380