1 /* $Id: seqdbatlas.cpp 631560 2021-05-19 13:52:56Z ivanov $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Kevin Bealer
27 *
28 */
29
30 /// @file seqdbatlas.cpp
31 /// Implementation for the CSeqDBAtlas class and several related
32 /// classes, which provide control of a set of memory mappings.
33 #include <ncbi_pch.hpp>
34
35 #include <objtools/blast/seqdb_reader/impl/seqdbatlas.hpp>
36 #include <objtools/blast/seqdb_reader/impl/seqdbgeneral.hpp>
37 #include <memory>
38 #include <algorithm>
39 #include <objtools/blast/seqdb_reader/seqdbcommon.hpp>
40
41 #include <corelib/ncbi_system.hpp>
42
43 #if defined(NCBI_OS_UNIX)
44 #include <unistd.h>
45 #include <sys/mman.h>
46 #include <sys/types.h>
47 #include <sys/stat.h>
48 #include <fcntl.h>
49 #include <sys/resource.h>
50 #include <unistd.h>
51 #endif
52
53 BEGIN_NCBI_SCOPE
54
55 // Further optimizations:
56
57 // 1. Regions could be stored in a map<>, sorted by file, then offset.
58 // This would allow a binary search instead of sequential and would
59 // vastly improve the "bad case" of 100_000s of buffers of file data.
60
61 // 2. "Scrounging" could be done in the file case. It is bad to read
62 // 0-4096 then 4096 to 8192, then 4000-4220. The third could use the
63 // first two to avoid reading. It should either combine the first two
64 // regions into a new region, or else just copy to a new region and
65 // leave the old ones alone (possibly marking the old regions as high
66 // penalty). Depending on refcnt, penalty, and region sizes.
67
68 // Throw function
69
SeqDB_ThrowException(CSeqDBException::EErrCode code,const string & msg)70 void SeqDB_ThrowException(CSeqDBException::EErrCode code, const string & msg)
71 {
72 switch(code) {
73 case CSeqDBException::eArgErr:
74 NCBI_THROW(CSeqDBException, eArgErr, msg);
75
76 case CSeqDBException::eFileErr:
77 NCBI_THROW(CSeqDBException, eFileErr, msg);
78
79 default:
80 NCBI_THROW(CSeqDBException, eMemErr, msg);
81 }
82 }
83
84 /// Build and throw a file-not-found exception.
85 ///
86 /// @param fname The name of the unfound file. [in]
87
s_SeqDB_FileNotFound(const string & fname)88 static void s_SeqDB_FileNotFound(const string & fname)
89 {
90 string msg("File [");
91 msg += fname;
92 msg += "] not found.";
93 SeqDB_ThrowException(CSeqDBException::eFileErr, msg);
94 }
95
96
97 /// Check the size of a number relative to the scope of a numeric type.
98
99 template<class TIn, class TOut>
SeqDB_CheckLength(TIn value)100 TOut SeqDB_CheckLength(TIn value)
101 {
102 TOut result = TOut(value);
103
104 if (sizeof(TOut) < sizeof(TIn)) {
105 if (TIn(result) != value) {
106 SeqDB_ThrowException(CSeqDBException::eFileErr,
107 "Offset type does not span file length.");
108 }
109 }
110
111 return result;
112 }
113
CSeqDBAtlas(bool use_atlas_lock)114 CSeqDBAtlas::CSeqDBAtlas(bool use_atlas_lock)
115 :m_UseLock (use_atlas_lock),
116 m_MaxFileSize (0),
117 m_SearchPath (GenerateSearchPath())
118 {
119 m_OpenedFilesCount = 0;
120 m_MaxOpenedFilesCount = 0;
121 }
122
~CSeqDBAtlas()123 CSeqDBAtlas::~CSeqDBAtlas()
124 {
125 }
126
GetMemoryFile(const string & fileName)127 CMemoryFile* CSeqDBAtlas::GetMemoryFile(const string& fileName)
128 {
129 std::lock_guard<std::mutex> guard(m_FileMemMapMutex);
130 auto it = m_FileMemMap.find(fileName);
131 if (it != m_FileMemMap.end()) {
132 it->second.get()->m_Count++;
133 //LOG_POST(Info << "File: " << fileName << " count " << it->second.get()->m_Count);
134 return it->second.get();
135 }
136 CAtlasMappedFile* file(new CAtlasMappedFile(fileName));
137 m_FileMemMap[fileName].reset(file);
138 _TRACE("Open File: " << fileName);
139 ChangeOpenedFilseCount(CSeqDBAtlas::eFileCounterIncrement);
140 return file;
141 }
142
ReturnMemoryFile(const string & fileName)143 CMemoryFile* CSeqDBAtlas::ReturnMemoryFile(const string& fileName)
144 {
145 std::lock_guard<std::mutex> guard(m_FileMemMapMutex);
146 auto it = m_FileMemMap.find(fileName);
147 if (it == m_FileMemMap.end()) {
148 NCBI_THROW(CSeqDBException, eMemErr, "File not in mapped file list: " + fileName);
149 }
150 it->second.get()->m_Count--;
151 //LOG_POST(Info << "Return File: " << fileName << "count " << it->second.get()->m_Count);
152 if ((GetOpenedFilseCount() > CSeqDBAtlas::e_MaxFileDescritors) &&
153 (it->second.get()->m_isIsam) && (it->second.get()->m_Count == 0)) {
154 m_FileMemMap.erase(it);
155 LOG_POST(Info << "Unmap max file descriptor reached: " << fileName);
156 ChangeOpenedFilseCount(CSeqDBAtlas::eFileCounterDecrement);
157 }
158 return NULL;
159 }
160
DoesFileExist(const string & fname)161 bool CSeqDBAtlas::DoesFileExist(const string & fname)
162 {
163 TIndx length(0);
164 return GetFileSize(fname, length);
165 }
166
GetFileSize(const string & fname,TIndx & length)167 bool CSeqDBAtlas::GetFileSize(const string & fname,
168 TIndx & length)
169 {
170 return GetFileSizeL(fname, length);
171 }
172
GetFileSizeL(const string & fname,TIndx & length)173 bool CSeqDBAtlas::GetFileSizeL(const string & fname, TIndx &length)
174 {
175 {
176 std::lock_guard<std::mutex> guard(m_FileSizeMutex);
177 auto it = m_FileSize.find(fname);
178 if (it != m_FileSize.end()) {
179 length = it->second.second;
180 return it->second.first;
181 }
182 }
183
184 pair<bool, TIndx> val;
185 CFile whole(fname);
186 Int8 file_length = whole.GetLength();
187
188 if (file_length >= 0) {
189 val.first = true;
190 val.second = SeqDB_CheckLength<Int8, TIndx>(file_length);
191 }
192 else {
193 val.first = false;
194 val.second = 0;
195 }
196
197 {
198 std::lock_guard<std::mutex> guard(m_FileSizeMutex);
199 m_FileSize[fname] = val;
200
201 if (file_length >= 0 && (Uint8)file_length > m_MaxFileSize)
202 m_MaxFileSize = file_length;
203 }
204
205 length = val.second;
206 return val.first;
207 }
208
209 /// Simple idiom for RIIA with malloc + free.
210 struct CSeqDBAutoFree {
211 /// Constructor.
CSeqDBAutoFreeCSeqDBAutoFree212 CSeqDBAutoFree()
213 : m_Array(0)
214 {
215 }
216
217 /// Specify a malloced area of memory.
SetCSeqDBAutoFree218 void Set(const char * x)
219 {
220 m_Array = x;
221 }
222
223 /// Destructor will free that memory.
~CSeqDBAutoFreeCSeqDBAutoFree224 ~CSeqDBAutoFree()
225 {
226 if (m_Array) {
227 free((void*) m_Array);
228 }
229 }
230
231 private:
232 /// Pointer to malloced memory.
233 const char * m_Array;
234 };
235
236
237
238 /// Releases allocated memory
RetRegion(const char * datap)239 void CSeqDBAtlas::RetRegion(const char * datap)
240 {
241 delete [] datap;
242 }
243
244
Alloc(size_t length,bool clear)245 char * CSeqDBAtlas::Alloc(size_t length, bool clear)
246 {
247 if (! length) {
248 length = 1;
249 }
250
251 // Allocate/clear
252
253 char * newcp = 0;
254
255 try {
256 newcp = new char[length];
257
258 // new() should have thrown, but some old implementations are
259 // said to be non-compliant in this regard:
260
261 if (! newcp) {
262 throw std::bad_alloc();
263 }
264
265 if (clear) {
266 memset(newcp, 0, length);
267 }
268 }
269 catch(std::bad_alloc) {
270 NCBI_THROW(CSeqDBException, eMemErr,
271 "CSeqDBAtlas::Alloc: allocation failed.");
272 }
273
274 return newcp;
275 }
276
RegisterExternal(CSeqDBMemReg & memreg,size_t bytes,CSeqDBLockHold & locked)277 void CSeqDBAtlas::RegisterExternal(CSeqDBMemReg & memreg,
278 size_t bytes,
279 CSeqDBLockHold & locked)
280 {
281 if (bytes > 0) {
282 Lock(locked);
283 _ASSERT(memreg.m_Bytes == 0);
284 memreg.m_Bytes = bytes;
285 }
286 }
287
UnregisterExternal(CSeqDBMemReg & memreg)288 void CSeqDBAtlas::UnregisterExternal(CSeqDBMemReg & memreg)
289 {
290 size_t bytes = memreg.m_Bytes;
291
292 if (bytes > 0) {
293 memreg.m_Bytes = 0;
294 }
295 }
296
297
298
299
300
CSeqDBAtlasHolder(CSeqDBLockHold * lockedp,bool use_atlas_lock)301 CSeqDBAtlasHolder::CSeqDBAtlasHolder(CSeqDBLockHold * lockedp,
302 bool use_atlas_lock)
303
304 {
305 {{
306 CFastMutexGuard guard(m_Lock);
307
308 if (m_Count == 0) {
309 m_Atlas = new CSeqDBAtlas(use_atlas_lock);
310 }
311 m_Count ++;
312 }}
313 }
314
315
316 // FIXME: This constrctor is deprecated
CSeqDBAtlasHolder(bool use_atlas_lock,CSeqDBLockHold * locdep)317 CSeqDBAtlasHolder::CSeqDBAtlasHolder(bool use_atlas_lock,
318 CSeqDBLockHold* locdep)
319 {
320 {{
321 CFastMutexGuard guard(m_Lock);
322
323 if (m_Count == 0) {
324 m_Atlas = new CSeqDBAtlas(use_atlas_lock);
325 }
326 m_Count ++;
327 }}
328 }
329
330
331 DEFINE_CLASS_STATIC_FAST_MUTEX(CSeqDBAtlasHolder::m_Lock);
332
~CSeqDBAtlasHolder()333 CSeqDBAtlasHolder::~CSeqDBAtlasHolder()
334 {
335
336 CFastMutexGuard guard(m_Lock);
337 m_Count --;
338
339 if (m_Count == 0) {
340 delete m_Atlas;
341 }
342 }
343
Get()344 CSeqDBAtlas & CSeqDBAtlasHolder::Get()
345 {
346 _ASSERT(m_Atlas);
347 return *m_Atlas;
348 }
349
~CSeqDBLockHold()350 CSeqDBLockHold::~CSeqDBLockHold()
351 {
352 CHECK_MARKER();
353
354 m_Atlas.Unlock(*this);
355 BREAK_MARKER();
356 }
357
358 int CSeqDBAtlasHolder::m_Count = 0;
359 CSeqDBAtlas * CSeqDBAtlasHolder::m_Atlas = NULL;
360
361
362 CSeqDB_AtlasRegionHolder::
CSeqDB_AtlasRegionHolder(CSeqDBAtlas & atlas,const char * ptr)363 CSeqDB_AtlasRegionHolder(CSeqDBAtlas & atlas, const char * ptr)
364 : m_Atlas(atlas), m_Ptr(ptr)
365 {
366 }
367
~CSeqDB_AtlasRegionHolder()368 CSeqDB_AtlasRegionHolder::~CSeqDB_AtlasRegionHolder()
369 {
370 if (m_Ptr) {
371 CSeqDBLockHold locked(m_Atlas);
372 m_Atlas.Lock(locked);
373
374 //m_Atlas.RetRegion(m_Ptr);
375 m_Ptr = NULL;
376 }
377 }
378
379 END_NCBI_SCOPE
380