1 /* $Id: asn_cache_loader.cpp 610972 2020-06-26 12:58:17Z grichenk $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Mike DiCuccio Cheinan Marks
27 *
28 * File Description: AsnCache dataloader. Implementations.
29 *
30 */
31
32
33 #include <ncbi_pch.hpp>
34
35 #include <objmgr/impl/handle_range_map.hpp>
36 #include <objmgr/impl/tse_info.hpp>
37 #include <objmgr/impl/tse_loadlock.hpp>
38 #include <objmgr/impl/bioseq_info.hpp>
39 #include <objmgr/impl/data_source.hpp>
40 #include <objmgr/data_loader_factory.hpp>
41
42 #include <corelib/plugin_manager.hpp>
43 #include <corelib/plugin_manager_impl.hpp>
44 #include <corelib/plugin_manager_store.hpp>
45
46 #include <objtools/data_loaders/asn_cache/asn_cache_loader.hpp>
47 #include <objtools/data_loaders/asn_cache/asn_cache.hpp>
48
49
50 #define NCBI_USE_ERRCODE_X Objtools_AsnCache_Loader
51
52 BEGIN_NCBI_SCOPE
53
BEGIN_SCOPE(objects)54 BEGIN_SCOPE(objects)
55
56 CAsnCache_DataLoader::SCacheInfo::SCacheInfo()
57 : requests(0)
58 , found(0)
59 {
60 }
61
62
~SCacheInfo()63 CAsnCache_DataLoader::SCacheInfo::~SCacheInfo()
64 {
65 }
66
67
RegisterInObjectManager(CObjectManager & om,const string & db_path,CObjectManager::EIsDefault is_default,CObjectManager::TPriority priority)68 CAsnCache_DataLoader::TRegisterLoaderInfo CAsnCache_DataLoader::RegisterInObjectManager(
69 CObjectManager& om,
70 const string& db_path,
71 CObjectManager::EIsDefault is_default,
72 CObjectManager::TPriority priority)
73 {
74 TDbMaker maker(db_path);
75 CDataLoader::RegisterInObjectManager(om, maker, is_default, priority);
76 return maker.GetRegisterInfo();
77 }
78
79
GetLoaderNameFromArgs(void)80 string CAsnCache_DataLoader::GetLoaderNameFromArgs(void)
81 {
82 return "AsnCache_dataloader";
83 }
84
85
GetLoaderNameFromArgs(const string & db_path)86 string CAsnCache_DataLoader::GetLoaderNameFromArgs(const string& db_path)
87 {
88 return string("AsnCache_dataloader:") + db_path;
89 }
90
91
CAsnCache_DataLoader(void)92 CAsnCache_DataLoader::CAsnCache_DataLoader(void)
93 : CDataLoader(GetLoaderNameFromArgs())
94 {
95 m_IndexMap.resize(15);
96 }
97
98
CAsnCache_DataLoader(const string & dl_name)99 CAsnCache_DataLoader::CAsnCache_DataLoader(const string& dl_name)
100 : CDataLoader(dl_name)
101 {
102 m_IndexMap.resize(15);
103 }
104
CAsnCache_DataLoader(const string & dl_name,const string & db_path)105 CAsnCache_DataLoader::CAsnCache_DataLoader(const string& dl_name,
106 const string& db_path)
107 : CDataLoader(dl_name),
108 m_DbPath(db_path)
109 {
110 m_IndexMap.resize(15);
111 }
112
113
~CAsnCache_DataLoader()114 CAsnCache_DataLoader::~CAsnCache_DataLoader()
115 {
116 /**
117 size_t total_requests = 0;
118 size_t total_found = 0;
119 ITERATE (TIndexMap, it, m_IndexMap) {
120 LOG_POST(Error << "thread=" << it->first
121 << " requests=" << it->second.requests
122 << " found=" << it->second.found);
123 total_requests += it->second.requests;
124 total_found += it->second.found;
125 }
126 LOG_POST(Error << "total requests: " << total_requests);
127 LOG_POST(Error << "total found: " << total_found);
128 **/
129 }
130
131
132 CAsnCache_DataLoader::TBlobId
GetBlobId(const CSeq_id_Handle & idh)133 CAsnCache_DataLoader::GetBlobId(const CSeq_id_Handle& idh)
134 {
135 SCacheInfo& index = x_GetIndex();
136 CFastMutexGuard LOCK(index.cache_mtx);
137
138 CAsnIndex::SIndexInfo info;
139 TBlobId blob_id;
140 if (index.cache->GetIndexEntry(idh, info)) {
141 blob_id = new CBlobIdSeq_id(idh);
142 }
143 //LOG_POST(Error << "CAsnCache_DataLoader::GetBlobId(): " << idh);
144 return blob_id;
145 }
146
147
CanGetBlobById() const148 bool CAsnCache_DataLoader::CanGetBlobById() const
149 {
150 return true;
151 }
152
153
GetGi(const CSeq_id_Handle & idh)154 TGi CAsnCache_DataLoader::GetGi(const CSeq_id_Handle& idh)
155 {
156 SCacheInfo& index = x_GetIndex();
157 CFastMutexGuard LOCK(index.cache_mtx);
158
159 CAsnIndex::TGi gi = 0;
160 time_t timestamp = 0;
161 if (index.cache->GetIdInfo(idh, gi, timestamp)) {
162 //LOG_POST(Error << "CAsnCache_DataLoader::GetGi(): " << idh << " -> " << gi);
163 return GI_FROM(CAsnIndex::TGi, gi);
164 }
165 return ZERO_GI;
166 }
167
168
GetSequenceLength(const CSeq_id_Handle & idh)169 TSeqPos CAsnCache_DataLoader::GetSequenceLength(const CSeq_id_Handle& idh)
170 {
171 SCacheInfo& index = x_GetIndex();
172 CFastMutexGuard LOCK(index.cache_mtx);
173
174 CAsnIndex::TGi gi = 0;
175 time_t timestamp = 0;
176 Uint4 sequence_length = 0;
177 Uint4 tax_id = 0;
178 CSeq_id_Handle acc;
179 if (index.cache->GetIdInfo(idh, acc, gi,
180 timestamp, sequence_length, tax_id)) {
181 return sequence_length;
182 }
183 return kInvalidSeqPos;
184 }
185
186
187
188
GetIds(const CSeq_id_Handle & idh,TIds & ids)189 void CAsnCache_DataLoader::GetIds(const CSeq_id_Handle& idh,
190 TIds& ids)
191 {
192 ///
193 /// okay, the contract is that we must return something if we know the
194 /// sequence. thus, if the sequence exists in the cache, we must return
195 /// something. If the SeqId index is available, the cache will use it to
196 /// get the ids quickly; otherwise it will use the expensive way, retrieving
197 /// the entire sequence.
198 ///
199 SCacheInfo& index = x_GetIndex();
200 CFastMutexGuard LOCK(index.cache_mtx);
201
202 vector<CSeq_id_Handle> bioseq_ids;
203 bool res = index.cache->GetSeqIds(idh, bioseq_ids, false);
204 ++index.requests;
205 if (res) {
206 ids.swap(bioseq_ids);
207 }
208 }
209
210
GetTaxId(const CSeq_id_Handle & idh)211 TTaxId CAsnCache_DataLoader::GetTaxId(const CSeq_id_Handle& idh)
212 {
213 SCacheInfo& index = x_GetIndex();
214 CFastMutexGuard LOCK(index.cache_mtx);
215
216 CAsnIndex::TGi gi = 0;
217 time_t timestamp = 0;
218 Uint4 sequence_length = 0;
219 Uint4 tax_id = 0;
220 CSeq_id_Handle acc;
221 if (index.cache->GetIdInfo(idh, acc, gi,
222 timestamp, sequence_length, tax_id)) {
223 return TAX_ID_FROM(Uint4, tax_id);
224 }
225 return INVALID_TAX_ID;
226 }
227
228
229 #if NCBI_PRODUCTION_VER > 20110000
230 /// not yet in SC-6.0...
GetGis(const TIds & ids,TLoaded & loaded,TIds & ret)231 void CAsnCache_DataLoader::GetGis(const TIds& ids, TLoaded& loaded, TIds& ret)
232 {
233 SCacheInfo& index = x_GetIndex();
234 CFastMutexGuard LOCK(index.cache_mtx);
235
236 ret.clear();
237 ret.resize(ids.size());
238
239 loaded.clear();
240 loaded.resize(ids.size());
241 for (size_t i = 0; i < ids.size(); ++i) {
242 CAsnIndex::TGi gi = 0;
243 time_t timestamp = 0;
244 if (index.cache->GetIdInfo(ids[i], gi, timestamp)) {
245 ret[i] = CSeq_id_Handle::GetHandle(GI_FROM(CAsnIndex::TGi, gi));
246 loaded[i] = true;
247 }
248 }
249 }
250 #endif
251
252 CAsnCache_DataLoader::TTSE_Lock
GetBlobById(const TBlobId & blob_id)253 CAsnCache_DataLoader::GetBlobById(const TBlobId& blob_id)
254 {
255 CSeq_id_Handle idh =
256 dynamic_cast<const CBlobIdSeq_id&>(*blob_id).GetValue();
257
258 CTSE_LoadLock lock = GetDataSource()->GetTSE_LoadLock(blob_id);
259 if ( !lock.IsLoaded() ) {
260 SCacheInfo& index = x_GetIndex();
261 CFastMutexGuard LOCK(index.cache_mtx);
262
263 CRef<CSeq_entry> entry = index.cache->GetEntry(idh);
264 ++index.requests;
265
266 if (entry) {
267 ++index.found;
268 lock->SetSeq_entry(*entry);
269 lock.SetLoaded();
270 } else {
271 NCBI_THROW(CException, eUnknown,
272 "CAsnCache_DataLoader::GetBlobById(): blob for " +
273 idh.AsString() + " not found");
274 }
275 }
276 return lock;
277 }
278
279
280 CDataLoader::TTSE_LockSet
GetRecords(const CSeq_id_Handle & idh,EChoice choice)281 CAsnCache_DataLoader::GetRecords(const CSeq_id_Handle& idh,
282 EChoice choice)
283 {
284 TTSE_LockSet locks;
285
286 switch ( choice ) {
287 case eBlob:
288 case eBioseq:
289 case eCore:
290 case eBioseqCore:
291 case eSequence:
292 case eAll:
293 {{
294 TBlobId blob_id = GetBlobId(idh);
295 if (blob_id) {
296 locks.insert(GetBlobById(blob_id));
297 }
298 }}
299 break;
300
301 default:
302 break;
303 }
304
305 return locks;
306 }
307
x_GetIndex()308 CAsnCache_DataLoader::SCacheInfo& CAsnCache_DataLoader::x_GetIndex()
309 {
310 if (m_IndexMap.empty()) {
311 NCBI_THROW(CException, eUnknown,
312 "setup failure: no cache objects available");
313 }
314
315 CFastMutexGuard LOCK(m_Mutex);
316
317 // hash to a pool of cache objects based on thread ID
318 int id = CThread::GetSelf();
319 id %= m_IndexMap.size();
320
321 TIndexMap::iterator iter = m_IndexMap.begin() + id;
322 if ( !iter->get() ) {
323 iter->reset(new SCacheInfo);
324 (*iter)->cache.Reset(new CAsnCache(m_DbPath));
325 }
326 return **iter;
327 }
328
329
330 END_SCOPE(objects)
331
332 // ===========================================================================
333
334 USING_SCOPE(objects);
335
DataLoaders_Register_AsnCache(void)336 void DataLoaders_Register_AsnCache(void)
337 {
338 RegisterEntryPoint<CDataLoader>(NCBI_EntryPoint_DataLoader_AsnCache);
339 }
340
341
342 const string kDataLoader_AsnCache_DriverName("asncache");
343
344 class CAsnCache_DataLoaderCF : public CDataLoaderFactory
345 {
346 public:
CAsnCache_DataLoaderCF(void)347 CAsnCache_DataLoaderCF(void)
348 : CDataLoaderFactory(kDataLoader_AsnCache_DriverName) {}
~CAsnCache_DataLoaderCF(void)349 virtual ~CAsnCache_DataLoaderCF(void) {}
350
351 protected:
352 virtual CDataLoader* CreateAndRegister(
353 CObjectManager& om,
354 const TPluginManagerParamTree* params) const;
355 };
356
357
CreateAndRegister(CObjectManager & om,const TPluginManagerParamTree * params) const358 CDataLoader* CAsnCache_DataLoaderCF::CreateAndRegister(
359 CObjectManager& om,
360 const TPluginManagerParamTree* params) const
361 {
362 string db_path =
363 GetParam(GetDriverName(), params,
364 "DbPath", false);
365
366 // IsDefault and Priority arguments may be specified
367 return CAsnCache_DataLoader::RegisterInObjectManager(om, db_path).GetLoader();
368 }
369
370
NCBI_EntryPoint_DataLoader_AsnCache(CPluginManager<CDataLoader>::TDriverInfoList & info_list,CPluginManager<CDataLoader>::EEntryPointRequest method)371 void NCBI_EntryPoint_DataLoader_AsnCache(
372 CPluginManager<CDataLoader>::TDriverInfoList& info_list,
373 CPluginManager<CDataLoader>::EEntryPointRequest method)
374 {
375 CHostEntryPointImpl<CAsnCache_DataLoaderCF>::NCBI_EntryPointImpl(info_list, method);
376 }
377
378
NCBI_EntryPoint_xloader_asncache(CPluginManager<objects::CDataLoader>::TDriverInfoList & info_list,CPluginManager<objects::CDataLoader>::EEntryPointRequest method)379 void NCBI_EntryPoint_xloader_asncache(
380 CPluginManager<objects::CDataLoader>::TDriverInfoList& info_list,
381 CPluginManager<objects::CDataLoader>::EEntryPointRequest method)
382 {
383 NCBI_EntryPoint_DataLoader_AsnCache(info_list, method);
384 }
385
386
387 END_NCBI_SCOPE
388