1 /*  $Id: asn_cache_loader.cpp 610972 2020-06-26 12:58:17Z grichenk $
2  * ===========================================================================
3  *
4  *                            PUBLIC DOMAIN NOTICE
5  *               National Center for Biotechnology Information
6  *
7  *  This software/database is a "United States Government Work" under the
8  *  terms of the United States Copyright Act.  It was written as part of
9  *  the author's official duties as a United States Government employee and
10  *  thus cannot be copyrighted.  This software/database is freely available
11  *  to the public for use. The National Library of Medicine and the U.S.
12  *  Government have not placed any restriction on its use or reproduction.
13  *
14  *  Although all reasonable efforts have been taken to ensure the accuracy
15  *  and reliability of the software and data, the NLM and the U.S.
16  *  Government do not and cannot warrant the performance or results that
17  *  may be obtained by using this software or data. The NLM and the U.S.
18  *  Government disclaim all warranties, express or implied, including
19  *  warranties of performance, merchantability or fitness for any particular
20  *  purpose.
21  *
22  *  Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Mike DiCuccio Cheinan Marks
27  *
28  * File Description:  AsnCache dataloader. Implementations.
29  *
30  */
31 
32 
33 #include <ncbi_pch.hpp>
34 
35 #include <objmgr/impl/handle_range_map.hpp>
36 #include <objmgr/impl/tse_info.hpp>
37 #include <objmgr/impl/tse_loadlock.hpp>
38 #include <objmgr/impl/bioseq_info.hpp>
39 #include <objmgr/impl/data_source.hpp>
40 #include <objmgr/data_loader_factory.hpp>
41 
42 #include <corelib/plugin_manager.hpp>
43 #include <corelib/plugin_manager_impl.hpp>
44 #include <corelib/plugin_manager_store.hpp>
45 
46 #include <objtools/data_loaders/asn_cache/asn_cache_loader.hpp>
47 #include <objtools/data_loaders/asn_cache/asn_cache.hpp>
48 
49 
50 #define NCBI_USE_ERRCODE_X   Objtools_AsnCache_Loader
51 
52 BEGIN_NCBI_SCOPE
53 
BEGIN_SCOPE(objects)54 BEGIN_SCOPE(objects)
55 
56 CAsnCache_DataLoader::SCacheInfo::SCacheInfo()
57     : requests(0)
58     , found(0)
59 {
60 }
61 
62 
~SCacheInfo()63 CAsnCache_DataLoader::SCacheInfo::~SCacheInfo()
64 {
65 }
66 
67 
RegisterInObjectManager(CObjectManager & om,const string & db_path,CObjectManager::EIsDefault is_default,CObjectManager::TPriority priority)68 CAsnCache_DataLoader::TRegisterLoaderInfo CAsnCache_DataLoader::RegisterInObjectManager(
69     CObjectManager& om,
70     const string& db_path,
71     CObjectManager::EIsDefault is_default,
72     CObjectManager::TPriority priority)
73 {
74     TDbMaker maker(db_path);
75     CDataLoader::RegisterInObjectManager(om, maker, is_default, priority);
76     return maker.GetRegisterInfo();
77 }
78 
79 
GetLoaderNameFromArgs(void)80 string CAsnCache_DataLoader::GetLoaderNameFromArgs(void)
81 {
82     return "AsnCache_dataloader";
83 }
84 
85 
GetLoaderNameFromArgs(const string & db_path)86 string CAsnCache_DataLoader::GetLoaderNameFromArgs(const string& db_path)
87 {
88     return string("AsnCache_dataloader:") + db_path;
89 }
90 
91 
CAsnCache_DataLoader(void)92 CAsnCache_DataLoader::CAsnCache_DataLoader(void)
93     : CDataLoader(GetLoaderNameFromArgs())
94 {
95     m_IndexMap.resize(15);
96 }
97 
98 
CAsnCache_DataLoader(const string & dl_name)99 CAsnCache_DataLoader::CAsnCache_DataLoader(const string& dl_name)
100     : CDataLoader(dl_name)
101 {
102     m_IndexMap.resize(15);
103 }
104 
CAsnCache_DataLoader(const string & dl_name,const string & db_path)105 CAsnCache_DataLoader::CAsnCache_DataLoader(const string& dl_name,
106                                            const string& db_path)
107     : CDataLoader(dl_name),
108       m_DbPath(db_path)
109 {
110     m_IndexMap.resize(15);
111 }
112 
113 
~CAsnCache_DataLoader()114 CAsnCache_DataLoader::~CAsnCache_DataLoader()
115 {
116     /**
117     size_t total_requests = 0;
118     size_t total_found = 0;
119     ITERATE (TIndexMap, it, m_IndexMap) {
120         LOG_POST(Error << "thread=" << it->first
121                  << "  requests=" << it->second.requests
122                  << "  found=" << it->second.found);
123         total_requests += it->second.requests;
124         total_found    += it->second.found;
125     }
126     LOG_POST(Error << "total requests: " << total_requests);
127     LOG_POST(Error << "total found:    " << total_found);
128     **/
129 }
130 
131 
132 CAsnCache_DataLoader::TBlobId
GetBlobId(const CSeq_id_Handle & idh)133 CAsnCache_DataLoader::GetBlobId(const CSeq_id_Handle& idh)
134 {
135     SCacheInfo& index = x_GetIndex();
136     CFastMutexGuard LOCK(index.cache_mtx);
137 
138     CAsnIndex::SIndexInfo info;
139     TBlobId blob_id;
140     if (index.cache->GetIndexEntry(idh, info)) {
141         blob_id = new CBlobIdSeq_id(idh);
142     }
143     //LOG_POST(Error << "CAsnCache_DataLoader::GetBlobId(): " << idh);
144     return blob_id;
145 }
146 
147 
CanGetBlobById() const148 bool CAsnCache_DataLoader::CanGetBlobById() const
149 {
150     return true;
151 }
152 
153 
GetGi(const CSeq_id_Handle & idh)154 TGi CAsnCache_DataLoader::GetGi(const CSeq_id_Handle& idh)
155 {
156     SCacheInfo& index = x_GetIndex();
157     CFastMutexGuard LOCK(index.cache_mtx);
158 
159     CAsnIndex::TGi gi = 0;
160     time_t timestamp = 0;
161     if (index.cache->GetIdInfo(idh, gi, timestamp)) {
162         //LOG_POST(Error << "CAsnCache_DataLoader::GetGi(): " << idh << " -> " << gi);
163         return GI_FROM(CAsnIndex::TGi, gi);
164     }
165     return ZERO_GI;
166 }
167 
168 
GetSequenceLength(const CSeq_id_Handle & idh)169 TSeqPos CAsnCache_DataLoader::GetSequenceLength(const CSeq_id_Handle& idh)
170 {
171     SCacheInfo& index = x_GetIndex();
172     CFastMutexGuard LOCK(index.cache_mtx);
173 
174     CAsnIndex::TGi gi = 0;
175     time_t timestamp = 0;
176     Uint4 sequence_length = 0;
177     Uint4 tax_id = 0;
178     CSeq_id_Handle acc;
179     if (index.cache->GetIdInfo(idh, acc, gi,
180                                timestamp, sequence_length, tax_id)) {
181         return sequence_length;
182     }
183     return kInvalidSeqPos;
184 }
185 
186 
187 
188 
GetIds(const CSeq_id_Handle & idh,TIds & ids)189 void CAsnCache_DataLoader::GetIds(const CSeq_id_Handle& idh,
190                                   TIds& ids)
191 {
192     ///
193     /// okay, the contract is that we must return something if we know the
194     /// sequence.  thus, if the sequence exists in the cache, we must return
195     /// something. If the SeqId index is available, the cache will use it to
196     /// get the ids quickly; otherwise it will use the expensive way, retrieving
197     /// the entire sequence.
198     ///
199     SCacheInfo& index = x_GetIndex();
200     CFastMutexGuard LOCK(index.cache_mtx);
201 
202     vector<CSeq_id_Handle> bioseq_ids;
203     bool res = index.cache->GetSeqIds(idh, bioseq_ids, false);
204     ++index.requests;
205     if (res) {
206         ids.swap(bioseq_ids);
207     }
208 }
209 
210 
GetTaxId(const CSeq_id_Handle & idh)211 TTaxId CAsnCache_DataLoader::GetTaxId(const CSeq_id_Handle& idh)
212 {
213     SCacheInfo& index = x_GetIndex();
214     CFastMutexGuard LOCK(index.cache_mtx);
215 
216     CAsnIndex::TGi gi = 0;
217     time_t timestamp = 0;
218     Uint4 sequence_length = 0;
219     Uint4 tax_id = 0;
220     CSeq_id_Handle acc;
221     if (index.cache->GetIdInfo(idh, acc, gi,
222                                timestamp, sequence_length, tax_id)) {
223         return TAX_ID_FROM(Uint4, tax_id);
224     }
225     return INVALID_TAX_ID;
226 }
227 
228 
229 #if NCBI_PRODUCTION_VER > 20110000
230 /// not yet in SC-6.0...
GetGis(const TIds & ids,TLoaded & loaded,TIds & ret)231 void CAsnCache_DataLoader::GetGis(const TIds& ids, TLoaded& loaded, TIds& ret)
232 {
233     SCacheInfo& index = x_GetIndex();
234     CFastMutexGuard LOCK(index.cache_mtx);
235 
236     ret.clear();
237     ret.resize(ids.size());
238 
239     loaded.clear();
240     loaded.resize(ids.size());
241     for (size_t i = 0;  i < ids.size();  ++i) {
242         CAsnIndex::TGi gi = 0;
243         time_t timestamp = 0;
244         if (index.cache->GetIdInfo(ids[i], gi, timestamp)) {
245             ret[i] = CSeq_id_Handle::GetHandle(GI_FROM(CAsnIndex::TGi, gi));
246             loaded[i] = true;
247         }
248     }
249 }
250 #endif
251 
252 CAsnCache_DataLoader::TTSE_Lock
GetBlobById(const TBlobId & blob_id)253 CAsnCache_DataLoader::GetBlobById(const TBlobId& blob_id)
254 {
255     CSeq_id_Handle idh =
256         dynamic_cast<const CBlobIdSeq_id&>(*blob_id).GetValue();
257 
258     CTSE_LoadLock lock = GetDataSource()->GetTSE_LoadLock(blob_id);
259     if ( !lock.IsLoaded() ) {
260         SCacheInfo& index = x_GetIndex();
261         CFastMutexGuard LOCK(index.cache_mtx);
262 
263         CRef<CSeq_entry> entry = index.cache->GetEntry(idh);
264         ++index.requests;
265 
266         if (entry) {
267             ++index.found;
268             lock->SetSeq_entry(*entry);
269             lock.SetLoaded();
270         } else {
271             NCBI_THROW(CException, eUnknown,
272                        "CAsnCache_DataLoader::GetBlobById(): blob for " +
273                        idh.AsString() + " not found");
274         }
275     }
276     return lock;
277 }
278 
279 
280 CDataLoader::TTSE_LockSet
GetRecords(const CSeq_id_Handle & idh,EChoice choice)281 CAsnCache_DataLoader::GetRecords(const CSeq_id_Handle& idh,
282                                  EChoice choice)
283 {
284     TTSE_LockSet locks;
285 
286     switch ( choice ) {
287     case eBlob:
288     case eBioseq:
289     case eCore:
290     case eBioseqCore:
291     case eSequence:
292     case eAll:
293         {{
294              TBlobId blob_id = GetBlobId(idh);
295              if (blob_id) {
296                  locks.insert(GetBlobById(blob_id));
297              }
298          }}
299         break;
300 
301     default:
302         break;
303     }
304 
305     return locks;
306 }
307 
x_GetIndex()308 CAsnCache_DataLoader::SCacheInfo& CAsnCache_DataLoader::x_GetIndex()
309 {
310     if (m_IndexMap.empty()) {
311         NCBI_THROW(CException, eUnknown,
312                    "setup failure: no cache objects available");
313     }
314 
315     CFastMutexGuard LOCK(m_Mutex);
316 
317     // hash to a pool of cache objects based on thread ID
318     int id = CThread::GetSelf();
319     id %= m_IndexMap.size();
320 
321     TIndexMap::iterator iter = m_IndexMap.begin() + id;
322     if ( !iter->get() ) {
323         iter->reset(new SCacheInfo);
324         (*iter)->cache.Reset(new CAsnCache(m_DbPath));
325     }
326     return **iter;
327 }
328 
329 
330 END_SCOPE(objects)
331 
332 // ===========================================================================
333 
334 USING_SCOPE(objects);
335 
DataLoaders_Register_AsnCache(void)336 void DataLoaders_Register_AsnCache(void)
337 {
338     RegisterEntryPoint<CDataLoader>(NCBI_EntryPoint_DataLoader_AsnCache);
339 }
340 
341 
342 const string kDataLoader_AsnCache_DriverName("asncache");
343 
344 class CAsnCache_DataLoaderCF : public CDataLoaderFactory
345 {
346 public:
CAsnCache_DataLoaderCF(void)347     CAsnCache_DataLoaderCF(void)
348         : CDataLoaderFactory(kDataLoader_AsnCache_DriverName) {}
~CAsnCache_DataLoaderCF(void)349     virtual ~CAsnCache_DataLoaderCF(void) {}
350 
351 protected:
352     virtual CDataLoader* CreateAndRegister(
353         CObjectManager& om,
354         const TPluginManagerParamTree* params) const;
355 };
356 
357 
CreateAndRegister(CObjectManager & om,const TPluginManagerParamTree * params) const358 CDataLoader* CAsnCache_DataLoaderCF::CreateAndRegister(
359     CObjectManager& om,
360     const TPluginManagerParamTree* params) const
361 {
362     string db_path =
363         GetParam(GetDriverName(), params,
364                  "DbPath", false);
365 
366     // IsDefault and Priority arguments may be specified
367     return CAsnCache_DataLoader::RegisterInObjectManager(om, db_path).GetLoader();
368 }
369 
370 
NCBI_EntryPoint_DataLoader_AsnCache(CPluginManager<CDataLoader>::TDriverInfoList & info_list,CPluginManager<CDataLoader>::EEntryPointRequest method)371 void NCBI_EntryPoint_DataLoader_AsnCache(
372     CPluginManager<CDataLoader>::TDriverInfoList&   info_list,
373     CPluginManager<CDataLoader>::EEntryPointRequest method)
374 {
375     CHostEntryPointImpl<CAsnCache_DataLoaderCF>::NCBI_EntryPointImpl(info_list, method);
376 }
377 
378 
NCBI_EntryPoint_xloader_asncache(CPluginManager<objects::CDataLoader>::TDriverInfoList & info_list,CPluginManager<objects::CDataLoader>::EEntryPointRequest method)379 void NCBI_EntryPoint_xloader_asncache(
380     CPluginManager<objects::CDataLoader>::TDriverInfoList&   info_list,
381     CPluginManager<objects::CDataLoader>::EEntryPointRequest method)
382 {
383     NCBI_EntryPoint_DataLoader_AsnCache(info_list, method);
384 }
385 
386 
387 END_NCBI_SCOPE
388