1 /*  $Id: gbnative.cpp 634758 2021-07-19 12:30:06Z ivanov $
2 * ===========================================================================
3 *
4 *                            PUBLIC DOMAIN NOTICE
5 *               National Center for Biotechnology Information
6 *
7 *  This software/database is a "United States Government Work" under the
8 *  terms of the United States Copyright Act.  It was written as part of
9 *  the author's official duties as a United States Government employee and
10 *  thus cannot be copyrighted.  This software/database is freely available
11 *  to the public for use. The National Library of Medicine and the U.S.
12 *  Government have not placed any restriction on its use or reproduction.
13 *
14 *  Although all reasonable efforts have been taken to ensure the accuracy
15 *  and reliability of the software and data, the NLM and the U.S.
16 *  Government do not and cannot warrant the performance or results that
17 *  may be obtained by using this software or data. The NLM and the U.S.
18 *  Government disclaim all warranties, express or implied, including
19 *  warranties of performance, merchantability or fitness for any particular
20 *  purpose.
21 *
22 *  Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 *  Author: Michael Kimelman, Eugene Vasilchenko
27 *
28 *  File Description: GenBank Data loader
29 *
30 */
31 
32 #include <ncbi_pch.hpp>
33 #include <corelib/ncbi_param.hpp>
34 #include <objtools/data_loaders/genbank/gbnative.hpp>
35 #include <objtools/data_loaders/genbank/gbloader_params.h>
36 #include <objtools/data_loaders/genbank/impl/dispatcher.hpp>
37 #include <objtools/data_loaders/genbank/impl/request_result.hpp>
38 #include <objtools/data_loaders/genbank/impl/wgsmaster.hpp>
39 
40 #include <objtools/data_loaders/genbank/reader_interface.hpp>
41 #include <objtools/data_loaders/genbank/writer_interface.hpp>
42 
43 // TODO: remove the following includes
44 #define REGISTER_READER_ENTRY_POINTS 1
45 #ifdef REGISTER_READER_ENTRY_POINTS
46 # include <objtools/data_loaders/genbank/readers.hpp>
47 #endif
48 
49 #include <objtools/data_loaders/genbank/seqref.hpp>
50 #include <objtools/error_codes.hpp>
51 
52 #include <objmgr/objmgr_exception.hpp>
53 
54 #include <objmgr/impl/tse_info.hpp>
55 #include <objmgr/impl/tse_chunk_info.hpp>
56 #include <objmgr/impl/bioseq_info.hpp>
57 #include <objmgr/impl/data_source.hpp>
58 #include <objmgr/data_loader_factory.hpp>
59 #include <objmgr/annot_selector.hpp>
60 
61 #include <objects/seqloc/Seq_id.hpp>
62 
63 #include <corelib/ncbithr.hpp>
64 #include <corelib/ncbiapp.hpp>
65 
66 #include <corelib/plugin_manager_impl.hpp>
67 #include <corelib/plugin_manager_store.hpp>
68 
69 #include <algorithm>
70 
71 
72 #define NCBI_USE_ERRCODE_X   Objtools_GBLoader
73 
74 BEGIN_NCBI_SCOPE
75 BEGIN_SCOPE(objects)
76 
77 //=======================================================================
78 //   GBLoader sub classes
79 //
80 
81 //=======================================================================
82 // GBLoader Public interface
83 //
84 
85 
86 NCBI_PARAM_DEF_EX(string, GENBANK, LOADER_METHOD, "",
87                   eParam_NoThread, GENBANK_LOADER_METHOD);
88 typedef NCBI_PARAM_TYPE(GENBANK, LOADER_METHOD) TGenbankLoaderMethod;
89 
90 NCBI_PARAM_DECL(string, GENBANK, READER_NAME);
91 NCBI_PARAM_DEF_EX(string, GENBANK, READER_NAME, "",
92                   eParam_NoThread, GENBANK_READER_NAME);
93 typedef NCBI_PARAM_TYPE(GENBANK, READER_NAME) TGenbankReaderName;
94 
95 NCBI_PARAM_DECL(string, GENBANK, WRITER_NAME);
96 NCBI_PARAM_DEF_EX(string, GENBANK, WRITER_NAME, "",
97                   eParam_NoThread, GENBANK_WRITER_NAME);
98 typedef NCBI_PARAM_TYPE(GENBANK, WRITER_NAME) TGenbankWriterName;
99 
100 #if defined(HAVE_PUBSEQ_OS)
101 static const char* const DEFAULT_DRV_ORDER = "ID2:PUBSEQOS:ID1";
102 #else
103 static const char* const DEFAULT_DRV_ORDER = "ID2:ID1";
104 #endif
105 
106 #define GBLOADER_NAME "GBLOADER"
107 #define GBLOADER_HUP_NAME "GBLOADER-HUP"
108 
109 #define DEFAULT_ID_GC_SIZE 10000
110 #define DEFAULT_ID_EXPIRATION_TIMEOUT 2*3600 // 2 hours
111 
112 class CGBReaderRequestResult : public CReaderRequestResult
113 {
114     typedef CReaderRequestResult TParent;
115 public:
116     CGBReaderRequestResult(CGBDataLoader_Native* loader,
117                            const CSeq_id_Handle& requested_id);
118     ~CGBReaderRequestResult(void);
119 
GetLoader(void)120     CGBDataLoader_Native& GetLoader(void)
121         {
122             return *m_Loader;
123         }
124     virtual CGBDataLoader_Native* GetLoaderPtr(void);
125 
126     //virtual TConn GetConn(void);
127     //virtual void ReleaseConn(void);
128     virtual CTSE_LoadLock GetTSE_LoadLock(const TKeyBlob& blob_id);
129     virtual CTSE_LoadLock GetTSE_LoadLockIfLoaded(const TKeyBlob& blob_id);
130     virtual void GetLoadedBlob_ids(const CSeq_id_Handle& idh,
131                                    TLoadedBlob_ids& blob_ids) const;
132 
operator CInitMutexPool&(void)133     virtual operator CInitMutexPool&(void) { return GetMutexPool(); }
134 
GetMutexPool(void)135     CInitMutexPool& GetMutexPool(void) { return m_Loader->m_MutexPool; }
136 
137     virtual TExpirationTime GetIdExpirationTimeout(GBL::EExpirationType type) const;
138 
139     virtual bool GetAddWGSMasterDescr(void) const;
140 
141     virtual EGBErrorAction GetPTISErrorAction(void) const;
142 
143     friend class CGBDataLoader_Native;
144 
145 private:
146     CRef<CGBDataLoader_Native> m_Loader;
147 };
148 
149 
ConvertRegInfo(const TGBMaker::TRegisterInfo & info)150 CGBDataLoader::TRegisterLoaderInfo CGBDataLoader_Native::ConvertRegInfo(const TGBMaker::TRegisterInfo& info)
151 {
152     TRegisterLoaderInfo ret;
153     ret.Set(info.GetLoader(), info.IsCreated());
154     return ret;
155 }
156 
157 
RegisterInObjectManager(CObjectManager & om,CReader * reader_ptr,CObjectManager::EIsDefault is_default,CObjectManager::TPriority priority)158 CGBDataLoader::TRegisterLoaderInfo CGBDataLoader_Native::RegisterInObjectManager(
159     CObjectManager& om,
160     CReader*        reader_ptr,
161     CObjectManager::EIsDefault is_default,
162     CObjectManager::TPriority  priority)
163 {
164     CGBLoaderParams params(reader_ptr);
165     TGBMaker maker(params);
166     CDataLoader::RegisterInObjectManager(om, maker, is_default, priority);
167     return ConvertRegInfo(maker.GetRegisterInfo());
168 }
169 
170 
RegisterInObjectManager(CObjectManager & om,const string & reader_name,CObjectManager::EIsDefault is_default,CObjectManager::TPriority priority)171 CGBDataLoader::TRegisterLoaderInfo CGBDataLoader_Native::RegisterInObjectManager(
172     CObjectManager& om,
173     const string&   reader_name,
174     CObjectManager::EIsDefault is_default,
175     CObjectManager::TPriority  priority)
176 {
177     CGBLoaderParams params(reader_name);
178     TGBMaker maker(params);
179     CDataLoader::RegisterInObjectManager(om, maker, is_default, priority);
180     return ConvertRegInfo(maker.GetRegisterInfo());
181 }
182 
183 
RegisterInObjectManager(CObjectManager & om,EIncludeHUP include_hup,CObjectManager::EIsDefault is_default,CObjectManager::TPriority priority)184 CGBDataLoader::TRegisterLoaderInfo CGBDataLoader_Native::RegisterInObjectManager(
185     CObjectManager& om,
186     EIncludeHUP     include_hup,
187     CObjectManager::EIsDefault is_default,
188     CObjectManager::TPriority  priority)
189 {
190     return RegisterInObjectManager(om, include_hup, NcbiEmptyString, is_default,
191                                    priority);
192 }
193 
194 
RegisterInObjectManager(CObjectManager & om,EIncludeHUP,const string & web_cookie,CObjectManager::EIsDefault is_default,CObjectManager::TPriority priority)195 CGBDataLoader::TRegisterLoaderInfo CGBDataLoader_Native::RegisterInObjectManager(
196     CObjectManager& om,
197     EIncludeHUP     /*include_hup*/,
198     const string& web_cookie,
199     CObjectManager::EIsDefault is_default,
200     CObjectManager::TPriority  priority)
201 {
202     CGBLoaderParams params("PUBSEQOS2:PUBSEQOS");
203     params.SetHUPIncluded(true, web_cookie);
204     TGBMaker maker(params);
205     CDataLoader::RegisterInObjectManager(om, maker, is_default, priority);
206     return ConvertRegInfo(maker.GetRegisterInfo());
207 }
208 
209 
RegisterInObjectManager(CObjectManager & om,const string & reader_name,EIncludeHUP include_hup,CObjectManager::EIsDefault is_default,CObjectManager::TPriority priority)210 CGBDataLoader::TRegisterLoaderInfo CGBDataLoader_Native::RegisterInObjectManager(
211     CObjectManager& om,
212     const string&   reader_name,
213     EIncludeHUP     include_hup,
214     CObjectManager::EIsDefault is_default,
215     CObjectManager::TPriority  priority)
216 {
217     return RegisterInObjectManager(om, reader_name, include_hup, NcbiEmptyString,
218                                    is_default, priority);
219 }
220 
RegisterInObjectManager(CObjectManager & om,const string & reader_name,EIncludeHUP,const string & web_cookie,CObjectManager::EIsDefault is_default,CObjectManager::TPriority priority)221 CGBDataLoader::TRegisterLoaderInfo CGBDataLoader_Native::RegisterInObjectManager(
222     CObjectManager& om,
223     const string&   reader_name,
224     EIncludeHUP     /*include_hup*/,
225     const string& web_cookie,
226     CObjectManager::EIsDefault is_default,
227     CObjectManager::TPriority  priority)
228 {
229     CGBLoaderParams params(reader_name);
230     params.SetHUPIncluded(true, web_cookie);
231     TGBMaker maker(params);
232     CDataLoader::RegisterInObjectManager(om, maker, is_default, priority);
233     return ConvertRegInfo(maker.GetRegisterInfo());
234 }
235 
236 
RegisterInObjectManager(CObjectManager & om,const TParamTree & param_tree,CObjectManager::EIsDefault is_default,CObjectManager::TPriority priority)237 CGBDataLoader::TRegisterLoaderInfo CGBDataLoader_Native::RegisterInObjectManager(
238     CObjectManager& om,
239     const TParamTree& param_tree,
240     CObjectManager::EIsDefault is_default,
241     CObjectManager::TPriority  priority)
242 {
243     CGBLoaderParams params(&param_tree);
244     TGBMaker maker(params);
245     CDataLoader::RegisterInObjectManager(om, maker, is_default, priority);
246     return ConvertRegInfo(maker.GetRegisterInfo());
247 }
248 
249 
RegisterInObjectManager(CObjectManager & om,const CGBLoaderParams & params,CObjectManager::EIsDefault is_default,CObjectManager::TPriority priority)250 CGBDataLoader::TRegisterLoaderInfo CGBDataLoader_Native::RegisterInObjectManager(
251     CObjectManager& om,
252     const CGBLoaderParams& params,
253     CObjectManager::EIsDefault is_default,
254     CObjectManager::TPriority  priority)
255 {
256     TGBMaker maker(params);
257     CDataLoader::RegisterInObjectManager(om, maker, is_default, priority);
258     return ConvertRegInfo(maker.GetRegisterInfo());
259 }
260 
261 
CGBDataLoader_Native(const string & loader_name,const CGBLoaderParams & params)262 CGBDataLoader_Native::CGBDataLoader_Native(const string& loader_name,
263                              const CGBLoaderParams& params)
264     : CGBDataLoader(loader_name, params)
265 {
266     GBLOG_POST_X(1, "CGBDataLoader_Native");
267     x_CreateDriver(params);
268 }
269 
270 
~CGBDataLoader_Native(void)271 CGBDataLoader_Native::~CGBDataLoader_Native(void)
272 {
273     GBLOG_POST_X(2, "~CGBDataLoader_Native");
274     CloseCache();
275 }
276 
277 
GetDefaultPriority(void) const278 CObjectManager::TPriority CGBDataLoader_Native::GetDefaultPriority(void) const
279 {
280     CObjectManager::TPriority priority = CDataLoader::GetDefaultPriority();
281     if ( HasHUPIncluded() ) {
282         // HUP data loader has lower priority by default
283         priority += 1;
284     }
285     return priority;
286 }
287 
288 
289 CGBDataLoader::TRealBlobId
x_GetRealBlobId(const TBlobId & blob_id) const290 CGBDataLoader_Native::x_GetRealBlobId(const TBlobId& blob_id) const
291 {
292     return dynamic_cast<const CBlob_id&>(*blob_id);
293 }
294 
295 
x_CreateDriver(const CGBLoaderParams & params)296 void CGBDataLoader_Native::x_CreateDriver(const CGBLoaderParams& params)
297 {
298     auto_ptr<TParamTree> app_params;
299     const TParamTree* gb_params = 0;
300     if ( params.GetParamTree() ) {
301         gb_params = GetLoaderParams(params.GetParamTree());
302     }
303     else {
304         CNcbiApplicationGuard app = CNcbiApplication::InstanceGuard();
305         if ( app ) {
306             app_params.reset(CConfig::ConvertRegToTree(app->GetConfig()));
307             gb_params = GetLoaderParams(app_params.get());
308         }
309     }
310 
311     size_t queue_size = DEFAULT_ID_GC_SIZE;
312     if ( gb_params ) {
313         try {
314             string param = GetParam(gb_params, NCBI_GBLOADER_PARAM_ID_GC_SIZE);
315             if ( !param.empty() ) {
316                 queue_size = NStr::StringToUInt(param);
317             }
318         }
319         catch ( CException& /*ignored*/ ) {
320         }
321     }
322 
323     m_IdExpirationTimeout = DEFAULT_ID_EXPIRATION_TIMEOUT;
324     if ( gb_params ) {
325         string param =
326             GetParam(gb_params, NCBI_GBLOADER_PARAM_ID_EXPIRATION_TIMEOUT);
327         if ( !param.empty() ) {
328             try {
329                 Uint4 timeout = NStr::StringToNumeric<Uint4>(param);
330                 if ( timeout > 0 ) {
331                     m_IdExpirationTimeout = timeout;
332                 }
333             }
334             catch ( CException& exc ) {
335                 NCBI_RETHROW_FMT(exc, CLoaderException, eBadConfig,
336                                  "Bad value of parameter "
337                                  NCBI_GBLOADER_PARAM_ID_EXPIRATION_TIMEOUT
338                                  ": \""<<param<<"\"");
339             }
340         }
341     }
342     m_AlwaysLoadExternal = false;
343     if ( gb_params ) {
344         string param =
345             GetParam(gb_params, NCBI_GBLOADER_PARAM_ALWAYS_LOAD_EXTERNAL);
346         if ( !param.empty() ) {
347             try {
348                 m_AlwaysLoadExternal = NStr::StringToBool(param);
349             }
350             catch ( CException& exc ) {
351                 NCBI_RETHROW_FMT(exc, CLoaderException, eBadConfig,
352                                  "Bad value of parameter "
353                                  NCBI_GBLOADER_PARAM_ALWAYS_LOAD_EXTERNAL
354                                  ": \""<<param<<"\"");
355             }
356         }
357     }
358     m_AlwaysLoadNamedAcc = true;
359     if ( gb_params ) {
360         string param =
361             GetParam(gb_params, NCBI_GBLOADER_PARAM_ALWAYS_LOAD_NAMED_ACC);
362         if ( !param.empty() ) {
363             try {
364                 m_AlwaysLoadNamedAcc = NStr::StringToBool(param);
365             }
366             catch ( CException& exc ) {
367                 NCBI_RETHROW_FMT(exc, CLoaderException, eBadConfig,
368                                  "Bad value of parameter "
369                                  NCBI_GBLOADER_PARAM_ALWAYS_LOAD_NAMED_ACC
370                                  ": \""<<param<<"\"");
371             }
372         }
373     }
374     m_AddWGSMasterDescr = true;
375     if ( gb_params ) {
376         string param =
377             GetParam(gb_params, NCBI_GBLOADER_PARAM_ADD_WGS_MASTER);
378         if ( !param.empty() ) {
379             try {
380                 m_AddWGSMasterDescr = NStr::StringToBool(param);
381             }
382             catch ( CException& exc ) {
383                 NCBI_RETHROW_FMT(exc, CLoaderException, eBadConfig,
384                                  "Bad value of parameter "
385                                  NCBI_GBLOADER_PARAM_ADD_WGS_MASTER
386                                  ": \""<<param<<"\"");
387             }
388         }
389     }
390     m_PTISErrorAction = eGBErrorAction_report;
391     if ( gb_params ) {
392         string param =
393             GetParam(gb_params, NCBI_GBLOADER_PARAM_PTIS_ERROR_ACTION);
394         if ( !param.empty() ) {
395             if ( NStr::EqualNocase(param, NCBI_GBLOADER_PARAM_PTIS_ERROR_ACTION_IGNORE) ) {
396                 m_PTISErrorAction = eGBErrorAction_ignore;
397             }
398             else if ( NStr::EqualNocase(param, NCBI_GBLOADER_PARAM_PTIS_ERROR_ACTION_REPORT) ) {
399                 m_PTISErrorAction = eGBErrorAction_report;
400             }
401             else if ( NStr::EqualNocase(param, NCBI_GBLOADER_PARAM_PTIS_ERROR_ACTION_THROW) ) {
402                 m_PTISErrorAction = eGBErrorAction_throw;
403             }
404             else {
405                 NCBI_THROW_FMT(CLoaderException, eBadConfig,
406                                "Bad value of parameter "
407                                NCBI_GBLOADER_PARAM_PTIS_ERROR_ACTION
408                                ": \""<<param<<"\"");
409             }
410         }
411     }
412 
413     m_Dispatcher = new CReadDispatcher;
414     m_InfoManager = new CGBInfoManager(queue_size);
415 
416     // now we create readers & writers
417     if ( params.GetReaderPtr() ) {
418         // explicit reader specified
419         CRef<CReader> reader(params.GetReaderPtr());
420         reader->OpenInitialConnection(false);
421         m_Dispatcher->InsertReader(1, reader);
422         return;
423     }
424 
425     CGBLoaderParams::EPreopenConnection preopen =
426         params.GetPreopenConnection();
427     if ( preopen == CGBLoaderParams::ePreopenByConfig && gb_params ) {
428         try {
429             string param = GetParam(gb_params, NCBI_GBLOADER_PARAM_PREOPEN);
430             if ( !param.empty() ) {
431                 if ( NStr::StringToBool(param) )
432                     preopen = CGBLoaderParams::ePreopenAlways;
433                 else
434                     preopen = CGBLoaderParams::ePreopenNever;
435             }
436         }
437         catch ( CException& /*ignored*/ ) {
438         }
439     }
440 
441     if ( !gb_params ) {
442         app_params.reset(new TParamTree);
443         gb_params = GetLoaderParams(app_params.get());
444     }
445 
446     if ( !params.GetReaderName().empty() ) {
447         string reader_name = params.GetReaderName();
448         NStr::ToLower(reader_name);
449         if ( NStr::StartsWith(reader_name, "pubseqos") )
450             m_WebCookie = params.GetWebCookie();
451 
452         if ( x_CreateReaders(reader_name, gb_params, preopen) ) {
453             if ( reader_name == "cache" ||
454                  NStr::StartsWith(reader_name, "cache;") ) {
455                 x_CreateWriters("cache", gb_params);
456             }
457         }
458     }
459     else {
460         pair<string, string> rw_name = GetReaderWriterName(gb_params);
461         if ( x_CreateReaders(rw_name.first, gb_params, preopen) ) {
462             x_CreateWriters(rw_name.second, gb_params);
463         }
464     }
465 }
466 
467 
468 pair<string, string>
GetReaderWriterName(const TParamTree * params) const469 CGBDataLoader_Native::GetReaderWriterName(const TParamTree* params) const
470 {
471     pair<string, string> ret;
472     ret.first = GetParam(params, NCBI_GBLOADER_PARAM_READER_NAME);
473     if ( ret.first.empty() ) {
474         ret.first = TGenbankReaderName::GetDefault();
475     }
476     ret.second = GetParam(params, NCBI_GBLOADER_PARAM_WRITER_NAME);
477     if ( ret.first.empty() ) {
478         ret.first = TGenbankWriterName::GetDefault();
479     }
480     if ( ret.first.empty() || ret.second.empty() ) {
481         string method = GetParam(params, NCBI_GBLOADER_PARAM_LOADER_METHOD);
482         if ( method.empty() ) {
483             // try config first
484             method = TGenbankLoaderMethod::GetDefault();
485         }
486         if ( method.empty() ) {
487             // fall back default reader list
488             method = DEFAULT_DRV_ORDER;
489         }
490         NStr::ToLower(method);
491         if ( ret.first.empty() ) {
492             ret.first = method;
493         }
494         if ( ret.second.empty() && NStr::StartsWith(method, "cache;") ) {
495             ret.second = "cache";
496         }
497     }
498     NStr::ToLower(ret.first);
499     NStr::ToLower(ret.second);
500     return ret;
501 }
502 
503 
x_CreateReaders(const string & str,const TParamTree * params,CGBLoaderParams::EPreopenConnection preopen)504 bool CGBDataLoader_Native::x_CreateReaders(const string& str,
505                                     const TParamTree* params,
506                                     CGBLoaderParams::EPreopenConnection preopen)
507 {
508     vector<string> str_list;
509     NStr::Split(str, ";", str_list);
510     size_t reader_count = 0;
511     for ( size_t i = 0; i < str_list.size(); ++i ) {
512         CRef<CReader> reader(x_CreateReader(str_list[i], params));
513         if( reader ) {
514             if ( HasHUPIncluded() ) {
515                 reader->SetIncludeHUP(true, m_WebCookie);
516             }
517             if ( preopen != CGBLoaderParams::ePreopenNever ) {
518                 reader->OpenInitialConnection(preopen == CGBLoaderParams::ePreopenAlways);
519             }
520             m_Dispatcher->InsertReader(i, reader);
521             ++reader_count;
522         }
523     }
524     if ( !reader_count ) {
525         NCBI_THROW(CLoaderException, eLoaderFailed,
526                    "no reader available from "+str);
527     }
528     return reader_count > 1 || str_list.size() > 1;
529 }
530 
531 
x_CreateWriters(const string & str,const TParamTree * params)532 void CGBDataLoader_Native::x_CreateWriters(const string& str,
533                                     const TParamTree* params)
534 {
535     vector<string> str_list;
536     NStr::Split(str, ";", str_list);
537     for ( size_t i = 0; i < str_list.size(); ++i ) {
538         if ( HasHUPIncluded() ) {
539             NCBI_THROW(CObjMgrException, eRegisterError,
540                        "HUP GBLoader cannot have cache");
541         }
542         CRef<CWriter> writer(x_CreateWriter(str_list[i], params));
543         if( writer ) {
544             m_Dispatcher->InsertWriter(i, writer);
545         }
546     }
547 }
548 
549 
550 #ifdef REGISTER_READER_ENTRY_POINTS
551 NCBI_PARAM_DECL(bool, GENBANK, REGISTER_READERS);
552 NCBI_PARAM_DEF_EX(bool, GENBANK, REGISTER_READERS, true,
553                   eParam_NoThread, GENBANK_REGISTER_READERS);
554 #endif
555 
x_GetReaderManager(void)556 CRef<CPluginManager<CReader> > CGBDataLoader_Native::x_GetReaderManager(void)
557 {
558     CRef<TReaderManager> manager(CPluginManagerGetter<CReader>::Get());
559     _ASSERT(manager);
560 
561 #ifdef REGISTER_READER_ENTRY_POINTS
562     if ( NCBI_PARAM_TYPE(GENBANK, REGISTER_READERS)::GetDefault() ) {
563         GenBankReaders_Register_Id1();
564         GenBankReaders_Register_Id2();
565         GenBankReaders_Register_Cache();
566 # ifdef HAVE_PUBSEQ_OS
567         //GenBankReaders_Register_Pubseq();
568 # endif
569     }
570 #endif
571 
572     return manager;
573 }
574 
575 
x_GetWriterManager(void)576 CRef<CPluginManager<CWriter> > CGBDataLoader_Native::x_GetWriterManager(void)
577 {
578     CRef<TWriterManager> manager(CPluginManagerGetter<CWriter>::Get());
579     _ASSERT(manager);
580 
581 #ifdef REGISTER_READER_ENTRY_POINTS
582     if ( NCBI_PARAM_TYPE(GENBANK, REGISTER_READERS)::GetDefault() ) {
583         GenBankWriters_Register_Cache();
584     }
585 #endif
586 
587     return manager;
588 }
589 
590 
s_ForceDriver(const string & name)591 static bool s_ForceDriver(const string& name)
592 {
593     return !name.empty() && name[name.size()-1] != ':';
594 }
595 
596 
x_CreateReader(const string & name,const TParamTree * params)597 CReader* CGBDataLoader_Native::x_CreateReader(const string& name,
598                                        const TParamTree* params)
599 {
600     CRef<TReaderManager> manager = x_GetReaderManager();
601     CReader* ret = manager->CreateInstanceFromList(params, name);
602     if ( !ret ) {
603         if ( s_ForceDriver(name) ) {
604             // reader is required at this slot
605             NCBI_THROW(CLoaderException, eLoaderFailed,
606                        "no reader available from "+name);
607         }
608     }
609     else {
610         ret->InitializeCache(m_CacheManager, params);
611     }
612     return ret;
613 }
614 
615 
x_CreateWriter(const string & name,const TParamTree * params)616 CWriter* CGBDataLoader_Native::x_CreateWriter(const string& name,
617                                        const TParamTree* params)
618 {
619     CRef<TWriterManager> manager = x_GetWriterManager();
620     CWriter* ret = manager->CreateInstanceFromList(params, name);
621     if ( !ret ) {
622         if ( s_ForceDriver(name) ) {
623             // writer is required at this slot
624             NCBI_THROW(CLoaderException, eLoaderFailed,
625                        "no writer available from "+name);
626         }
627     }
628     else {
629         ret->InitializeCache(m_CacheManager, params);
630     }
631     return ret;
632 }
633 
634 
GetBlobId(const CSeq_id_Handle & sih)635 CDataLoader::TBlobId CGBDataLoader_Native::GetBlobId(const CSeq_id_Handle& sih)
636 {
637     if ( CReadDispatcher::CannotProcess(sih) ) {
638         return TBlobId();
639     }
640     CGBReaderRequestResult result(this, sih);
641     CLoadLockBlobIds blobs(result, sih, 0);
642     m_Dispatcher->LoadSeq_idBlob_ids(result, sih, 0);
643     CFixedBlob_ids blob_ids = blobs.GetBlob_ids();
644     ITERATE ( CFixedBlob_ids, it, blob_ids ) {
645         if ( it->Matches(fBlobHasCore, 0) ) {
646             return TBlobId(it->GetBlob_id().GetPointer());
647         }
648     }
649     return TBlobId();
650 }
651 
GetBlobIdFromString(const string & str) const652 CDataLoader::TBlobId CGBDataLoader_Native::GetBlobIdFromString(const string& str) const
653 {
654     return TBlobId(CBlob_id::CreateFromString(str));
655 }
656 
657 
GetIds(const CSeq_id_Handle & idh,TIds & ids)658 void CGBDataLoader_Native::GetIds(const CSeq_id_Handle& idh, TIds& ids)
659 {
660     if ( CReadDispatcher::CannotProcess(idh) ) {
661         return;
662     }
663     CGBReaderRequestResult result(this, idh);
664     CLoadLockSeqIds lock(result, idh);
665     if ( !lock.IsLoaded() ) {
666         m_Dispatcher->LoadSeq_idSeq_ids(result, idh);
667     }
668     ids = lock.GetSeq_ids();
669 }
670 
671 
672 CDataLoader::SAccVerFound
GetAccVerFound(const CSeq_id_Handle & idh)673 CGBDataLoader_Native::GetAccVerFound(const CSeq_id_Handle& idh)
674 {
675     SAccVerFound ret;
676     if ( CReadDispatcher::CannotProcess(idh) ) {
677         // no such sequence
678         return ret;
679     }
680     CGBReaderRequestResult result(this, idh);
681     CLoadLockAcc lock(result, idh);
682     if ( !lock.IsLoadedAccVer() ) {
683         m_Dispatcher->LoadSeq_idAccVer(result, idh);
684     }
685     if ( lock.IsLoadedAccVer() ) {
686         ret = lock.GetAccVer();
687     }
688     return ret;
689 }
690 
691 
692 CDataLoader::SGiFound
GetGiFound(const CSeq_id_Handle & idh)693 CGBDataLoader_Native::GetGiFound(const CSeq_id_Handle& idh)
694 {
695     SGiFound ret;
696     if ( CReadDispatcher::CannotProcess(idh) ) {
697         return ret;
698     }
699     CGBReaderRequestResult result(this, idh);
700     CLoadLockGi lock(result, idh);
701     if ( !lock.IsLoadedGi() ) {
702         m_Dispatcher->LoadSeq_idGi(result, idh);
703     }
704     if ( lock.IsLoadedGi() ) {
705         ret = lock.GetGi();
706     }
707     return ret;
708 }
709 
710 
GetLabel(const CSeq_id_Handle & idh)711 string CGBDataLoader_Native::GetLabel(const CSeq_id_Handle& idh)
712 {
713     if ( CReadDispatcher::CannotProcess(idh) ) {
714         return string();
715     }
716     CGBReaderRequestResult result(this, idh);
717     CLoadLockLabel lock(result, idh);
718     if ( !lock.IsLoadedLabel() ) {
719         m_Dispatcher->LoadSeq_idLabel(result, idh);
720     }
721     return lock.GetLabel();
722 }
723 
724 
GetTaxId(const CSeq_id_Handle & idh)725 TTaxId CGBDataLoader_Native::GetTaxId(const CSeq_id_Handle& idh)
726 {
727     if ( CReadDispatcher::CannotProcess(idh) ) {
728         return INVALID_TAX_ID;
729     }
730     CGBReaderRequestResult result(this, idh);
731     CLoadLockTaxId lock(result, idh);
732     if ( !lock.IsLoadedTaxId() ) {
733         m_Dispatcher->LoadSeq_idTaxId(result, idh);
734     }
735     TTaxId taxid = lock.IsLoadedTaxId()? lock.GetTaxId(): INVALID_TAX_ID;
736     if ( taxid == INVALID_TAX_ID ) {
737         return CDataLoader::GetTaxId(idh);
738     }
739     return taxid;
740 }
741 
742 
GetSequenceState(const CSeq_id_Handle & sih)743 int CGBDataLoader_Native::GetSequenceState(const CSeq_id_Handle& sih)
744 {
745     const int kNotFound = (CBioseq_Handle::fState_not_found |
746                            CBioseq_Handle::fState_no_data);
747 
748     if ( CReadDispatcher::CannotProcess(sih) ) {
749         return kNotFound;
750     }
751     TIds ids(1, sih);
752     TLoaded loaded(1);
753     TSequenceStates states(1);
754     GetSequenceStates(ids, loaded, states);
755     return loaded[0]? states[0]: kNotFound;
756 }
757 
758 
GetAccVers(const TIds & ids,TLoaded & loaded,TIds & ret)759 void CGBDataLoader_Native::GetAccVers(const TIds& ids, TLoaded& loaded, TIds& ret)
760 {
761     for ( size_t i = 0; i < ids.size(); ++i ) {
762         if ( loaded[i] || CReadDispatcher::CannotProcess(ids[i]) ) {
763             continue;
764         }
765         CGBReaderRequestResult result(this, ids[i]);
766         m_Dispatcher->LoadAccVers(result, ids, loaded, ret);
767         return;
768     }
769 }
770 
771 
GetGis(const TIds & ids,TLoaded & loaded,TGis & ret)772 void CGBDataLoader_Native::GetGis(const TIds& ids, TLoaded& loaded, TGis& ret)
773 {
774     for ( size_t i = 0; i < ids.size(); ++i ) {
775         if ( loaded[i] || CReadDispatcher::CannotProcess(ids[i]) ) {
776             continue;
777         }
778         CGBReaderRequestResult result(this, ids[i]);
779         m_Dispatcher->LoadGis(result, ids, loaded, ret);
780         return;
781     }
782 }
783 
784 
GetLabels(const TIds & ids,TLoaded & loaded,TLabels & ret)785 void CGBDataLoader_Native::GetLabels(const TIds& ids, TLoaded& loaded, TLabels& ret)
786 {
787     for ( size_t i = 0; i < ids.size(); ++i ) {
788         if ( loaded[i] || CReadDispatcher::CannotProcess(ids[i]) ) {
789             continue;
790         }
791         CGBReaderRequestResult result(this, ids[i]);
792         m_Dispatcher->LoadLabels(result, ids, loaded, ret);
793         return;
794     }
795 }
796 
797 
GetTaxIds(const TIds & ids,TLoaded & loaded,TTaxIds & ret)798 void CGBDataLoader_Native::GetTaxIds(const TIds& ids, TLoaded& loaded, TTaxIds& ret)
799 {
800     for ( size_t i = 0; i < ids.size(); ++i ) {
801         if ( loaded[i] || CReadDispatcher::CannotProcess(ids[i]) ) {
802             continue;
803         }
804         CGBReaderRequestResult result(this, ids[i]);
805         m_Dispatcher->LoadTaxIds(result, ids, loaded, ret);
806 
807         // the ID2 may accidentally return no taxid for newly loaded sequences
808         // we have to fall back to full sequence retrieval in such cases
809         bool retry = false;
810         for ( size_t i = 0; i < ids.size(); ++i ) {
811             if ( loaded[i] && ret[i] == INVALID_TAX_ID ) {
812                 loaded[i] = false;
813                 retry = true;
814             }
815         }
816         if ( retry ) {
817             // full sequence retrieval is implemented in base CDataLoader class
818             CDataLoader::GetTaxIds(ids, loaded, ret);
819         }
820 
821         return;
822     }
823 }
824 
825 
826 static const bool s_LoadBulkBlobs = true;
827 
828 
GetSequenceLength(const CSeq_id_Handle & sih)829 TSeqPos CGBDataLoader_Native::GetSequenceLength(const CSeq_id_Handle& sih)
830 {
831     if ( CReadDispatcher::CannotProcess(sih) ) {
832         return kInvalidSeqPos;
833     }
834     CGBReaderRequestResult result(this, sih);
835     CLoadLockLength lock(result, sih);
836     if ( !lock.IsLoadedLength() ) {
837         m_Dispatcher->LoadSequenceLength(result, sih);
838     }
839     return lock.IsLoaded()? lock.GetLength(): 0;
840 }
841 
842 
GetSequenceLengths(const TIds & ids,TLoaded & loaded,TSequenceLengths & ret)843 void CGBDataLoader_Native::GetSequenceLengths(const TIds& ids, TLoaded& loaded,
844                                        TSequenceLengths& ret)
845 {
846     for ( size_t i = 0; i < ids.size(); ++i ) {
847         if ( loaded[i] || CReadDispatcher::CannotProcess(ids[i]) ) {
848             continue;
849         }
850         CGBReaderRequestResult result(this, ids[i]);
851         m_Dispatcher->LoadLengths(result, ids, loaded, ret);
852         return;
853     }
854 }
855 
856 
857 CDataLoader::STypeFound
GetSequenceTypeFound(const CSeq_id_Handle & sih)858 CGBDataLoader_Native::GetSequenceTypeFound(const CSeq_id_Handle& sih)
859 {
860     STypeFound ret;
861     if ( CReadDispatcher::CannotProcess(sih) ) {
862         return ret;
863     }
864     CGBReaderRequestResult result(this, sih);
865     CLoadLockType lock(result, sih);
866     if ( !lock.IsLoadedType() ) {
867         m_Dispatcher->LoadSequenceType(result, sih);
868     }
869     if ( lock.IsLoadedType() ) {
870         ret = lock.GetType();
871     }
872     return ret;
873 }
874 
875 
GetSequenceTypes(const TIds & ids,TLoaded & loaded,TSequenceTypes & ret)876 void CGBDataLoader_Native::GetSequenceTypes(const TIds& ids, TLoaded& loaded,
877                                      TSequenceTypes& ret)
878 {
879     for ( size_t i = 0; i < ids.size(); ++i ) {
880         if ( loaded[i] || CReadDispatcher::CannotProcess(ids[i]) ) {
881             continue;
882         }
883         CGBReaderRequestResult result(this, ids[i]);
884         m_Dispatcher->LoadTypes(result, ids, loaded, ret);
885         return;
886     }
887 }
888 
889 
GetSequenceStates(const TIds & ids,TLoaded & loaded,TSequenceStates & ret)890 void CGBDataLoader_Native::GetSequenceStates(const TIds& ids, TLoaded& loaded,
891                                       TSequenceStates& ret)
892 {
893     if ( !s_LoadBulkBlobs ) {
894         CDataLoader::GetSequenceStates(ids, loaded, ret);
895         return;
896     }
897     for ( size_t i = 0; i < ids.size(); ++i ) {
898         if ( loaded[i] || CReadDispatcher::CannotProcess(ids[i]) ) {
899             continue;
900         }
901         CGBReaderRequestResult result(this, ids[i]);
902         m_Dispatcher->LoadStates(result, ids, loaded, ret);
903         return;
904     }
905     /*
906     set<CSeq_id_Handle> load_set;
907     size_t count = ids.size();
908     _ASSERT(ids.size() == loaded.size());
909     _ASSERT(ids.size() == ret.size());
910     CGBReaderRequestResult result(this, ids[0]);
911     for ( size_t i = 0; i < count; ++i ) {
912         if ( loaded[i] || CReadDispatcher::CannotProcess(ids[i]) ) {
913             continue;
914         }
915         CLoadLockBlobIds blobs(result, ids[i], 0);
916         if ( blobs.IsLoaded() ) {
917             continue;
918         }
919         // add into loading set
920         load_set.insert(ids[i]);
921     }
922     if ( !load_set.empty() ) {
923         result.SetRequestedId(*load_set.begin());
924         m_Dispatcher->LoadSeq_idsBlob_ids(result, load_set);
925     }
926     // update sequence states
927     for ( size_t i = 0; i < count; ++i ) {
928         CReadDispatcher::SetBlobState(i, result, ids, loaded, ret);
929     }
930     */
931 }
932 
933 
934 CDataLoader::SHashFound
GetSequenceHashFound(const CSeq_id_Handle & sih)935 CGBDataLoader_Native::GetSequenceHashFound(const CSeq_id_Handle& sih)
936 {
937     SHashFound ret;
938     if ( CReadDispatcher::CannotProcess(sih) ) {
939         return ret;
940     }
941     CGBReaderRequestResult result(this, sih);
942     CLoadLockHash lock(result, sih);
943     if ( !lock.IsLoadedHash() ) {
944         m_Dispatcher->LoadSequenceHash(result, sih);
945     }
946     if ( lock.IsLoadedHash() ) {
947         ret = lock.GetHash();
948     }
949     return ret;
950 }
951 
952 
GetSequenceHashes(const TIds & ids,TLoaded & loaded,TSequenceHashes & ret,THashKnown & known)953 void CGBDataLoader_Native::GetSequenceHashes(const TIds& ids, TLoaded& loaded,
954                                       TSequenceHashes& ret, THashKnown& known)
955 {
956     for ( size_t i = 0; i < ids.size(); ++i ) {
957         if ( loaded[i] || CReadDispatcher::CannotProcess(ids[i]) ) {
958             continue;
959         }
960         CGBReaderRequestResult result(this, ids[i]);
961         m_Dispatcher->LoadHashes(result, ids, loaded, ret, known);
962         return;
963     }
964 }
965 
966 
GetBlobVersion(const TBlobId & id)967 CDataLoader::TBlobVersion CGBDataLoader_Native::GetBlobVersion(const TBlobId& id)
968 {
969     TRealBlobId blob_id = GetRealBlobId(id);
970     CGBReaderRequestResult result(this, CSeq_id_Handle());
971     CLoadLockBlobVersion lock(result, blob_id);
972     if ( !lock.IsLoadedBlobVersion() ) {
973         m_Dispatcher->LoadBlobVersion(result, blob_id);
974     }
975     return lock.GetBlobVersion();
976 }
977 
978 
979 CDataLoader::TTSE_Lock
ResolveConflict(const CSeq_id_Handle & handle,const TTSE_LockSet & tse_set)980 CGBDataLoader_Native::ResolveConflict(const CSeq_id_Handle& handle,
981                                const TTSE_LockSet& tse_set)
982 {
983     TTSE_Lock best;
984     bool         conflict=false;
985 
986     CGBReaderRequestResult result(this, handle);
987 
988     ITERATE(TTSE_LockSet, sit, tse_set) {
989         const CTSE_Info& tse = **sit;
990         TRealBlobId rbid = GetRealBlobId(tse);
991         CLoadLockBlob blob(result, GetRealBlobId(tse));
992         _ASSERT(blob);
993 
994         /*
995         if ( tse.m_SeqIds.find(handle) == tse->m_SeqIds.end() ) {
996             continue;
997         }
998         */
999 
1000         // listed for given TSE
1001         if ( !best ) {
1002             best = *sit; conflict=false;
1003         }
1004         else if( !tse.IsDead() && best->IsDead() ) {
1005             best = *sit; conflict=false;
1006         }
1007         else if( tse.IsDead() && best->IsDead() ) {
1008             conflict=true;
1009         }
1010         else if( tse.IsDead() && !best->IsDead() ) {
1011         }
1012         else {
1013             conflict=true;
1014             //_ASSERT(tse.IsDead() || best->IsDead());
1015         }
1016     }
1017 
1018 /*
1019     if ( !best || conflict ) {
1020         // try harder
1021         best.Reset();
1022         conflict=false;
1023 
1024         CReaderRequestResultBlob_ids blobs = result.GetResultBlob_ids(handle);
1025         _ASSERT(blobs);
1026 
1027         ITERATE ( CLoadInfoBlob_ids, it, *blobs ) {
1028             TBlob_InfoMap::iterator tsep =
1029                 m_Blob_InfoMap.find((*srp)->GetKeyByTSE());
1030             if (tsep == m_Blob_InfoMap.end()) continue;
1031             ITERATE(TTSE_LockSet, sit, tse_set) {
1032                 CConstRef<CTSE_Info> ti = *sit;
1033                 //TTse2TSEinfo::iterator it =
1034                 //    m_Tse2TseInfo.find(&ti->GetSeq_entry());
1035                 //if(it==m_Tse2TseInfo.end()) continue;
1036                 CRef<STSEinfo> tinfo = GetTSEinfo(*ti);
1037                 if ( !tinfo )
1038                     continue;
1039 
1040                 if(tinfo==tsep->second) {
1041                     if ( !best )
1042                         best=ti;
1043                     else if (ti != best)
1044                         conflict=true;
1045                 }
1046             }
1047         }
1048         if(conflict)
1049             best.Reset();
1050     }
1051 */
1052     if ( !best || conflict ) {
1053         _TRACE("CGBDataLoader::ResolveConflict("<<handle.AsString()<<"): "
1054                "conflict");
1055     }
1056     return best;
1057 }
1058 
1059 
HaveCache(TCacheType cache_type)1060 bool CGBDataLoader_Native::HaveCache(TCacheType cache_type)
1061 {
1062     ITERATE(CReaderCacheManager::TCaches, it, m_CacheManager.GetCaches()) {
1063         if ((it->m_Type & cache_type) != 0) {
1064             return true;
1065         }
1066     }
1067     return false;
1068 }
1069 
1070 
PurgeCache(TCacheType cache_type,time_t access_timeout)1071 void CGBDataLoader_Native::PurgeCache(TCacheType            cache_type,
1072                                time_t                access_timeout)
1073 {
1074     ITERATE(CReaderCacheManager::TCaches, it, m_CacheManager.GetCaches()) {
1075         if ((it->m_Type & cache_type) != 0) {
1076             it->m_Cache->Purge(access_timeout);
1077         }
1078     }
1079 }
1080 
1081 
CloseCache(void)1082 void CGBDataLoader_Native::CloseCache(void)
1083 {
1084     // Reset cache for each reader/writer
1085     m_Dispatcher->ResetCaches();
1086     m_CacheManager.GetCaches().clear();
1087 }
1088 
1089 
1090 //=======================================================================
1091 // GBLoader private interface
1092 //
GC(void)1093 void CGBDataLoader_Native::GC(void)
1094 {
1095 }
1096 
1097 
x_MakeContentMask(EChoice choice) const1098 TBlobContentsMask CGBDataLoader_Native::x_MakeContentMask(EChoice choice) const
1099 {
1100     switch(choice) {
1101     case CGBDataLoader_Native::eBlob:
1102     case CGBDataLoader_Native::eBioseq:
1103         // whole bioseq
1104         return fBlobHasAllLocal;
1105     case CGBDataLoader_Native::eCore:
1106     case CGBDataLoader_Native::eBioseqCore:
1107         // everything except bioseqs & annotations
1108         return fBlobHasCore;
1109     case CGBDataLoader_Native::eSequence:
1110         // seq data
1111         return fBlobHasSeqMap | fBlobHasSeqData;
1112     case CGBDataLoader_Native::eFeatures:
1113         // SeqFeatures
1114         return fBlobHasIntFeat;
1115     case CGBDataLoader_Native::eGraph:
1116         // SeqGraph
1117         return fBlobHasIntGraph;
1118     case CGBDataLoader_Native::eAlign:
1119         // SeqGraph
1120         return fBlobHasIntAlign;
1121     case CGBDataLoader_Native::eAnnot:
1122         // all internal annotations
1123         return fBlobHasIntAnnot;
1124     case CGBDataLoader_Native::eExtFeatures:
1125         return fBlobHasExtFeat|fBlobHasNamedFeat;
1126     case CGBDataLoader_Native::eExtGraph:
1127         return fBlobHasExtGraph|fBlobHasNamedGraph;
1128     case CGBDataLoader_Native::eExtAlign:
1129         return fBlobHasExtAlign|fBlobHasNamedAlign;
1130     case CGBDataLoader_Native::eExtAnnot:
1131         // external annotations
1132         return fBlobHasExtAnnot|fBlobHasNamedAnnot;
1133     case CGBDataLoader_Native::eOrphanAnnot:
1134         // orphan annotations
1135         return fBlobHasOrphanAnnot;
1136     case CGBDataLoader_Native::eAll:
1137         // everything
1138         return fBlobHasAll;
1139     default:
1140         return 0;
1141     }
1142 }
1143 
1144 
1145 TBlobContentsMask
x_MakeContentMask(const SRequestDetails & details) const1146 CGBDataLoader_Native::x_MakeContentMask(const SRequestDetails& details) const
1147 {
1148     TBlobContentsMask mask = 0;
1149     if ( details.m_NeedSeqMap.NotEmpty() ) {
1150         mask |= fBlobHasSeqMap;
1151     }
1152     if ( details.m_NeedSeqData.NotEmpty() ) {
1153         mask |= fBlobHasSeqData;
1154     }
1155     if ( details.m_AnnotBlobType != SRequestDetails::fAnnotBlobNone ) {
1156         TBlobContentsMask annots = 0;
1157         switch ( DetailsToChoice(details.m_NeedAnnots) ) {
1158         case eFeatures:
1159             annots |= fBlobHasIntFeat;
1160             break;
1161         case eAlign:
1162             annots |= fBlobHasIntAlign;
1163             break;
1164         case eGraph:
1165             annots |= fBlobHasIntGraph;
1166             break;
1167         case eAnnot:
1168             annots |= fBlobHasIntAnnot;
1169             break;
1170         default:
1171             break;
1172         }
1173         if ( details.m_AnnotBlobType & SRequestDetails::fAnnotBlobInternal ) {
1174             mask |= annots;
1175         }
1176         if ( details.m_AnnotBlobType & SRequestDetails::fAnnotBlobExternal ) {
1177             mask |= (annots << 1);
1178         }
1179         if ( details.m_AnnotBlobType & SRequestDetails::fAnnotBlobOrphan ) {
1180             mask |= (annots << 2);
1181         }
1182     }
1183     return mask;
1184 }
1185 
1186 
1187 CDataLoader::TTSE_LockSet
GetRecords(const CSeq_id_Handle & sih,const EChoice choice)1188 CGBDataLoader_Native::GetRecords(const CSeq_id_Handle& sih, const EChoice choice)
1189 {
1190     return x_GetRecords(sih, x_MakeContentMask(choice), 0);
1191 }
1192 
1193 
1194 CDataLoader::TTSE_LockSet
GetDetailedRecords(const CSeq_id_Handle & sih,const SRequestDetails & details)1195 CGBDataLoader_Native::GetDetailedRecords(const CSeq_id_Handle& sih,
1196                                   const SRequestDetails& details)
1197 {
1198     return x_GetRecords(sih, x_MakeContentMask(details), 0);
1199 }
1200 
1201 
CanGetBlobById(void) const1202 bool CGBDataLoader_Native::CanGetBlobById(void) const
1203 {
1204     return true;
1205 }
1206 
1207 
1208 CDataLoader::TTSE_Lock
GetBlobById(const TBlobId & id)1209 CGBDataLoader_Native::GetBlobById(const TBlobId& id)
1210 {
1211     TRealBlobId blob_id = GetRealBlobId(id);
1212 
1213     CGBReaderRequestResult result(this, CSeq_id_Handle());
1214     CLoadLockBlob blob(result, blob_id);
1215     if ( !blob.IsLoadedBlob() ) {
1216         m_Dispatcher->LoadBlob(result, blob_id);
1217     }
1218     _ASSERT(blob.IsLoadedBlob());
1219     return blob.GetTSE_LoadLock();
1220 }
1221 
1222 
1223 namespace {
1224     struct SBetterId
1225     {
GetScore__anon2624e5540111::SBetterId1226         int GetScore(const CSeq_id_Handle& id1) const
1227             {
1228                 if ( id1.IsGi() ) {
1229                     return 100;
1230                 }
1231                 if ( !id1 ) {
1232                     return -1;
1233                 }
1234                 CConstRef<CSeq_id> seq_id = id1.GetSeqId();
1235                 const CTextseq_id* text_id = seq_id->GetTextseq_Id();
1236                 if ( text_id ) {
1237                     int score;
1238                     if ( text_id->IsSetAccession() ) {
1239                         if ( text_id->IsSetVersion() ) {
1240                             score = 99;
1241                         }
1242                         else {
1243                             score = 50;
1244                         }
1245                     }
1246                     else {
1247                         score = 0;
1248                     }
1249                     return score;
1250                 }
1251                 if ( seq_id->IsGeneral() ) {
1252                     return 10;
1253                 }
1254                 if ( seq_id->IsLocal() ) {
1255                     return 0;
1256                 }
1257                 return 1;
1258             }
operator ()__anon2624e5540111::SBetterId1259         bool operator()(const CSeq_id_Handle& id1,
1260                         const CSeq_id_Handle& id2) const
1261             {
1262                 int score1 = GetScore(id1);
1263                 int score2 = GetScore(id2);
1264                 if ( score1 != score2 ) {
1265                     return score1 > score2;
1266                 }
1267                 return id1 < id2;
1268             }
1269     };
1270 }
1271 
1272 
1273 CDataLoader::TTSE_LockSet
GetExternalRecords(const CBioseq_Info & bioseq)1274 CGBDataLoader_Native::GetExternalRecords(const CBioseq_Info& bioseq)
1275 {
1276     TTSE_LockSet ret;
1277     TIds ids = bioseq.GetId();
1278     sort(ids.begin(), ids.end(), SBetterId());
1279     ITERATE ( TIds, it, ids ) {
1280         if ( GetBlobId(*it) ) {
1281             // correct id is found
1282             TTSE_LockSet ret2 = GetRecords(*it, eExtAnnot);
1283             ret.swap(ret2);
1284             break;
1285         }
1286         else if ( it->Which() == CSeq_id::e_Gi ) {
1287             // gi is not found, do not try any other Seq-id
1288             break;
1289         }
1290     }
1291     return ret;
1292 }
1293 
1294 
1295 CDataLoader::TTSE_LockSet
GetExternalAnnotRecordsNA(const CSeq_id_Handle & idh,const SAnnotSelector * sel,TProcessedNAs * processed_nas)1296 CGBDataLoader_Native::GetExternalAnnotRecordsNA(const CSeq_id_Handle& idh,
1297                                          const SAnnotSelector* sel,
1298                                          TProcessedNAs* processed_nas)
1299 {
1300     return x_GetRecords(idh, fBlobHasExtAnnot|fBlobHasNamedAnnot, sel, processed_nas);
1301 }
1302 
1303 
1304 CDataLoader::TTSE_LockSet
GetExternalAnnotRecordsNA(const CBioseq_Info & bioseq,const SAnnotSelector * sel,TProcessedNAs * processed_nas)1305 CGBDataLoader_Native::GetExternalAnnotRecordsNA(const CBioseq_Info& bioseq,
1306                                          const SAnnotSelector* sel,
1307                                          TProcessedNAs* processed_nas)
1308 {
1309     TTSE_LockSet ret;
1310     TIds ids = bioseq.GetId();
1311     sort(ids.begin(), ids.end(), SBetterId());
1312     ITERATE ( TIds, it, ids ) {
1313         if ( GetBlobId(*it) ) {
1314             // correct id is found
1315             TTSE_LockSet ret2 = GetExternalAnnotRecordsNA(*it, sel, processed_nas);
1316             ret.swap(ret2);
1317             break;
1318         }
1319         else if ( it->Which() == CSeq_id::e_Gi ) {
1320             // gi is not found, do not try any other Seq-id
1321             break;
1322         }
1323     }
1324     return ret;
1325 }
1326 
1327 
1328 CDataLoader::TTSE_LockSet
GetOrphanAnnotRecordsNA(const CSeq_id_Handle & idh,const SAnnotSelector * sel,TProcessedNAs * processed_nas)1329 CGBDataLoader_Native::GetOrphanAnnotRecordsNA(const CSeq_id_Handle& idh,
1330                                        const SAnnotSelector* sel,
1331                                        TProcessedNAs* processed_nas)
1332 {
1333     bool load_external = m_AlwaysLoadExternal;
1334     bool load_namedacc =
1335         m_AlwaysLoadNamedAcc && IsRequestedAnyNA(sel);
1336     if ( load_external || load_namedacc ) {
1337         TBlobContentsMask mask = 0;
1338         if ( load_external ) {
1339             mask |= fBlobHasExtAnnot;
1340         }
1341         if ( load_namedacc ) {
1342             mask |= fBlobHasNamedAnnot;
1343         }
1344         return x_GetRecords(idh, mask, sel, processed_nas);
1345     }
1346     else {
1347         return CDataLoader::GetOrphanAnnotRecordsNA(idh, sel, processed_nas);
1348     }
1349 }
1350 
1351 
1352 CDataLoader::TTSE_LockSet
x_GetRecords(const CSeq_id_Handle & sih,TBlobContentsMask mask,const SAnnotSelector * sel,TProcessedNAs * processed_nas)1353 CGBDataLoader_Native::x_GetRecords(const CSeq_id_Handle& sih,
1354                             TBlobContentsMask mask,
1355                             const SAnnotSelector* sel,
1356                             TProcessedNAs* processed_nas)
1357 {
1358     TTSE_LockSet locks;
1359 
1360     if ( mask == 0 || CReadDispatcher::CannotProcess(sih) ) {
1361         return locks;
1362     }
1363 
1364     if ( (mask & ~fBlobHasOrphanAnnot) == 0 ) {
1365         // no orphan annotations in GenBank
1366         return locks;
1367     }
1368 
1369     CGBReaderRequestResult result(this, sih);
1370     m_Dispatcher->LoadBlobs(result, sih, mask, sel);
1371     CLoadLockBlobIds blobs(result, sih, sel);
1372     if ( !blobs.IsLoaded() ) {
1373         return locks;
1374     }
1375     _ASSERT(blobs.IsLoaded());
1376 
1377     CFixedBlob_ids blob_ids = blobs.GetBlob_ids();
1378     if ( (blob_ids.GetState() & CBioseq_Handle::fState_no_data) != 0 ) {
1379         if ( (mask & fBlobHasAllLocal) == 0 ||
1380              blob_ids.GetState() == CBioseq_Handle::fState_no_data ) {
1381             // only external annotatsions are requested,
1382             // or default state - return empty lock set
1383             return locks;
1384         }
1385         NCBI_THROW2(CBlobStateException, eBlobStateError,
1386                     "blob state error for "+sih.AsString(),
1387                     blob_ids.GetState());
1388     }
1389 
1390     ITERATE ( CFixedBlob_ids, it, blob_ids ) {
1391         const CBlob_Info& info = *it;
1392         const CBlob_id& blob_id = *info.GetBlob_id();
1393         if ( info.Matches(mask, sel) ) {
1394             CLoadLockBlob blob(result, blob_id);
1395             if ( !blob.IsLoadedBlob() ) {
1396                 continue;
1397             }
1398             CTSE_LoadLock& lock = blob.GetTSE_LoadLock();
1399             _ASSERT(lock);
1400             if ( lock->GetBlobState() & CBioseq_Handle::fState_no_data ) {
1401                 NCBI_THROW2(CBlobStateException, eBlobStateError,
1402                             "blob state error for "+blob_id.ToString(),
1403                             lock->GetBlobState());
1404             }
1405             if ( processed_nas ) {
1406                 if ( auto annot_info = info.GetAnnotInfo() ) {
1407                     for ( auto& acc : annot_info->GetNamedAnnotNames() ) {
1408                         CDataLoader::SetProcessedNA(acc, processed_nas);
1409                     }
1410                 }
1411             }
1412             locks.insert(lock);
1413         }
1414     }
1415     result.SaveLocksTo(locks);
1416     return locks;
1417 }
1418 
1419 
1420 CGBDataLoader::TNamedAnnotNames
GetNamedAnnotAccessions(const CSeq_id_Handle & sih)1421 CGBDataLoader_Native::GetNamedAnnotAccessions(const CSeq_id_Handle& sih)
1422 {
1423     TNamedAnnotNames names;
1424 
1425     CGBReaderRequestResult result(this, sih);
1426     SAnnotSelector sel;
1427     sel.IncludeNamedAnnotAccession("NA*");
1428     CLoadLockBlobIds blobs(result, sih, &sel);
1429     m_Dispatcher->LoadSeq_idBlob_ids(result, sih, &sel);
1430     _ASSERT(blobs.IsLoaded());
1431 
1432     CFixedBlob_ids blob_ids = blobs.GetBlob_ids();
1433     if ( (blob_ids.GetState() & CBioseq_Handle::fState_no_data) != 0) {
1434         if ( blob_ids.GetState() == CBioseq_Handle::fState_no_data ) {
1435             // default state - return empty name set
1436             return names;
1437         }
1438         NCBI_THROW2(CBlobStateException, eBlobStateError,
1439                     "blob state error for "+sih.AsString(),
1440                     blob_ids.GetState());
1441     }
1442 
1443     ITERATE ( CFixedBlob_ids, it, blob_ids ) {
1444         const CBlob_Info& info = *it;
1445         if ( !info.IsSetAnnotInfo() ) {
1446             continue;
1447         }
1448         CConstRef<CBlob_Annot_Info> annot_info = info.GetAnnotInfo();
1449         ITERATE( CBlob_Annot_Info::TNamedAnnotNames, jt,
1450                  annot_info->GetNamedAnnotNames()) {
1451             names.insert(*jt);
1452         }
1453     }
1454     return names;
1455 }
1456 
1457 
1458 CGBDataLoader::TNamedAnnotNames
GetNamedAnnotAccessions(const CSeq_id_Handle & sih,const string & named_acc)1459 CGBDataLoader_Native::GetNamedAnnotAccessions(const CSeq_id_Handle& sih,
1460                                        const string& named_acc)
1461 {
1462     TNamedAnnotNames names;
1463 
1464     CGBReaderRequestResult result(this, sih);
1465     SAnnotSelector sel;
1466     if ( !ExtractZoomLevel(named_acc, 0, 0) ) {
1467         sel.IncludeNamedAnnotAccession(CombineWithZoomLevel(named_acc, -1));
1468     }
1469     else {
1470         sel.IncludeNamedAnnotAccession(named_acc);
1471     }
1472     CLoadLockBlobIds blobs(result, sih, &sel);
1473     m_Dispatcher->LoadSeq_idBlob_ids(result, sih, &sel);
1474     _ASSERT(blobs.IsLoaded());
1475 
1476     CFixedBlob_ids blob_ids = blobs.GetBlob_ids();
1477     if ( (blob_ids.GetState() & CBioseq_Handle::fState_no_data) != 0 ) {
1478         if ( blob_ids.GetState() == CBioseq_Handle::fState_no_data ) {
1479             // default state - return empty name set
1480             return names;
1481         }
1482         NCBI_THROW2(CBlobStateException, eBlobStateError,
1483                     "blob state error for "+sih.AsString(),
1484                     blob_ids.GetState());
1485     }
1486 
1487     ITERATE ( CFixedBlob_ids, it, blob_ids ) {
1488         const CBlob_Info& info = *it;
1489         if ( !info.IsSetAnnotInfo() ) {
1490             continue;
1491         }
1492         CConstRef<CBlob_Annot_Info> annot_info = info.GetAnnotInfo();
1493         ITERATE ( CBlob_Annot_Info::TNamedAnnotNames, jt,
1494                   annot_info->GetNamedAnnotNames() ) {
1495             names.insert(*jt);
1496         }
1497     }
1498     return names;
1499 }
1500 
1501 
GetChunk(TChunk chunk)1502 void CGBDataLoader_Native::GetChunk(TChunk chunk)
1503 {
1504     CReader::TChunkId id = chunk->GetChunkId();
1505     if ( id == kMasterWGS_ChunkId ) {
1506         CWGSMasterSupport::LoadWGSMaster(this, chunk);
1507     }
1508     else {
1509         CGBReaderRequestResult result(this, CSeq_id_Handle());
1510         m_Dispatcher->LoadChunk(result,
1511                                 GetRealBlobId(chunk->GetBlobId()),
1512                                 id);
1513     }
1514 }
1515 
1516 
GetChunks(const TChunkSet & chunks)1517 void CGBDataLoader_Native::GetChunks(const TChunkSet& chunks)
1518 {
1519     typedef map<TBlobId, CReader::TChunkIds> TChunkIdMap;
1520     TChunkIdMap chunk_ids;
1521     ITERATE(TChunkSet, it, chunks) {
1522         CReader::TChunkId id = (*it)->GetChunkId();
1523         if ( id == kMasterWGS_ChunkId ) {
1524             CWGSMasterSupport::LoadWGSMaster(this, *it);
1525         }
1526         else {
1527             chunk_ids[(*it)->GetBlobId()].push_back(id);
1528         }
1529     }
1530     ITERATE(TChunkIdMap, it, chunk_ids) {
1531         CGBReaderRequestResult result(this, CSeq_id_Handle());
1532         m_Dispatcher->LoadChunks(result,
1533                                  GetRealBlobId(it->first),
1534                                  it->second);
1535     }
1536 }
1537 
1538 
GetBlobs(TTSE_LockSets & tse_sets)1539 void CGBDataLoader_Native::GetBlobs(TTSE_LockSets& tse_sets)
1540 {
1541     CGBReaderRequestResult result(this, CSeq_id_Handle());
1542     TBlobContentsMask mask = fBlobHasCore;
1543     CReadDispatcher::TIds ids;
1544     ITERATE(TTSE_LockSets, tse_set, tse_sets) {
1545         const CSeq_id_Handle& id = tse_set->first;
1546         if ( CReadDispatcher::CannotProcess(id) ) {
1547             continue;
1548         }
1549         ids.push_back(id);
1550     }
1551     m_Dispatcher->LoadBlobSet(result, ids);
1552 
1553     NON_CONST_ITERATE(TTSE_LockSets, tse_set, tse_sets) {
1554         const CSeq_id_Handle& id = tse_set->first;
1555         if ( CReadDispatcher::CannotProcess(id) ) {
1556             continue;
1557         }
1558         CLoadLockBlobIds blob_ids_lock(result, id, 0);
1559         CFixedBlob_ids blob_ids = blob_ids_lock.GetBlob_ids();
1560         ITERATE ( CFixedBlob_ids, it, blob_ids ) {
1561             const CBlob_Info& info = *it;
1562             const CBlob_id& blob_id = *info.GetBlob_id();
1563             if ( info.Matches(mask, 0) ) {
1564                 CLoadLockBlob blob(result, blob_id);
1565                 _ASSERT(blob.IsLoadedBlob());
1566                 /*
1567                 if ((blob.GetBlobState() & CBioseq_Handle::fState_no_data) != 0) {
1568                     // Ignore bad blobs
1569                     continue;
1570                 }
1571                 */
1572                 tse_set->second.insert(blob.GetTSE_LoadLock());
1573             }
1574         }
1575     }
1576 }
1577 
1578 #if 0
1579 class CTimerGuard
1580 {
1581     CTimer *t;
1582     bool    calibrating;
1583 public:
1584     CTimerGuard(CTimer& x)
1585         : t(&x), calibrating(x.NeedCalibration())
1586         {
1587             if ( calibrating ) {
1588                 t->Start();
1589             }
1590         }
1591     ~CTimerGuard(void)
1592         {
1593             if ( calibrating ) {
1594                 t->Stop();
1595             }
1596         }
1597 };
1598 #endif
1599 
1600 
DropTSE(CRef<CTSE_Info>)1601 void CGBDataLoader_Native::DropTSE(CRef<CTSE_Info> /* tse_info */)
1602 {
1603     //TWriteLockGuard guard(m_LoadMap_Lock);
1604     //m_LoadMapBlob.erase(GetBlob_id(*tse_info));
1605 }
1606 
1607 
1608 CGBReaderRequestResult::
CGBReaderRequestResult(CGBDataLoader_Native * loader,const CSeq_id_Handle & requested_id)1609 CGBReaderRequestResult(CGBDataLoader_Native* loader,
1610                        const CSeq_id_Handle& requested_id)
1611     : CReaderRequestResult(requested_id,
1612                            loader->GetDispatcher(),
1613                            loader->GetInfoManager()),
1614       m_Loader(loader)
1615 {
1616 }
1617 
1618 
~CGBReaderRequestResult(void)1619 CGBReaderRequestResult::~CGBReaderRequestResult(void)
1620 {
1621 }
1622 
1623 
GetLoaderPtr(void)1624 CGBDataLoader_Native* CGBReaderRequestResult::GetLoaderPtr(void)
1625 {
1626     return m_Loader;
1627 }
1628 
1629 
GetTSE_LoadLock(const TKeyBlob & blob_id)1630 CTSE_LoadLock CGBReaderRequestResult::GetTSE_LoadLock(const TKeyBlob& blob_id)
1631 {
1632     CGBDataLoader::TBlobId id(new TKeyBlob(blob_id));
1633     return GetLoader().GetDataSource()->GetTSE_LoadLock(id);
1634 }
1635 
1636 
GetTSE_LoadLockIfLoaded(const TKeyBlob & blob_id)1637 CTSE_LoadLock CGBReaderRequestResult::GetTSE_LoadLockIfLoaded(const TKeyBlob& blob_id)
1638 {
1639     CGBDataLoader::TBlobId id(new TKeyBlob(blob_id));
1640     return GetLoader().GetDataSource()->GetTSE_LoadLockIfLoaded(id);
1641 }
1642 
1643 
GetLoadedBlob_ids(const CSeq_id_Handle & idh,TLoadedBlob_ids & blob_ids) const1644 void CGBReaderRequestResult::GetLoadedBlob_ids(const CSeq_id_Handle& idh,
1645                                                TLoadedBlob_ids& blob_ids) const
1646 {
1647     CDataSource::TLoadedBlob_ids blob_ids2;
1648     m_Loader->GetDataSource()->GetLoadedBlob_ids(idh,
1649                                                  CDataSource::fLoaded_bioseqs,
1650                                                  blob_ids2);
1651     ITERATE(CDataSource::TLoadedBlob_ids, id, blob_ids2) {
1652         blob_ids.push_back(m_Loader->GetRealBlobId(*id));
1653     }
1654 }
1655 
1656 
1657 CGBDataLoader::TExpirationTimeout
GetIdExpirationTimeout(GBL::EExpirationType type) const1658 CGBReaderRequestResult::GetIdExpirationTimeout(GBL::EExpirationType type) const
1659 {
1660     if ( type == GBL::eExpire_normal ) {
1661         return m_Loader->GetIdExpirationTimeout();
1662     }
1663     else {
1664         return CReaderRequestResult::GetIdExpirationTimeout(type);
1665     }
1666 }
1667 
1668 
1669 bool
GetAddWGSMasterDescr(void) const1670 CGBReaderRequestResult::GetAddWGSMasterDescr(void) const
1671 {
1672     return m_Loader->GetAddWGSMasterDescr();
1673 }
1674 
1675 
GetPTISErrorAction(void) const1676 EGBErrorAction CGBReaderRequestResult::GetPTISErrorAction(void) const
1677 {
1678     return m_Loader->GetPTISErrorAction();
1679 }
1680 
1681 
1682 /*
1683 bool CGBDataLoader::LessBlobId(const TBlobId& id1, const TBlobId& id2) const
1684 {
1685     const CBlob_id& bid1 = dynamic_cast<const CBlob_id&>(*id1);
1686     const CBlob_id& bid2 = dynamic_cast<const CBlob_id&>(*id2);
1687     return bid1 < bid2;
1688 }
1689 
1690 
1691 string CGBDataLoader::BlobIdToString(const TBlobId& id) const
1692 {
1693     const CBlob_id& bid = dynamic_cast<const CBlob_id&>(*id);
1694     return bid.ToString();
1695 }
1696 */
1697 
RegisterCache(ICache & cache,ECacheType cache_type)1698 void CGBReaderCacheManager::RegisterCache(ICache& cache,
1699                                           ECacheType cache_type)
1700 {
1701     SReaderCacheInfo info(cache, cache_type);
1702     //!!! Make sure the cache is not registered yet!
1703     m_Caches.push_back(info);
1704 }
1705 
1706 
FindCache(ECacheType cache_type,const TCacheParams * params)1707 ICache* CGBReaderCacheManager::FindCache(ECacheType cache_type,
1708                                          const TCacheParams* params)
1709 {
1710     NON_CONST_ITERATE(TCaches, it, m_Caches) {
1711         if ((it->m_Type & cache_type) == 0) {
1712             continue;
1713         }
1714         if ( it->m_Cache->SameCacheParams(params) ) {
1715             return it->m_Cache.get();
1716         }
1717     }
1718     return 0;
1719 }
1720 
1721 
1722 END_SCOPE(objects)
1723 
1724 // ===========================================================================
1725 
1726 USING_SCOPE(objects);
1727 
1728 END_NCBI_SCOPE
1729