1 /*  $Id: psg_loader.cpp 624399 2021-01-28 20:09:32Z ivanov $
2  * ===========================================================================
3  *
4  *                            PUBLIC DOMAIN NOTICE
5  *               National Center for Biotechnology Information
6  *
7  *  This software/database is a "United States Government Work" under the
8  *  terms of the United States Copyright Act.  It was written as part of
9  *  the author's official duties as a United States Government employee and
10  *  thus cannot be copyrighted.  This software/database is freely available
11  *  to the public for use. The National Library of Medicine and the U.S.
12  *  Government have not placed any restriction on its use or reproduction.
13  *
14  *  Although all reasonable efforts have been taken to ensure the accuracy
15  *  and reliability of the software and data, the NLM and the U.S.
16  *  Government do not and cannot warrant the performance or results that
17  *  may be obtained by using this software or data. The NLM and the U.S.
18  *  Government disclaim all warranties, express or implied, including
19  *  warranties of performance, merchantability or fitness for any particular
20  *  purpose.
21  *
22  *  Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Eugene Vasilchenko, Aleksey Grichenko
27  *
28  * File Description: PSG data loader
29  *
30  * ===========================================================================
31  */
32 
33 #include <ncbi_pch.hpp>
34 #include <corelib/ncbistd.hpp>
35 
36 #include <objects/general/general__.hpp>
37 #include <objects/seqloc/Seq_id.hpp>
38 #include <objects/seq/seq__.hpp>
39 #include <objects/seqres/seqres__.hpp>
40 
41 #include <objmgr/impl/data_source.hpp>
42 #include <objmgr/impl/tse_loadlock.hpp>
43 #include <objmgr/impl/tse_chunk_info.hpp>
44 #include <objmgr/data_loader_factory.hpp>
45 #include <corelib/plugin_manager_impl.hpp>
46 #include <corelib/plugin_manager_store.hpp>
47 
48 #include <objtools/data_loaders/genbank/psg_loader.hpp>
49 #include <objtools/data_loaders/genbank/impl/psg_loader_impl.hpp>
50 
51 #if defined(HAVE_PSG_LOADER)
52 
53 BEGIN_NCBI_SCOPE
54 BEGIN_SCOPE(objects)
55 
56 class CDataLoader;
57 
58 /////////////////////////////////////////////////////////////////////////////
59 // CPSGDataLoader
60 /////////////////////////////////////////////////////////////////////////////
61 
62 
63 #define PSGLOADER_NAME "GBLOADER"
64 #define PSGLOADER_HUP_NAME "GBLOADER-HUP"
65 
66 const char kDataLoader_PSG_DriverName[] = "psg";
67 
RegisterInObjectManager(CObjectManager & om,const CGBLoaderParams & params,CObjectManager::EIsDefault is_default,CObjectManager::TPriority priority)68 CPSGDataLoader::TRegisterLoaderInfo CPSGDataLoader::RegisterInObjectManager(
69     CObjectManager& om,
70     const CGBLoaderParams& params,
71     CObjectManager::EIsDefault is_default,
72     CObjectManager::TPriority priority)
73 {
74     TMaker maker(params);
75     CDataLoader::RegisterInObjectManager(om, maker, is_default, priority);
76     return ConvertRegInfo(maker.GetRegisterInfo());
77 }
78 
79 
RegisterInObjectManager(CObjectManager & om,CObjectManager::EIsDefault is_default,CObjectManager::TPriority priority)80 CPSGDataLoader::TRegisterLoaderInfo CPSGDataLoader::RegisterInObjectManager(
81     CObjectManager& om,
82     CObjectManager::EIsDefault is_default,
83     CObjectManager::TPriority priority)
84 {
85     CGBLoaderParams params;
86     TMaker maker(params);
87     CDataLoader::RegisterInObjectManager(om, maker, is_default, priority);
88     return ConvertRegInfo(maker.GetRegisterInfo());
89 }
90 
91 
RegisterInObjectManager(CObjectManager & om,const TParamTree & param_tree,CObjectManager::EIsDefault is_default,CObjectManager::TPriority priority)92 CPSGDataLoader::TRegisterLoaderInfo CPSGDataLoader::RegisterInObjectManager(
93     CObjectManager& om,
94     const TParamTree& param_tree,
95     CObjectManager::EIsDefault is_default,
96     CObjectManager::TPriority priority)
97 {
98     CGBLoaderParams params(&param_tree);
99     TMaker maker(params);
100     CDataLoader::RegisterInObjectManager(om, maker, is_default, priority);
101     return ConvertRegInfo(maker.GetRegisterInfo());
102 }
103 
104 
CPSGDataLoader(const string & loader_name,const CGBLoaderParams & params)105 CPSGDataLoader::CPSGDataLoader(const string& loader_name,
106                                const CGBLoaderParams& params)
107     : CGBDataLoader(loader_name, params)
108 {
109     m_Impl.Reset(new CPSGDataLoader_Impl(params));
110 }
111 
112 
~CPSGDataLoader(void)113 CPSGDataLoader::~CPSGDataLoader(void)
114 {
115 }
116 
117 
GetBlobId(const CSeq_id_Handle & idh)118 CDataLoader::TBlobId CPSGDataLoader::GetBlobId(const CSeq_id_Handle& idh)
119 {
120     return TBlobId(m_Impl->GetBlobId(idh).GetPointerOrNull());
121 }
122 
123 
124 CDataLoader::TBlobId
GetBlobIdFromString(const string & str) const125 CPSGDataLoader::GetBlobIdFromString(const string& str) const
126 {
127     return TBlobId(new CPsgBlobId(str));
128 }
129 
130 
CanGetBlobById(void) const131 bool CPSGDataLoader::CanGetBlobById(void) const
132 {
133     return true;
134 }
135 
136 
137 CDataLoader::TTSE_LockSet
GetRecords(const CSeq_id_Handle & idh,EChoice choice)138 CPSGDataLoader::GetRecords(const CSeq_id_Handle& idh,
139                            EChoice choice)
140 {
141     return m_Impl->GetRecords(GetDataSource(), idh, choice);
142 }
143 
144 
GetOrphanAnnotRecordsNA(const CSeq_id_Handle & idh,const SAnnotSelector * sel,TProcessedNAs * processed_nas)145 CPSGDataLoader::TTSE_LockSet CPSGDataLoader::GetOrphanAnnotRecordsNA(const CSeq_id_Handle& idh,
146     const SAnnotSelector* sel,
147     TProcessedNAs* processed_nas)
148 {
149     return CDataLoader::TTSE_LockSet();
150 }
151 
152 
GetExternalAnnotRecordsNA(const CSeq_id_Handle & idh,const SAnnotSelector * sel,TProcessedNAs * processed_nas)153 CPSGDataLoader::TTSE_LockSet CPSGDataLoader::GetExternalAnnotRecordsNA(const CSeq_id_Handle& idh,
154     const SAnnotSelector* sel,
155     TProcessedNAs* processed_nas)
156 {
157     return m_Impl->GetAnnotRecordsNA(GetDataSource(), idh, sel, processed_nas);
158 }
159 
160 
GetExternalAnnotRecordsNA(const CBioseq_Info & bioseq,const SAnnotSelector * sel,TProcessedNAs * processed_nas)161 CPSGDataLoader::TTSE_LockSet CPSGDataLoader::GetExternalAnnotRecordsNA(const CBioseq_Info& bioseq,
162     const SAnnotSelector* sel,
163     TProcessedNAs* processed_nas)
164 {
165     TTSE_LockSet ret;
166     ITERATE(CBioseq_Info::TId, it, bioseq.GetId()) {
167         TTSE_LockSet ret2 = m_Impl->GetAnnotRecordsNA(GetDataSource(), *it, sel, processed_nas);
168         if (!ret2.empty()) {
169             ret.swap(ret2);
170             break;
171         }
172     }
173     return ret;
174 }
175 
176 
GetChunk(TChunk chunk)177 void CPSGDataLoader::GetChunk(TChunk chunk)
178 {
179     m_Impl->LoadChunk(GetDataSource(), *chunk);
180 }
181 
182 
GetChunks(const TChunkSet & chunks)183 void CPSGDataLoader::GetChunks(const TChunkSet& chunks)
184 {
185     m_Impl->LoadChunks(GetDataSource(), chunks);
186 }
187 
188 
GetBlobs(TTSE_LockSets & tse_sets)189 void CPSGDataLoader::GetBlobs(TTSE_LockSets& tse_sets)
190 {
191     m_Impl->GetBlobs(GetDataSource(), tse_sets);
192 }
193 
194 
195 CDataLoader::TTSE_Lock
GetBlobById(const TBlobId & blob_id)196 CPSGDataLoader::GetBlobById(const TBlobId& blob_id)
197 {
198     return m_Impl->GetBlobById(GetDataSource(),
199                                dynamic_cast<const CPsgBlobId&>(*blob_id));
200 }
201 
202 
GetIds(const CSeq_id_Handle & idh,TIds & ids)203 void CPSGDataLoader::GetIds(const CSeq_id_Handle& idh, TIds& ids)
204 {
205     m_Impl->GetIds(idh, ids);
206 }
207 
208 
209 CDataLoader::SGiFound
GetGiFound(const CSeq_id_Handle & idh)210 CPSGDataLoader::GetGiFound(const CSeq_id_Handle& idh)
211 {
212     return m_Impl->GetGi(idh);
213 }
214 
215 
216 CDataLoader::SAccVerFound
GetAccVerFound(const CSeq_id_Handle & idh)217 CPSGDataLoader::GetAccVerFound(const CSeq_id_Handle& idh)
218 {
219     return m_Impl->GetAccVer(idh);
220 }
221 
222 
GetTaxId(const CSeq_id_Handle & idh)223 TTaxId CPSGDataLoader::GetTaxId(const CSeq_id_Handle& idh)
224 {
225     auto taxid = m_Impl->GetTaxId(idh);
226     return taxid != INVALID_TAX_ID ? taxid : CDataLoader::GetTaxId(idh);
227 }
228 
229 
GetSequenceLength(const CSeq_id_Handle & idh)230 TSeqPos CPSGDataLoader::GetSequenceLength(const CSeq_id_Handle& idh)
231 {
232     return m_Impl->GetSequenceLength(idh);
233 }
234 
235 
236 CDataLoader::SHashFound
GetSequenceHashFound(const CSeq_id_Handle & idh)237 CPSGDataLoader::GetSequenceHashFound(const CSeq_id_Handle& idh)
238 {
239     return m_Impl->GetSequenceHash(idh);
240 }
241 
242 
243 CDataLoader::STypeFound
GetSequenceTypeFound(const CSeq_id_Handle & idh)244 CPSGDataLoader::GetSequenceTypeFound(const CSeq_id_Handle& idh)
245 {
246     return m_Impl->GetSequenceType(idh);
247 }
248 
249 
GetSequenceState(const CSeq_id_Handle & idh)250 int CPSGDataLoader::GetSequenceState(const CSeq_id_Handle& idh)
251 {
252     return m_Impl->GetSequenceState(idh);
253 }
254 
255 
DropTSE(CRef<CTSE_Info> tse_info)256 void CPSGDataLoader::DropTSE(CRef<CTSE_Info> tse_info)
257 {
258     m_Impl->DropTSE(dynamic_cast<const CPsgBlobId&>(*tse_info->GetBlobId()));
259 }
260 
261 
GetAccVers(const TIds & ids,TLoaded & loaded,TIds & ret)262 void CPSGDataLoader::GetAccVers(const TIds& ids, TLoaded& loaded, TIds& ret)
263 {
264     m_Impl->GetAccVers(ids, loaded, ret);
265 }
266 
267 
GetGis(const TIds & ids,TLoaded & loaded,TGis & ret)268 void CPSGDataLoader::GetGis(const TIds& ids, TLoaded& loaded, TGis& ret)
269 {
270     m_Impl->GetGis(ids, loaded, ret);
271 }
272 
273 
274 CGBDataLoader::TNamedAnnotNames
GetNamedAnnotAccessions(const CSeq_id_Handle & sih)275 CPSGDataLoader::GetNamedAnnotAccessions(const CSeq_id_Handle& sih)
276 {
277     TNamedAnnotNames names;
278 
279     /*
280     CGBReaderRequestResult result(this, sih);
281     SAnnotSelector sel;
282     sel.IncludeNamedAnnotAccession("NA*");
283     CLoadLockBlobIds blobs(result, sih, &sel);
284     m_Dispatcher->LoadSeq_idBlob_ids(result, sih, &sel);
285     _ASSERT(blobs.IsLoaded());
286 
287     CFixedBlob_ids blob_ids = blobs.GetBlob_ids();
288     if ((blob_ids.GetState() & CBioseq_Handle::fState_no_data) != 0) {
289         if (blob_ids.GetState() == CBioseq_Handle::fState_no_data) {
290             // default state - return empty name set
291             return names;
292         }
293         NCBI_THROW2(CBlobStateException, eBlobStateError,
294             "blob state error for " + sih.AsString(),
295             blob_ids.GetState());
296     }
297 
298     ITERATE(CFixedBlob_ids, it, blob_ids) {
299         const CBlob_Info& info = *it;
300         if (!info.IsSetAnnotInfo()) {
301             continue;
302         }
303         CConstRef<CBlob_Annot_Info> annot_info = info.GetAnnotInfo();
304         ITERATE(CBlob_Annot_Info::TNamedAnnotNames, jt,
305             annot_info->GetNamedAnnotNames()) {
306             names.insert(*jt);
307         }
308     }
309     */
310 
311     return names;
312 }
313 
314 
315 CGBDataLoader::TNamedAnnotNames
GetNamedAnnotAccessions(const CSeq_id_Handle & sih,const string & named_acc)316 CPSGDataLoader::GetNamedAnnotAccessions(const CSeq_id_Handle& sih,
317     const string& named_acc)
318 {
319     TNamedAnnotNames names;
320 
321     /*
322     CGBReaderRequestResult result(this, sih);
323     SAnnotSelector sel;
324     if (!ExtractZoomLevel(named_acc, 0, 0)) {
325         sel.IncludeNamedAnnotAccession(CombineWithZoomLevel(named_acc, -1));
326     }
327     else {
328         sel.IncludeNamedAnnotAccession(named_acc);
329     }
330     CLoadLockBlobIds blobs(result, sih, &sel);
331     m_Dispatcher->LoadSeq_idBlob_ids(result, sih, &sel);
332     _ASSERT(blobs.IsLoaded());
333 
334     CFixedBlob_ids blob_ids = blobs.GetBlob_ids();
335     if ((blob_ids.GetState() & CBioseq_Handle::fState_no_data) != 0) {
336         if (blob_ids.GetState() == CBioseq_Handle::fState_no_data) {
337             // default state - return empty name set
338             return names;
339         }
340         NCBI_THROW2(CBlobStateException, eBlobStateError,
341             "blob state error for " + sih.AsString(),
342             blob_ids.GetState());
343     }
344 
345     ITERATE(CFixedBlob_ids, it, blob_ids) {
346         const CBlob_Info& info = *it;
347         if (!info.IsSetAnnotInfo()) {
348             continue;
349         }
350         CConstRef<CBlob_Annot_Info> annot_info = info.GetAnnotInfo();
351         ITERATE(CBlob_Annot_Info::TNamedAnnotNames, jt,
352             annot_info->GetNamedAnnotNames()) {
353             names.insert(*jt);
354         }
355     }
356     */
357 
358     return names;
359 }
360 
361 
ConvertRegInfo(const TMaker::TRegisterInfo & info)362 CGBDataLoader::TRegisterLoaderInfo CPSGDataLoader::ConvertRegInfo(const TMaker::TRegisterInfo& info)
363 {
364     TRegisterLoaderInfo ret;
365     ret.Set(info.GetLoader(), info.IsCreated());
366     return ret;
367 }
368 
369 
370 END_SCOPE(objects)
371 
372 // ===========================================================================
373 
374 USING_SCOPE(objects);
375 
376 class CPSG_DataLoaderCF : public CDataLoaderFactory
377 {
378 public:
CPSG_DataLoaderCF(void)379     CPSG_DataLoaderCF(void)
380         : CDataLoaderFactory(objects::kDataLoader_PSG_DriverName) {}
~CPSG_DataLoaderCF(void)381     virtual ~CPSG_DataLoaderCF(void) {}
382 
383 protected:
384     virtual CDataLoader* CreateAndRegister(
385         CObjectManager& om,
386         const TPluginManagerParamTree* params) const;
387 };
388 
389 
CreateAndRegister(CObjectManager & om,const TPluginManagerParamTree * params) const390 CDataLoader* CPSG_DataLoaderCF::CreateAndRegister(
391     CObjectManager& om,
392     const TPluginManagerParamTree* params) const
393 {
394     if ( !ValidParams(params) ) {
395         // Use constructor without arguments
396         return CPSGDataLoader::RegisterInObjectManager(om).GetLoader();
397     }
398     // IsDefault and Priority arguments may be specified
399     return CPSGDataLoader::RegisterInObjectManager(
400         om,
401         GetIsDefault(params),
402         GetPriority(params)).GetLoader();
403 }
404 
405 END_NCBI_SCOPE
406 
407 #endif // HAVE_PSG_LOADER
408